1 /*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 */
53 #include <skywalk/os_skywalk_private.h>
54 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
55 #include <skywalk/nexus/flowswitch/fsw_var.h>
56 #include <skywalk/nexus/netif/nx_netif.h>
57 #include <skywalk/nexus/netif/nx_netif_compat.h>
58
59 #include <net/bpf.h>
60 #include <net/if.h>
61 #include <net/pktsched/pktsched_netem.h>
62 #include <sys/eventhandler.h>
63
64 #if (DEVELOPMENT || DEBUG)
65 SYSCTL_UINT(_kern_skywalk_flowswitch, OID_AUTO, chain_enqueue,
66 CTLFLAG_RW | CTLFLAG_LOCKED, &fsw_chain_enqueue, 0, "");
67 #endif /* !DEVELOPMENT && !DEBUG */
68
69 uint32_t fsw_chain_enqueue = 0;
70 static int __nx_fsw_inited = 0;
71 static eventhandler_tag __nx_fsw_ifnet_eventhandler_tag = NULL;
72 static eventhandler_tag __nx_fsw_protoctl_eventhandler_tag = NULL;
73
74 static ZONE_DEFINE(nx_fsw_zone, SKMEM_ZONE_PREFIX ".nx.fsw",
75 sizeof(struct nx_flowswitch), ZC_ZFREE_CLEARMEM);
76
77 static ZONE_DEFINE(nx_fsw_stats_zone, SKMEM_ZONE_PREFIX ".nx.fsw.stats",
78 sizeof(struct __nx_stats_fsw), ZC_ZFREE_CLEARMEM);
79
80 #define SKMEM_TAG_FSW_PORTS "com.apple.skywalk.fsw.ports"
81 SKMEM_TAG_DEFINE(skmem_tag_fsw_ports, SKMEM_TAG_FSW_PORTS);
82
83 #define SKMEM_TAG_FSW_FOB_HASH "com.apple.skywalk.fsw.fsw.fob.hash"
84 SKMEM_TAG_DEFINE(skmem_tag_fsw_fob_hash, SKMEM_TAG_FSW_FOB_HASH);
85
86 #define SKMEM_TAG_FSW_FRB_HASH "com.apple.skywalk.fsw.fsw.frb.hash"
87 SKMEM_TAG_DEFINE(skmem_tag_fsw_frb_hash, SKMEM_TAG_FSW_FRB_HASH);
88
89 #define SKMEM_TAG_FSW_FRIB_HASH "com.apple.skywalk.fsw.fsw.frib.hash"
90 SKMEM_TAG_DEFINE(skmem_tag_fsw_frib_hash, SKMEM_TAG_FSW_FRIB_HASH);
91
92 #define SKMEM_TAG_FSW_FRAG_MGR "com.apple.skywalk.fsw.fsw.frag.mgr"
93 SKMEM_TAG_DEFINE(skmem_tag_fsw_frag_mgr, SKMEM_TAG_FSW_FRAG_MGR);
94
95 /* 64-bit mask with range */
96 #define BMASK64(_beg, _end) \
97 ((NX_FSW_CHUNK_FREE >> (63 - (_end))) & ~((1ULL << (_beg)) - 1))
98
99 static int fsw_detach(struct nx_flowswitch *fsw, struct nexus_adapter *hwna,
100 boolean_t purge);
101
102 int
fsw_attach_vp(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct nxbind * nxb,struct proc * p,struct nexus_vp_adapter ** vpna)103 fsw_attach_vp(struct kern_nexus *nx, struct kern_channel *ch,
104 struct chreq *chr, struct nxbind *nxb, struct proc *p,
105 struct nexus_vp_adapter **vpna)
106 {
107 #pragma unused(ch)
108 struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx);
109 char *cr_name = chr->cr_name;
110 int err = 0;
111
112 SK_LOCK_ASSERT_HELD();
113 ASSERT(!(chr->cr_mode & CHMODE_CONFIG));
114 *vpna = NULL;
115
116 /* if there's an existing adapter on the nexus port then use it */
117 FSW_WLOCK(fsw);
118 err = fsw_port_alloc(fsw, nxb, vpna, chr->cr_port, p, FALSE, FALSE);
119 FSW_WUNLOCK(fsw);
120
121 if (err != 0) {
122 ASSERT(*vpna == NULL);
123 goto out;
124 } else if (*vpna != NULL) {
125 /*
126 * Use the existing adapter on that port; fsw_port_alloc()
127 * callback has retained a reference count on the adapter.
128 */
129 goto out;
130 }
131 ASSERT(*vpna == NULL);
132
133 /* create a virtual port; callee holds vpna ref */
134 err = fsw_vp_na_create(nx, chr, vpna);
135 if (err != 0) {
136 SK_ERR("vpna create failed (err %d)", err);
137 goto out;
138 }
139
140 /* attach vp to fsw */
141 err = fsw_vp_na_attach(nx, cr_name, &(*vpna)->vpna_up);
142 if (err != 0) {
143 SK_ERR("vpna \"%s\" fsw attach failed (err %d)",
144 (*vpna)->vpna_up.na_name, err);
145 goto out;
146 }
147
148 FSW_WLOCK(fsw);
149 err = fsw_port_alloc(fsw, nxb, vpna, (*vpna)->vpna_nx_port, p, FALSE, FALSE);
150 FSW_WUNLOCK(fsw);
151
152 out:
153 if ((*vpna) != NULL) {
154 SK_DF(err ? SK_VERB_ERROR : SK_VERB_FSW,
155 "vpna \"%s\" (0x%llx) refs %u to fsw \"%s\" "
156 "nx_port %d (err %d)", (*vpna)->vpna_up.na_name,
157 SK_KVA(&(*vpna)->vpna_up), (*vpna)->vpna_up.na_refcount,
158 cr_name, (int)(*vpna)->vpna_nx_port, err);
159
160 if (err != 0) {
161 na_release_locked(&(*vpna)->vpna_up);
162 *vpna = NULL;
163 }
164 }
165
166 return err;
167 }
168
169 static int
fsw_nx_check(struct nx_flowswitch * fsw,struct kern_nexus * hw_nx)170 fsw_nx_check(struct nx_flowswitch *fsw, struct kern_nexus *hw_nx)
171 {
172 #pragma unused(fsw)
173 nexus_type_t hw_nxdom_type = NX_DOM(hw_nx)->nxdom_type;
174
175 if (hw_nxdom_type != NEXUS_TYPE_NET_IF) {
176 return EINVAL;
177 }
178
179 /* it's a netif below */
180 return 0;
181 }
182
183 static int
fsw_ctl_flow_add(struct nx_flowswitch * fsw,struct proc * p,struct nx_flow_req * req)184 fsw_ctl_flow_add(struct nx_flowswitch *fsw, struct proc *p,
185 struct nx_flow_req *req)
186 {
187 struct flow_owner *fo;
188 int error = 0;
189
190 ASSERT(p != PROC_NULL);
191
192 if (p != kernproc) {
193 /* special port shouldn't be bound via this method */
194 if (req->nfr_nx_port < FSW_VP_USER_MIN) {
195 return EINVAL;
196 }
197 req->nfr_flags |= (NXFLOWREQF_TRACK | NXFLOWREQF_FLOWADV);
198 } else {
199 /* no flow track or advisory support for bsd flow */
200 ASSERT((req->nfr_flags & NXFLOWREQF_TRACK) == 0);
201 ASSERT((req->nfr_flags & NXFLOWREQF_FLOWADV) == 0);
202 ASSERT((req->nfr_flags & NXFLOWREQF_LOW_LATENCY) == 0);
203 }
204
205 /* init kernel only fields */
206 if (p != kernproc) {
207 nx_flow_req_internalize(req);
208 }
209 req->nfr_pid = proc_pid(p);
210 if (req->nfr_epid == -1) {
211 req->nfr_epid = proc_pid(p);
212 }
213
214 fo = fsw_flow_add(fsw, req, &error);
215 ASSERT(fo != NULL || error != 0);
216
217 if (error == 0) {
218 // user space don't need this flow stats
219 flow_stats_release(req->nfr_flow_stats);
220 }
221 if (p != kernproc) {
222 nx_flow_req_externalize(req);
223 }
224
225 return error;
226 }
227
228 static int
fsw_ctl_flow_del(struct nx_flowswitch * fsw,struct proc * p,struct nx_flow_req * req)229 fsw_ctl_flow_del(struct nx_flowswitch *fsw, struct proc *p,
230 struct nx_flow_req *req)
231 {
232 int err;
233
234 nx_flow_req_internalize(req);
235 req->nfr_pid = proc_pid(p);
236 err = fsw_flow_del(fsw, req, TRUE, NULL);
237
238 nx_flow_req_externalize(req);
239 return err;
240 }
241
242 static int
fsw_setup_ifp(struct nx_flowswitch * fsw,struct nexus_adapter * hwna)243 fsw_setup_ifp(struct nx_flowswitch *fsw, struct nexus_adapter *hwna)
244 {
245 int error = 0;
246 struct ifnet *ifp = hwna->na_ifp;
247 struct kern_pbufpool *pp = skmem_arena_nexus(hwna->na_arena)->arn_rx_pp;
248 size_t f_limit = pp->pp_kmd_region->skr_c_obj_cnt / 2;
249
250 ASSERT((hwna->na_type == NA_NETIF_HOST) ||
251 (hwna->na_type == NA_NETIF_COMPAT_HOST));
252
253 SK_LOCK_ASSERT_HELD();
254
255 /*
256 * XXX: we don't support non TXSTART interface.
257 * There are assumptions in fsw_port_flush_enqueue_dst() about
258 * single threaded write to destination rings.
259 */
260 if ((ifp->if_eflags & IFEF_TXSTART) == 0) {
261 SK_ERR("non TXSTART interface not supported ifp(0x%llx)",
262 SK_KVA(ifp));
263 return ENOTSUP;
264 }
265
266 FSW_WLOCK(fsw);
267
268 ASSERT(fsw->fsw_ifp == NULL);
269 ASSERT(fsw->fsw_nifna == NULL);
270 ASSERT(fsw->fsw_resolve == NULL);
271 ASSERT(fsw->fsw_frame == NULL);
272 ASSERT(fsw->fsw_demux == NULL);
273 ASSERT(fsw->fsw_pkt_copy_from_pkt == NULL);
274 ASSERT(fsw->fsw_pkt_copy_from_mbuf == NULL);
275 ASSERT(fsw->fsw_pkt_copy_to_mbuf == NULL);
276
277 fsw->fsw_ipfm = fsw_ip_frag_mgr_create(fsw, ifp, f_limit);
278 if (fsw->fsw_ipfm == NULL) {
279 FSW_WUNLOCK(fsw);
280 return ENOMEM;
281 }
282
283 switch (ifp->if_family) {
284 case IFNET_FAMILY_ETHERNET:
285 error = fsw_ethernet_setup(fsw, ifp);
286 fsw->fsw_ifp_dlt = DLT_EN10MB;
287 break;
288
289 case IFNET_FAMILY_CELLULAR:
290 error = fsw_cellular_setup(fsw, ifp);
291 fsw->fsw_ifp_dlt = DLT_RAW;
292 break;
293
294 default:
295 if (ifp->if_family == IFNET_FAMILY_IPSEC ||
296 ifp->if_family == IFNET_FAMILY_UTUN) {
297 error = fsw_ip_setup(fsw, ifp);
298 fsw->fsw_ifp_dlt = DLT_RAW;
299 break;
300 }
301 error = ENOTSUP;
302 break;
303 }
304
305 if (error != 0) {
306 FSW_WUNLOCK(fsw);
307 return error;
308 }
309
310 ASSERT(fsw->fsw_resolve != NULL);
311
312 if (NX_PROV(fsw->fsw_nx)->nxprov_region_params[SKMEM_REGION_KMD].
313 srp_max_frags > 1 || pp->pp_max_frags > 1) {
314 fsw->fsw_pkt_copy_from_pkt = pkt_copy_multi_buflet_from_pkt;
315 fsw->fsw_pkt_copy_from_mbuf = pkt_copy_multi_buflet_from_mbuf;
316 fsw->fsw_pkt_copy_to_mbuf = pkt_copy_multi_buflet_to_mbuf;
317 } else {
318 fsw->fsw_pkt_copy_from_pkt = pkt_copy_from_pkt;
319 fsw->fsw_pkt_copy_from_mbuf = pkt_copy_from_mbuf;
320 fsw->fsw_pkt_copy_to_mbuf = pkt_copy_to_mbuf;
321 }
322
323 /*
324 * Since it is possible for fsw to refer to the ifp after all
325 * underlying hwnas are freed (see fsw_teardown_ifp()), we need
326 * an extra reference to the ifp here.
327 *
328 * We also cache the netif adapter of the interface, as it's
329 * needed for each packet enqueued to the classq. There is no
330 * need to retain a refcnt for the same reason as above.
331 *
332 * We hold the busy lock across these, just in case an interface
333 * detach and reattach happens, as fsw_flow_bind() relies on the
334 * same lock as well before making its checks.
335 */
336 lck_mtx_lock(&fsw->fsw_detach_barrier_lock);
337
338 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
339 fsw->fsw_ifp = ifp;
340 fsw->fsw_nifna = &ifp->if_na->nifna_up;
341 ifp->if_na->nifna_netif->nif_fsw = fsw;
342 ifp->if_na->nifna_netif->nif_fsw_nxadv =
343 fsw->fsw_nx->nx_adv.flowswitch_nxv_adv;
344 (void) strlcpy(fsw->fsw_flow_mgr->fm_name,
345 if_name(ifp), IFNAMSIZ);
346
347 fsw_classq_setup(fsw, hwna);
348 fsw->fsw_classq_enabled = TRUE;
349 fsw->fsw_src_lla_gencnt = 0;
350
351 ASSERT(fsw->fsw_reap_thread != THREAD_NULL);
352 (void) snprintf(fsw->fsw_reap_name, sizeof(fsw->fsw_reap_name),
353 FSW_REAP_THREADNAME, ifp->if_xname, "");
354 thread_set_thread_name(fsw->fsw_reap_thread, fsw->fsw_reap_name);
355
356 error = fsw_netagent_register(fsw, ifp);
357 SK_DF(error ? SK_VERB_ERROR : SK_VERB_FSW,
358 "fsw_netagent_register %s (family %u) (err %d)",
359 if_name(ifp), ifp->if_family, error);
360
361 /*
362 * Clear NXF_REJECT to allow new channels to be opened
363 * to this nexus, in case this is an interface reattach.
364 * Otherwise this flag should already be cleared.
365 */
366 if (error == 0) {
367 atomic_bitclear_32(&fsw->fsw_nx->nx_flags, NXF_REJECT);
368 }
369
370 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
371
372 /*
373 * Wake up the reaper thread.
374 */
375 if (error == 0) {
376 fsw_reap_sched(fsw);
377 }
378
379 /* init skoid */
380 skoid_create(&fsw->fsw_skoid,
381 SKOID_SNODE(_kern_skywalk_flowswitch), if_name(ifp),
382 CTLFLAG_RW);
383
384 FSW_WUNLOCK(fsw);
385
386 return error;
387 }
388
389 static void
fsw_teardown_ifp(struct nx_flowswitch * fsw,struct nexus_adapter * hwna)390 fsw_teardown_ifp(struct nx_flowswitch *fsw, struct nexus_adapter *hwna)
391 {
392 struct ifnet *ifp;
393
394 SK_LOCK_ASSERT_HELD();
395
396 FSW_WLOCK_ASSERT_HELD(fsw);
397 ifp = fsw->fsw_ifp;
398 ASSERT(ifp != NULL);
399 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
400
401 fsw_netagent_unregister(fsw, ifp);
402
403 if (fsw->fsw_ipfm != NULL) {
404 fsw_ip_frag_mgr_destroy(fsw->fsw_ipfm);
405 }
406
407 skoid_destroy(&fsw->fsw_skoid);
408
409 SK_DF(SK_VERB_FSW, "%sdetached from %s (family %u)",
410 ((fsw->fsw_agent_session != NULL) ? "netagent" : ""),
411 if_name(ifp), ifp->if_family);
412
413 if (hwna != NULL) {
414 fsw_classq_teardown(fsw, hwna);
415 }
416
417 /*
418 * Set NXF_REJECT on the nexus, which would cause existing adapters
419 * to be marked similarly; channels associated with them would then
420 * cease to function.
421 */
422 atomic_bitset_32(&fsw->fsw_nx->nx_flags, NXF_REJECT);
423
424 /* see notes on fsw_na_attach() about I/O refcnt */
425 if (ifp->if_na != NULL) {
426 ifp->if_na->nifna_netif->nif_fsw = NULL;
427 ifp->if_na->nifna_netif->nif_fsw_nxadv = NULL;
428 membar_sync();
429 }
430
431 fsw->fsw_ifp = NULL;
432 fsw->fsw_nifna = NULL;
433 fsw->fsw_resolve = NULL;
434 fsw->fsw_frame = NULL;
435 fsw->fsw_frame_headroom = 0;
436 fsw->fsw_demux = NULL;
437 fsw->fsw_classq_enabled = FALSE;
438 fsw->fsw_pkt_copy_from_pkt = NULL;
439 fsw->fsw_pkt_copy_from_mbuf = NULL;
440 fsw->fsw_pkt_copy_to_mbuf = NULL;
441
442 if (ifp->if_input_netem != NULL) {
443 netem_destroy(ifp->if_input_netem);
444 ifp->if_input_netem = NULL;
445 }
446
447 ASSERT(fsw->fsw_reap_thread != THREAD_NULL);
448 (void) snprintf(fsw->fsw_reap_name, sizeof(fsw->fsw_reap_name),
449 FSW_REAP_THREADNAME, if_name(ifp), "_detached");
450 thread_set_thread_name(fsw->fsw_reap_thread, fsw->fsw_reap_name);
451 }
452
453 static int
fsw_host_setup(struct nx_flowswitch * fsw)454 fsw_host_setup(struct nx_flowswitch *fsw)
455 {
456 struct nexus_adapter *hwna;
457 struct ifnet *ifp;
458
459 SK_LOCK_ASSERT_HELD();
460
461 hwna = fsw->fsw_host_ch->ch_na;
462 ASSERT(hwna != NULL);
463
464
465 /* the netif below must have an ifnet attached (dev/host port) */
466 if ((ifp = hwna->na_ifp) == NULL) {
467 return ENXIO;
468 }
469
470 /*
471 * XXX: we don't support multiple rx rings yet.
472 * There are assumptions in fsw_port_flush_enqueue_dst() about
473 * single threaded write to destination rings.
474 */
475 if (SKYWALK_NATIVE(ifp) && (hwna->na_num_rx_rings > 1)) {
476 SK_ERR("ifp(0x%llx): multiple rx rings(%d) not supported",
477 SK_KVA(ifp), hwna->na_num_rx_rings);
478 return ENOTSUP;
479 }
480
481 lck_mtx_lock(&fsw->fsw_detach_barrier_lock);
482 if ((fsw->fsw_detach_flags & FSW_DETACHF_DETACHING) != 0) {
483 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
484 return EBUSY;
485 }
486 fsw->fsw_detach_flags = 0;
487 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
488
489 int error = fsw_setup_ifp(fsw, hwna);
490 ASSERT(error != 0 || fsw->fsw_ifp != NULL);
491 if (error != 0) {
492 return error;
493 }
494
495 /* update the interface index */
496 ASSERT(NX_PROV(fsw->fsw_nx)->nxprov_params->nxp_ifindex == 0);
497 NX_PROV(fsw->fsw_nx)->nxprov_params->nxp_ifindex = ifp->if_index;
498 return 0;
499 }
500
501 static int
fsw_host_teardown(struct nx_flowswitch * fsw)502 fsw_host_teardown(struct nx_flowswitch *fsw)
503 {
504 struct nexus_adapter *hwna = fsw->fsw_host_ch->ch_na;
505
506 SK_LOCK_ASSERT_HELD();
507 return fsw_detach(fsw, hwna, FALSE);
508 }
509
510 #if SK_LOG
511 /* Hoisted out of line to reduce kernel stack footprint */
512 SK_LOG_ATTRIBUTE
513 static void
fsw_ctl_attach_log(const struct nx_spec_req * nsr,const struct kern_nexus * nx,int err)514 fsw_ctl_attach_log(const struct nx_spec_req *nsr,
515 const struct kern_nexus *nx, int err)
516 {
517 uuid_string_t uuidstr, ifuuidstr;
518 const char *nustr;
519
520 if (nsr->nsr_flags & NXSPECREQ_UUID) {
521 nustr = sk_uuid_unparse(nsr->nsr_uuid, uuidstr);
522 } else if (nsr->nsr_flags & NXSPECREQ_IFP) {
523 (void) snprintf((char *)uuidstr, sizeof(uuidstr), "0x%llx",
524 SK_KVA(nsr->nsr_ifp));
525 nustr = uuidstr;
526 } else {
527 nustr = nsr->nsr_name;
528 }
529
530 SK_DF(err ? SK_VERB_ERROR : SK_VERB_FSW,
531 "nexus 0x%llx (%s) name/uuid \"%s\" if_uuid %s flags 0x%x err %d",
532 SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name, nustr,
533 sk_uuid_unparse(nsr->nsr_if_uuid, ifuuidstr), nsr->nsr_flags, err);
534 }
535 #endif /* SK_LOG */
536
537 SK_NO_INLINE_ATTRIBUTE
538 static void
fsw_netif_set_callbacks_common(struct nx_flowswitch * fsw,boolean_t set)539 fsw_netif_set_callbacks_common(struct nx_flowswitch *fsw, boolean_t set)
540 {
541 struct nexus_adapter *hwna = fsw->fsw_dev_ch->ch_na;
542
543 ASSERT(hwna->na_type == NA_NETIF_DEV ||
544 hwna->na_type == NA_NETIF_COMPAT_DEV);
545
546 if (set) {
547 netif_hwna_set_mode(hwna, NETIF_MODE_FSW, fsw_devna_rx);
548 } else {
549 netif_hwna_clear_mode(hwna);
550 }
551 }
552
553 SK_NO_INLINE_ATTRIBUTE
554 static void
fsw_netif_set_callbacks(struct nx_flowswitch * fsw)555 fsw_netif_set_callbacks(struct nx_flowswitch *fsw)
556 {
557 fsw_netif_set_callbacks_common(fsw, TRUE);
558 }
559
560 SK_NO_INLINE_ATTRIBUTE
561 static void
fsw_netif_clear_callbacks(struct nx_flowswitch * fsw)562 fsw_netif_clear_callbacks(struct nx_flowswitch *fsw)
563 {
564 fsw_netif_set_callbacks_common(fsw, FALSE);
565 }
566
567 SK_NO_INLINE_ATTRIBUTE
568 static void
fsw_dp_start(struct nx_flowswitch * fsw)569 fsw_dp_start(struct nx_flowswitch *fsw)
570 {
571 ASSERT(fsw->fsw_dev_ch != NULL);
572 ASSERT(fsw->fsw_host_ch != NULL);
573
574 fsw_netif_set_callbacks(fsw);
575 na_start_spec(fsw->fsw_dev_ch->ch_nexus, fsw->fsw_dev_ch);
576 na_start_spec(fsw->fsw_host_ch->ch_nexus, fsw->fsw_host_ch);
577 }
578
579 SK_NO_INLINE_ATTRIBUTE
580 static int
fsw_dp_stop(struct nx_flowswitch * fsw,struct ifnet ** ifpp)581 fsw_dp_stop(struct nx_flowswitch *fsw, struct ifnet **ifpp)
582 {
583 struct ifnet *ifp;
584
585 FSW_WLOCK(fsw);
586 if ((fsw->fsw_state_flags & FSW_STATEF_QUIESCED) != 0) {
587 FSW_WUNLOCK(fsw);
588 return EALREADY;
589 }
590 fsw->fsw_state_flags |= FSW_STATEF_QUIESCED;
591 FSW_WUNLOCK(fsw);
592
593 /*
594 * For regular kernel-attached interfaces, quiescing is handled by
595 * the ifnet detach thread, which calls dlil_quiesce_and_detach_nexuses().
596 * For interfaces created by skywalk test cases, flowswitch/netif nexuses
597 * are constructed on the fly and can also be torn down on the fly.
598 * dlil_quiesce_and_detach_nexuses() won't help here because any nexus
599 * can be detached while the interface is still attached.
600 */
601 if ((ifp = fsw->fsw_ifp) != NULL &&
602 ifnet_datamov_suspend_if_needed(ifp)) {
603 SK_UNLOCK();
604 ifnet_datamov_drain(ifp);
605 /* Reference will be released by caller */
606 *ifpp = ifp;
607 SK_LOCK();
608 }
609 ASSERT(fsw->fsw_dev_ch != NULL);
610 ASSERT(fsw->fsw_host_ch != NULL);
611 na_stop_spec(fsw->fsw_host_ch->ch_nexus, fsw->fsw_host_ch);
612 na_stop_spec(fsw->fsw_dev_ch->ch_nexus, fsw->fsw_dev_ch);
613 fsw_netif_clear_callbacks(fsw);
614 return 0;
615 }
616
617 SK_NO_INLINE_ATTRIBUTE
618 static int
fsw_netif_port_setup(struct nx_flowswitch * fsw,struct kern_nexus * hw_nx,boolean_t host)619 fsw_netif_port_setup(struct nx_flowswitch *fsw, struct kern_nexus *hw_nx,
620 boolean_t host)
621 {
622 struct chreq chr;
623 struct kern_channel *ch;
624 int err;
625
626 bzero(&chr, sizeof(chr));
627 uuid_copy(chr.cr_spec_uuid, hw_nx->nx_uuid);
628 chr.cr_ring_id = CHANNEL_RING_ID_ANY;
629 chr.cr_port = host ? NEXUS_PORT_NET_IF_HOST : NEXUS_PORT_NET_IF_DEV;
630 chr.cr_mode |= CHMODE_CONFIG | (host ? CHMODE_HOST : 0);
631
632 err = 0;
633 ch = ch_open_special(hw_nx, &chr, FALSE, &err);
634 if (ch == NULL) {
635 SK_ERR("ch_open_special(%s) failed: %d",
636 host ? "host" : "dev", err);
637 return err;
638 }
639 if (host) {
640 fsw->fsw_host_ch = ch;
641 } else {
642 fsw->fsw_dev_ch = ch;
643 }
644 return 0;
645 }
646
647 SK_NO_INLINE_ATTRIBUTE
648 static int
fsw_netif_port_teardown(struct nx_flowswitch * fsw,boolean_t host)649 fsw_netif_port_teardown(struct nx_flowswitch *fsw, boolean_t host)
650 {
651 struct kern_channel *ch;
652
653 ch = host ? fsw->fsw_host_ch : fsw->fsw_dev_ch;
654 if (ch == NULL) {
655 return EINVAL;
656 }
657 if (host) {
658 fsw->fsw_host_ch = NULL;
659 } else {
660 fsw->fsw_dev_ch = NULL;
661 }
662 ch_close_special(ch);
663 (void) ch_release_locked(ch);
664 return 0;
665 }
666
667 SK_NO_INLINE_ATTRIBUTE
668 static int
fsw_devna_setup(struct nx_flowswitch * fsw,struct kern_nexus * hw_nx)669 fsw_devna_setup(struct nx_flowswitch *fsw, struct kern_nexus *hw_nx)
670 {
671 return fsw_netif_port_setup(fsw, hw_nx, FALSE);
672 }
673
674 SK_NO_INLINE_ATTRIBUTE
675 static int
fsw_hostna_setup(struct nx_flowswitch * fsw,struct kern_nexus * hw_nx)676 fsw_hostna_setup(struct nx_flowswitch *fsw, struct kern_nexus *hw_nx)
677 {
678 return fsw_netif_port_setup(fsw, hw_nx, TRUE);
679 }
680
681 SK_NO_INLINE_ATTRIBUTE
682 static int
fsw_devna_teardown(struct nx_flowswitch * fsw)683 fsw_devna_teardown(struct nx_flowswitch *fsw)
684 {
685 return fsw_netif_port_teardown(fsw, FALSE);
686 }
687
688 SK_NO_INLINE_ATTRIBUTE
689 static int
fsw_hostna_teardown(struct nx_flowswitch * fsw)690 fsw_hostna_teardown(struct nx_flowswitch *fsw)
691 {
692 return fsw_netif_port_teardown(fsw, TRUE);
693 }
694
695 /* Process NXCFG_CMD_ATTACH */
696 SK_NO_INLINE_ATTRIBUTE
697 static int
fsw_ctl_attach(struct kern_nexus * nx,struct proc * p,struct nx_spec_req * nsr)698 fsw_ctl_attach(struct kern_nexus *nx, struct proc *p, struct nx_spec_req *nsr)
699 {
700 #pragma unused(p)
701 struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx);
702 struct kern_nexus *hw_nx = NULL;
703 int err = 0;
704
705 SK_LOCK_ASSERT_HELD();
706
707 /*
708 * The flowswitch only accepts UUID as an identifier, since it
709 * represents the UUID of the kernel object we are trying to
710 * attach to this flowswitch.
711 */
712 if ((nsr->nsr_flags & (NXSPECREQ_UUID | NXSPECREQ_IFP)) !=
713 NXSPECREQ_UUID || uuid_is_null(nsr->nsr_uuid)) {
714 err = EINVAL;
715 goto done;
716 }
717
718 if (fsw->fsw_dev_ch != NULL) {
719 ASSERT(fsw->fsw_host_ch != NULL);
720 err = EEXIST;
721 goto done;
722 }
723
724 hw_nx = nx_find(nsr->nsr_uuid, TRUE);
725 if (hw_nx == NULL) {
726 err = ENOENT;
727 goto done;
728 } else if (hw_nx == nx) {
729 err = EINVAL;
730 goto done;
731 }
732
733 /* preflight check to see if the nexus is attachable to us */
734 err = fsw_nx_check(fsw, hw_nx);
735 if (err != 0) {
736 goto done;
737 }
738
739 err = fsw_devna_setup(fsw, hw_nx);
740 if (err != 0) {
741 goto done;
742 }
743
744 err = fsw_hostna_setup(fsw, hw_nx);
745 if (err != 0) {
746 (void) fsw_devna_teardown(fsw);
747 goto done;
748 }
749
750 err = fsw_host_setup(fsw);
751 if (err != 0) {
752 (void) fsw_hostna_teardown(fsw);
753 (void) fsw_devna_teardown(fsw);
754 goto done;
755 }
756
757 fsw_dp_start(fsw);
758
759 /* return the devna UUID */
760 uuid_copy(nsr->nsr_if_uuid, fsw->fsw_dev_ch->ch_na->na_uuid);
761 ASSERT(!uuid_is_null(nsr->nsr_if_uuid));
762 done:
763 #if SK_LOG
764 if (__improbable(sk_verbose != 0)) {
765 fsw_ctl_attach_log(nsr, nx, err);
766 }
767 #endif /* SK_LOG */
768
769 if (hw_nx != NULL) {
770 nx_release_locked(hw_nx);
771 }
772
773 return err;
774 }
775
776 SK_NO_INLINE_ATTRIBUTE
777 static void
fsw_cleanup(struct nx_flowswitch * fsw)778 fsw_cleanup(struct nx_flowswitch *fsw)
779 {
780 int err;
781 struct ifnet *ifp = NULL;
782
783 if (fsw->fsw_dev_ch == NULL) {
784 ASSERT(fsw->fsw_host_ch == NULL);
785 return;
786 }
787 err = fsw_dp_stop(fsw, &ifp);
788 if (err != 0) {
789 return;
790 }
791 err = fsw_host_teardown(fsw);
792 VERIFY(err == 0);
793
794 err = fsw_hostna_teardown(fsw);
795 VERIFY(err == 0);
796
797 err = fsw_devna_teardown(fsw);
798 VERIFY(err == 0);
799
800 if (ifp != NULL) {
801 ifnet_datamov_resume(ifp);
802 }
803 }
804
805 int
fsw_ctl_detach(struct kern_nexus * nx,struct proc * p,struct nx_spec_req * nsr)806 fsw_ctl_detach(struct kern_nexus *nx, struct proc *p,
807 struct nx_spec_req *nsr)
808 {
809 #pragma unused(p)
810 struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx);
811 int err = 0;
812
813 SK_LOCK_ASSERT_HELD();
814
815 /*
816 * nsr is NULL when we're called from the destructor, and it
817 * implies that we'll detach everything that is attached.
818 */
819 if (nsr == NULL) {
820 fsw_cleanup(fsw);
821 ASSERT(fsw->fsw_dev_ch == NULL);
822 ASSERT(fsw->fsw_host_ch == NULL);
823 goto done;
824 }
825
826 if (uuid_is_null(nsr->nsr_if_uuid)) {
827 err = EINVAL;
828 goto done;
829 } else if (fsw->fsw_dev_ch == NULL || fsw->fsw_host_ch == NULL) {
830 err = ENXIO;
831 goto done;
832 }
833
834 /* check if the devna uuid is correct */
835 if (uuid_compare(nsr->nsr_if_uuid,
836 fsw->fsw_dev_ch->ch_na->na_uuid) != 0) {
837 err = ESRCH;
838 goto done;
839 }
840 fsw_cleanup(fsw);
841
842 done:
843 #if SK_LOG
844 if (nsr != NULL) {
845 uuid_string_t ifuuidstr;
846 SK_DF(err ? SK_VERB_ERROR : SK_VERB_FSW,
847 "nexus 0x%llx (%s) if_uuid %s flags 0x%x err %d",
848 SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
849 sk_uuid_unparse(nsr->nsr_if_uuid, ifuuidstr),
850 nsr->nsr_flags, err);
851 } else {
852 SK_DF(err ? SK_VERB_ERROR : SK_VERB_FSW,
853 "nexus 0x%llx (%s) ANY err %d", SK_KVA(nx),
854 NX_DOM_PROV(nx)->nxdom_prov_name, err);
855 }
856 #endif /* SK_LOG */
857
858 return err;
859 }
860
861 static int
fsw_netem_config(struct nx_flowswitch * fsw,void * data)862 fsw_netem_config(struct nx_flowswitch *fsw, void *data)
863 {
864 struct ifnet *ifp = fsw->fsw_ifp;
865 struct if_netem_params *params = data;
866 int ret;
867
868 if (ifp == NULL) {
869 return ENODEV;
870 }
871
872 SK_LOCK_ASSERT_HELD();
873 #define fsw_INPUT_NETEM_THREADNAME "if_input_netem_%s@fsw"
874 #define fsw_INPUT_NETEM_THREADNAME_LEN 32
875 char netem_name[fsw_INPUT_NETEM_THREADNAME_LEN];
876 (void) snprintf(netem_name, sizeof(netem_name),
877 fsw_INPUT_NETEM_THREADNAME, if_name(ifp));
878 ret = netem_config(&ifp->if_input_netem, netem_name, params, fsw,
879 fsw_dev_input_netem_dequeue, FSW_VP_DEV_BATCH_MAX);
880
881 return ret;
882 }
883
884 int
fsw_ctl(struct kern_nexus * nx,nxcfg_cmd_t nc_cmd,struct proc * p,void * data)885 fsw_ctl(struct kern_nexus *nx, nxcfg_cmd_t nc_cmd, struct proc *p,
886 void *data)
887 {
888 struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx);
889 struct nx_spec_req *nsr = data;
890 struct nx_flow_req *req = data;
891 boolean_t need_check;
892 int error = 0;
893
894 switch (nc_cmd) {
895 case NXCFG_CMD_FLOW_ADD:
896 case NXCFG_CMD_FLOW_DEL:
897 if (uuid_is_null(req->nfr_flow_uuid)) {
898 error = EINVAL;
899 goto done;
900 }
901 if (p != kernproc) {
902 req->nfr_flags &= NXFLOWREQF_MASK;
903 }
904 req->nfr_flowadv_idx = FLOWADV_IDX_NONE;
905
906 if (nc_cmd == NXCFG_CMD_FLOW_DEL) {
907 break;
908 }
909
910 need_check = FALSE;
911 if (req->nfr_epid != -1 && proc_pid(p) != req->nfr_epid) {
912 need_check = TRUE;
913 } else if (!uuid_is_null(req->nfr_euuid)) {
914 uuid_t uuid;
915
916 /* get the UUID of the issuing process */
917 proc_getexecutableuuid(p, uuid, sizeof(uuid));
918
919 /*
920 * If this is not issued by a process for its own
921 * executable UUID and if the process does not have
922 * the necessary privilege, reject the request.
923 * The logic is similar to so_set_effective_uuid().
924 */
925 if (uuid_compare(req->nfr_euuid, uuid) != 0) {
926 need_check = TRUE;
927 }
928 }
929 if (need_check) {
930 kauth_cred_t cred = kauth_cred_proc_ref(p);
931 error = priv_check_cred(cred,
932 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
933 kauth_cred_unref(&cred);
934 if (error != 0) {
935 goto done;
936 }
937 }
938 break;
939
940 default:
941 break;
942 }
943
944 switch (nc_cmd) {
945 case NXCFG_CMD_ATTACH:
946 error = fsw_ctl_attach(nx, p, nsr);
947 break;
948
949 case NXCFG_CMD_DETACH:
950 error = fsw_ctl_detach(nx, p, nsr);
951 break;
952
953 case NXCFG_CMD_FLOW_ADD: /* struct nx_flow_req */
954 error = fsw_ctl_flow_add(fsw, p, data);
955 break;
956
957 case NXCFG_CMD_FLOW_DEL: /* struct nx_flow_req */
958 error = fsw_ctl_flow_del(fsw, p, data);
959 break;
960 case NXCFG_CMD_NETEM: /* struct if_netem_params */
961 error = fsw_netem_config(fsw, data);
962 break;
963
964 default:
965 SK_ERR("invalid cmd %u", nc_cmd);
966 error = EINVAL;
967 break;
968 }
969
970 done:
971 return error;
972 }
973
974 struct nx_flowswitch *
fsw_ifp_to_fsw(struct ifnet * ifp)975 fsw_ifp_to_fsw(struct ifnet *ifp)
976 {
977 struct nx_flowswitch *fsw = NULL;
978
979 if (ifp->if_na != NULL) {
980 fsw = ifp->if_na->nifna_netif->nif_fsw;
981 }
982 return fsw;
983 }
984
985 static void
fsw_ifnet_event_callback(struct eventhandler_entry_arg ee_arg __unused,struct ifnet * ifp,struct sockaddr * ip_addr __unused,intf_event_code_t intf_ev_code)986 fsw_ifnet_event_callback(struct eventhandler_entry_arg ee_arg __unused,
987 struct ifnet *ifp, struct sockaddr *ip_addr __unused,
988 intf_event_code_t intf_ev_code)
989 {
990 struct nx_flowswitch *fsw = NULL;
991
992 if (ifp->if_na == NULL) {
993 return;
994 }
995
996 SK_LOCK();
997 fsw = fsw_ifp_to_fsw(ifp);
998 if (fsw != NULL) {
999 switch (intf_ev_code) {
1000 case INTF_EVENT_CODE_LLADDR_UPDATE:
1001 if ((fsw->fsw_ifp == NULL) ||
1002 (fsw->fsw_ifp_dlt != DLT_EN10MB)) {
1003 break;
1004 }
1005
1006 VERIFY(fsw->fsw_ifp == ifp);
1007 SK_DF(SK_VERB_FSW, "MAC address change detected for %s",
1008 if_name(fsw->fsw_ifp));
1009 (void) ifnet_lladdr_copy_bytes(ifp, fsw->fsw_ether_shost,
1010 ETHER_ADDR_LEN);
1011 atomic_add_32(&fsw->fsw_src_lla_gencnt, 1);
1012 break;
1013
1014 case INTF_EVENT_CODE_LOW_POWER_UPDATE:
1015 if (fsw->fsw_ifp == NULL) {
1016 break;
1017 }
1018
1019 VERIFY(fsw->fsw_ifp == ifp);
1020
1021 if (ifp->if_xflags & IFXF_LOW_POWER) {
1022 SK_DF(SK_VERB_FSW,
1023 "Low power mode updated for %s",
1024 if_name(fsw->fsw_ifp));
1025
1026 fsw_reap_sched(fsw);
1027 }
1028 break;
1029
1030 default:
1031 break;
1032 }
1033 }
1034 SK_UNLOCK();
1035 }
1036
1037 static void
fsw_protoctl_event_callback(struct eventhandler_entry_arg ee_arg,struct ifnet * ifp,struct sockaddr * p_laddr,struct sockaddr * p_raddr,uint16_t lport,uint16_t rport,uint8_t proto,uint32_t protoctl_event_code,struct protoctl_ev_val * p_val)1038 fsw_protoctl_event_callback(struct eventhandler_entry_arg ee_arg,
1039 struct ifnet *ifp, struct sockaddr *p_laddr, struct sockaddr *p_raddr,
1040 uint16_t lport, uint16_t rport, uint8_t proto, uint32_t protoctl_event_code,
1041 struct protoctl_ev_val *p_val)
1042 {
1043 #pragma unused(ee_arg)
1044 struct nx_flowswitch *fsw = NULL;
1045 struct flow_entry *fe = NULL;
1046 boolean_t netagent_update_flow = FALSE;
1047 uuid_t fe_uuid;
1048
1049 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1050 return;
1051 }
1052
1053 /*
1054 * XXX Right now only handle the event if we have enough
1055 * information to match the entire flow.
1056 */
1057 if (lport == 0 || rport == 0 || p_laddr == NULL || p_raddr == NULL) {
1058 return;
1059 }
1060
1061 SK_LOCK();
1062 fsw = fsw_ifp_to_fsw(ifp);
1063 if (fsw == NULL) {
1064 goto out;
1065 }
1066
1067 if (!fsw_detach_barrier_add(fsw)) {
1068 fsw = NULL;
1069 SK_ERR("netagent detached");
1070 goto out;
1071 }
1072
1073 struct flow_key fk __sk_aligned(16);
1074 FLOW_KEY_CLEAR(&fk);
1075 fk.fk_proto = proto;
1076 if (p_laddr->sa_family == AF_INET) {
1077 fk.fk_ipver = IPVERSION;
1078 fk.fk_src4 = SIN(p_laddr)->sin_addr;
1079 fk.fk_dst4 = SIN(p_raddr)->sin_addr;
1080 } else {
1081 fk.fk_ipver = IPV6_VERSION;
1082 fk.fk_src6 = SIN6(p_laddr)->sin6_addr;
1083 fk.fk_dst6 = SIN6(p_raddr)->sin6_addr;
1084 }
1085 fk.fk_sport = lport;
1086 fk.fk_dport = rport;
1087 fk.fk_mask = FKMASK_5TUPLE;
1088
1089 fe = flow_mgr_find_fe_by_key(fsw->fsw_flow_mgr, &fk);
1090 if (__improbable(fe == NULL)) {
1091 goto out;
1092 }
1093
1094 uuid_copy(fe_uuid, fe->fe_uuid);
1095 /*
1096 * If the protocol notification is for TCP, make sure
1097 * protocol event received is for bytes in the flight.
1098 * XXX Redirect events are not delivered as protocol events
1099 * but as better route events.
1100 * Also redirect events do not indicate loss of the packet.
1101 */
1102 if (proto != IPPROTO_TCP) {
1103 p_val->tcp_seq_number = 0;
1104 }
1105
1106 netagent_update_flow = TRUE;
1107
1108 out:
1109 SK_UNLOCK();
1110
1111 if (netagent_update_flow) {
1112 int error = 0;
1113 #if SK_LOG
1114 char dbgbuf[FLOWENTRY_DBGBUF_SIZE];
1115 SK_DF(SK_VERB_FLOW, "Update flow entry \"%s\" for protocol "
1116 "event %d with value %d and tcp sequence number %d",
1117 fe_as_string(fe, dbgbuf, sizeof(dbgbuf)),
1118 protoctl_event_code, p_val->val, p_val->tcp_seq_number);
1119 #endif /* SK_LOG */
1120 if ((error = netagent_update_flow_protoctl_event(
1121 fsw->fsw_agent_session, fe_uuid, protoctl_event_code,
1122 p_val->val, p_val->tcp_seq_number)) != 0) {
1123 #if SK_LOG
1124 SK_DF(SK_VERB_FLOW, "Error: %d. Could not update "
1125 "flow entry \"%s\" for protocol event %d with "
1126 "value %d and tcp sequence number %d", error,
1127 dbgbuf, protoctl_event_code, p_val->val,
1128 p_val->tcp_seq_number);
1129 #endif /* SK_LOG */
1130 }
1131 }
1132
1133 if (fe != NULL) {
1134 flow_entry_release(&fe);
1135 }
1136
1137 if (fsw != NULL) {
1138 fsw_detach_barrier_remove(fsw);
1139 }
1140 }
1141
1142 int
fsw_netagent_add_remove(struct kern_nexus * nx,boolean_t add)1143 fsw_netagent_add_remove(struct kern_nexus *nx, boolean_t add)
1144 {
1145 struct nx_flowswitch *fsw = NULL;
1146 int error = 0;
1147
1148 SK_LOCK_ASSERT_HELD();
1149 VERIFY(nx != NULL);
1150 VERIFY(NX_PROV(nx) != NULL);
1151 VERIFY(NX_DOM_PROV(nx) != NULL);
1152
1153 if (NX_DOM(nx)->nxdom_type != NEXUS_TYPE_FLOW_SWITCH) {
1154 error = EINVAL;
1155 goto out;
1156 }
1157
1158 fsw = NX_FSW_PRIVATE(nx);
1159 VERIFY(fsw != NULL);
1160 FSW_WLOCK(fsw);
1161
1162 if (fsw->fsw_agent_session == NULL) {
1163 error = ENXIO;
1164 goto out;
1165 }
1166
1167 ASSERT(!uuid_is_null(fsw->fsw_agent_uuid));
1168
1169 if (add) {
1170 if (FSW_NETAGENT_ADDED(fsw)) {
1171 /* agent already added */
1172 error = EEXIST;
1173 } else {
1174 fsw->fsw_state_flags |= FSW_STATEF_NETAGENT_ADDED;
1175 if (if_is_fsw_netagent_enabled()) {
1176 fsw->fsw_state_flags
1177 |= FSW_STATEF_NETAGENT_ENABLED;
1178 }
1179 if_add_netagent(fsw->fsw_ifp, fsw->fsw_agent_uuid);
1180 SK_D("flowswitch netagent added for interface %s",
1181 if_name(fsw->fsw_ifp));
1182 }
1183 } else {
1184 if (!FSW_NETAGENT_ADDED(fsw)) {
1185 /* agent has not been added */
1186 error = ENOENT;
1187 } else {
1188 fsw->fsw_state_flags &= ~(FSW_STATEF_NETAGENT_ADDED |
1189 FSW_STATEF_NETAGENT_ENABLED);
1190 if_delete_netagent(fsw->fsw_ifp, fsw->fsw_agent_uuid);
1191 SK_D("flowswitch netagent removed for interface %s",
1192 if_name(fsw->fsw_ifp));
1193 }
1194 }
1195 out:
1196 if (fsw != NULL) {
1197 FSW_UNLOCK(fsw);
1198 }
1199 return error;
1200 }
1201
1202 void
fsw_netagent_update(struct kern_nexus * nx)1203 fsw_netagent_update(struct kern_nexus *nx)
1204 {
1205 struct nx_flowswitch *fsw = NULL;
1206
1207 SK_LOCK_ASSERT_HELD();
1208 VERIFY(nx != NULL);
1209 VERIFY(NX_PROV(nx) != NULL);
1210 VERIFY(NX_DOM_PROV(nx) != NULL);
1211
1212 if (NX_DOM(nx)->nxdom_type != NEXUS_TYPE_FLOW_SWITCH) {
1213 goto out;
1214 }
1215 fsw = NX_FSW_PRIVATE(nx);
1216 VERIFY(fsw != NULL);
1217 FSW_WLOCK(fsw);
1218 if (fsw->fsw_agent_session == NULL) {
1219 goto out;
1220 }
1221 ASSERT(!uuid_is_null(fsw->fsw_agent_uuid));
1222 uint32_t flags = netagent_get_flags(fsw->fsw_agent_uuid);
1223 const bool ip_agent = ifnet_needs_fsw_ip_netagent(fsw->fsw_ifp);
1224 const bool transport_agent = ifnet_needs_fsw_transport_netagent(fsw->fsw_ifp);
1225 if (ip_agent || transport_agent) {
1226 flags |= NETAGENT_FLAG_NEXUS_LISTENER;
1227 } else {
1228 flags &= ~NETAGENT_FLAG_NEXUS_LISTENER;
1229 }
1230 if (transport_agent) {
1231 flags |= NETAGENT_FLAG_NEXUS_PROVIDER;
1232 } else {
1233 flags &= ~NETAGENT_FLAG_NEXUS_PROVIDER;
1234 }
1235 if (ip_agent) {
1236 flags |= NETAGENT_FLAG_CUSTOM_IP_NEXUS;
1237 } else {
1238 flags &= ~NETAGENT_FLAG_CUSTOM_IP_NEXUS;
1239 }
1240 if (netagent_set_flags(fsw->fsw_agent_uuid, flags) == 0) {
1241 SK_D("flowswitch netagent updated for interface %s",
1242 if_name(fsw->fsw_ifp));
1243 }
1244 out:
1245 if (fsw != NULL) {
1246 FSW_UNLOCK(fsw);
1247 }
1248 }
1249
1250 static int
fsw_port_ctor(struct nx_flowswitch * fsw,struct nexus_vp_adapter * vpna,const struct nxbind * nxb)1251 fsw_port_ctor(struct nx_flowswitch *fsw, struct nexus_vp_adapter *vpna,
1252 const struct nxbind *nxb)
1253 {
1254 #pragma unused(nxb)
1255 int err = 0;
1256
1257 SK_LOCK_ASSERT_HELD();
1258 ASSERT(nxb == NULL || !(nxb->nxb_flags & NXBF_MATCH_UNIQUEID) ||
1259 vpna->vpna_pid == nxb->nxb_pid);
1260
1261 /*
1262 * Reject regular channel open requests unless there is
1263 * something attached to the host port of the flowswitch.
1264 */
1265 if (vpna->vpna_nx_port >= FSW_VP_USER_MIN) {
1266 struct nexus_adapter *na = &vpna->vpna_up;
1267 struct ifnet *ifp = fsw->fsw_ifp;
1268
1269 if (ifp == NULL) {
1270 err = ENXIO;
1271 goto done;
1272 }
1273
1274 /* if adapter supports mitigation, set default value */
1275 if (na->na_flags & (NAF_TX_MITIGATION | NAF_RX_MITIGATION)) {
1276 if (IFNET_IS_WIFI(ifp)) {
1277 na->na_ch_mit_ival = CH_MIT_IVAL_WIFI;
1278 } else if (IFNET_IS_CELLULAR(ifp)) {
1279 na->na_ch_mit_ival = CH_MIT_IVAL_CELLULAR;
1280 } else if (IFNET_IS_ETHERNET(ifp)) {
1281 na->na_ch_mit_ival = CH_MIT_IVAL_ETHERNET;
1282 } else {
1283 na->na_ch_mit_ival = CH_MIT_IVAL_DEFAULT;
1284 }
1285 }
1286 }
1287
1288 done:
1289 SK_DF(err ? SK_VERB_ERROR : SK_VERB_FSW,
1290 "fsw 0x%llx nx_port %d vpna_pid %d vpna_pid_bound %u mit_ival %llu "
1291 "(err %d)", SK_KVA(fsw), (int)vpna->vpna_nx_port, vpna->vpna_pid,
1292 vpna->vpna_pid_bound, vpna->vpna_up.na_ch_mit_ival, err);
1293
1294 return err;
1295 }
1296
1297 static bool
fsw_port_dtor(struct nx_flowswitch * fsw,const struct nexus_vp_adapter * vpna)1298 fsw_port_dtor(struct nx_flowswitch *fsw, const struct nexus_vp_adapter *vpna)
1299 {
1300 struct flow_mgr *fm = fsw->fsw_flow_mgr;
1301 nexus_port_t nx_port = vpna->vpna_nx_port;
1302 uint32_t purge_cnt;
1303
1304 ASSERT(fsw == vpna->vpna_fsw);
1305 ASSERT(nx_port != NEXUS_PORT_ANY);
1306
1307 /*
1308 * If this nexus port was bound to a PID, we just need to look at a
1309 * single bucket and iterate from there. Note that in any case, we
1310 * can't just search for a single flow_owner based on the PID itself,
1311 * since a given process may be opening multiple channels to the
1312 * flowswitch; hence we search for the ones matching this nexus port.
1313 *
1314 * Close any open flows on the port and remove the flow owner and
1315 * nexus port binding.
1316 */
1317 purge_cnt = flow_owner_detach_nexus_port(fm, vpna->vpna_pid_bound,
1318 vpna->vpna_pid, nx_port, FALSE);
1319
1320 SK_DF(SK_VERB_FSW,
1321 "fsw 0x%llx nx_port %d pid %d pid_bound %u defunct %u "
1322 "purged %u", SK_KVA(fsw), (int)nx_port,
1323 vpna->vpna_pid, vpna->vpna_pid_bound, vpna->vpna_defunct,
1324 purge_cnt);
1325
1326 return purge_cnt != 0;
1327 }
1328
1329 /*
1330 * Flowswitch nexus port allocator.
1331 *
1332 * A nexus port is represented by a bit in the port bitmap; its state is
1333 * either free or allocated. A free state implies that the port has no
1334 * nxbind AND no nexus adapter association. An allocated state means that
1335 * either it has a nxbind OR a nexus adapter assocation. This routine
1336 * manages the nexus adapter association with a nexus port; nxbind is
1337 * handled separately via nx_fsw_port_bind().
1338 *
1339 * The caller of this routine may optionally pass in a NULL nexus adapter.
1340 * In such a case (*vpna is NULL), this routine checks to see if the port
1341 * has already been associated with an adapter, and returns a reference to
1342 * that adapter. No action is taken on a port that doesn't have an adapter
1343 * associated. Otherwise (*vpna is non-NULL), this routine associates that
1344 * adapter with a port that's not already associated with one; the reference
1345 * to the adapter is untouched here, as the caller is expected to handle it.
1346 *
1347 * The flowswitch code invokes this routine each time it is requested to
1348 * find an adapter via nx_fsw_na_find(). The counterpart of this routine,
1349 * nx_fsw_port_free(), is only executed ONCE by the adapter's destructor.
1350 * This allows for multiple channels to be opened to a nexus port, each
1351 * time holding a reference to that same nexus adapter. The releasing of
1352 * the nexus port only happens when the last channel closes.
1353 */
1354 static int
fsw_port_alloc__(struct nx_flowswitch * fsw,struct nxbind * nxb,struct nexus_vp_adapter ** vpna,nexus_port_t nx_port,struct proc * p)1355 fsw_port_alloc__(struct nx_flowswitch *fsw, struct nxbind *nxb,
1356 struct nexus_vp_adapter **vpna, nexus_port_t nx_port, struct proc *p)
1357 {
1358 struct kern_nexus *nx = fsw->fsw_nx;
1359 boolean_t refonly = FALSE;
1360 int error = 0;
1361
1362 FSW_WLOCK_ASSERT_HELD(fsw);
1363
1364 error = nx_port_alloc(nx, nx_port, nxb, (struct nexus_adapter **)vpna, p);
1365 if (error == 0 && *vpna != NULL && !refonly) {
1366 /* initialize the nexus port and the adapter occupying it */
1367 (*vpna)->vpna_fsw = fsw;
1368 (*vpna)->vpna_nx_port = nx_port;
1369 (*vpna)->vpna_pid = proc_pid(p);
1370 if (nxb != NULL && (nxb->nxb_flags & NXBF_MATCH_UNIQUEID)) {
1371 ASSERT((*vpna)->vpna_pid == nxb->nxb_pid);
1372 (*vpna)->vpna_pid_bound = TRUE;
1373 } else {
1374 (*vpna)->vpna_pid_bound = FALSE;
1375 }
1376
1377 error = fsw_port_ctor(fsw, *vpna, nxb);
1378 if (error != 0) {
1379 fsw_port_free(fsw, (*vpna),
1380 (*vpna)->vpna_nx_port, FALSE);
1381 }
1382 }
1383
1384 #if SK_LOG
1385 if (*vpna != NULL) {
1386 SK_DF(error ? SK_VERB_ERROR : SK_VERB_FSW,
1387 "+++ vpna \"%s\" (0x%llx) <-> fsw 0x%llx "
1388 "%sport %d refonly %u (err %d)",
1389 (*vpna)->vpna_up.na_name, SK_KVA(*vpna), SK_KVA(fsw),
1390 nx_fsw_dom_port_is_reserved(nx, nx_port) ?
1391 "[reserved] " : "", (int)nx_port, refonly, error);
1392 } else {
1393 SK_DF(error ? SK_VERB_ERROR : SK_VERB_FSW,
1394 "+++ fsw 0x%llx nx_port %d refonly %u "
1395 "(err %d)", SK_KVA(fsw), (int)nx_port, refonly, error);
1396 }
1397 #endif /* SK_LOG */
1398
1399 return error;
1400 }
1401
1402 int
fsw_port_alloc(struct nx_flowswitch * fsw,struct nxbind * nxb,struct nexus_vp_adapter ** vpna,nexus_port_t nx_port,struct proc * p,boolean_t ifattach,boolean_t host)1403 fsw_port_alloc(struct nx_flowswitch *fsw, struct nxbind *nxb,
1404 struct nexus_vp_adapter **vpna, nexus_port_t nx_port, struct proc *p,
1405 boolean_t ifattach, boolean_t host)
1406 {
1407 int err = 0;
1408
1409 FSW_WLOCK_ASSERT_HELD(fsw);
1410
1411 if (ifattach) {
1412 /* override port to either NX_FSW_{HOST,DEV} */
1413 nx_port = (host ? FSW_VP_HOST : FSW_VP_DEV);
1414 /* allocate reserved port for ifattach */
1415 err = fsw_port_alloc__(fsw, nxb, vpna, nx_port, p);
1416 } else if (host) {
1417 /* host is valid only for ifattach */
1418 err = EINVAL;
1419 } else {
1420 /* nexus port otherwise (reserve dev and host for ifattach) */
1421 err = fsw_port_alloc__(fsw, nxb, vpna, nx_port, p);
1422 }
1423
1424 return err;
1425 }
1426
1427 /*
1428 * Remove nexus port association from a nexus adapter. This call is
1429 * the opposite of fsw_port_alloc(), except that it is called only
1430 * at nx_fsw_vp_na_dtor() destructor time. See above notes
1431 * on fsw_port_alloc().
1432 */
1433 void
fsw_port_free(struct nx_flowswitch * fsw,struct nexus_vp_adapter * vpna,nexus_port_t nx_port,boolean_t defunct)1434 fsw_port_free(struct nx_flowswitch *fsw, struct nexus_vp_adapter *vpna,
1435 nexus_port_t nx_port, boolean_t defunct)
1436 {
1437 struct kern_nexus *nx = fsw->fsw_nx;
1438
1439 FSW_WLOCK_ASSERT_HELD(fsw);
1440 ASSERT(vpna->vpna_fsw == fsw);
1441
1442 if (defunct) {
1443 vpna->vpna_defunct = TRUE;
1444 nx_port_defunct(nx, nx_port);
1445 }
1446
1447 bool destroyed = fsw_port_dtor(fsw, vpna);
1448 if (destroyed) {
1449 /*
1450 * If the extension's destructor no longer needs to be
1451 * bound to any channel client, release the binding.
1452 */
1453 nx_port_unbind(nx, nx_port);
1454 }
1455
1456 /*
1457 * If this is a defunct, then stop here as the port is still
1458 * occupied by the channel. We'll come here again later when
1459 * the actual close happens.
1460 */
1461 if (defunct) {
1462 return;
1463 }
1464
1465 SK_DF(SK_VERB_FSW, "--- vpna \"%s\" (0x%llx) -!- fsw 0x%llx "
1466 "nx_port %d defunct %u", vpna->vpna_up.na_name, SK_KVA(vpna),
1467 SK_KVA(fsw), (int)nx_port, vpna->vpna_defunct);
1468
1469 nx_port_free(nx, nx_port);
1470 vpna->vpna_fsw = NULL;
1471 vpna->vpna_nx_port = NEXUS_PORT_ANY;
1472 vpna->vpna_pid_bound = FALSE;
1473 vpna->vpna_pid = -1;
1474 vpna->vpna_defunct = FALSE;
1475 }
1476
1477 int
fsw_port_na_activate(struct nx_flowswitch * fsw,struct nexus_vp_adapter * vpna,na_activate_mode_t mode)1478 fsw_port_na_activate(struct nx_flowswitch *fsw,
1479 struct nexus_vp_adapter *vpna, na_activate_mode_t mode)
1480 {
1481 struct flow_mgr *fm = fsw->fsw_flow_mgr;
1482 uint32_t fo_cnt = 0;
1483
1484 SK_LOCK_ASSERT_HELD();
1485
1486 /* The following code relies on the static value asserted below */
1487 _CASSERT(FSW_VP_DEV == 0);
1488 _CASSERT(FSW_VP_HOST == 1);
1489
1490 ASSERT(NA_IS_ACTIVE(&vpna->vpna_up));
1491 ASSERT(vpna->vpna_nx_port != NEXUS_PORT_ANY);
1492
1493 switch (mode) {
1494 case NA_ACTIVATE_MODE_ON:
1495 break;
1496
1497 case NA_ACTIVATE_MODE_DEFUNCT:
1498 break;
1499
1500 case NA_ACTIVATE_MODE_OFF:
1501 break;
1502
1503 default:
1504 VERIFY(0);
1505 /* NOTREACHED */
1506 __builtin_unreachable();
1507 }
1508
1509 /* nothing further to do for special ports */
1510 if (vpna->vpna_nx_port < FSW_VP_USER_MIN) {
1511 goto done;
1512 }
1513
1514 /* activate any flow owner related resources (e.g. flowadv), if any */
1515 fo_cnt = flow_owner_activate_nexus_port(fm, vpna->vpna_pid_bound,
1516 vpna->vpna_pid, vpna->vpna_nx_port, &vpna->vpna_up, mode);
1517
1518 done:
1519 SK_DF(SK_VERB_FSW,
1520 "fsw 0x%llx %s nx_port %d vpna_pid %d vpna_pid_bound %u fo_cnt %u",
1521 SK_KVA(fsw), na_activate_mode2str(mode), (int)vpna->vpna_nx_port,
1522 vpna->vpna_pid, vpna->vpna_pid_bound, fo_cnt);
1523
1524 return 0;
1525 }
1526
1527 int
fsw_port_na_defunct(struct nx_flowswitch * fsw,struct nexus_vp_adapter * vpna)1528 fsw_port_na_defunct(struct nx_flowswitch *fsw, struct nexus_vp_adapter *vpna)
1529 {
1530 int err = 0;
1531
1532 SK_LOCK_ASSERT_HELD();
1533 ASSERT(vpna->vpna_nx_port >= FSW_VP_USER_MIN);
1534
1535 /*
1536 * During defunct, we want to purge all flows associated to this
1537 * port and the flow owner as well. This is accomplished as part
1538 * of calling the port's destructor. However, we still want to
1539 * occupy the nexus port since there's a channel open to it.
1540 */
1541 FSW_WLOCK(fsw);
1542 if (!vpna->vpna_defunct) {
1543 fsw_port_free(fsw, vpna, vpna->vpna_nx_port, TRUE);
1544 } else {
1545 err = EALREADY;
1546 }
1547 FSW_WUNLOCK(fsw);
1548
1549 return err;
1550 }
1551
1552 static size_t
fsw_mib_get_flow(struct nx_flowswitch * fsw,struct nexus_mib_filter * filter,void * out,size_t len)1553 fsw_mib_get_flow(struct nx_flowswitch *fsw,
1554 struct nexus_mib_filter *filter, void *out, size_t len)
1555 {
1556 struct flow_mgr *fm = fsw->fsw_flow_mgr;
1557 size_t sf_size = sizeof(struct sk_stats_flow);
1558 __block size_t actual_space = 0;
1559 __block struct sk_stats_flow *sf = out;
1560 struct flow_entry *fe;
1561
1562 FSW_LOCK_ASSERT_HELD(fsw);
1563
1564 if (filter->nmf_bitmap & NXMIB_FILTER_FLOW_ID) {
1565 fe = flow_mgr_get_fe_by_uuid_rlock(fm, filter->nmf_flow_id);
1566 if (fe != NULL) {
1567 if (out != NULL && len >= sf_size) {
1568 flow_entry_stats_get(fe, sf);
1569 }
1570
1571 flow_entry_release(&fe);
1572 return sf_size;
1573 }
1574 return 0;
1575 } else if (filter->nmf_bitmap & NXMIB_FILTER_INFO_TUPLE) {
1576 struct info_tuple *itpl = &filter->nmf_info_tuple;
1577 struct flow_key fk;
1578 bzero(&fk, sizeof(fk));
1579 if (itpl->itpl_local_sa.sa_family == AF_INET &&
1580 itpl->itpl_remote_sa.sa_family == AF_INET) {
1581 fk.fk_mask = FKMASK_5TUPLE;
1582 fk.fk_ipver = IPVERSION;
1583 fk.fk_proto = itpl->itpl_proto;
1584 fk.fk_src4 = itpl->itpl_local_sin.sin_addr;
1585 fk.fk_dst4 = itpl->itpl_remote_sin.sin_addr;
1586 fk.fk_sport = itpl->itpl_local_sin.sin_port;
1587 fk.fk_dport = itpl->itpl_remote_sin.sin_port;
1588 } else if (itpl->itpl_local_sa.sa_family == AF_INET6 &&
1589 itpl->itpl_remote_sa.sa_family == AF_INET6) {
1590 fk.fk_mask = FKMASK_5TUPLE;
1591 fk.fk_ipver = IPV6_VERSION;
1592 fk.fk_proto = itpl->itpl_proto;
1593 fk.fk_src6 = itpl->itpl_local_sin6.sin6_addr;
1594 fk.fk_dst6 = itpl->itpl_remote_sin6.sin6_addr;
1595 fk.fk_sport = itpl->itpl_local_sin6.sin6_port;
1596 fk.fk_dport = itpl->itpl_remote_sin6.sin6_port;
1597 } else {
1598 SK_ERR("invalid info tuple: local af %d remote af %d",
1599 itpl->itpl_local_sa.sa_family,
1600 itpl->itpl_remote_sa.sa_family);
1601 return 0;
1602 }
1603
1604 fe = flow_mgr_find_fe_by_key(fsw->fsw_flow_mgr, &fk);
1605 if (fe != NULL) {
1606 if (out != NULL && len >= sf_size) {
1607 flow_entry_stats_get(fe, sf);
1608 }
1609 flow_entry_release(&fe);
1610 return sf_size;
1611 }
1612 return 0;
1613 }
1614
1615 flow_mgr_foreach_flow(fsw->fsw_flow_mgr, ^(struct flow_entry *_fe) {
1616 actual_space += sf_size;
1617
1618 if (out == NULL || actual_space > len) {
1619 return;
1620 }
1621
1622 flow_entry_stats_get(_fe, sf);
1623 sf++;
1624 });
1625
1626 /*
1627 * Also return the ones in deferred free list.
1628 */
1629 lck_mtx_lock(&fsw->fsw_linger_lock);
1630 TAILQ_FOREACH(fe, &fsw->fsw_linger_head, fe_linger_link) {
1631 actual_space += sf_size;
1632 if (out == NULL || actual_space > len) {
1633 continue;
1634 }
1635
1636 flow_entry_stats_get(fe, sf);
1637 sf++;
1638 }
1639 lck_mtx_unlock(&fsw->fsw_linger_lock);
1640
1641 return actual_space;
1642 }
1643
1644 static size_t
fsw_mib_get_flow_adv(struct nx_flowswitch * fsw,struct nexus_mib_filter * filter,void * out,size_t len)1645 fsw_mib_get_flow_adv(struct nx_flowswitch *fsw,
1646 struct nexus_mib_filter *filter, void *out, size_t len)
1647 {
1648 #pragma unused(filter)
1649 uint32_t fae_idx;
1650 size_t actual_space = 0;
1651 struct kern_channel *ch = NULL;
1652 struct sk_stats_flow_adv *sfa = NULL;
1653 struct sk_stats_flow_adv_ent *sfae = NULL;
1654 struct __flowadv_entry *fae = NULL;
1655 size_t sfa_size = sizeof(struct sk_stats_flow_adv);
1656 size_t sfae_size = sizeof(struct sk_stats_flow_adv_ent);
1657 uint32_t max_flowadv =
1658 fsw->fsw_nx->nx_prov->nxprov_params->nxp_flowadv_max;
1659
1660 SK_LOCK_ASSERT_HELD();
1661
1662 sfa = out;
1663 /* copyout flow advisory table (allocated entries only) */
1664 STAILQ_FOREACH(ch, &fsw->fsw_nx->nx_ch_head, ch_link) {
1665 struct skmem_arena *ar;
1666 struct skmem_arena_nexus *arn;
1667 struct nexus_adapter *na;
1668
1669 /* ch_lock isn't needed here since sk_lock is held */
1670 if ((ch->ch_flags & CHANF_CLOSING) ||
1671 (na = ch->ch_na) == NULL) {
1672 /* channel is closing */
1673 continue;
1674 }
1675
1676 ar = na->na_arena;
1677 arn = skmem_arena_nexus(ar);
1678
1679 AR_LOCK(ar);
1680 if (arn->arn_flowadv_obj == NULL) {
1681 ASSERT(ar->ar_flags & ARF_DEFUNCT);
1682 AR_UNLOCK(ar);
1683 continue;
1684 }
1685 actual_space += sfa_size;
1686 /* fill out flowadv_table info */
1687 if (out != NULL && actual_space <= len) {
1688 uuid_copy(sfa->sfa_nx_uuid, fsw->fsw_nx->nx_uuid);
1689 (void) strlcpy(sfa->sfa_if_name,
1690 fsw->fsw_flow_mgr->fm_name, IFNAMSIZ);
1691 sfa->sfa_owner_pid = ch->ch_pid;
1692 sfa->sfa_entries_count = 0;
1693 }
1694
1695 /* fill out flowadv_entries */
1696 sfae = &sfa->sfa_entries[0];
1697 for (fae_idx = 0; fae_idx < max_flowadv; fae_idx++) {
1698 fae = &arn->arn_flowadv_obj[fae_idx];
1699 if (!uuid_is_null(fae->fae_id)) {
1700 actual_space += sfae_size;
1701 if (out == NULL || actual_space > len) {
1702 continue;
1703 }
1704
1705 /* fill out entry */
1706 uuid_copy(sfae->sfae_flow_id, fae->fae_id);
1707 sfae->sfae_flags = fae->fae_flags;
1708 sfae++;
1709 sfa->sfa_entries_count++;
1710 }
1711 }
1712 sfa = (struct sk_stats_flow_adv *)
1713 ((uintptr_t)out + actual_space);
1714 AR_UNLOCK(ar);
1715 }
1716
1717 return actual_space;
1718 }
1719
1720 static inline void
fsw_fo2sfo(struct nx_flowswitch * fsw,struct flow_owner * fo,struct sk_stats_flow_owner * sfo)1721 fsw_fo2sfo(struct nx_flowswitch *fsw, struct flow_owner *fo,
1722 struct sk_stats_flow_owner *sfo)
1723 {
1724 struct flow_mgr *fm = fsw->fsw_flow_mgr;
1725
1726 uuid_copy(sfo->sfo_nx_uuid, fsw->fsw_nx->nx_uuid);
1727 (void) strlcpy(sfo->sfo_if_name, fsw->fsw_flow_mgr->fm_name,
1728 IFNAMSIZ);
1729 sfo->sfo_bucket_idx = flow_mgr_get_fob_idx(fm, FO_BUCKET(fo));
1730
1731 (void) snprintf(sfo->sfo_name, sizeof(sfo->sfo_name), "%s",
1732 fo->fo_name);
1733 sfo->sfo_pid = fo->fo_pid;
1734 sfo->sfo_nx_port = fo->fo_nx_port;
1735 sfo->sfo_nx_port_pid_bound = fo->fo_nx_port_pid_bound;
1736 sfo->sfo_nx_port_destroyed = fo->fo_nx_port_destroyed;
1737 }
1738
1739 static size_t
fsw_mib_get_flow_owner(struct nx_flowswitch * fsw,struct nexus_mib_filter * filter,void * out,size_t len)1740 fsw_mib_get_flow_owner(struct nx_flowswitch *fsw,
1741 struct nexus_mib_filter *filter, void *out, size_t len)
1742 {
1743 #pragma unused(filter)
1744 uint32_t i;
1745 size_t actual_space = 0;
1746 struct flow_mgr *fm = fsw->fsw_flow_mgr;
1747 struct sk_stats_flow_owner *sfo = out;
1748 size_t sfo_size = sizeof(struct sk_stats_flow_owner);
1749 struct flow_owner *fo;
1750
1751 FSW_LOCK_ASSERT_HELD(fsw);
1752
1753 /*
1754 * Ideally we'd like to hide the bucket level details from flow library
1755 * user, but there is no simple way to iterate flow_owner with
1756 * buckets/RB_TREE nested. So keep it as is.
1757 */
1758 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
1759 struct flow_owner_bucket *fob = flow_mgr_get_fob_at_idx(fm, i);
1760 FOB_LOCK(fob);
1761 RB_FOREACH(fo, flow_owner_tree, &fob->fob_owner_head) {
1762 actual_space += sfo_size;
1763 if (out == NULL || actual_space > len) {
1764 continue;
1765 }
1766
1767 fsw_fo2sfo(fsw, fo, sfo);
1768 sfo++;
1769 }
1770 FOB_UNLOCK(fob);
1771 }
1772
1773 return actual_space;
1774 }
1775
1776 static inline void
fsw_fr2sfr(struct nx_flowswitch * fsw,struct flow_route * fr,struct sk_stats_flow_route * sfr,boolean_t ll_scrub)1777 fsw_fr2sfr(struct nx_flowswitch *fsw, struct flow_route *fr,
1778 struct sk_stats_flow_route *sfr, boolean_t ll_scrub)
1779 {
1780 uuid_copy(sfr->sfr_nx_uuid, fsw->fsw_nx->nx_uuid);
1781 uuid_copy(sfr->sfr_uuid, fr->fr_uuid);
1782 (void) strlcpy(sfr->sfr_if_name, fsw->fsw_flow_mgr->fm_name,
1783 IFNAMSIZ);
1784
1785 sfr->sfr_bucket_idx = fr->fr_frb->frb_idx;
1786 sfr->sfr_id_bucket_idx = fr->fr_frib->frib_idx;
1787
1788 if (fr->fr_flags & FLOWRTF_ATTACHED) {
1789 sfr->sfr_flags |= SFLOWRTF_ATTACHED;
1790 }
1791 if (fr->fr_flags & FLOWRTF_ONLINK) {
1792 sfr->sfr_flags |= SFLOWRTF_ONLINK;
1793 }
1794 if (fr->fr_flags & FLOWRTF_GATEWAY) {
1795 sfr->sfr_flags |= SFLOWRTF_GATEWAY;
1796 }
1797 if (fr->fr_flags & FLOWRTF_RESOLVED) {
1798 sfr->sfr_flags |= SFLOWRTF_RESOLVED;
1799 }
1800 if (fr->fr_flags & FLOWRTF_HAS_LLINFO) {
1801 sfr->sfr_flags |= SFLOWRTF_HAS_LLINFO;
1802 }
1803 if (fr->fr_flags & FLOWRTF_DELETED) {
1804 sfr->sfr_flags |= SFLOWRTF_DELETED;
1805 }
1806 if (fr->fr_flags & FLOWRTF_DST_LL_MCAST) {
1807 sfr->sfr_flags |= SFLOWRTF_DST_LL_MCAST;
1808 }
1809 if (fr->fr_flags & FLOWRTF_DST_LL_BCAST) {
1810 sfr->sfr_flags |= SFLOWRTF_DST_LL_BCAST;
1811 }
1812
1813 lck_spin_lock(&fr->fr_reflock);
1814 ASSERT(fr->fr_usecnt >= FLOW_ROUTE_MINREF);
1815 sfr->sfr_usecnt = fr->fr_usecnt - FLOW_ROUTE_MINREF;
1816 if (fr->fr_expire != 0) {
1817 sfr->sfr_expire = (int64_t)(fr->fr_expire - net_uptime());
1818 } else {
1819 sfr->sfr_expire = 0;
1820 }
1821 lck_spin_unlock(&fr->fr_reflock);
1822
1823 sfr->sfr_laddr = fr->fr_laddr;
1824 sfr->sfr_faddr = fr->fr_faddr;
1825 sfr->sfr_gaddr = fr->fr_gaddr;
1826
1827 if (ll_scrub) {
1828 static const uint8_t unspec[ETHER_ADDR_LEN] = {[0] = 2 };
1829 bcopy(&unspec, &sfr->sfr_ether_dhost, ETHER_ADDR_LEN);
1830 } else {
1831 bcopy(&fr->fr_eth.ether_dhost, &sfr->sfr_ether_dhost,
1832 ETHER_ADDR_LEN);
1833 }
1834 }
1835
1836 #if CONFIG_MACF
1837 extern int dlil_lladdr_ckreq;
1838 #endif /* CONFIG_MACF */
1839
1840 static size_t
fsw_mib_get_flow_route(struct nx_flowswitch * fsw,struct nexus_mib_filter * filter,void * out,size_t len,struct proc * p)1841 fsw_mib_get_flow_route(struct nx_flowswitch *fsw,
1842 struct nexus_mib_filter *filter, void *out, size_t len, struct proc *p)
1843 {
1844 #pragma unused(filter)
1845 uint32_t i;
1846 size_t actual_space = 0;
1847 struct flow_mgr *fm = fsw->fsw_flow_mgr;
1848 struct sk_stats_flow_route *sfr = out;
1849 size_t sfo_size = sizeof(struct sk_stats_flow_route);
1850 struct flow_route *fr;
1851 boolean_t ll_scrub;
1852
1853 FSW_LOCK_ASSERT_HELD(fsw);
1854
1855 /*
1856 * To get the link-layer info, the caller must have the following
1857 * in their sandbox profile (or not be sandboxed at all), else we
1858 * scrub it clean just like dlil_ifaddr_bytes() does:
1859 *
1860 * (allow system-info (info-type "net.link.addr"))
1861 *
1862 * If scrubbed, we return 02:00:00:00:00:00.
1863 */
1864 #if CONFIG_MACF
1865 ll_scrub = (dlil_lladdr_ckreq &&
1866 skywalk_mac_system_check_proc_cred(p, "net.link.addr") != 0);
1867 #else /* !CONFIG_MACF */
1868 ll_scrub = FALSE;
1869 #endif /* !CONFIG_MACF */
1870
1871 for (i = 0; i < fm->fm_route_buckets_cnt; i++) {
1872 struct flow_route_bucket *frb = flow_mgr_get_frb_at_idx(fm, i);
1873 FRB_RLOCK(frb);
1874 RB_FOREACH(fr, flow_route_tree, &frb->frb_head) {
1875 actual_space += sfo_size;
1876 if (out == NULL || actual_space > len) {
1877 continue;
1878 }
1879
1880 fsw_fr2sfr(fsw, fr, sfr, ll_scrub);
1881 sfr++;
1882 }
1883 FRB_UNLOCK(frb);
1884 }
1885
1886 return actual_space;
1887 }
1888
1889 static inline void
fsw_nxs2nus(struct nx_flowswitch * fsw,struct nexus_mib_filter * filter,pid_t pid,struct __nx_stats_fsw * nxs,struct sk_stats_userstack * sus)1890 fsw_nxs2nus(struct nx_flowswitch *fsw, struct nexus_mib_filter *filter,
1891 pid_t pid, struct __nx_stats_fsw *nxs, struct sk_stats_userstack *sus)
1892 {
1893 uuid_copy(sus->sus_nx_uuid, fsw->fsw_nx->nx_uuid);
1894 (void) strlcpy(sus->sus_if_name, fsw->fsw_flow_mgr->fm_name,
1895 IFNAMSIZ);
1896 sus->sus_owner_pid = pid;
1897
1898 if (filter->nmf_type & NXMIB_IP_STATS) {
1899 sus->sus_ip = nxs->nxs_ipstat;
1900 }
1901
1902 if (filter->nmf_type & NXMIB_IP6_STATS) {
1903 sus->sus_ip6 = nxs->nxs_ip6stat;
1904 }
1905
1906 if (filter->nmf_type & NXMIB_TCP_STATS) {
1907 sus->sus_tcp = nxs->nxs_tcpstat;
1908 }
1909
1910 if (filter->nmf_type & NXMIB_UDP_STATS) {
1911 sus->sus_udp = nxs->nxs_udpstat;
1912 }
1913
1914 if (filter->nmf_type & NXMIB_QUIC_STATS) {
1915 sus->sus_quic = nxs->nxs_quicstat;
1916 }
1917 }
1918
1919 static size_t
fsw_mib_get_userstack_stats(struct nx_flowswitch * fsw,struct nexus_mib_filter * filter,void * out,size_t len)1920 fsw_mib_get_userstack_stats(struct nx_flowswitch *fsw,
1921 struct nexus_mib_filter *filter, void *out, size_t len)
1922 {
1923 size_t actual_space = 0;
1924 struct kern_channel *ch;
1925 struct __nx_stats_fsw *nxs;
1926 struct sk_stats_userstack *sus = out;
1927 size_t sus_size = sizeof(struct sk_stats_userstack);
1928
1929 SK_LOCK_ASSERT_HELD();
1930
1931 /* copyout saved stats from closed ports */
1932 if (((filter->nmf_bitmap & NXMIB_FILTER_PID) &&
1933 (filter->nmf_pid == 0)) ||
1934 !(filter->nmf_bitmap & NXMIB_FILTER_PID)) {
1935 actual_space += sus_size;
1936 if (out != NULL && actual_space <= len) {
1937 nxs = fsw->fsw_closed_na_stats;
1938 fsw_nxs2nus(fsw, filter, 0, nxs, sus);
1939 sus++;
1940 }
1941 }
1942
1943 /*
1944 * XXX Currently a proc only opens one channel to nexus so we don't do
1945 * per proc aggregation of inet stats now as this needs lots of code
1946 */
1947 /* copyout per process stats */
1948 STAILQ_FOREACH(ch, &fsw->fsw_nx->nx_ch_head, ch_link) {
1949 struct skmem_arena *ar;
1950 struct nexus_adapter *na;
1951
1952 /* ch_lock isn't needed here since sk_lock is held */
1953 if ((ch->ch_flags & CHANF_CLOSING) ||
1954 (na = ch->ch_na) == NULL) {
1955 /* channel is closing */
1956 continue;
1957 }
1958
1959 if ((filter->nmf_bitmap & NXMIB_FILTER_PID) &&
1960 filter->nmf_pid != ch->ch_pid) {
1961 continue;
1962 }
1963
1964 ar = na->na_arena;
1965
1966 AR_LOCK(ar);
1967 nxs = skmem_arena_nexus(ar)->arn_stats_obj;
1968 if (nxs == NULL) {
1969 ASSERT(ar->ar_flags & ARF_DEFUNCT);
1970 AR_UNLOCK(ar);
1971 continue;
1972 }
1973
1974 actual_space += sus_size;
1975 if (out == NULL || actual_space > len) {
1976 AR_UNLOCK(ar);
1977 continue;
1978 }
1979
1980 fsw_nxs2nus(fsw, filter, ch->ch_pid, nxs, sus);
1981 sus++;
1982 AR_UNLOCK(ar);
1983 }
1984
1985 return actual_space;
1986 }
1987
1988 static size_t
fsw_mib_get_stats(struct nx_flowswitch * fsw,void * out,size_t len)1989 fsw_mib_get_stats(struct nx_flowswitch *fsw, void *out, size_t len)
1990 {
1991 struct sk_stats_flow_switch *sfs = out;
1992 size_t actual_space = sizeof(struct sk_stats_flow_switch);
1993
1994 if (out != NULL && actual_space <= len) {
1995 uuid_copy(sfs->sfs_nx_uuid, fsw->fsw_nx->nx_uuid);
1996 (void) strlcpy(sfs->sfs_if_name,
1997 fsw->fsw_flow_mgr->fm_name, IFNAMSIZ);
1998 sfs->sfs_fsws = fsw->fsw_stats;
1999 }
2000
2001 return actual_space;
2002 }
2003
2004 size_t
fsw_mib_get(struct nx_flowswitch * fsw,struct nexus_mib_filter * filter,void * out,size_t len,struct proc * p)2005 fsw_mib_get(struct nx_flowswitch *fsw, struct nexus_mib_filter *filter,
2006 void *out, size_t len, struct proc *p)
2007 {
2008 size_t ret;
2009
2010 switch (filter->nmf_type) {
2011 case NXMIB_FSW_STATS:
2012 ret = fsw_mib_get_stats(fsw, out, len);
2013 break;
2014 case NXMIB_FLOW:
2015 ret = fsw_mib_get_flow(fsw, filter, out, len);
2016 break;
2017 case NXMIB_FLOW_OWNER:
2018 ret = fsw_mib_get_flow_owner(fsw, filter, out, len);
2019 break;
2020 case NXMIB_FLOW_ROUTE:
2021 ret = fsw_mib_get_flow_route(fsw, filter, out, len, p);
2022 break;
2023 case NXMIB_TCP_STATS:
2024 case NXMIB_UDP_STATS:
2025 case NXMIB_IP_STATS:
2026 case NXMIB_IP6_STATS:
2027 case NXMIB_USERSTACK_STATS:
2028 ret = fsw_mib_get_userstack_stats(fsw, filter, out, len);
2029 break;
2030 case NXMIB_FLOW_ADV:
2031 ret = fsw_mib_get_flow_adv(fsw, filter, out, len);
2032 break;
2033 default:
2034 ret = 0;
2035 break;
2036 }
2037
2038 return ret;
2039 }
2040
2041 void
fsw_fold_stats(struct nx_flowswitch * fsw,void * data,nexus_stats_type_t type)2042 fsw_fold_stats(struct nx_flowswitch *fsw,
2043 void *data, nexus_stats_type_t type)
2044 {
2045 ASSERT(data != NULL);
2046 FSW_LOCK_ASSERT_HELD(fsw);
2047
2048 switch (type) {
2049 case NEXUS_STATS_TYPE_FSW:
2050 {
2051 struct __nx_stats_fsw *d, *s;
2052 d = fsw->fsw_closed_na_stats;
2053 s = data;
2054 ip_stats_fold(&d->nxs_ipstat, &s->nxs_ipstat);
2055 ip6_stats_fold(&d->nxs_ip6stat, &s->nxs_ip6stat);
2056 tcp_stats_fold(&d->nxs_tcpstat, &s->nxs_tcpstat);
2057 udp_stats_fold(&d->nxs_udpstat, &s->nxs_udpstat);
2058 quic_stats_fold(&d->nxs_quicstat, &s->nxs_quicstat);
2059 break;
2060 }
2061 case NEXUS_STATS_TYPE_CHAN_ERRORS:
2062 {
2063 struct __nx_stats_channel_errors *s = data;
2064 fsw_vp_channel_error_stats_fold(&fsw->fsw_stats, s);
2065 break;
2066 }
2067 default:
2068 VERIFY(0);
2069 /* NOTREACHED */
2070 __builtin_unreachable();
2071 }
2072 }
2073
2074 boolean_t
fsw_detach_barrier_add(struct nx_flowswitch * fsw)2075 fsw_detach_barrier_add(struct nx_flowswitch *fsw)
2076 {
2077 lck_mtx_lock_spin(&fsw->fsw_detach_barrier_lock);
2078 if (__improbable(fsw->fsw_detach_flags != 0 ||
2079 fsw->fsw_ifp == NULL || fsw->fsw_agent_session == NULL)) {
2080 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
2081 return FALSE;
2082 }
2083 fsw->fsw_detach_barriers++;
2084 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
2085
2086 return TRUE;
2087 }
2088
2089 void
fsw_detach_barrier_remove(struct nx_flowswitch * fsw)2090 fsw_detach_barrier_remove(struct nx_flowswitch *fsw)
2091 {
2092 lck_mtx_lock_spin(&fsw->fsw_detach_barrier_lock);
2093 ASSERT((fsw->fsw_detach_flags & FSW_DETACHF_DETACHED) == 0);
2094 ASSERT(fsw->fsw_detach_barriers != 0);
2095 fsw->fsw_detach_barriers--;
2096 /* if there's a thread waiting to detach the interface, let it know */
2097 if (__improbable((fsw->fsw_detach_waiters > 0) &&
2098 (fsw->fsw_detach_barriers == 0))) {
2099 fsw->fsw_detach_waiters = 0;
2100 wakeup(&fsw->fsw_detach_waiters);
2101 }
2102 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
2103 }
2104
2105 /*
2106 * Generic resolver for non-Ethernet interfaces.
2107 */
2108 int
fsw_generic_resolve(struct nx_flowswitch * fsw,struct flow_route * fr,struct __kern_packet * pkt)2109 fsw_generic_resolve(struct nx_flowswitch *fsw, struct flow_route *fr,
2110 struct __kern_packet *pkt)
2111 {
2112 #pragma unused(pkt)
2113 #if SK_LOG
2114 char dst_s[MAX_IPv6_STR_LEN];
2115 #endif /* SK_LOG */
2116 struct ifnet *ifp = fsw->fsw_ifp;
2117 struct rtentry *tgt_rt = NULL;
2118 int err = 0;
2119
2120 ASSERT(fr != NULL);
2121 ASSERT(ifp != NULL);
2122
2123 FR_LOCK(fr);
2124 /*
2125 * If the destination is on-link, we use the final destination
2126 * address as target. If it's off-link, we use the gateway
2127 * address instead. Point tgt_rt to the the destination or
2128 * gateway route accordingly.
2129 */
2130 if (fr->fr_flags & FLOWRTF_ONLINK) {
2131 tgt_rt = fr->fr_rt_dst;
2132 } else if (fr->fr_flags & FLOWRTF_GATEWAY) {
2133 tgt_rt = fr->fr_rt_gw;
2134 }
2135
2136 /*
2137 * Perform another routing table lookup if necessary.
2138 */
2139 if (tgt_rt == NULL || !(tgt_rt->rt_flags & RTF_UP) ||
2140 fr->fr_want_configure) {
2141 if (fr->fr_want_configure == 0) {
2142 atomic_add_32(&fr->fr_want_configure, 1);
2143 }
2144 err = flow_route_configure(fr, ifp, NULL);
2145 if (err != 0) {
2146 SK_ERR("failed to configure route to %s on %s (err %d)",
2147 sk_sa_ntop(SA(&fr->fr_faddr), dst_s,
2148 sizeof(dst_s)), ifp->if_xname, err);
2149 goto done;
2150 }
2151
2152 /* refresh pointers */
2153 if (fr->fr_flags & FLOWRTF_ONLINK) {
2154 tgt_rt = fr->fr_rt_dst;
2155 } else if (fr->fr_flags & FLOWRTF_GATEWAY) {
2156 tgt_rt = fr->fr_rt_gw;
2157 }
2158 }
2159
2160 if (__improbable(!(fr->fr_flags & (FLOWRTF_ONLINK | FLOWRTF_GATEWAY)))) {
2161 err = EHOSTUNREACH;
2162 SK_ERR("invalid route for %s on %s (err %d)",
2163 sk_sa_ntop(SA(&fr->fr_faddr), dst_s,
2164 sizeof(dst_s)), ifp->if_xname, err);
2165 goto done;
2166 }
2167
2168 ASSERT(tgt_rt != NULL);
2169
2170 done:
2171 if (__probable(err == 0)) {
2172 /*
2173 * There's no actual resolution taking place here, so just
2174 * mark it with FLOWRTF_RESOLVED for consistency.
2175 */
2176 atomic_bitset_32(&fr->fr_flags, FLOWRTF_RESOLVED);
2177 atomic_set_32(&fr->fr_want_probe, 0);
2178 } else {
2179 atomic_bitclear_32(&fr->fr_flags, FLOWRTF_RESOLVED);
2180 flow_route_cleanup(fr);
2181 }
2182 FR_UNLOCK(fr);
2183
2184 return err;
2185 }
2186
2187 void
fsw_init(void)2188 fsw_init(void)
2189 {
2190 _CASSERT(NX_FSW_CHUNK_FREE == (uint64_t)-1);
2191 _CASSERT(PKT_MAX_PROTO_HEADER_SIZE <= NX_FSW_MINBUFSIZE);
2192
2193 if (!__nx_fsw_inited) {
2194 /*
2195 * Register callbacks for interface & protocol events
2196 * Use dummy arg for callback cookie.
2197 */
2198 __nx_fsw_ifnet_eventhandler_tag =
2199 EVENTHANDLER_REGISTER(&ifnet_evhdlr_ctxt,
2200 ifnet_event, fsw_ifnet_event_callback,
2201 eventhandler_entry_dummy_arg, EVENTHANDLER_PRI_ANY);
2202 VERIFY(__nx_fsw_ifnet_eventhandler_tag != NULL);
2203
2204 __nx_fsw_protoctl_eventhandler_tag =
2205 EVENTHANDLER_REGISTER(&protoctl_evhdlr_ctxt,
2206 protoctl_event, fsw_protoctl_event_callback,
2207 eventhandler_entry_dummy_arg, EVENTHANDLER_PRI_ANY);
2208 VERIFY(__nx_fsw_protoctl_eventhandler_tag != NULL);
2209 __nx_fsw_inited = 1;
2210 }
2211 }
2212
2213 void
fsw_uninit(void)2214 fsw_uninit(void)
2215 {
2216 if (__nx_fsw_inited) {
2217 EVENTHANDLER_DEREGISTER(&ifnet_evhdlr_ctxt, ifnet_event,
2218 __nx_fsw_ifnet_eventhandler_tag);
2219 EVENTHANDLER_DEREGISTER(&protoctl_evhdlr_ctxt, protoctl_event,
2220 __nx_fsw_protoctl_eventhandler_tag);
2221
2222 __nx_fsw_inited = 0;
2223 }
2224 }
2225
2226 struct nx_flowswitch *
fsw_alloc(zalloc_flags_t how)2227 fsw_alloc(zalloc_flags_t how)
2228 {
2229 struct nx_flowswitch *fsw;
2230 struct __nx_stats_fsw *nsfw;
2231
2232 SK_LOCK_ASSERT_HELD();
2233
2234 nsfw = zalloc_flags(nx_fsw_stats_zone, how | Z_ZERO);
2235 if (nsfw == NULL) {
2236 return NULL;
2237 }
2238
2239 fsw = zalloc_flags(nx_fsw_zone, how | Z_ZERO);
2240 if (fsw == NULL) {
2241 zfree(nx_fsw_stats_zone, nsfw);
2242 return NULL;
2243 }
2244
2245 FSW_RWINIT(fsw);
2246 fsw->fsw_dev_ch = NULL;
2247 fsw->fsw_host_ch = NULL;
2248 fsw->fsw_closed_na_stats = nsfw;
2249
2250 SK_DF(SK_VERB_MEM, "fsw 0x%llx ALLOC", SK_KVA(fsw));
2251
2252 return fsw;
2253 }
2254
2255 static int
fsw_detach(struct nx_flowswitch * fsw,struct nexus_adapter * hwna,boolean_t purge)2256 fsw_detach(struct nx_flowswitch *fsw, struct nexus_adapter *hwna,
2257 boolean_t purge)
2258 {
2259 struct kern_nexus_provider *nx_prov = fsw->fsw_nx->nx_prov;
2260 boolean_t do_dtor = FALSE;
2261
2262 SK_LOCK_ASSERT_HELD();
2263
2264 /*
2265 * return error if the the host port detach is in progress
2266 * or already detached.
2267 * For the case of flowswitch free (i.e. purge is TRUE) we have to
2268 * cleanup everything, so we will block if needed.
2269 */
2270 lck_mtx_lock(&fsw->fsw_detach_barrier_lock);
2271 if (!purge && fsw->fsw_detach_flags != 0) {
2272 SK_ERR("fsw detaching");
2273 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
2274 return EBUSY;
2275 }
2276 VERIFY(purge || fsw->fsw_detach_flags == 0);
2277 /*
2278 * mark the flowswitch as detaching and release sk_lock while
2279 * waiting for other threads to exit. Maintain lock/unlock
2280 * ordering between the two locks.
2281 */
2282 fsw->fsw_detach_flags |= FSW_DETACHF_DETACHING;
2283 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
2284 SK_UNLOCK();
2285
2286 /*
2287 * wait until all threads needing accesses to the flowswitch
2288 * netagent get out, and mark this as detached to prevent
2289 * further access requests from being admitted.
2290 */
2291 lck_mtx_lock(&fsw->fsw_detach_barrier_lock);
2292 while (fsw->fsw_detach_barriers != 0) {
2293 fsw->fsw_detach_waiters++;
2294 (void) msleep(&fsw->fsw_detach_waiters,
2295 &fsw->fsw_detach_barrier_lock,
2296 (PZERO + 1), __FUNCTION__, NULL);
2297 }
2298 VERIFY(fsw->fsw_detach_barriers == 0);
2299 VERIFY(fsw->fsw_detach_flags != 0);
2300 fsw->fsw_detach_flags &= ~FSW_DETACHF_DETACHING;
2301 /*
2302 * if the NA detach thread as well as the flowswitch free thread were
2303 * both waiting, then the thread which wins the race is responsible
2304 * for doing the dtor work.
2305 */
2306 if (fsw->fsw_detach_flags == 0) {
2307 fsw->fsw_detach_flags |= FSW_DETACHF_DETACHED;
2308 do_dtor = TRUE;
2309 }
2310 VERIFY(fsw->fsw_detach_flags == FSW_DETACHF_DETACHED);
2311 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
2312 SK_LOCK();
2313
2314 FSW_WLOCK(fsw);
2315 if (do_dtor) {
2316 if (fsw->fsw_ifp != NULL) {
2317 fsw_teardown_ifp(fsw, hwna);
2318 ASSERT(fsw->fsw_ifp == NULL);
2319 ASSERT(fsw->fsw_nifna == NULL);
2320 }
2321 bzero(fsw->fsw_slla, sizeof(fsw->fsw_slla));
2322 nx_prov->nxprov_params->nxp_ifindex = 0;
2323 /* free any flow entries in the deferred list */
2324 fsw_linger_purge(fsw);
2325 }
2326 /*
2327 * If we are destroying the instance, release lock to let all
2328 * outstanding agent threads to enter, followed by waiting until
2329 * all of them exit the critical section before continuing.
2330 */
2331 if (purge) {
2332 FSW_UNLOCK(fsw);
2333 flow_mgr_terminate(fsw->fsw_flow_mgr);
2334 FSW_WLOCK(fsw);
2335 }
2336 FSW_WUNLOCK(fsw);
2337 return 0;
2338 }
2339
2340 void
fsw_free(struct nx_flowswitch * fsw)2341 fsw_free(struct nx_flowswitch *fsw)
2342 {
2343 int err;
2344
2345 SK_LOCK_ASSERT_HELD();
2346 ASSERT(fsw != NULL);
2347
2348 err = fsw_detach(fsw, NULL, TRUE);
2349 VERIFY(err == 0);
2350
2351 fsw_dp_dtor(fsw);
2352
2353 ASSERT(fsw->fsw_dev_ch == NULL);
2354 ASSERT(fsw->fsw_host_ch == NULL);
2355 ASSERT(fsw->fsw_closed_na_stats != NULL);
2356 zfree(nx_fsw_stats_zone, fsw->fsw_closed_na_stats);
2357 fsw->fsw_closed_na_stats = NULL;
2358 FSW_RWDESTROY(fsw);
2359
2360 SK_DF(SK_VERB_MEM, "fsw 0x%llx FREE", SK_KVA(fsw));
2361 zfree(nx_fsw_zone, fsw);
2362 }
2363