1 /*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * The netif nexus domain has two domain providers: native and compat, with
31 * the latter being the default provider of this domain. The compat provider
32 * has special handlers for NXCFG_CMD_ATTACH and NXCFG_CMD_DETACH, etc.
33 *
34 * A netif nexus instance can be in a native or compat mode; in either case,
35 * it is associated with two instances of a nexus_adapter structure, and allows
36 * at most two channels opened to the nexus. Two two adapters correspond to
37 * host and device ports, respectively.
38 *
39 * By itself, a netif nexus isn't associated with a network interface. The
40 * association happens by attaching a network interface to the nexus instance.
41 * A channel can only be successfully opened to a netif nexus after it has an
42 * interface attached to it.
43 *
44 * During an attach, the interface is marked as Skywalk-capable, and its ifnet
45 * structure refers to the attached netif nexus adapter via its if_na field.
46 * The nexus also holds a reference to the interface on its na_ifp field. Note
47 * that attaching to a netif_compat nexus does not alter the input/output data
48 * path, nor does it remove any of the interface's hardware offload flags. It
49 * merely associates the interface and netif nexus together.
50 *
51 * During a detach, the above references are dropped and the fields are cleared;
52 * the interface is also marked as non-Skywalk-capable. This detach can happen
53 * explicitly via a command down the nexus, or implicitly when the nexus goes
54 * away (assuming there's no channel opened to it.)
55 *
56 * A userland channel can be opened to a netif nexus via the usual ch_open()
57 * way, assuming the nexus provider is setup to allow access for the userland
58 * process (either by binding the nexus port to PID, etc. or by creating the
59 * nexus in the anonymous mode.)
60 *
61 * Alternatively, a kernel channel can also be opened to it by some kernel
62 * subsystem, via ch_open_special(), e.g. by the flowswitch. Kernel channels
63 * don't have any task mapping created, and the flag CHANF_KERNEL is used to
64 * indicate that.
65 *
66 * Opening a channel to the host port of a native or compat netif causes the
67 * ifnet output path to be redirected to nx_netif_host_transmit(). We also,
68 * at present, disable any hardware offload features.
69 *
70 * Opening a channel to the device port of a compat netif causes the ifnet
71 * input path to be redirected to nx_netif_compat_receive(). This is specific
72 * to the compat variant, as the native variant's RX path already goes to
73 * the native netif.
74 *
75 * During channel close, we restore the original I/O callbacks, as well as the
76 * interface's offload flags.
77 */
78
79 #include <skywalk/os_skywalk_private.h>
80 #include <skywalk/nexus/netif/nx_netif.h>
81 #include <skywalk/nexus/upipe/nx_user_pipe.h>
82 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
83 #include <sys/kdebug.h>
84 #include <sys/sdt.h>
85 #include <os/refcnt.h>
86 #include <libkern/OSDebug.h>
87
88 #define NX_NETIF_MAXRINGS NX_MAX_NUM_RING_PAIR
89 #define NX_NETIF_MINSLOTS 2 /* XXX same as above */
90 #define NX_NETIF_MAXSLOTS NX_MAX_NUM_SLOT_PER_RING /* max # of slots */
91 #define NX_NETIF_TXRINGSIZE 512 /* default TX ring size */
92 #define NX_NETIF_RXRINGSIZE 1024 /* default RX ring size */
93 #define NX_NETIF_BUFSIZE (2 * 1024) /* default buffer size */
94 #define NX_NETIF_MINBUFSIZE (128) /* min buffer size */
95 #define NX_NETIF_MAXBUFSIZE (32 * 1024) /* max buffer size */
96
97 /*
98 * TODO: [email protected] -- minimum buflets for now; we will need to
99 * have a way to adjust this based on the underlying interface's
100 * parameters, e.g. jumbo MTU, large segment offload, etc.
101 */
102 #define NX_NETIF_UMD_SIZE _USER_PACKET_SIZE(BUFLETS_MIN)
103 #define NX_NETIF_KMD_SIZE _KERN_PACKET_SIZE(BUFLETS_MIN)
104
105 /*
106 * minimum stack space required for IOSkywalkFamily and Driver execution.
107 */
108 #if XNU_TARGET_OS_OSX
109 #define NX_NETIF_MIN_DRIVER_STACK_SIZE (kernel_stack_size >> 1)
110 #else /* !XNU_TARGET_OS_OSX */
111 #define NX_NETIF_MIN_DRIVER_STACK_SIZE (kernel_stack_size >> 2)
112 #endif /* XNU_TARGET_OS_OSX */
113
114 static void nx_netif_dom_init(struct nxdom *);
115 static void nx_netif_dom_terminate(struct nxdom *);
116 static void nx_netif_dom_fini(struct nxdom *);
117 static int nx_netif_prov_params_adjust(
118 const struct kern_nexus_domain_provider *, const struct nxprov_params *,
119 struct nxprov_adjusted_params *);
120
121 static int nx_netif_dom_bind_port(struct kern_nexus *, nexus_port_t *,
122 struct nxbind *, void *);
123 static int nx_netif_dom_unbind_port(struct kern_nexus *, nexus_port_t);
124 static int nx_netif_dom_connect(struct kern_nexus_domain_provider *,
125 struct kern_nexus *, struct kern_channel *, struct chreq *,
126 struct kern_channel *, struct nxbind *, struct proc *);
127 static void nx_netif_dom_disconnect(struct kern_nexus_domain_provider *,
128 struct kern_nexus *, struct kern_channel *);
129 static void nx_netif_dom_defunct(struct kern_nexus_domain_provider *,
130 struct kern_nexus *, struct kern_channel *, struct proc *);
131 static void nx_netif_dom_defunct_finalize(struct kern_nexus_domain_provider *,
132 struct kern_nexus *, struct kern_channel *, boolean_t);
133
134 static void nx_netif_doorbell(struct ifnet *);
135 static int nx_netif_na_txsync(struct __kern_channel_ring *, struct proc *,
136 uint32_t);
137 static int nx_netif_na_rxsync(struct __kern_channel_ring *, struct proc *,
138 uint32_t);
139 static void nx_netif_na_dtor(struct nexus_adapter *na);
140 static int nx_netif_na_notify_tx(struct __kern_channel_ring *, struct proc *,
141 uint32_t);
142 static int nx_netif_na_notify_rx(struct __kern_channel_ring *, struct proc *,
143 uint32_t);
144 static int nx_netif_na_activate(struct nexus_adapter *, na_activate_mode_t);
145
146 static int nx_netif_ctl(struct kern_nexus *, nxcfg_cmd_t, void *,
147 struct proc *);
148 static int nx_netif_ctl_attach(struct kern_nexus *, struct nx_spec_req *,
149 struct proc *);
150 static int nx_netif_ctl_detach(struct kern_nexus *, struct nx_spec_req *);
151 static int nx_netif_attach(struct kern_nexus *, struct ifnet *);
152 static void nx_netif_flags_init(struct nx_netif *);
153 static void nx_netif_flags_fini(struct nx_netif *);
154 static void nx_netif_capabilities_fini(struct nx_netif *);
155 static errno_t nx_netif_interface_advisory_notify(void *,
156 const struct ifnet_interface_advisory *);
157
158 struct nxdom nx_netif_dom_s = {
159 .nxdom_prov_head =
160 STAILQ_HEAD_INITIALIZER(nx_netif_dom_s.nxdom_prov_head),
161 .nxdom_type = NEXUS_TYPE_NET_IF,
162 .nxdom_md_type = NEXUS_META_TYPE_PACKET,
163 .nxdom_md_subtype = NEXUS_META_SUBTYPE_RAW,
164 .nxdom_name = "netif",
165 .nxdom_ports = {
166 .nb_def = 2,
167 .nb_min = 2,
168 .nb_max = NX_NETIF_MAXPORTS,
169 },
170 .nxdom_tx_rings = {
171 .nb_def = 1,
172 .nb_min = 1,
173 .nb_max = NX_NETIF_MAXRINGS,
174 },
175 .nxdom_rx_rings = {
176 .nb_def = 1,
177 .nb_min = 1,
178 .nb_max = NX_NETIF_MAXRINGS,
179 },
180 .nxdom_tx_slots = {
181 .nb_def = NX_NETIF_TXRINGSIZE,
182 .nb_min = NX_NETIF_MINSLOTS,
183 .nb_max = NX_NETIF_MAXSLOTS,
184 },
185 .nxdom_rx_slots = {
186 .nb_def = NX_NETIF_RXRINGSIZE,
187 .nb_min = NX_NETIF_MINSLOTS,
188 .nb_max = NX_NETIF_MAXSLOTS,
189 },
190 .nxdom_buf_size = {
191 .nb_def = NX_NETIF_BUFSIZE,
192 .nb_min = NX_NETIF_MINBUFSIZE,
193 .nb_max = NX_NETIF_MAXBUFSIZE,
194 },
195 .nxdom_large_buf_size = {
196 .nb_def = 0,
197 .nb_min = 0,
198 .nb_max = 0,
199 },
200 .nxdom_meta_size = {
201 .nb_def = NX_NETIF_UMD_SIZE,
202 .nb_min = NX_NETIF_UMD_SIZE,
203 .nb_max = NX_METADATA_USR_MAX_SZ,
204 },
205 .nxdom_stats_size = {
206 .nb_def = 0,
207 .nb_min = 0,
208 .nb_max = NX_STATS_MAX_SZ,
209 },
210 .nxdom_pipes = {
211 .nb_def = 0,
212 .nb_min = 0,
213 .nb_max = NX_UPIPE_MAXPIPES,
214 },
215 .nxdom_flowadv_max = {
216 .nb_def = 0,
217 .nb_min = 0,
218 .nb_max = NX_FLOWADV_MAX,
219 },
220 .nxdom_nexusadv_size = {
221 .nb_def = 0,
222 .nb_min = 0,
223 .nb_max = NX_NEXUSADV_MAX_SZ,
224 },
225 .nxdom_capabilities = {
226 .nb_def = NXPCAP_USER_CHANNEL,
227 .nb_min = 0,
228 .nb_max = NXPCAP_USER_CHANNEL,
229 },
230 .nxdom_qmap = {
231 .nb_def = NEXUS_QMAP_TYPE_DEFAULT,
232 .nb_min = NEXUS_QMAP_TYPE_DEFAULT,
233 .nb_max = NEXUS_QMAP_TYPE_WMM,
234 },
235 .nxdom_max_frags = {
236 .nb_def = NX_PBUF_FRAGS_DEFAULT,
237 .nb_min = NX_PBUF_FRAGS_MIN,
238 .nb_max = NX_PBUF_FRAGS_MAX,
239 },
240 .nxdom_init = nx_netif_dom_init,
241 .nxdom_terminate = nx_netif_dom_terminate,
242 .nxdom_fini = nx_netif_dom_fini,
243 .nxdom_find_port = NULL,
244 .nxdom_port_is_reserved = NULL,
245 .nxdom_bind_port = nx_netif_dom_bind_port,
246 .nxdom_unbind_port = nx_netif_dom_unbind_port,
247 .nxdom_connect = nx_netif_dom_connect,
248 .nxdom_disconnect = nx_netif_dom_disconnect,
249 .nxdom_defunct = nx_netif_dom_defunct,
250 .nxdom_defunct_finalize = nx_netif_dom_defunct_finalize,
251 };
252
253 struct kern_nexus_domain_provider nx_netif_prov_s = {
254 .nxdom_prov_name = NEXUS_PROVIDER_NET_IF,
255 /*
256 * Don't install this as the default domain provider, i.e.
257 * NXDOMPROVF_DEFAULT flag not set; we want netif_compat
258 * provider to be the one handling userland-issued requests
259 * coming down thru nxprov_create() instead.
260 */
261 .nxdom_prov_flags = 0,
262 .nxdom_prov_cb = {
263 .dp_cb_init = nx_netif_prov_init,
264 .dp_cb_fini = nx_netif_prov_fini,
265 .dp_cb_params = nx_netif_prov_params,
266 .dp_cb_mem_new = nx_netif_prov_mem_new,
267 .dp_cb_config = nx_netif_prov_config,
268 .dp_cb_nx_ctor = nx_netif_prov_nx_ctor,
269 .dp_cb_nx_dtor = nx_netif_prov_nx_dtor,
270 .dp_cb_nx_mem_info = nx_netif_prov_nx_mem_info,
271 .dp_cb_nx_mib_get = nx_netif_prov_nx_mib_get,
272 .dp_cb_nx_stop = nx_netif_prov_nx_stop,
273 },
274 };
275
276 struct nexus_ifnet_ops na_netif_ops = {
277 .ni_finalize = na_netif_finalize,
278 .ni_reap = nx_netif_reap,
279 .ni_dequeue = nx_netif_native_tx_dequeue,
280 .ni_get_len = nx_netif_native_tx_get_len,
281 .ni_detach_notify = nx_netif_detach_notify
282 };
283
284 #define NX_NETIF_DOORBELL_MAX_DEQUEUE 64
285 uint32_t nx_netif_doorbell_max_dequeue = NX_NETIF_DOORBELL_MAX_DEQUEUE;
286
287 #define NQ_TRANSFER_DECAY 2 /* ilog2 of EWMA decay rate (4) */
288 static uint32_t nq_transfer_decay = NQ_TRANSFER_DECAY;
289
290 #define NQ_ACCUMULATE_INTERVAL 2 /* 2 seconds */
291 static uint32_t nq_accumulate_interval = NQ_ACCUMULATE_INTERVAL;
292
293 static uint32_t nq_stat_enable = 0;
294
295 SYSCTL_EXTENSIBLE_NODE(_kern_skywalk, OID_AUTO, netif,
296 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk network interface");
297 #if (DEVELOPMENT || DEBUG)
298 SYSCTL_STRING(_kern_skywalk_netif, OID_AUTO, sk_ll_prefix,
299 CTLFLAG_RW | CTLFLAG_LOCKED, sk_ll_prefix, sizeof(sk_ll_prefix),
300 "ifname prefix for enabling low latency support");
301 static uint32_t nx_netif_force_ifnet_start = 0;
302 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, force_ifnet_start,
303 CTLFLAG_RW | CTLFLAG_LOCKED, &nx_netif_force_ifnet_start, 0,
304 "always use ifnet starter thread");
305 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, doorbell_max_dequeue,
306 CTLFLAG_RW | CTLFLAG_LOCKED, &nx_netif_doorbell_max_dequeue,
307 NX_NETIF_DOORBELL_MAX_DEQUEUE,
308 "max packets to dequeue in doorbell context");
309 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, netif_queue_transfer_decay,
310 CTLFLAG_RW | CTLFLAG_LOCKED, &nq_transfer_decay,
311 NQ_TRANSFER_DECAY, "ilog2 of EWMA decay rate of netif queue transfers");
312 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, netif_queue_stat_accumulate_interval,
313 CTLFLAG_RW | CTLFLAG_LOCKED, &nq_accumulate_interval,
314 NQ_ACCUMULATE_INTERVAL, "accumulation interval for netif queue stats");
315 #endif /* !DEVELOPMENT && !DEBUG */
316
317 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, netif_queue_stat_enable,
318 CTLFLAG_RW | CTLFLAG_LOCKED, &nq_stat_enable,
319 0, "enable/disable stats collection for netif queue");
320
321 static SKMEM_TYPE_DEFINE(na_netif_zone, struct nexus_netif_adapter);
322
323 static SKMEM_TYPE_DEFINE(nx_netif_zone, struct nx_netif);
324
325 #define SKMEM_TAG_NETIF_MIT "com.apple.skywalk.netif.mit"
326 static SKMEM_TAG_DEFINE(skmem_tag_netif_mit, SKMEM_TAG_NETIF_MIT);
327
328 #define SKMEM_TAG_NETIF_FILTER "com.apple.skywalk.netif.filter"
329 SKMEM_TAG_DEFINE(skmem_tag_netif_filter, SKMEM_TAG_NETIF_FILTER);
330
331 #define SKMEM_TAG_NETIF_FLOW "com.apple.skywalk.netif.flow"
332 SKMEM_TAG_DEFINE(skmem_tag_netif_flow, SKMEM_TAG_NETIF_FLOW);
333
334 #define SKMEM_TAG_NETIF_AGENT_FLOW "com.apple.skywalk.netif.agent_flow"
335 SKMEM_TAG_DEFINE(skmem_tag_netif_agent_flow, SKMEM_TAG_NETIF_AGENT_FLOW);
336
337 #define SKMEM_TAG_NETIF_LLINK "com.apple.skywalk.netif.llink"
338 SKMEM_TAG_DEFINE(skmem_tag_netif_llink, SKMEM_TAG_NETIF_LLINK);
339
340 #define SKMEM_TAG_NETIF_QSET "com.apple.skywalk.netif.qset"
341 SKMEM_TAG_DEFINE(skmem_tag_netif_qset, SKMEM_TAG_NETIF_QSET);
342
343 #define SKMEM_TAG_NETIF_LLINK_INFO "com.apple.skywalk.netif.llink_info"
344 SKMEM_TAG_DEFINE(skmem_tag_netif_llink_info, SKMEM_TAG_NETIF_LLINK_INFO);
345
346 /* use this for any temporary allocations */
347 #define SKMEM_TAG_NETIF_TEMP "com.apple.skywalk.netif.temp"
348 static SKMEM_TAG_DEFINE(skmem_tag_netif_temp, SKMEM_TAG_NETIF_TEMP);
349
350 static void
nx_netif_dom_init(struct nxdom * nxdom)351 nx_netif_dom_init(struct nxdom *nxdom)
352 {
353 SK_LOCK_ASSERT_HELD();
354 ASSERT(!(nxdom->nxdom_flags & NEXUSDOMF_INITIALIZED));
355
356 _CASSERT(NEXUS_PORT_NET_IF_DEV == 0);
357 _CASSERT(NEXUS_PORT_NET_IF_HOST == 1);
358 _CASSERT(NEXUS_PORT_NET_IF_CLIENT == 2);
359 _CASSERT(SK_NETIF_MIT_FORCE_OFF < SK_NETIF_MIT_FORCE_SIMPLE);
360 _CASSERT(SK_NETIF_MIT_FORCE_SIMPLE < SK_NETIF_MIT_FORCE_ADVANCED);
361 _CASSERT(SK_NETIF_MIT_FORCE_ADVANCED < SK_NETIF_MIT_AUTO);
362 _CASSERT(SK_NETIF_MIT_AUTO == SK_NETIF_MIT_MAX);
363
364 (void) nxdom_prov_add(nxdom, &nx_netif_prov_s);
365
366 nx_netif_compat_init(nxdom);
367
368 ASSERT(nxdom_prov_default[nxdom->nxdom_type] != NULL &&
369 strcmp(nxdom_prov_default[nxdom->nxdom_type]->nxdom_prov_name,
370 NEXUS_PROVIDER_NET_IF_COMPAT) == 0);
371
372 netif_gso_init();
373 }
374
375 static void
nx_netif_dom_terminate(struct nxdom * nxdom)376 nx_netif_dom_terminate(struct nxdom *nxdom)
377 {
378 struct kern_nexus_domain_provider *nxdom_prov, *tnxdp;
379
380 SK_LOCK_ASSERT_HELD();
381
382 netif_gso_fini();
383 nx_netif_compat_fini();
384
385 STAILQ_FOREACH_SAFE(nxdom_prov, &nxdom->nxdom_prov_head,
386 nxdom_prov_link, tnxdp) {
387 (void) nxdom_prov_del(nxdom_prov);
388 }
389 }
390
391 static void
nx_netif_dom_fini(struct nxdom * nxdom)392 nx_netif_dom_fini(struct nxdom *nxdom)
393 {
394 #pragma unused(nxdom)
395 }
396
397 int
nx_netif_prov_init(struct kern_nexus_domain_provider * nxdom_prov)398 nx_netif_prov_init(struct kern_nexus_domain_provider *nxdom_prov)
399 {
400 #pragma unused(nxdom_prov)
401 SK_D("initializing %s", nxdom_prov->nxdom_prov_name);
402 return 0;
403 }
404
405 static int
nx_netif_na_notify_drop(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)406 nx_netif_na_notify_drop(struct __kern_channel_ring *kring, struct proc *p,
407 uint32_t flags)
408 {
409 #pragma unused(kring, p, flags)
410 return ENXIO;
411 }
412
413 int
nx_netif_prov_nx_stop(struct kern_nexus * nx)414 nx_netif_prov_nx_stop(struct kern_nexus *nx)
415 {
416 uint32_t r;
417 struct nexus_adapter *na = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
418 struct nexus_netif_adapter *nifna = (struct nexus_netif_adapter *)na;
419
420 SK_LOCK_ASSERT_HELD();
421 ASSERT(nx != NULL);
422
423 /* place all rings in drop mode */
424 na_kr_drop(na, TRUE);
425
426 /* ensure global visibility */
427 membar_sync();
428
429 /* reset all TX notify callbacks */
430 for (r = 0; r < na_get_nrings(na, NR_TX); r++) {
431 while (!atomic_test_set_ptr(&na->na_tx_rings[r].ckr_na_notify,
432 ptrauth_nop_cast(void *, na->na_tx_rings[r].ckr_na_notify),
433 ptrauth_nop_cast(void *, &nx_netif_na_notify_drop))) {
434 ;
435 }
436 membar_sync();
437 if (nifna->nifna_tx_mit != NULL) {
438 nx_netif_mit_cleanup(&nifna->nifna_tx_mit[r]);
439 }
440 }
441 if (nifna->nifna_tx_mit != NULL) {
442 skn_free_type_array(tx, struct nx_netif_mit,
443 na_get_nrings(na, NR_TX), nifna->nifna_tx_mit);
444 nifna->nifna_tx_mit = NULL;
445 }
446
447 /* reset all RX notify callbacks */
448 for (r = 0; r < na_get_nrings(na, NR_RX); r++) {
449 while (!atomic_test_set_ptr(&na->na_rx_rings[r].ckr_na_notify,
450 ptrauth_nop_cast(void *, na->na_rx_rings[r].ckr_na_notify),
451 ptrauth_nop_cast(void *, &nx_netif_na_notify_drop))) {
452 ;
453 }
454 membar_sync();
455 if (nifna->nifna_rx_mit != NULL) {
456 nx_netif_mit_cleanup(&nifna->nifna_rx_mit[r]);
457 }
458 }
459 if (nifna->nifna_rx_mit != NULL) {
460 skn_free_type_array(rx, struct nx_netif_mit,
461 na_get_nrings(na, NR_RX), nifna->nifna_rx_mit);
462 nifna->nifna_rx_mit = NULL;
463 }
464 return 0;
465 }
466
467 static inline void
nx_netif_compat_adjust_ring_size(struct nxprov_adjusted_params * adj,ifnet_t ifp)468 nx_netif_compat_adjust_ring_size(struct nxprov_adjusted_params *adj,
469 ifnet_t ifp)
470 {
471 if (IFNET_IS_CELLULAR(ifp) && (ifp->if_unit != 0)) {
472 *(adj->adj_rx_slots) = sk_netif_compat_aux_cell_rx_ring_sz;
473 *(adj->adj_tx_slots) = sk_netif_compat_aux_cell_tx_ring_sz;
474 } else if (IFNET_IS_WIFI(ifp)) {
475 if (ifp->if_name[0] == 'a' && ifp->if_name[1] == 'p' &&
476 ifp->if_name[2] == '\0') {
477 /* Wi-Fi Access Point */
478 *(adj->adj_rx_slots) = sk_netif_compat_wap_rx_ring_sz;
479 *(adj->adj_tx_slots) = sk_netif_compat_wap_tx_ring_sz;
480 } else if (ifp->if_eflags & IFEF_AWDL) {
481 /* AWDL */
482 *(adj->adj_rx_slots) = sk_netif_compat_awdl_rx_ring_sz;
483 *(adj->adj_tx_slots) = sk_netif_compat_awdl_tx_ring_sz;
484 } else {
485 /* Wi-Fi infrastructure */
486 *(adj->adj_rx_slots) = sk_netif_compat_wif_rx_ring_sz;
487 *(adj->adj_tx_slots) = sk_netif_compat_wif_tx_ring_sz;
488 }
489 } else if (IFNET_IS_ETHERNET(ifp)) {
490 #if !XNU_TARGET_OS_OSX
491 /*
492 * On non-macOS platforms, treat all compat Ethernet
493 * interfaces as USB Ethernet with reduced ring sizes.
494 */
495 *(adj->adj_rx_slots) = sk_netif_compat_usb_eth_rx_ring_sz;
496 *(adj->adj_tx_slots) = sk_netif_compat_usb_eth_tx_ring_sz;
497 #else /* XNU_TARGET_OS_OSX */
498 if (ifp->if_subfamily == IFNET_SUBFAMILY_USB) {
499 *(adj->adj_rx_slots) =
500 sk_netif_compat_usb_eth_rx_ring_sz;
501 *(adj->adj_tx_slots) =
502 sk_netif_compat_usb_eth_tx_ring_sz;
503 }
504 #endif /* XNU_TARGET_OS_OSX */
505 }
506 }
507
508 static int
nx_netif_prov_params_adjust(const struct kern_nexus_domain_provider * nxdom_prov,const struct nxprov_params * nxp,struct nxprov_adjusted_params * adj)509 nx_netif_prov_params_adjust(const struct kern_nexus_domain_provider *nxdom_prov,
510 const struct nxprov_params *nxp, struct nxprov_adjusted_params *adj)
511 {
512 /*
513 * for netif compat adjust the following parameters for memory
514 * optimization:
515 * - change the size of buffer object to 128 bytes.
516 * - don't allocate rx ring for host port and tx ring for dev port.
517 * - for cellular interfaces other than pdp_ip0 reduce the ring size.
518 * Assumption here is that pdp_ip0 is always used as the data
519 * interface.
520 * - reduce the ring size for AWDL interface.
521 * - reduce the ring size for USB ethernet interface.
522 */
523 if (strcmp(nxdom_prov->nxdom_prov_name,
524 NEXUS_PROVIDER_NET_IF_COMPAT) == 0) {
525 /*
526 * Leave the parameters default if userspace access may be
527 * needed. We can't use skywalk_direct_allowed() here because
528 * the drivers have not attached yet.
529 */
530 if (skywalk_netif_direct_enabled()) {
531 goto done;
532 }
533
534 *(adj->adj_buf_size) = NETIF_COMPAT_BUF_SIZE;
535 *(adj->adj_tx_rings) = 1;
536 if (IF_INDEX_IN_RANGE(nxp->nxp_ifindex)) {
537 ifnet_t ifp;
538 ifnet_head_lock_shared();
539 ifp = ifindex2ifnet[nxp->nxp_ifindex];
540 ifnet_head_done();
541 VERIFY(ifp != NULL);
542 nx_netif_compat_adjust_ring_size(adj, ifp);
543 }
544 } else { /* netif native */
545 if (nxp->nxp_flags & NXPF_NETIF_LLINK) {
546 *(adj->adj_tx_slots) = NX_NETIF_MINSLOTS;
547 *(adj->adj_rx_slots) = NX_NETIF_MINSLOTS;
548 }
549 /*
550 * Add another extra ring for host port. Note that if the
551 * nexus isn't configured to use the same pbufpool for all of
552 * its ports, we'd end up allocating extra here.
553 * Not a big deal since that case isn't the default.
554 */
555 *(adj->adj_tx_rings) += 1;
556 *(adj->adj_rx_rings) += 1;
557
558 if ((*(adj->adj_buf_size) < PKT_MAX_PROTO_HEADER_SIZE)) {
559 SK_ERR("buf size too small, min (%d)",
560 PKT_MAX_PROTO_HEADER_SIZE);
561 return EINVAL;
562 }
563 _CASSERT(sizeof(struct __kern_netif_intf_advisory) ==
564 NX_INTF_ADV_SIZE);
565 *(adj->adj_nexusadv_size) = sizeof(struct netif_nexus_advisory);
566 }
567 done:
568 return 0;
569 }
570
571 int
nx_netif_prov_params(struct kern_nexus_domain_provider * nxdom_prov,const uint32_t req,const struct nxprov_params * nxp0,struct nxprov_params * nxp,struct skmem_region_params srp[SKMEM_REGIONS],uint32_t pp_region_config_flags)572 nx_netif_prov_params(struct kern_nexus_domain_provider *nxdom_prov,
573 const uint32_t req, const struct nxprov_params *nxp0,
574 struct nxprov_params *nxp, struct skmem_region_params srp[SKMEM_REGIONS],
575 uint32_t pp_region_config_flags)
576 {
577 struct nxdom *nxdom = nxdom_prov->nxdom_prov_dom;
578
579 return nxprov_params_adjust(nxdom_prov, req, nxp0, nxp, srp,
580 nxdom, nxdom, nxdom, pp_region_config_flags,
581 nx_netif_prov_params_adjust);
582 }
583
584 int
nx_netif_prov_mem_new(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct nexus_adapter * na)585 nx_netif_prov_mem_new(struct kern_nexus_domain_provider *nxdom_prov,
586 struct kern_nexus *nx, struct nexus_adapter *na)
587 {
588 #pragma unused(nxdom_prov)
589 int err = 0;
590 boolean_t allow_direct;
591 uint32_t pp_flags = 0;
592
593 SK_DF(SK_VERB_NETIF,
594 "nx 0x%llx (\"%s\":\"%s\") na \"%s\" (0x%llx)", SK_KVA(nx),
595 NX_DOM(nx)->nxdom_name, nxdom_prov->nxdom_prov_name, na->na_name,
596 SK_KVA(na));
597
598 ASSERT(na->na_arena == NULL);
599 if ((na->na_type == NA_NETIF_COMPAT_DEV) ||
600 (na->na_type == NA_NETIF_COMPAT_HOST)) {
601 pp_flags |= SKMEM_PP_FLAG_TRUNCATED_BUF;
602 }
603 /*
604 * We do this check to determine whether to create the extra
605 * regions needed for userspace access. This is per interface.
606 * NX_USER_CHANNEL_PROV() is systemwide so it can't be used.
607 */
608 allow_direct = skywalk_netif_direct_allowed(na->na_name);
609
610 /*
611 * Both ports (host and dev) share the same packet buffer pool;
612 * the first time a port gets opened will allocate the pp that
613 * gets stored in the nexus, which will then be used by any
614 * subsequent opens.
615 */
616 if (!allow_direct || !NX_USER_CHANNEL_PROV(nx)) {
617 pp_flags |= SKMEM_PP_FLAG_KERNEL_ONLY;
618 }
619 na->na_arena = skmem_arena_create_for_nexus(na,
620 NX_PROV(nx)->nxprov_region_params, &nx->nx_tx_pp,
621 &nx->nx_rx_pp, pp_flags, &nx->nx_adv, &err);
622 ASSERT(na->na_arena != NULL || err != 0);
623 ASSERT(nx->nx_tx_pp == NULL || (nx->nx_tx_pp->pp_md_type ==
624 NX_DOM(nx)->nxdom_md_type && nx->nx_tx_pp->pp_md_subtype ==
625 NX_DOM(nx)->nxdom_md_subtype));
626
627 return err;
628 }
629
630 SK_NO_INLINE_ATTRIBUTE
631 static int
nx_netif_get_llink_info(struct sockopt * sopt,struct kern_nexus * nx)632 nx_netif_get_llink_info(struct sockopt *sopt, struct kern_nexus *nx)
633 {
634 struct nx_llink_info_req *nlir = NULL;
635 struct nx_netif *nif;
636 struct netif_llink *llink;
637 uint16_t llink_cnt;
638 size_t len, user_len;
639 int err, i;
640
641 nif = NX_NETIF_PRIVATE(nx);
642 if (!NETIF_LLINK_ENABLED(nif)) {
643 SK_ERR("llink mode not enabled");
644 return ENOTSUP;
645 }
646 lck_rw_lock_shared(&nif->nif_llink_lock);
647 llink_cnt = nif->nif_llink_cnt;
648 if (llink_cnt == 0) {
649 SK_ERR("zero llink cnt");
650 err = ENXIO;
651 goto done;
652 }
653 len = sizeof(*nlir) + (sizeof(struct nx_llink_info) * llink_cnt);
654 /* preserve sopt_valsize because it gets overwritten by copyin */
655 user_len = sopt->sopt_valsize;
656 if (user_len < len) {
657 SK_ERR("buffer too small");
658 err = ENOBUFS;
659 goto done;
660 }
661 nlir = sk_alloc_data(len, Z_WAITOK, skmem_tag_netif_llink_info);
662 if (nlir == NULL) {
663 SK_ERR("failed to allocate nlir");
664 err = ENOMEM;
665 goto done;
666 }
667 err = sooptcopyin(sopt, nlir, sizeof(*nlir), sizeof(*nlir));
668 if (err != 0) {
669 SK_ERR("copyin failed: %d", err);
670 goto done;
671 }
672 if (nlir->nlir_version != NETIF_LLINK_INFO_VERSION) {
673 SK_ERR("nlir version mismatch: %d != %d",
674 nlir->nlir_version, NETIF_LLINK_INFO_VERSION);
675 err = ENOTSUP;
676 goto done;
677 }
678 nlir->nlir_llink_cnt = llink_cnt;
679 i = 0;
680 STAILQ_FOREACH(llink, &nif->nif_llink_list, nll_link) {
681 struct nx_llink_info *nli;
682 struct netif_qset *qset;
683 uint16_t qset_cnt;
684 int j;
685
686 nli = &nlir->nlir_llink[i];
687 nli->nli_link_id = llink->nll_link_id;
688 nli->nli_link_id_internal = llink->nll_link_id_internal;
689 nli->nli_state = llink->nll_state;
690 nli->nli_flags = llink->nll_flags;
691
692 qset_cnt = llink->nll_qset_cnt;
693 ASSERT(qset_cnt <= NETIF_LLINK_MAX_QSETS);
694 nli->nli_qset_cnt = qset_cnt;
695
696 j = 0;
697 SLIST_FOREACH(qset, &llink->nll_qset_list, nqs_list) {
698 struct nx_qset_info *nqi;
699
700 nqi = &nli->nli_qset[j];
701 nqi->nqi_id = qset->nqs_id;
702 nqi->nqi_flags = qset->nqs_flags;
703 nqi->nqi_num_rx_queues = qset->nqs_num_rx_queues;
704 nqi->nqi_num_tx_queues = qset->nqs_num_tx_queues;
705 j++;
706 }
707 ASSERT(j == qset_cnt);
708 i++;
709 }
710 ASSERT(i == llink_cnt);
711 sopt->sopt_valsize = user_len;
712 err = sooptcopyout(sopt, nlir, len);
713 if (err != 0) {
714 SK_ERR("sooptcopyout failed: %d", err);
715 }
716 done:
717 lck_rw_unlock_shared(&nif->nif_llink_lock);
718 if (nlir != NULL) {
719 sk_free_data(nlir, len);
720 }
721 return err;
722 }
723
724 int
nx_netif_prov_config(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct nx_cfg_req * ncr,int sopt_dir,struct proc * p,kauth_cred_t cred)725 nx_netif_prov_config(struct kern_nexus_domain_provider *nxdom_prov,
726 struct kern_nexus *nx, struct nx_cfg_req *ncr, int sopt_dir,
727 struct proc *p, kauth_cred_t cred)
728 {
729 #pragma unused(nxdom_prov)
730 struct sockopt sopt;
731 int err = 0;
732
733 SK_LOCK_ASSERT_HELD();
734
735 /* proceed only if the client possesses netif entitlement */
736 if ((err = skywalk_priv_check_cred(p, cred,
737 PRIV_SKYWALK_REGISTER_NET_IF)) != 0) {
738 goto done;
739 }
740
741 if (ncr->nc_req == USER_ADDR_NULL) {
742 err = EINVAL;
743 goto done;
744 }
745
746 /* to make life easier for handling copies */
747 bzero(&sopt, sizeof(sopt));
748 sopt.sopt_dir = sopt_dir;
749 sopt.sopt_val = ncr->nc_req;
750 sopt.sopt_valsize = ncr->nc_req_len;
751 sopt.sopt_p = p;
752
753 switch (ncr->nc_cmd) {
754 case NXCFG_CMD_ATTACH:
755 case NXCFG_CMD_DETACH: {
756 struct nx_spec_req nsr;
757
758 bzero(&nsr, sizeof(nsr));
759 err = sooptcopyin(&sopt, &nsr, sizeof(nsr), sizeof(nsr));
760 if (err != 0) {
761 goto done;
762 }
763
764 /*
765 * Null-terminate in case this has an interface name;
766 * the union is already large enough for uuid_t.
767 */
768 nsr.nsr_name[sizeof(nsr.nsr_name) - 1] = '\0';
769 if (p != kernproc) {
770 nsr.nsr_flags &= NXSPECREQ_MASK;
771 }
772
773 err = nx_netif_ctl(nx, ncr->nc_cmd, &nsr, p);
774 if (err != 0) {
775 goto done;
776 }
777
778 /* XXX: [email protected] -- can this copyout fail? */
779 (void) sooptcopyout(&sopt, &nsr, sizeof(nsr));
780 break;
781 }
782 case NXCFG_CMD_FLOW_ADD:
783 case NXCFG_CMD_FLOW_DEL: {
784 _CASSERT(offsetof(struct nx_flow_req, _nfr_kernel_field_end) ==
785 offsetof(struct nx_flow_req, _nfr_common_field_end));
786 struct nx_flow_req nfr;
787
788 bzero(&nfr, sizeof(nfr));
789 err = sooptcopyin(&sopt, &nfr, sizeof(nfr), sizeof(nfr));
790 if (err != 0) {
791 goto done;
792 }
793
794 err = nx_netif_ctl(nx, ncr->nc_cmd, &nfr, p);
795 if (err != 0) {
796 goto done;
797 }
798
799 /* XXX: [email protected] -- can this copyout fail? */
800 (void) sooptcopyout(&sopt, &nfr, sizeof(nfr));
801 break;
802 }
803 case NXCFG_CMD_GET_LLINK_INFO: {
804 err = nx_netif_get_llink_info(&sopt, nx);
805 break;
806 }
807 default:
808 err = EINVAL;
809 goto done;
810 }
811 done:
812 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NETIF,
813 "nexus 0x%llx (%s) cmd %d err %d", SK_KVA(nx),
814 NX_DOM_PROV(nx)->nxdom_prov_name, ncr->nc_cmd, err);
815 return err;
816 }
817
818 void
nx_netif_prov_fini(struct kern_nexus_domain_provider * nxdom_prov)819 nx_netif_prov_fini(struct kern_nexus_domain_provider *nxdom_prov)
820 {
821 #pragma unused(nxdom_prov)
822 SK_D("destroying %s", nxdom_prov->nxdom_prov_name);
823 }
824
825 int
nx_netif_prov_nx_ctor(struct kern_nexus * nx)826 nx_netif_prov_nx_ctor(struct kern_nexus *nx)
827 {
828 struct nx_netif *n;
829 char name[64];
830 int error;
831
832 SK_LOCK_ASSERT_HELD();
833 ASSERT(nx->nx_arg == NULL);
834
835 SK_D("nexus 0x%llx (%s)", SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name);
836
837 nx->nx_arg = nx_netif_alloc(Z_WAITOK);
838 n = NX_NETIF_PRIVATE(nx);
839 if (NX_USER_CHANNEL_PROV(nx) &&
840 NX_PROV(nx)->nxprov_params->nxp_nexusadv_size != 0) {
841 (void) snprintf(name, sizeof(name), "netif_%llu", nx->nx_id);
842 error = nx_advisory_alloc(nx, name,
843 &NX_PROV(nx)->nxprov_region_params[SKMEM_REGION_NEXUSADV],
844 NEXUS_ADVISORY_TYPE_NETIF);
845 if (error != 0) {
846 nx_netif_free(n);
847 return error;
848 }
849 }
850 n->nif_nx = nx;
851 SK_D("create new netif 0x%llx for nexus 0x%llx",
852 SK_KVA(NX_NETIF_PRIVATE(nx)), SK_KVA(nx));
853 return 0;
854 }
855
856 void
nx_netif_prov_nx_dtor(struct kern_nexus * nx)857 nx_netif_prov_nx_dtor(struct kern_nexus *nx)
858 {
859 struct nx_netif *n = NX_NETIF_PRIVATE(nx);
860
861 SK_LOCK_ASSERT_HELD();
862
863 SK_D("nexus 0x%llx (%s) netif 0x%llx", SK_KVA(nx),
864 NX_DOM_PROV(nx)->nxdom_prov_name, SK_KVA(n));
865
866 /*
867 * XXX
868 * detach should be done separately to be symmetrical with attach.
869 */
870 nx_advisory_free(nx);
871 if (nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV) != NULL) {
872 /* we're called by nx_detach(), so this cannot fail */
873 int err = nx_netif_ctl_detach(nx, NULL);
874 VERIFY(err == 0);
875 }
876 if (n->nif_dev_nxb != NULL) {
877 nxb_free(n->nif_dev_nxb);
878 n->nif_dev_nxb = NULL;
879 }
880 if (n->nif_host_nxb != NULL) {
881 nxb_free(n->nif_host_nxb);
882 n->nif_host_nxb = NULL;
883 }
884 SK_DF(SK_VERB_NETIF, "marking netif 0x%llx as free", SK_KVA(n));
885 nx_netif_free(n);
886 nx->nx_arg = NULL;
887 }
888
889 int
nx_netif_prov_nx_mem_info(struct kern_nexus * nx,struct kern_pbufpool ** tpp,struct kern_pbufpool ** rpp)890 nx_netif_prov_nx_mem_info(struct kern_nexus *nx, struct kern_pbufpool **tpp,
891 struct kern_pbufpool **rpp)
892 {
893 ASSERT(nx->nx_tx_pp != NULL);
894 ASSERT(nx->nx_rx_pp != NULL);
895
896 if (tpp != NULL) {
897 *tpp = nx->nx_tx_pp;
898 }
899 if (rpp != NULL) {
900 *rpp = nx->nx_rx_pp;
901 }
902
903 return 0;
904 }
905
906 static size_t
__netif_mib_get_stats(struct kern_nexus * nx,void * out,size_t len)907 __netif_mib_get_stats(struct kern_nexus *nx, void *out, size_t len)
908 {
909 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
910 struct ifnet *ifp = nif->nif_ifp;
911 struct sk_stats_net_if *sns = out;
912 size_t actual_space = sizeof(struct sk_stats_net_if);
913
914 if (out != NULL && actual_space <= len) {
915 uuid_copy(sns->sns_nx_uuid, nx->nx_uuid);
916 if (ifp != NULL) {
917 (void) strlcpy(sns->sns_if_name, if_name(ifp), IFNAMSIZ);
918 }
919 sns->sns_nifs = nif->nif_stats;
920 }
921
922 return actual_space;
923 }
924
925 static size_t
__netif_mib_get_llinks(struct kern_nexus * nx,void * out,size_t len)926 __netif_mib_get_llinks(struct kern_nexus *nx, void *out, size_t len)
927 {
928 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
929 struct nx_llink_info *nli_list = out;
930 size_t actual_space = 0;
931 if (NETIF_LLINK_ENABLED(nif)) {
932 lck_rw_lock_shared(&nif->nif_llink_lock);
933 actual_space += nif->nif_llink_cnt * sizeof(struct nx_llink_info);
934
935 if (out != NULL && actual_space <= len) {
936 struct netif_llink *llink;
937 int i = 0;
938 STAILQ_FOREACH(llink, &nif->nif_llink_list, nll_link) {
939 struct nx_llink_info *nli;
940 struct netif_qset *qset;
941 uint16_t qset_cnt;
942 int j;
943
944 nli = &nli_list[i];
945 uuid_copy(nli->nli_netif_uuid, nx->nx_uuid);
946 nli->nli_link_id = llink->nll_link_id;
947 nli->nli_link_id_internal = llink->nll_link_id_internal;
948 nli->nli_state = llink->nll_state;
949 nli->nli_flags = llink->nll_flags;
950
951 qset_cnt = llink->nll_qset_cnt;
952 ASSERT(qset_cnt <= NETIF_LLINK_MAX_QSETS);
953 nli->nli_qset_cnt = qset_cnt;
954
955 j = 0;
956 SLIST_FOREACH(qset, &llink->nll_qset_list, nqs_list) {
957 struct nx_qset_info *nqi;
958
959 nqi = &nli->nli_qset[j];
960 nqi->nqi_id = qset->nqs_id;
961 nqi->nqi_flags = qset->nqs_flags;
962 nqi->nqi_num_rx_queues = qset->nqs_num_rx_queues;
963 nqi->nqi_num_tx_queues = qset->nqs_num_tx_queues;
964 j++;
965 }
966 ASSERT(j == qset_cnt);
967 i++;
968 }
969 ASSERT(i == nif->nif_llink_cnt);
970 }
971 lck_rw_unlock_shared(&nif->nif_llink_lock);
972 }
973
974 return actual_space;
975 }
976
977 static size_t
__netif_mib_get_queue_stats(struct kern_nexus * nx,void * out,size_t len)978 __netif_mib_get_queue_stats(struct kern_nexus *nx, void *out, size_t len)
979 {
980 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
981 uint8_t *itr = out;
982 size_t actual_space = 0;
983 if (!NETIF_LLINK_ENABLED(nif)) {
984 return actual_space;
985 }
986
987 lck_rw_lock_shared(&nif->nif_llink_lock);
988 struct netif_llink *llink;
989 struct netif_qset *qset;
990 STAILQ_FOREACH(llink, &nif->nif_llink_list, nll_link) {
991 SLIST_FOREACH(qset, &llink->nll_qset_list, nqs_list) {
992 actual_space += sizeof(struct netif_qstats_info) *
993 (qset->nqs_num_rx_queues + qset->nqs_num_tx_queues);
994 }
995 }
996 if (out == NULL || actual_space > len) {
997 lck_rw_unlock_shared(&nif->nif_llink_lock);
998 return actual_space;
999 }
1000
1001 llink = NULL;
1002 qset = NULL;
1003 uint16_t i = 0, j = 0;
1004 STAILQ_FOREACH(llink, &nif->nif_llink_list, nll_link) {
1005 uint16_t qset_cnt;
1006 j = 0;
1007 qset_cnt = llink->nll_qset_cnt;
1008 ASSERT(qset_cnt <= NETIF_LLINK_MAX_QSETS);
1009 SLIST_FOREACH(qset, &llink->nll_qset_list, nqs_list) {
1010 int queue_cnt = qset->nqs_num_rx_queues +
1011 qset->nqs_num_tx_queues;
1012 for (uint16_t k = 0; k < queue_cnt; k++) {
1013 struct netif_qstats_info *nqi =
1014 (struct netif_qstats_info *)(void *)itr;
1015 struct netif_queue *nq = &qset->nqs_driver_queues[k];
1016 nqi->nqi_qset_id = qset->nqs_id;
1017 nqi->nqi_queue_idx = k;
1018 if (KPKT_VALID_SVC(nq->nq_svc)) {
1019 nqi->nqi_svc = nq->nq_svc;
1020 }
1021 if (nq->nq_flags & NETIF_QUEUE_IS_RX) {
1022 nqi->nqi_queue_flag = NQI_QUEUE_FLAG_IS_RX;
1023 }
1024
1025 struct netif_qstats *nq_out = &nqi->nqi_stats;
1026 struct netif_qstats *nq_src = &nq->nq_stats;
1027 memcpy(nq_out, nq_src, sizeof(struct netif_qstats));
1028
1029 itr += sizeof(struct netif_qstats_info);
1030 }
1031 j++;
1032 }
1033 ASSERT(j == qset_cnt);
1034 i++;
1035 }
1036 ASSERT(i == nif->nif_llink_cnt);
1037
1038 lck_rw_unlock_shared(&nif->nif_llink_lock);
1039 return actual_space;
1040 }
1041
1042 size_t
nx_netif_prov_nx_mib_get(struct kern_nexus * nx,struct nexus_mib_filter * filter,void * out,size_t len,struct proc * p)1043 nx_netif_prov_nx_mib_get(struct kern_nexus *nx, struct nexus_mib_filter *filter,
1044 void *out, size_t len, struct proc *p)
1045 {
1046 #pragma unused(p)
1047 size_t ret;
1048
1049 if ((filter->nmf_bitmap & NXMIB_FILTER_NX_UUID) &&
1050 (uuid_compare(filter->nmf_nx_uuid, nx->nx_uuid)) != 0) {
1051 return 0;
1052 }
1053
1054 switch (filter->nmf_type) {
1055 case NXMIB_NETIF_STATS:
1056 ret = __netif_mib_get_stats(nx, out, len);
1057 break;
1058 case NXMIB_LLINK_LIST:
1059 ret = __netif_mib_get_llinks(nx, out, len);
1060 break;
1061 case NXMIB_NETIF_QUEUE_STATS:
1062 ret = __netif_mib_get_queue_stats(nx, out, len);
1063 break;
1064 default:
1065 ret = 0;
1066 break;
1067 }
1068 return ret;
1069 }
1070
1071 static int
nx_netif_dom_bind_port(struct kern_nexus * nx,nexus_port_t * nx_port,struct nxbind * nxb,void * info)1072 nx_netif_dom_bind_port(struct kern_nexus *nx, nexus_port_t *nx_port,
1073 struct nxbind *nxb, void *info)
1074 {
1075 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
1076 nexus_port_t first, last, port;
1077 int error;
1078
1079 ASSERT(nx_port != NULL);
1080 ASSERT(nxb != NULL);
1081
1082 port = *nx_port;
1083
1084 /*
1085 * If port is:
1086 * != NEXUS_PORT_ANY: attempt to bind to the specified port
1087 * == NEXUS_PORT_ANY: find an available port, bind to it, and
1088 * return back the assigned port.
1089 */
1090 first = NEXUS_PORT_NET_IF_CLIENT;
1091 ASSERT(NXDOM_MAX(NX_DOM(nx), ports) <= NEXUS_PORT_MAX);
1092 last = (nexus_port_size_t)NXDOM_MAX(NX_DOM(nx), ports);
1093 ASSERT(first <= last);
1094
1095 NETIF_WLOCK(nif);
1096
1097 if (__improbable(first == last)) {
1098 error = ENOMEM;
1099 } else if (port != NEXUS_PORT_ANY) {
1100 error = nx_port_bind_info(nx, port, nxb, info);
1101 SK_DF(SK_VERB_NETIF, "port %d, bind err %d", port, error);
1102 } else {
1103 error = nx_port_find(nx, first, last - 1, &port);
1104 ASSERT(error != 0 || (port >= first && port < last));
1105 if (error == 0) {
1106 error = nx_port_bind_info(nx, port, nxb, info);
1107 SK_DF(SK_VERB_NETIF, "found port %d, bind err %d",
1108 port, error);
1109 }
1110 }
1111 NETIF_WUNLOCK(nif);
1112
1113 ASSERT(*nx_port == NEXUS_PORT_ANY || *nx_port == port);
1114 if (error == 0) {
1115 *nx_port = port;
1116 }
1117
1118 SK_DF(error ? SK_VERB_ERROR : SK_VERB_NETIF,
1119 "+++ netif 0x%llx nx_port %d, total %u active %u (err %d)",
1120 SK_KVA(nif), (int)*nx_port, NX_NETIF_MAXPORTS,
1121 nx->nx_active_ports, error);
1122
1123 return error;
1124 }
1125
1126 static int
nx_netif_dom_unbind_port(struct kern_nexus * nx,nexus_port_t nx_port)1127 nx_netif_dom_unbind_port(struct kern_nexus *nx, nexus_port_t nx_port)
1128 {
1129 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
1130 int error = 0;
1131
1132 ASSERT(nx_port != NEXUS_PORT_ANY);
1133
1134 NETIF_WLOCK(nif);
1135 error = nx_port_unbind(nx, nx_port);
1136 NETIF_WUNLOCK(nif);
1137
1138 return error;
1139 }
1140
1141 static int
nx_netif_dom_connect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)1142 nx_netif_dom_connect(struct kern_nexus_domain_provider *nxdom_prov,
1143 struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
1144 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
1145 {
1146 #pragma unused(nxdom_prov)
1147 int err = 0;
1148
1149 SK_LOCK_ASSERT_HELD();
1150
1151 ASSERT(NX_DOM_PROV(nx) == nxdom_prov);
1152 ASSERT(nx->nx_prov->nxprov_params->nxp_type ==
1153 nxdom_prov->nxdom_prov_dom->nxdom_type &&
1154 nx->nx_prov->nxprov_params->nxp_type == NEXUS_TYPE_NET_IF);
1155 ASSERT(!(ch->ch_flags & CHANF_HOST));
1156
1157 switch (chr->cr_port) {
1158 case NEXUS_PORT_NET_IF_DEV:
1159 if (chr->cr_mode & CHMODE_HOST) {
1160 err = EINVAL;
1161 goto done;
1162 }
1163 break;
1164
1165 case NEXUS_PORT_NET_IF_HOST:
1166 if (!(chr->cr_mode & CHMODE_HOST)) {
1167 if (ch->ch_flags & CHANF_KERNEL) {
1168 err = EINVAL;
1169 goto done;
1170 }
1171 chr->cr_mode |= CHMODE_HOST;
1172 }
1173 /*
1174 * This channel is exclusively opened to the host
1175 * rings; don't notify the external provider.
1176 */
1177 atomic_bitset_32(&ch->ch_flags, CHANF_HOST | CHANF_EXT_SKIP);
1178 break;
1179
1180 default:
1181 /*
1182 * This channel is shared between netif and user process;
1183 * don't notify the external provider.
1184 */
1185 atomic_bitset_32(&ch->ch_flags, CHANF_EXT_SKIP);
1186 break;
1187 }
1188
1189 chr->cr_ring_set = RING_SET_DEFAULT;
1190 chr->cr_real_endpoint = chr->cr_endpoint = CH_ENDPOINT_NET_IF;
1191 (void) snprintf(chr->cr_name, sizeof(chr->cr_name), "netif:%llu:%.*s",
1192 nx->nx_id, (int)nx->nx_prov->nxprov_params->nxp_namelen,
1193 nx->nx_prov->nxprov_params->nxp_name);
1194
1195 if (ch->ch_flags & CHANF_KERNEL) {
1196 err = na_connect_spec(nx, ch, chr, p);
1197 } else {
1198 err = na_connect(nx, ch, chr, ch0, nxb, p);
1199 }
1200
1201 if (err == 0) {
1202 /*
1203 * Mark the kernel slot descriptor region as busy; this
1204 * prevents it from being torn-down at channel defunct
1205 * time, as the (external) nexus owner may be calling
1206 * KPIs that require accessing the slots.
1207 */
1208 skmem_arena_nexus_sd_set_noidle(
1209 skmem_arena_nexus(ch->ch_na->na_arena), 1);
1210 }
1211
1212 done:
1213 return err;
1214 }
1215
1216 static void
nx_netif_dom_disconnect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch)1217 nx_netif_dom_disconnect(struct kern_nexus_domain_provider *nxdom_prov,
1218 struct kern_nexus *nx, struct kern_channel *ch)
1219 {
1220 #pragma unused(nxdom_prov)
1221 SK_LOCK_ASSERT_HELD();
1222
1223 SK_D("channel 0x%llx -!- nexus 0x%llx (%s:\"%s\":%u:%d)", SK_KVA(ch),
1224 SK_KVA(nx), nxdom_prov->nxdom_prov_name, ch->ch_na->na_name,
1225 ch->ch_info->cinfo_nx_port, (int)ch->ch_info->cinfo_ch_ring_id);
1226
1227 /*
1228 * Release busy assertion held earlier in nx_netif_dom_connect();
1229 * this allows for the final arena teardown to succeed.
1230 */
1231 skmem_arena_nexus_sd_set_noidle(
1232 skmem_arena_nexus(ch->ch_na->na_arena), -1);
1233
1234 if (ch->ch_flags & CHANF_KERNEL) {
1235 na_disconnect_spec(nx, ch);
1236 } else {
1237 na_disconnect(nx, ch);
1238 }
1239 }
1240
1241 static void
nx_netif_dom_defunct(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct proc * p)1242 nx_netif_dom_defunct(struct kern_nexus_domain_provider *nxdom_prov,
1243 struct kern_nexus *nx, struct kern_channel *ch, struct proc *p)
1244 {
1245 #pragma unused(nxdom_prov, nx)
1246 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1247 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
1248 ASSERT(ch->ch_na->na_type == NA_NETIF_DEV ||
1249 ch->ch_na->na_type == NA_NETIF_HOST ||
1250 ch->ch_na->na_type == NA_NETIF_COMPAT_DEV ||
1251 ch->ch_na->na_type == NA_NETIF_COMPAT_HOST ||
1252 ch->ch_na->na_type == NA_NETIF_VP);
1253
1254 na_ch_rings_defunct(ch, p);
1255 }
1256
1257 static void
nx_netif_dom_defunct_finalize(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,boolean_t locked)1258 nx_netif_dom_defunct_finalize(struct kern_nexus_domain_provider *nxdom_prov,
1259 struct kern_nexus *nx, struct kern_channel *ch, boolean_t locked)
1260 {
1261 #pragma unused(nxdom_prov)
1262 struct ifnet *ifp;
1263
1264 if (!locked) {
1265 SK_LOCK_ASSERT_NOTHELD();
1266 SK_LOCK();
1267 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED);
1268 } else {
1269 SK_LOCK_ASSERT_HELD();
1270 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1271 }
1272
1273 ASSERT(ch->ch_na->na_type == NA_NETIF_DEV ||
1274 ch->ch_na->na_type == NA_NETIF_HOST ||
1275 ch->ch_na->na_type == NA_NETIF_COMPAT_DEV ||
1276 ch->ch_na->na_type == NA_NETIF_COMPAT_HOST ||
1277 ch->ch_na->na_type == NA_NETIF_VP);
1278
1279 na_defunct(nx, ch, ch->ch_na, locked);
1280 ifp = ch->ch_na->na_ifp;
1281 if (ch->ch_na->na_type == NA_NETIF_VP && ifp != NULL &&
1282 ifnet_is_low_latency(ifp)) {
1283 /*
1284 * We release the VPNA's ifp here instead of waiting for the
1285 * application to close the channel to trigger the release.
1286 */
1287 DTRACE_SKYWALK2(release__vpna__ifp, struct nexus_adapter *,
1288 ch->ch_na, struct ifnet *, ifp);
1289 ifnet_decr_iorefcnt(ifp);
1290 ch->ch_na->na_ifp = NULL;
1291 }
1292 SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d)",
1293 ch->ch_name, ch->ch_pid, SK_KVA(ch), SK_KVA(nx),
1294 nxdom_prov->nxdom_prov_name, ch->ch_na->na_name,
1295 ch->ch_info->cinfo_nx_port, (int)ch->ch_info->cinfo_ch_ring_id);
1296
1297 if (!locked) {
1298 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED);
1299 SK_UNLOCK();
1300 } else {
1301 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1302 SK_LOCK_ASSERT_HELD();
1303 }
1304 }
1305
1306 struct nexus_netif_adapter *
na_netif_alloc(zalloc_flags_t how)1307 na_netif_alloc(zalloc_flags_t how)
1308 {
1309 _CASSERT(offsetof(struct nexus_netif_adapter, nifna_up) == 0);
1310
1311 return zalloc_flags(na_netif_zone, how | Z_ZERO);
1312 }
1313
1314 void
na_netif_free(struct nexus_adapter * na)1315 na_netif_free(struct nexus_adapter *na)
1316 {
1317 struct nexus_netif_adapter *nifna = (struct nexus_netif_adapter *)na;
1318
1319 SK_LOCK_ASSERT_HELD();
1320 SK_DF(SK_VERB_MEM, "nifna 0x%llx FREE", SK_KVA(nifna));
1321
1322 ASSERT(na->na_refcount == 0);
1323 ASSERT(nifna->nifna_tx_mit == NULL);
1324 ASSERT(nifna->nifna_rx_mit == NULL);
1325 bzero(nifna, sizeof(*nifna));
1326
1327 zfree(na_netif_zone, nifna);
1328 }
1329
1330 /* Process NXCFG_CMD_ATTACH */
1331 SK_NO_INLINE_ATTRIBUTE
1332 static int
nx_netif_ctl_attach(struct kern_nexus * nx,struct nx_spec_req * nsr,struct proc * p)1333 nx_netif_ctl_attach(struct kern_nexus *nx, struct nx_spec_req *nsr,
1334 struct proc *p)
1335 {
1336 struct nx_netif *n = NX_NETIF_PRIVATE(nx);
1337 struct ifnet *ifp = NULL;
1338 boolean_t compat;
1339 int err = 0;
1340
1341 SK_LOCK_ASSERT_HELD();
1342
1343 ASSERT(NX_DOM(nx)->nxdom_type == NEXUS_TYPE_NET_IF);
1344 compat = (strcmp(NX_DOM_PROV(nx)->nxdom_prov_name,
1345 NEXUS_PROVIDER_NET_IF_COMPAT) == 0);
1346
1347 uuid_clear(nsr->nsr_if_uuid);
1348 /*
1349 * The netif accepts either an interface name or a pointer to
1350 * an ifnet, but never a UUID.
1351 */
1352 if (nsr->nsr_flags & NXSPECREQ_UUID) {
1353 err = EINVAL;
1354 goto done;
1355 }
1356 if (nsr->nsr_flags & NXSPECREQ_IFP) {
1357 if (p != kernproc || (ifp = nsr->nsr_ifp) == NULL) {
1358 err = EINVAL;
1359 goto done;
1360 }
1361 } else if ((ifp = ifunit_ref(nsr->nsr_name)) == NULL) {
1362 err = ENXIO;
1363 goto done;
1364 }
1365
1366 if ((compat && SKYWALK_NATIVE(ifp)) ||
1367 (!compat && !SKYWALK_NATIVE(ifp))) {
1368 /* native driver for netif; non-native for netif_compat */
1369 err = ENODEV;
1370 } else if (ifp->if_na != NULL || !uuid_is_null(n->nif_uuid)) {
1371 err = EBUSY;
1372 } else {
1373 ASSERT(uuid_is_null(n->nif_uuid));
1374 /*
1375 * Upon success, callee will hold its own ifnet iorefcnt
1376 * as well as a retain count on the nexus adapter.
1377 */
1378 if (compat) {
1379 err = nx_netif_compat_attach(nx, ifp);
1380 } else {
1381 err = nx_netif_attach(nx, ifp);
1382 }
1383
1384 if (err == 0) {
1385 /* return the adapter UUID */
1386 uuid_generate_random(n->nif_uuid);
1387 uuid_copy(nsr->nsr_if_uuid, n->nif_uuid);
1388 #if (DEVELOPMENT || DEBUG)
1389 skoid_create(&n->nif_skoid,
1390 SKOID_SNODE(_kern_skywalk_netif), if_name(ifp),
1391 CTLFLAG_RW);
1392 #endif /* !DEVELOPMENT && !DEBUG */
1393 }
1394 }
1395 done:
1396 /* drop I/O refcnt from ifunit_ref() */
1397 if (ifp != NULL && !(nsr->nsr_flags & NXSPECREQ_IFP)) {
1398 ifnet_decr_iorefcnt(ifp);
1399 }
1400
1401 #if SK_LOG
1402 uuid_string_t uuidstr, ifuuidstr;
1403 const char *nustr;
1404 if (nsr->nsr_flags & NXSPECREQ_UUID) {
1405 nustr = sk_uuid_unparse(nsr->nsr_uuid, uuidstr);
1406 } else if (nsr->nsr_flags & NXSPECREQ_IFP) {
1407 (void) snprintf((char *)uuidstr, sizeof(uuidstr), "0x%llx",
1408 SK_KVA(nsr->nsr_ifp));
1409 nustr = uuidstr;
1410 } else {
1411 nustr = nsr->nsr_name;
1412 }
1413 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NETIF,
1414 "nexus 0x%llx (%s) name/uuid \"%s\" if_uuid %s flags 0x%x err %d",
1415 SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name, nustr,
1416 sk_uuid_unparse(nsr->nsr_if_uuid, ifuuidstr), nsr->nsr_flags, err);
1417 #endif /* SK_LOG */
1418
1419 return err;
1420 }
1421
1422 SK_NO_INLINE_ATTRIBUTE
1423 static int
nx_netif_clean(struct nx_netif * nif,boolean_t quiesce_needed)1424 nx_netif_clean(struct nx_netif *nif, boolean_t quiesce_needed)
1425 {
1426 struct kern_nexus *nx = nif->nif_nx;
1427 struct ifnet *ifp;
1428 boolean_t suspended = FALSE;
1429
1430 ifp = nif->nif_ifp;
1431 if (ifp == NULL) {
1432 return EALREADY;
1433 }
1434 /*
1435 * For regular kernel-attached interfaces, quiescing is handled by
1436 * the ifnet detach thread, which calls dlil_quiesce_and_detach_nexuses().
1437 * For interfaces created by skywalk test cases, flowswitch/netif nexuses
1438 * are constructed on the fly and can also be torn down on the fly.
1439 * dlil_quiesce_and_detach_nexuses() won't help here because any nexus
1440 * can be detached while the interface is still attached.
1441 */
1442 if (quiesce_needed && ifnet_datamov_suspend_if_needed(ifp)) {
1443 SK_UNLOCK();
1444 suspended = TRUE;
1445 ifnet_datamov_drain(ifp);
1446 SK_LOCK();
1447 }
1448 nx_netif_agent_fini(nif);
1449 nx_netif_capabilities_fini(nif);
1450 nx_netif_flow_fini(nif);
1451 nx_netif_filter_fini(nif);
1452 nx_netif_llink_fini(nif);
1453 nx_netif_flags_fini(nif);
1454
1455 uuid_clear(nif->nif_uuid);
1456 /* nx_netif_{compat_}attach() held both references */
1457 na_release_locked(nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV));
1458 na_release_locked(nx_port_get_na(nx, NEXUS_PORT_NET_IF_HOST));
1459 nx_port_free(nx, NEXUS_PORT_NET_IF_DEV);
1460 nx_port_free(nx, NEXUS_PORT_NET_IF_HOST);
1461
1462 ifp->if_na_ops = NULL;
1463 ifp->if_na = NULL;
1464 nif->nif_ifp = NULL;
1465 nif->nif_netif_nxadv = NULL;
1466 SKYWALK_CLEAR_CAPABLE(ifp);
1467 if (suspended) {
1468 ifnet_datamov_resume(ifp);
1469 }
1470
1471 #if (DEVELOPMENT || DEBUG)
1472 skoid_destroy(&nif->nif_skoid);
1473 #endif /* !DEVELOPMENT && !DEBUG */
1474 return 0;
1475 }
1476
1477 /* process NXCFG_CMD_DETACH */
1478 SK_NO_INLINE_ATTRIBUTE
1479 static int
nx_netif_ctl_detach(struct kern_nexus * nx,struct nx_spec_req * nsr)1480 nx_netif_ctl_detach(struct kern_nexus *nx, struct nx_spec_req *nsr)
1481 {
1482 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
1483 int err = 0;
1484
1485 SK_LOCK_ASSERT_HELD();
1486
1487 /*
1488 * nsr is NULL when we're called from the destructor, and it
1489 * implies that we'll detach whatever that is attached.
1490 */
1491 if (nsr != NULL && uuid_is_null(nsr->nsr_if_uuid)) {
1492 err = EINVAL;
1493 } else if (nsr != NULL && uuid_compare(nsr->nsr_if_uuid,
1494 nif->nif_uuid) != 0) {
1495 err = ESRCH;
1496 } else if (nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV) == NULL) {
1497 /* nx_netif_ctl_attach() not yet done or already detached */
1498 err = ENXIO;
1499 } else if (nx->nx_ch_count != 0) {
1500 /*
1501 * There's at least a channel opened; we can't
1502 * yank the interface from underneath the nexus
1503 * since our dlil input/output handler may be
1504 * running now. Bail out and come back here
1505 * again when the nexus detaches.
1506 */
1507 err = EBUSY;
1508 } else {
1509 err = nx_netif_clean(nif, TRUE);
1510 }
1511
1512 #if SK_LOG
1513 if (nsr != NULL) {
1514 uuid_string_t ifuuidstr;
1515 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NETIF,
1516 "nexus 0x%llx (%s) if_uuid %s flags 0x%x err %d",
1517 SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
1518 sk_uuid_unparse(nsr->nsr_if_uuid, ifuuidstr),
1519 nsr->nsr_flags, err);
1520 } else {
1521 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NETIF,
1522 "nexus 0x%llx (%s) err %d", SK_KVA(nx),
1523 NX_DOM_PROV(nx)->nxdom_prov_name, err);
1524 }
1525 #endif /* SK_LOG */
1526
1527 return err;
1528 }
1529
1530 /*
1531 * XXX
1532 * These checks are copied from fsw.c
1533 * There are no tests exercising this code. Do we still need this?
1534 */
1535 SK_NO_INLINE_ATTRIBUTE
1536 static int
nx_netif_ctl_flow_check(struct nx_netif * nif,nxcfg_cmd_t cmd,struct proc * p,struct nx_flow_req * req)1537 nx_netif_ctl_flow_check(struct nx_netif *nif, nxcfg_cmd_t cmd,
1538 struct proc *p, struct nx_flow_req *req)
1539 {
1540 #pragma unused(nif)
1541 boolean_t need_check;
1542 int error;
1543
1544 if (uuid_is_null(req->nfr_flow_uuid)) {
1545 return EINVAL;
1546 }
1547 req->nfr_flags &= NXFLOWREQF_MASK;
1548 req->nfr_flowadv_idx = FLOWADV_IDX_NONE;
1549
1550 if (cmd == NXCFG_CMD_FLOW_DEL) {
1551 return 0;
1552 }
1553 need_check = FALSE;
1554 if (req->nfr_epid != -1 && proc_pid(p) != req->nfr_epid) {
1555 need_check = TRUE;
1556 } else if (!uuid_is_null(req->nfr_euuid)) {
1557 uuid_t uuid;
1558
1559 /* get the UUID of the issuing process */
1560 proc_getexecutableuuid(p, uuid, sizeof(uuid));
1561
1562 /*
1563 * If this is not issued by a process for its own
1564 * executable UUID and if the process does not have
1565 * the necessary privilege, reject the request.
1566 * The logic is similar to so_set_effective_uuid().
1567 */
1568 if (uuid_compare(req->nfr_euuid, uuid) != 0) {
1569 need_check = TRUE;
1570 }
1571 }
1572 if (need_check) {
1573 kauth_cred_t cred = kauth_cred_proc_ref(p);
1574 error = priv_check_cred(cred,
1575 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
1576 kauth_cred_unref(&cred);
1577 if (error != 0) {
1578 return error;
1579 }
1580 }
1581 return 0;
1582 }
1583
1584 SK_NO_INLINE_ATTRIBUTE
1585 static int
nx_netif_ctl_flow_add(struct nx_netif * nif,struct proc * p,struct nx_flow_req * req)1586 nx_netif_ctl_flow_add(struct nx_netif *nif, struct proc *p,
1587 struct nx_flow_req *req)
1588 {
1589 int err;
1590
1591 ASSERT(p != PROC_NULL);
1592 err = nx_netif_ctl_flow_check(nif, NXCFG_CMD_FLOW_ADD, p, req);
1593 if (err != 0) {
1594 return err;
1595 }
1596
1597 /* init kernel only fields */
1598 nx_flow_req_internalize(req);
1599 req->nfr_context = NULL;
1600 req->nfr_flow_stats = NULL;
1601 req->nfr_port_reservation = NULL;
1602 req->nfr_pid = proc_pid(p);
1603
1604 err = nx_netif_netagent_flow_add(nif, req);
1605 nx_flow_req_externalize(req);
1606 return err;
1607 }
1608
1609 SK_NO_INLINE_ATTRIBUTE
1610 static int
nx_netif_ctl_flow_del(struct nx_netif * nif,struct proc * p,struct nx_flow_req * req)1611 nx_netif_ctl_flow_del(struct nx_netif *nif, struct proc *p,
1612 struct nx_flow_req *req)
1613 {
1614 int err;
1615
1616 err = nx_netif_ctl_flow_check(nif, NXCFG_CMD_FLOW_DEL, p, req);
1617 if (err != 0) {
1618 return err;
1619 }
1620
1621 nx_flow_req_internalize(req);
1622 req->nfr_pid = proc_pid(p);
1623
1624 err = nx_netif_netagent_flow_del(nif, req);
1625 nx_flow_req_externalize(req);
1626 return err;
1627 }
1628
1629 SK_NO_INLINE_ATTRIBUTE
1630 static int
nx_netif_ctl(struct kern_nexus * nx,nxcfg_cmd_t nc_cmd,void * data,struct proc * p)1631 nx_netif_ctl(struct kern_nexus *nx, nxcfg_cmd_t nc_cmd, void *data,
1632 struct proc *p)
1633 {
1634 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
1635 struct nx_spec_req *nsr = data;
1636 struct nx_flow_req *nfr = data;
1637 int error = 0;
1638
1639 SK_LOCK_ASSERT_HELD();
1640
1641 switch (nc_cmd) {
1642 case NXCFG_CMD_ATTACH:
1643 error = nx_netif_ctl_attach(nx, nsr, p);
1644 break;
1645
1646 case NXCFG_CMD_DETACH:
1647 error = nx_netif_ctl_detach(nx, nsr);
1648 break;
1649
1650 case NXCFG_CMD_FLOW_ADD:
1651 error = nx_netif_ctl_flow_add(nif, p, nfr);
1652 break;
1653
1654 case NXCFG_CMD_FLOW_DEL:
1655 error = nx_netif_ctl_flow_del(nif, p, nfr);
1656 break;
1657
1658 default:
1659 SK_ERR("invalid cmd %u", nc_cmd);
1660 error = EINVAL;
1661 break;
1662 }
1663 return error;
1664 }
1665
1666 static void
nx_netif_llink_notify(struct kern_nexus * nx,struct netif_llink * llink,uint32_t flags)1667 nx_netif_llink_notify(struct kern_nexus *nx, struct netif_llink *llink,
1668 uint32_t flags)
1669 {
1670 #pragma unused(flags)
1671 struct netif_qset *qset;
1672
1673 SLIST_FOREACH(qset, &llink->nll_qset_list, nqs_list) {
1674 (void) nx_tx_qset_notify(nx, qset->nqs_ctx);
1675 }
1676 }
1677
1678 static void
nx_netif_llink_notify_all(struct kern_nexus * nx,uint32_t flags)1679 nx_netif_llink_notify_all(struct kern_nexus *nx, uint32_t flags)
1680 {
1681 struct nx_netif *nif;
1682 struct netif_llink *llink;
1683
1684 nif = NX_NETIF_PRIVATE(nx);
1685
1686 lck_rw_lock_shared(&nif->nif_llink_lock);
1687 STAILQ_FOREACH(llink, &nif->nif_llink_list, nll_link) {
1688 nx_netif_llink_notify(nx, llink, flags);
1689 }
1690 lck_rw_unlock_shared(&nif->nif_llink_lock);
1691 }
1692
1693 /*
1694 * if_start() callback for native Skywalk interfaces, registered
1695 * at ifnet_allocate_extended() time, and invoked by the ifnet
1696 * starter thread.
1697 */
1698 static void
nx_netif_doorbell_internal(struct ifnet * ifp,uint32_t flags)1699 nx_netif_doorbell_internal(struct ifnet *ifp, uint32_t flags)
1700 {
1701 if (__improbable(ifp->if_na == NULL)) {
1702 return;
1703 }
1704
1705 /*
1706 * Do this only if the nexus adapter is active, i.e. a channel
1707 * has been opened to it by the module above (flowswitch, etc.)
1708 */
1709 struct nexus_adapter *hwna = &NA(ifp)->nifna_up;
1710 if (__probable(NA_IS_ACTIVE(hwna))) {
1711 struct kern_nexus *nx = hwna->na_nx;
1712
1713 /* update our work timestamp */
1714 hwna->na_work_ts = _net_uptime;
1715
1716 if (NX_LLINK_PROV(nx)) {
1717 nx_netif_llink_notify_all(nx, flags);
1718 } else {
1719 struct __kern_channel_ring *kring;
1720
1721 /* for doorbell purposes, use TX ring 0 */
1722 kring = &hwna->na_tx_rings[0];
1723
1724 /* Issue a synchronous TX doorbell on the netif device ring */
1725 kring->ckr_na_sync(kring, PROC_NULL,
1726 (NA_SYNCF_NETIF_DOORBELL | NA_SYNCF_NETIF_IFSTART));
1727 }
1728 } else {
1729 struct netif_stats *nifs =
1730 &NX_NETIF_PRIVATE(hwna->na_nx)->nif_stats;
1731 STATS_INC(nifs, NETIF_STATS_DROP_NA_INACTIVE);
1732 }
1733 }
1734
1735 static void
nx_netif_doorbell(struct ifnet * ifp)1736 nx_netif_doorbell(struct ifnet *ifp)
1737 {
1738 nx_netif_doorbell_internal(ifp, NETIF_XMIT_FLAG_HOST);
1739 }
1740
1741 /*
1742 * TX sync callback, called from nx_netif_doorbell() where we'd expect to
1743 * perform synchronous TX doorbell to the driver, by invoking the driver's
1744 * doorbell callback directly in the same thread context. It is also called
1745 * when the layer above performs a TX sync operation, where we might need
1746 * to do an asynchronous doorbell instead, by simply calling ifnet_start().
1747 */
1748 static int
nx_netif_na_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1749 nx_netif_na_txsync(struct __kern_channel_ring *kring, struct proc *p,
1750 uint32_t flags)
1751 {
1752 #pragma unused(p)
1753 struct ifnet *ifp = KRNA(kring)->na_ifp;
1754 boolean_t sync_only;
1755 int ret = 0;
1756
1757 ASSERT(ifp != NULL);
1758
1759 SK_DF(SK_VERB_NETIF | SK_VERB_SYNC | SK_VERB_TX,
1760 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
1761 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
1762 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
1763 flags);
1764
1765 if (__improbable(!IF_FULLY_ATTACHED(ifp))) {
1766 SK_ERR("kr 0x%llx ifp %s (0x%llx), interface not attached",
1767 SK_KVA(kring), if_name(ifp), SK_KVA(ifp));
1768 return ENXIO;
1769 }
1770
1771 if (__improbable((ifp->if_start_flags & IFSF_FLOW_CONTROLLED) != 0)) {
1772 SK_DF(SK_VERB_SYNC | SK_VERB_TX, "kr 0x%llx ifp %s (0x%llx), "
1773 "flow control ON", SK_KVA(kring), if_name(ifp),
1774 SK_KVA(ifp));
1775 return ENXIO;
1776 }
1777
1778 /* update our work timestamp */
1779 KRNA(kring)->na_work_ts = _net_uptime;
1780
1781 sync_only = ((flags & NA_SYNCF_SYNC_ONLY) != 0) ||
1782 !KR_KERNEL_ONLY(kring);
1783 /* regular sync (reclaim) */
1784 if ((flags & NA_SYNCF_NETIF) != 0 || __improbable(sync_only)) {
1785 ret = nx_sync_tx(kring, (flags & NA_SYNCF_FORCE_RECLAIM) ||
1786 kring->ckr_pending_intr != 0);
1787 kring->ckr_pending_intr = 0;
1788
1789 /* direct user channels do not need to use the doorbell */
1790 if (__improbable(sync_only)) {
1791 return ret;
1792 }
1793 }
1794
1795 /*
1796 * Doorbell call. Here we do doorbell explicitly if the flag is
1797 * set or implicitly if we're opened directly by a user channel.
1798 * Synchronous vs. asynchronous depending on the context.
1799 */
1800 if (__probable((flags & NA_SYNCF_NETIF_DOORBELL) != 0)) {
1801 if ((flags & NA_SYNCF_NETIF_IFSTART) != 0) {
1802 ASSERT(!(flags & NA_SYNCF_NETIF_IFSTART) ||
1803 !(flags & NA_SYNCF_NETIF_ASYNC));
1804 nx_tx_doorbell(kring, (flags & NA_SYNCF_NETIF_ASYNC));
1805 } else {
1806 ifnet_start(ifp);
1807 }
1808 }
1809
1810 return ret;
1811 }
1812
1813 static int
nx_netif_na_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1814 nx_netif_na_rxsync(struct __kern_channel_ring *kring, struct proc *p,
1815 uint32_t flags)
1816 {
1817 #pragma unused(p)
1818 int ret;
1819
1820 SK_DF(SK_VERB_NETIF | SK_VERB_SYNC | SK_VERB_RX,
1821 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
1822 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
1823 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
1824 flags);
1825
1826 ASSERT(kring->ckr_rhead <= kring->ckr_lim);
1827
1828 /* update our work timestamp */
1829 KRNA(kring)->na_work_ts = _net_uptime;
1830
1831 ret = nx_sync_rx(kring, (flags & NA_SYNCF_FORCE_READ) ||
1832 kring->ckr_pending_intr != 0);
1833 kring->ckr_pending_intr = 0;
1834
1835 return ret;
1836 }
1837
1838 static void
nx_netif_na_dtor(struct nexus_adapter * na)1839 nx_netif_na_dtor(struct nexus_adapter *na)
1840 {
1841 struct ifnet *ifp;
1842 struct nexus_netif_adapter *nifna = NIFNA(na);
1843
1844 SK_LOCK_ASSERT_HELD();
1845 ASSERT(na->na_type == NA_NETIF_DEV || na->na_type == NA_NETIF_HOST);
1846
1847 SK_DF(SK_VERB_NETIF, "na \"%s\" (0x%llx)", na->na_name, SK_KVA(na));
1848
1849 /*
1850 * If the finalizer callback hasn't been called for whatever
1851 * reasons, pick up the embryonic ifnet stored in na_private.
1852 * Otherwise, release the I/O refcnt of a non-NULL na_ifp.
1853 */
1854 if ((ifp = na->na_ifp) == NULL) {
1855 ifp = na->na_private;
1856 na->na_private = NULL;
1857 } else {
1858 ifnet_decr_iorefcnt(ifp);
1859 na->na_ifp = NULL;
1860 }
1861
1862 if (nifna->nifna_netif != NULL) {
1863 nx_netif_release(nifna->nifna_netif);
1864 nifna->nifna_netif = NULL;
1865 }
1866 ASSERT(SKYWALK_NATIVE(ifp));
1867 }
1868
1869 /*
1870 * Dispatch rx/tx interrupts to the channel rings.
1871 *
1872 * The 'notify' routine depends on what the ring is attached to.
1873 * - for a channel file descriptor, do an event wakeup on the individual
1874 * waitqueue, plus one on the global one if needed (see na_notify)
1875 * - for a device port connected to a FlowSwitch, call the proper
1876 * forwarding routine; see nx_fsw_tx_hwna_notify()
1877 * or nx_fsw_rx_hwna_notify().
1878 */
1879 int
nx_netif_common_intr(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags,uint32_t * work_done)1880 nx_netif_common_intr(struct __kern_channel_ring *kring, struct proc *p,
1881 uint32_t flags, uint32_t *work_done)
1882 {
1883 struct netif_stats *nifs =
1884 &NX_NETIF_PRIVATE(KRNA(kring)->na_nx)->nif_stats;
1885 int (*notify)(struct __kern_channel_ring *kring,
1886 struct proc *, uint32_t flags);
1887 int ret;
1888
1889 KDBG((SK_KTRACE_NETIF_COMMON_INTR | DBG_FUNC_START), SK_KVA(kring));
1890
1891 SK_DF(SK_VERB_NETIF | SK_VERB_INTR |
1892 ((kring->ckr_tx == NR_RX) ? SK_VERB_RX : SK_VERB_TX),
1893 "na \"%s\" (0x%llx) kr \"%s\" (0x%llx) krflags 0x%b",
1894 KRNA(kring)->na_name, SK_KVA(KRNA(kring)), kring->ckr_name,
1895 SK_KVA(kring), kring->ckr_flags, CKRF_BITS);
1896
1897 /* update our work timestamp */
1898 KRNA(kring)->na_work_ts = _net_uptime;
1899
1900 kring->ckr_pending_intr++;
1901 if (work_done != NULL) {
1902 *work_done = 1; /* do not fire again */
1903 }
1904 /*
1905 * We can't be calling ckr_na_notify here since we could already be
1906 * intercepting it, else we'd end up recursively calling ourselves.
1907 * Use the original na_notify callback saved during na_activate, or in
1908 * the case when the module above us is the flowswitch, the notify
1909 * routine that it has installed in place of our original one.
1910 */
1911 if (__probable(!KR_DROP(kring) &&
1912 (notify = kring->ckr_netif_notify) != NULL)) {
1913 ret = notify(kring, p, flags);
1914 } else {
1915 /*
1916 * If the ring is in drop mode, pretend as if it's busy.
1917 * This allows the mitigation thread to pause for a while
1918 * before attempting again.
1919 */
1920 ret = EBUSY;
1921 }
1922 if (__improbable(ret != 0)) {
1923 switch (kring->ckr_tx) {
1924 case NR_RX:
1925 if (ret == EBUSY) {
1926 STATS_INC(nifs, NETIF_STATS_RX_IRQ_BUSY);
1927 } else if (ret == EAGAIN) {
1928 STATS_INC(nifs, NETIF_STATS_RX_IRQ_AGAIN);
1929 } else {
1930 STATS_INC(nifs, NETIF_STATS_RX_IRQ_ERR);
1931 }
1932 break;
1933
1934 case NR_TX:
1935 if (ret == EBUSY) {
1936 STATS_INC(nifs, NETIF_STATS_TX_IRQ_BUSY);
1937 } else if (ret == EAGAIN) {
1938 STATS_INC(nifs, NETIF_STATS_TX_IRQ_AGAIN);
1939 } else {
1940 STATS_INC(nifs, NETIF_STATS_TX_IRQ_ERR);
1941 }
1942 break;
1943
1944 default:
1945 break;
1946 }
1947 }
1948
1949 KDBG((SK_KTRACE_NETIF_COMMON_INTR | DBG_FUNC_END), SK_KVA(kring), ret);
1950
1951 return ret;
1952 }
1953
1954 static int
nx_netif_na_notify_tx(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1955 nx_netif_na_notify_tx(struct __kern_channel_ring *kring, struct proc *p,
1956 uint32_t flags)
1957 {
1958 return nx_netif_mit_tx_intr(kring, p, flags, NULL);
1959 }
1960
1961 static int
nx_netif_na_notify_rx(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1962 nx_netif_na_notify_rx(struct __kern_channel_ring *kring, struct proc *p,
1963 uint32_t flags)
1964 {
1965 int ret;
1966
1967 /*
1968 * In the event the mitigation thread is disabled, protect
1969 * against recursion by detecting if we're already in the
1970 * context of an RX notify. IOSkywalkFamily may invoke the
1971 * notify callback as part of its RX sync callback.
1972 */
1973 if (__probable(!sk_is_rx_notify_protected())) {
1974 sk_protect_t protect;
1975 uint32_t work_done;
1976
1977 protect = sk_rx_notify_protect();
1978 ret = nx_netif_mit_rx_intr(kring, p, flags, &work_done);
1979 sk_sync_unprotect(protect);
1980 } else {
1981 ret = EAGAIN;
1982 }
1983
1984 return ret;
1985 }
1986
1987 void
nx_netif_mit_config(struct nexus_netif_adapter * nifna,boolean_t * tx_mit,boolean_t * tx_mit_simple,boolean_t * rx_mit,boolean_t * rx_mit_simple)1988 nx_netif_mit_config(struct nexus_netif_adapter *nifna,
1989 boolean_t *tx_mit, boolean_t *tx_mit_simple,
1990 boolean_t *rx_mit, boolean_t *rx_mit_simple)
1991 {
1992 struct nx_netif *nif = nifna->nifna_netif;
1993
1994 /*
1995 * TX mitigation is disabled by default, but can be
1996 * overridden via "sk_netif_tx_mit=N" boot-arg, where
1997 * N is one of SK_NETIF_MIT_FORCE_* values.
1998 */
1999 *tx_mit = *tx_mit_simple = FALSE;
2000 switch (sk_netif_tx_mit) {
2001 case SK_NETIF_MIT_FORCE_SIMPLE:
2002 *tx_mit_simple = TRUE;
2003 OS_FALLTHROUGH;
2004 case SK_NETIF_MIT_FORCE_ADVANCED:
2005 *tx_mit = TRUE;
2006 break;
2007 case SK_NETIF_MIT_FORCE_OFF:
2008 case SK_NETIF_MIT_AUTO:
2009 ASSERT(*tx_mit == FALSE);
2010 break;
2011 default:
2012 VERIFY(0);
2013 /* NOTREACHED */
2014 __builtin_unreachable();
2015 }
2016
2017 /*
2018 * RX mitigation is enabled by default only for BSD-style
2019 * virtual network interfaces, but can be overridden
2020 * via "sk_netif_rx_mit=N" boot-arg, where N is one of
2021 * SK_NETIF_MIT_FORCE_* values.
2022 */
2023 *rx_mit = *rx_mit_simple = FALSE;
2024 switch (sk_netif_rx_mit) {
2025 case SK_NETIF_MIT_FORCE_OFF:
2026 ASSERT(*rx_mit == FALSE);
2027 break;
2028 case SK_NETIF_MIT_FORCE_SIMPLE:
2029 *rx_mit_simple = TRUE;
2030 OS_FALLTHROUGH;
2031 case SK_NETIF_MIT_FORCE_ADVANCED:
2032 *rx_mit = TRUE;
2033 break;
2034 case SK_NETIF_MIT_AUTO:
2035 *rx_mit_simple = TRUE;
2036 /*
2037 * Enable RX mitigation thread only for BSD-style virtual (and
2038 * regular) interfaces, since otherwise we may run out of stack
2039 * when subjected to IPsec processing, etc.
2040 */
2041 *rx_mit = (NX_PROV(nifna->nifna_up.na_nx)->nxprov_flags &
2042 NXPROVF_VIRTUAL_DEVICE) && !NETIF_IS_LOW_LATENCY(nif);
2043 break;
2044 default:
2045 VERIFY(0);
2046 /* NOTREACHED */
2047 __builtin_unreachable();
2048 }
2049 }
2050
2051 static int
nx_netif_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)2052 nx_netif_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
2053 {
2054 struct nexus_netif_adapter *nifna = (struct nexus_netif_adapter *)na;
2055 boolean_t tx_mit, rx_mit, tx_mit_simple, rx_mit_simple;
2056 struct nx_netif *nif = nifna->nifna_netif;
2057 struct ifnet *ifp = na->na_ifp;
2058 int error = 0;
2059 uint32_t r;
2060
2061 ASSERT(na->na_type == NA_NETIF_DEV);
2062 ASSERT(!(na->na_flags & NAF_HOST_ONLY));
2063
2064 SK_DF(SK_VERB_NETIF, "na \"%s\" (0x%llx) %s [%s]", na->na_name,
2065 SK_KVA(na), ifp->if_xname, na_activate_mode2str(mode));
2066
2067 switch (mode) {
2068 case NA_ACTIVATE_MODE_ON:
2069 ASSERT(SKYWALK_CAPABLE(ifp));
2070
2071 nx_netif_mit_config(nifna, &tx_mit, &tx_mit_simple,
2072 &rx_mit, &rx_mit_simple);
2073
2074 /*
2075 * Init the mitigation support on all the dev TX rings.
2076 */
2077 if (tx_mit) {
2078 nifna->nifna_tx_mit =
2079 skn_alloc_type_array(tx_on, struct nx_netif_mit,
2080 na_get_nrings(na, NR_TX), Z_WAITOK,
2081 skmem_tag_netif_mit);
2082 if (nifna->nifna_tx_mit == NULL) {
2083 SK_ERR("TX mitigation allocation failed");
2084 error = ENOMEM;
2085 goto out;
2086 }
2087 } else {
2088 ASSERT(nifna->nifna_tx_mit == NULL);
2089 }
2090
2091 /*
2092 * Init the mitigation support on all the dev RX rings.
2093 */
2094 if (rx_mit) {
2095 nifna->nifna_rx_mit =
2096 skn_alloc_type_array(rx_on, struct nx_netif_mit,
2097 na_get_nrings(na, NR_RX), Z_WAITOK,
2098 skmem_tag_netif_mit);
2099 if (nifna->nifna_rx_mit == NULL) {
2100 SK_ERR("RX mitigation allocation failed");
2101 if (nifna->nifna_tx_mit != NULL) {
2102 skn_free_type_array(rx_fail,
2103 struct nx_netif_mit,
2104 na_get_nrings(na, NR_TX),
2105 nifna->nifna_tx_mit);
2106 nifna->nifna_tx_mit = NULL;
2107 }
2108 error = ENOMEM;
2109 goto out;
2110 }
2111 } else {
2112 ASSERT(nifna->nifna_rx_mit == NULL);
2113 }
2114
2115 /* intercept na_notify callback on the TX rings */
2116 for (r = 0; r < na_get_nrings(na, NR_TX); r++) {
2117 na->na_tx_rings[r].ckr_netif_notify =
2118 na->na_tx_rings[r].ckr_na_notify;
2119 na->na_tx_rings[r].ckr_na_notify =
2120 nx_netif_na_notify_tx;
2121 if (nifna->nifna_tx_mit != NULL) {
2122 nx_netif_mit_init(nif, ifp,
2123 &nifna->nifna_tx_mit[r],
2124 &na->na_tx_rings[r], tx_mit_simple);
2125 }
2126 }
2127
2128 /* intercept na_notify callback on the RX rings */
2129 for (r = 0; r < na_get_nrings(na, NR_RX); r++) {
2130 na->na_rx_rings[r].ckr_netif_notify =
2131 na->na_rx_rings[r].ckr_na_notify;
2132 na->na_rx_rings[r].ckr_na_notify =
2133 nx_netif_na_notify_rx;
2134 if (nifna->nifna_rx_mit != NULL) {
2135 nx_netif_mit_init(nif, ifp,
2136 &nifna->nifna_rx_mit[r],
2137 &na->na_rx_rings[r], rx_mit_simple);
2138 }
2139 }
2140 nx_netif_filter_enable(nif);
2141 nx_netif_flow_enable(nif);
2142 atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
2143
2144 /* steer all start requests to netif; this must not fail */
2145 lck_mtx_lock(&ifp->if_start_lock);
2146 error = ifnet_set_start_handler(ifp, nx_netif_doorbell);
2147 VERIFY(error == 0);
2148 lck_mtx_unlock(&ifp->if_start_lock);
2149 break;
2150
2151 case NA_ACTIVATE_MODE_DEFUNCT:
2152 ASSERT(SKYWALK_CAPABLE(ifp));
2153 break;
2154
2155 case NA_ACTIVATE_MODE_OFF:
2156 /*
2157 * Note that here we cannot assert SKYWALK_CAPABLE()
2158 * as we're called in the destructor path.
2159 */
2160 atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
2161 nx_netif_flow_disable(nif);
2162 nx_netif_filter_disable(nif);
2163
2164 /*
2165 * Here we may block while holding sk_lock, but because
2166 * we've cleared NAF_ACTIVE above, kern_channel_tx_refill()
2167 * should immediately return. A better approach would be
2168 * to drop sk_lock and add a monitor for this routine.
2169 */
2170 lck_mtx_lock(&ifp->if_start_lock);
2171 while (ifp->if_start_active != 0) {
2172 ++ifp->if_start_waiters;
2173 (void) msleep(&ifp->if_start_waiters,
2174 &ifp->if_start_lock, (PZERO - 1),
2175 na->na_name, NULL);
2176 }
2177 /* steer all start requests to default handler */
2178 ifnet_reset_start_handler(ifp);
2179 lck_mtx_unlock(&ifp->if_start_lock);
2180
2181 /* reset all TX notify callbacks */
2182 for (r = 0; r < na_get_nrings(na, NR_TX); r++) {
2183 na->na_tx_rings[r].ckr_na_notify =
2184 na->na_tx_rings[r].ckr_netif_notify;
2185 na->na_tx_rings[r].ckr_netif_notify = NULL;
2186 if (nifna->nifna_tx_mit != NULL) {
2187 na->na_tx_rings[r].ckr_netif_mit_stats = NULL;
2188 nx_netif_mit_cleanup(&nifna->nifna_tx_mit[r]);
2189 }
2190 }
2191
2192 if (nifna->nifna_tx_mit != NULL) {
2193 skn_free_type_array(tx_off, struct nx_netif_mit,
2194 na_get_nrings(na, NR_TX), nifna->nifna_tx_mit);
2195 nifna->nifna_tx_mit = NULL;
2196 }
2197
2198 /* reset all RX notify callbacks */
2199 for (r = 0; r < na_get_nrings(na, NR_RX); r++) {
2200 na->na_rx_rings[r].ckr_na_notify =
2201 na->na_rx_rings[r].ckr_netif_notify;
2202 na->na_rx_rings[r].ckr_netif_notify = NULL;
2203 if (nifna->nifna_rx_mit != NULL) {
2204 na->na_rx_rings[r].ckr_netif_mit_stats = NULL;
2205 nx_netif_mit_cleanup(&nifna->nifna_rx_mit[r]);
2206 }
2207 }
2208 if (nifna->nifna_rx_mit != NULL) {
2209 skn_free_type_array(rx_off, struct nx_netif_mit,
2210 na_get_nrings(na, NR_RX), nifna->nifna_rx_mit);
2211 nifna->nifna_rx_mit = NULL;
2212 }
2213 break;
2214
2215 default:
2216 VERIFY(0);
2217 /* NOTREACHED */
2218 __builtin_unreachable();
2219 }
2220 out:
2221 return error;
2222 }
2223
2224 SK_NO_INLINE_ATTRIBUTE
2225 static int
nx_netif_attach(struct kern_nexus * nx,struct ifnet * ifp)2226 nx_netif_attach(struct kern_nexus *nx, struct ifnet *ifp)
2227 __attribute__((optnone))
2228 {
2229 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
2230 struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
2231 struct nexus_netif_adapter *devnifna = NULL;
2232 struct nexus_netif_adapter *hostnifna = NULL;
2233 struct nexus_adapter *devna = NULL;
2234 struct nexus_adapter *hostna = NULL;
2235 boolean_t embryonic = FALSE;
2236 int retval = 0;
2237 uint32_t na_flags;
2238
2239 SK_LOCK_ASSERT_HELD();
2240 ASSERT(SKYWALK_NATIVE(ifp));
2241 ASSERT(!SKYWALK_CAPABLE(ifp));
2242 ASSERT(ifp->if_na == NULL);
2243 ASSERT(ifp->if_na_ops == NULL);
2244
2245 devnifna = na_netif_alloc(Z_WAITOK);
2246 hostnifna = na_netif_alloc(Z_WAITOK);
2247
2248 /*
2249 * We can be called for two different interface states:
2250 *
2251 * Fully attached: get an io ref count; upon success, this
2252 * holds a reference to the ifnet for the ifp pointer stored
2253 * in 'na_ifp' down below for both adapters.
2254 *
2255 * Embryonic: temporary hold the ifnet in na_private, which
2256 * upon a successful ifnet_attach(), will be moved over to
2257 * the 'na_ifp' with an io ref count held.
2258 *
2259 * The ifnet in 'na_ifp' will be released by na_release_locked().
2260 */
2261 if (!ifnet_is_attached(ifp, 1)) {
2262 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
2263 ifp = NULL;
2264 retval = ENXIO;
2265 goto err;
2266 }
2267 embryonic = TRUE;
2268 }
2269
2270 /* initialize the device netif adapter */
2271 devnifna->nifna_netif = nif;
2272 nx_netif_retain(nif);
2273 devna = &devnifna->nifna_up;
2274 devna->na_type = NA_NETIF_DEV;
2275 devna->na_free = na_netif_free;
2276 (void) strncpy(devna->na_name, ifp->if_xname, sizeof(devna->na_name) - 1);
2277 devna->na_name[sizeof(devna->na_name) - 1] = '\0';
2278 uuid_generate_random(devna->na_uuid);
2279 if (embryonic) {
2280 /*
2281 * We will move this over to na_ifp once
2282 * the interface is fully attached.
2283 */
2284 devna->na_private = ifp;
2285 ASSERT(devna->na_ifp == NULL);
2286 } else {
2287 ASSERT(devna->na_private == NULL);
2288 /* use I/O refcnt from ifnet_is_attached() */
2289 devna->na_ifp = ifp;
2290 }
2291 devna->na_activate = nx_netif_na_activate;
2292 devna->na_txsync = nx_netif_na_txsync;
2293 devna->na_rxsync = nx_netif_na_rxsync;
2294 devna->na_dtor = nx_netif_na_dtor;
2295 devna->na_krings_create = nx_netif_dev_krings_create;
2296 devna->na_krings_delete = nx_netif_dev_krings_delete;
2297 devna->na_special = nx_netif_na_special;
2298
2299 na_flags = NAF_NATIVE;
2300 if (NX_PROV(nx)->nxprov_flags & NXPROVF_VIRTUAL_DEVICE) {
2301 na_flags |= NAF_VIRTUAL_DEVICE;
2302 }
2303 if (NX_LLINK_PROV(nx)) {
2304 /*
2305 * while operating in logical link mode, we don't need to
2306 * create backing memory regions for the rings as they are
2307 * not used.
2308 */
2309 na_flags |= NAF_MEM_NO_INIT;
2310 }
2311 atomic_bitset_32(&devna->na_flags, na_flags);
2312 *(nexus_stats_type_t *)(uintptr_t)&devna->na_stats_type =
2313 NEXUS_STATS_TYPE_INVALID;
2314
2315 na_set_nrings(devna, NR_TX, nxp->nxp_tx_rings);
2316 na_set_nrings(devna, NR_RX, nxp->nxp_rx_rings);
2317 na_set_nslots(devna, NR_TX, nxp->nxp_tx_slots);
2318 na_set_nslots(devna, NR_RX, nxp->nxp_rx_slots);
2319 /*
2320 * Verify upper bounds; the parameters must have already been
2321 * validated by nxdom_prov_params() by the time we get here.
2322 */
2323 ASSERT(na_get_nrings(devna, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
2324 ASSERT(na_get_nrings(devna, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
2325 ASSERT(na_get_nslots(devna, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
2326 ASSERT(na_get_nslots(devna, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
2327
2328 na_attach_common(devna, nx, &nx_netif_prov_s);
2329
2330 if ((retval = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
2331 nx, devna)) != 0) {
2332 ASSERT(devna->na_arena == NULL);
2333 goto err;
2334 }
2335 ASSERT(devna->na_arena != NULL);
2336
2337 *(uint32_t *)(uintptr_t)&devna->na_flowadv_max = nxp->nxp_flowadv_max;
2338 ASSERT(devna->na_flowadv_max == 0 ||
2339 skmem_arena_nexus(devna->na_arena)->arn_flowadv_obj != NULL);
2340
2341 /* setup packet copy routines */
2342 if (skmem_arena_nexus(devna->na_arena)->arn_rx_pp->pp_max_frags > 1) {
2343 nif->nif_pkt_copy_from_mbuf = pkt_copy_multi_buflet_from_mbuf;
2344 nif->nif_pkt_copy_to_mbuf = pkt_copy_multi_buflet_to_mbuf;
2345 nif->nif_pkt_copy_from_pkt = pkt_copy_multi_buflet_from_pkt;
2346 } else {
2347 nif->nif_pkt_copy_from_mbuf = pkt_copy_from_mbuf;
2348 nif->nif_pkt_copy_to_mbuf = pkt_copy_to_mbuf;
2349 nif->nif_pkt_copy_from_pkt = pkt_copy_from_pkt;
2350 }
2351
2352 /* initialize the host netif adapter */
2353 hostnifna->nifna_netif = nif;
2354 nx_netif_retain(nif);
2355 hostna = &hostnifna->nifna_up;
2356 (void) snprintf(hostna->na_name, sizeof(hostna->na_name),
2357 "%s^", devna->na_name);
2358 uuid_generate_random(hostna->na_uuid);
2359 if (embryonic) {
2360 /*
2361 * We will move this over to na_ifp once
2362 * the interface is fully attached.
2363 */
2364 hostna->na_private = ifp;
2365 ASSERT(hostna->na_ifp == NULL);
2366 } else {
2367 ASSERT(hostna->na_private == NULL);
2368 hostna->na_ifp = devna->na_ifp;
2369 ifnet_incr_iorefcnt(hostna->na_ifp);
2370 }
2371 hostna->na_type = NA_NETIF_HOST;
2372 hostna->na_free = na_netif_free;
2373 hostna->na_activate = nx_netif_host_na_activate;
2374 hostna->na_txsync = nx_netif_host_na_txsync;
2375 hostna->na_rxsync = nx_netif_host_na_rxsync;
2376 hostna->na_dtor = nx_netif_na_dtor;
2377 hostna->na_krings_create = nx_netif_host_krings_create;
2378 hostna->na_krings_delete = nx_netif_host_krings_delete;
2379 hostna->na_special = nx_netif_host_na_special;
2380
2381 na_flags = NAF_HOST_ONLY | NAF_NATIVE;
2382 if (NX_LLINK_PROV(nx)) {
2383 /*
2384 * while operating in logical link mode, we don't need to
2385 * create backing memory regions for the rings as they are
2386 * not used.
2387 */
2388 na_flags |= NAF_MEM_NO_INIT;
2389 }
2390 atomic_bitset_32(&hostna->na_flags, na_flags);
2391 *(nexus_stats_type_t *)(uintptr_t)&hostna->na_stats_type =
2392 NEXUS_STATS_TYPE_INVALID;
2393
2394 na_set_nrings(hostna, NR_TX, 1);
2395 na_set_nrings(hostna, NR_RX, 1);
2396 na_set_nslots(hostna, NR_TX, nxp->nxp_tx_slots);
2397 na_set_nslots(hostna, NR_RX, nxp->nxp_rx_slots);
2398
2399 na_attach_common(hostna, nx, &nx_netif_prov_s);
2400
2401 if ((retval = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
2402 nx, hostna)) != 0) {
2403 ASSERT(hostna->na_arena == NULL);
2404 goto err;
2405 }
2406 ASSERT(hostna->na_arena != NULL);
2407
2408 *(uint32_t *)(uintptr_t)&hostna->na_flowadv_max = nxp->nxp_flowadv_max;
2409 ASSERT(hostna->na_flowadv_max == 0 ||
2410 skmem_arena_nexus(hostna->na_arena)->arn_flowadv_obj != NULL);
2411
2412 /* adjust the classq packet drop limit */
2413 if (embryonic) {
2414 uint32_t drop_lim;
2415 struct kern_pbufpool_memory_info pp_info;
2416
2417 retval = kern_pbufpool_get_memory_info(nx->nx_tx_pp, &pp_info);
2418 VERIFY(retval == 0);
2419
2420 /* set the drop limit as 80% of size of packet pool */
2421 drop_lim = (pp_info.kpm_packets * 4) / 5;
2422 VERIFY(drop_lim != 0);
2423 IFCQ_PKT_DROP_LIMIT(ifp->if_snd) = drop_lim;
2424 }
2425
2426 /* these will be undone by destructor */
2427 ifp->if_na_ops = &na_netif_ops;
2428 ifp->if_na = devnifna;
2429 na_retain_locked(devna);
2430 na_retain_locked(hostna);
2431
2432 SKYWALK_SET_CAPABLE(ifp);
2433
2434 NETIF_WLOCK(nif);
2435 nif->nif_ifp = ifp;
2436 nif->nif_netif_nxadv = nx->nx_adv.netif_nxv_adv;
2437 retval = nx_port_alloc(nx, NEXUS_PORT_NET_IF_DEV, NULL, &devna,
2438 kernproc);
2439 ASSERT(retval == 0);
2440 retval = nx_port_alloc(nx, NEXUS_PORT_NET_IF_HOST, NULL, &hostna,
2441 kernproc);
2442 ASSERT(retval == 0);
2443 NETIF_WUNLOCK(nif);
2444
2445 #if SK_LOG
2446 uuid_string_t uuidstr;
2447 SK_DF(SK_VERB_NETIF, "devna: \"%s\"", devna->na_name);
2448 SK_DF(SK_VERB_NETIF, " UUID: %s",
2449 sk_uuid_unparse(devna->na_uuid, uuidstr));
2450 SK_DF(SK_VERB_NETIF, " nx: 0x%llx (\"%s\":\"%s\")",
2451 SK_KVA(devna->na_nx), NX_DOM(devna->na_nx)->nxdom_name,
2452 NX_DOM_PROV(devna->na_nx)->nxdom_prov_name);
2453 SK_DF(SK_VERB_NETIF, " flags: 0x%b", devna->na_flags, NAF_BITS);
2454 SK_DF(SK_VERB_NETIF, " flowadv_max: %u", devna->na_flowadv_max);
2455 SK_DF(SK_VERB_NETIF, " rings: tx %u rx %u",
2456 na_get_nrings(devna, NR_TX), na_get_nrings(devna, NR_RX));
2457 SK_DF(SK_VERB_NETIF, " slots: tx %u rx %u",
2458 na_get_nslots(devna, NR_TX), na_get_nslots(devna, NR_RX));
2459 #if CONFIG_NEXUS_USER_PIPE
2460 SK_DF(SK_VERB_NETIF, " next_pipe: %u", devna->na_next_pipe);
2461 SK_DF(SK_VERB_NETIF, " max_pipes: %u", devna->na_max_pipes);
2462 #endif /* CONFIG_NEXUS_USER_PIPE */
2463 SK_DF(SK_VERB_NETIF, " ifp: 0x%llx %s [ioref %u]",
2464 SK_KVA(ifp), ifp->if_xname, ifp->if_refio);
2465 SK_DF(SK_VERB_NETIF, "hostna: \"%s\"", hostna->na_name);
2466 SK_DF(SK_VERB_NETIF, " UUID: %s",
2467 sk_uuid_unparse(hostna->na_uuid, uuidstr));
2468 SK_DF(SK_VERB_NETIF, " nx: 0x%llx (\"%s\":\"%s\")",
2469 SK_KVA(hostna->na_nx), NX_DOM(hostna->na_nx)->nxdom_name,
2470 NX_DOM_PROV(hostna->na_nx)->nxdom_prov_name);
2471 SK_DF(SK_VERB_NETIF, " flags: 0x%b",
2472 hostna->na_flags, NAF_BITS);
2473 SK_DF(SK_VERB_NETIF, " flowadv_max: %u", hostna->na_flowadv_max);
2474 SK_DF(SK_VERB_NETIF, " rings: tx %u rx %u",
2475 na_get_nrings(hostna, NR_TX), na_get_nrings(hostna, NR_RX));
2476 SK_DF(SK_VERB_NETIF, " slots: tx %u rx %u",
2477 na_get_nslots(hostna, NR_TX), na_get_nslots(hostna, NR_RX));
2478 #if CONFIG_NEXUS_USER_PIPE
2479 SK_DF(SK_VERB_NETIF, " next_pipe: %u", hostna->na_next_pipe);
2480 SK_DF(SK_VERB_NETIF, " max_pipes: %u", hostna->na_max_pipes);
2481 #endif /* CONFIG_NEXUS_USER_PIPE */
2482 SK_DF(SK_VERB_NETIF, " ifp: 0x%llx %s [ioref %u]",
2483 SK_KVA(ifp), ifp->if_xname, ifp->if_refio);
2484 #endif /* SK_LOG */
2485
2486 err:
2487 if (retval != 0) {
2488 if (ifp != NULL) {
2489 if (!embryonic) {
2490 ifnet_decr_iorefcnt(ifp);
2491 }
2492 ifp = NULL;
2493 }
2494 if (devna != NULL) {
2495 if (devna->na_arena != NULL) {
2496 skmem_arena_release(devna->na_arena);
2497 devna->na_arena = NULL;
2498 }
2499 if (devna->na_ifp != NULL) {
2500 ifnet_decr_iorefcnt(devna->na_ifp);
2501 devna->na_ifp = NULL;
2502 }
2503 devna->na_private = NULL;
2504 }
2505 if (hostna != NULL) {
2506 if (hostna->na_arena != NULL) {
2507 skmem_arena_release(hostna->na_arena);
2508 hostna->na_arena = NULL;
2509 }
2510 if (hostna->na_ifp != NULL) {
2511 ifnet_decr_iorefcnt(hostna->na_ifp);
2512 hostna->na_ifp = NULL;
2513 }
2514 hostna->na_private = NULL;
2515 }
2516 if (devnifna != NULL) {
2517 if (devnifna->nifna_netif != NULL) {
2518 nx_netif_release(devnifna->nifna_netif);
2519 devnifna->nifna_netif = NULL;
2520 }
2521 na_netif_free((struct nexus_adapter *)devnifna);
2522 }
2523 if (hostnifna != NULL) {
2524 if (hostnifna->nifna_netif != NULL) {
2525 nx_netif_release(hostnifna->nifna_netif);
2526 hostnifna->nifna_netif = NULL;
2527 }
2528 na_netif_free((struct nexus_adapter *)hostnifna);
2529 }
2530 }
2531 return retval;
2532 }
2533
2534 /*
2535 * Any per-netif state that can be discovered at attach time should be
2536 * initialized here.
2537 */
2538 static void
nx_netif_flags_init(struct nx_netif * nif)2539 nx_netif_flags_init(struct nx_netif *nif)
2540 {
2541 ifnet_t ifp = nif->nif_ifp;
2542 struct kern_nexus *nx = nif->nif_nx;
2543 struct nexus_adapter *devna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
2544
2545 switch (devna->na_type) {
2546 case NA_NETIF_DEV:
2547 if (strcmp(ifp->if_name, sk_ll_prefix) == 0) {
2548 nif->nif_flags |= NETIF_FLAG_LOW_LATENCY;
2549 if_set_xflags(ifp, IFXF_LOW_LATENCY);
2550 }
2551 break;
2552 case NA_NETIF_COMPAT_DEV:
2553 nif->nif_flags |= NETIF_FLAG_COMPAT;
2554 break;
2555 default:
2556 break;
2557 }
2558 }
2559
2560 /*
2561 * This is also supposed to check for any inconsistent state at detach time.
2562 */
2563 static void
nx_netif_flags_fini(struct nx_netif * nif)2564 nx_netif_flags_fini(struct nx_netif *nif)
2565 {
2566 ifnet_t ifp = nif->nif_ifp;
2567
2568 if (ifp != NULL) {
2569 if_clear_xflags(ifp, IFXF_LOW_LATENCY);
2570 }
2571 nif->nif_flags = 0;
2572 }
2573
2574 static void
configure_capab_interface_advisory(struct nx_netif * nif,nxprov_capab_config_fn_t capab_fn)2575 configure_capab_interface_advisory(struct nx_netif *nif,
2576 nxprov_capab_config_fn_t capab_fn)
2577 {
2578 struct kern_nexus_capab_interface_advisory capab;
2579 struct kern_nexus *nx = nif->nif_nx;
2580 uint32_t capab_len;
2581 int error;
2582
2583 /* check/configure interface advisory notifications */
2584 if ((nif->nif_ifp->if_eflags & IFEF_ADV_REPORT) == 0) {
2585 return;
2586 }
2587 bzero(&capab, sizeof(capab));
2588 capab.kncia_version =
2589 KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1;
2590 *__DECONST(kern_nexus_capab_interface_advisory_notify_fn_t *,
2591 &(capab.kncia_notify)) = nx_netif_interface_advisory_notify;
2592 *__DECONST(void **, &(capab.kncia_kern_context)) = nx;
2593 capab_len = sizeof(capab);
2594 error = capab_fn(NX_PROV(nx), nx,
2595 KERN_NEXUS_CAPAB_INTERFACE_ADVISORY, &capab, &capab_len);
2596 if (error != 0) {
2597 DTRACE_SKYWALK2(interface__advisory__capab__error,
2598 struct nx_netif *, nif, int, error);
2599 return;
2600 }
2601 VERIFY(capab.kncia_config != NULL);
2602 VERIFY(capab.kncia_provider_context != NULL);
2603 nif->nif_intf_adv_config = capab.kncia_config;
2604 nif->nif_intf_adv_prov_ctx = capab.kncia_provider_context;
2605 nif->nif_extended_capabilities |= NETIF_CAPAB_INTERFACE_ADVISORY;
2606 }
2607
2608 static void
unconfigure_capab_interface_advisory(struct nx_netif * nif)2609 unconfigure_capab_interface_advisory(struct nx_netif *nif)
2610 {
2611 if ((nif->nif_extended_capabilities & NETIF_CAPAB_INTERFACE_ADVISORY) == 0) {
2612 return;
2613 }
2614 nif->nif_intf_adv_config = NULL;
2615 nif->nif_intf_adv_prov_ctx = NULL;
2616 nif->nif_extended_capabilities &= ~NETIF_CAPAB_INTERFACE_ADVISORY;
2617 }
2618
2619 static void
configure_capab_qset_extensions(struct nx_netif * nif,nxprov_capab_config_fn_t capab_fn)2620 configure_capab_qset_extensions(struct nx_netif *nif,
2621 nxprov_capab_config_fn_t capab_fn)
2622 {
2623 struct kern_nexus_capab_qset_extensions capab;
2624 struct kern_nexus *nx = nif->nif_nx;
2625 uint32_t capab_len;
2626 int error;
2627
2628 if (!NX_LLINK_PROV(nx)) {
2629 DTRACE_SKYWALK1(not__llink__prov, struct nx_netif *, nif);
2630 return;
2631 }
2632 bzero(&capab, sizeof(capab));
2633 capab.cqe_version = KERN_NEXUS_CAPAB_QSET_EXTENSIONS_VERSION_1;
2634 capab_len = sizeof(capab);
2635 error = capab_fn(NX_PROV(nx), nx,
2636 KERN_NEXUS_CAPAB_QSET_EXTENSIONS, &capab, &capab_len);
2637 if (error != 0) {
2638 DTRACE_SKYWALK2(qset__extensions__capab__error,
2639 struct nx_netif *, nif, int, error);
2640 return;
2641 }
2642 VERIFY(capab.cqe_notify_steering_info != NULL);
2643 VERIFY(capab.cqe_prov_ctx != NULL);
2644 nif->nif_qset_extensions.qe_notify_steering_info =
2645 capab.cqe_notify_steering_info;
2646 nif->nif_qset_extensions.qe_prov_ctx = capab.cqe_prov_ctx;
2647 nif->nif_extended_capabilities |= NETIF_CAPAB_QSET_EXTENSIONS;
2648 }
2649
2650 static void
unconfigure_capab_qset_extensions(struct nx_netif * nif)2651 unconfigure_capab_qset_extensions(struct nx_netif *nif)
2652 {
2653 if ((nif->nif_extended_capabilities & NETIF_CAPAB_QSET_EXTENSIONS) == 0) {
2654 return;
2655 }
2656 bzero(&nif->nif_qset_extensions, sizeof(nif->nif_qset_extensions));
2657 nif->nif_extended_capabilities &= ~NETIF_CAPAB_QSET_EXTENSIONS;
2658 }
2659
2660 int
nx_netif_notify_steering_info(struct nx_netif * nif,struct netif_qset * qset,struct ifnet_traffic_descriptor_common * td,bool add)2661 nx_netif_notify_steering_info(struct nx_netif *nif, struct netif_qset *qset,
2662 struct ifnet_traffic_descriptor_common *td, bool add)
2663 {
2664 struct netif_qset_extensions *qset_ext;
2665 int err;
2666
2667 if ((nif->nif_extended_capabilities & NETIF_CAPAB_QSET_EXTENSIONS) == 0) {
2668 return ENOTSUP;
2669 }
2670 qset_ext = &nif->nif_qset_extensions;
2671 VERIFY(qset_ext->qe_prov_ctx != NULL);
2672 VERIFY(qset_ext->qe_notify_steering_info != NULL);
2673 err = qset_ext->qe_notify_steering_info(qset_ext->qe_prov_ctx,
2674 qset->nqs_ctx, td, add);
2675 return err;
2676 }
2677
2678 static void
nx_netif_capabilities_init(struct nx_netif * nif)2679 nx_netif_capabilities_init(struct nx_netif *nif)
2680 {
2681 struct kern_nexus *nx = nif->nif_nx;
2682 nxprov_capab_config_fn_t capab_fn;
2683
2684 if ((NX_PROV(nx)->nxprov_netif_ext.nxnpi_version) ==
2685 KERN_NEXUS_PROVIDER_VERSION_NETIF) {
2686 capab_fn = NX_PROV(nx)->nxprov_netif_ext.nxnpi_config_capab;
2687 ASSERT(capab_fn != NULL);
2688 } else {
2689 capab_fn = NX_PROV(nx)->nxprov_ext.nxpi_config_capab;
2690 }
2691 if (capab_fn == NULL) {
2692 return;
2693 }
2694 configure_capab_interface_advisory(nif, capab_fn);
2695 configure_capab_qset_extensions(nif, capab_fn);
2696 }
2697
2698 static void
nx_netif_capabilities_fini(struct nx_netif * nif)2699 nx_netif_capabilities_fini(struct nx_netif *nif)
2700 {
2701 unconfigure_capab_interface_advisory(nif);
2702 unconfigure_capab_qset_extensions(nif);
2703 }
2704
2705 static void
nx_netif_verify_tso_config(struct nx_netif * nif,struct ifnet * ifp)2706 nx_netif_verify_tso_config(struct nx_netif *nif, struct ifnet *ifp)
2707 {
2708 uint32_t tso_v4_mtu = 0;
2709 uint32_t tso_v6_mtu = 0;
2710
2711 if ((nif->nif_hwassist & IFNET_TSO_IPV4) != 0) {
2712 tso_v4_mtu = ifp->if_tso_v4_mtu;
2713 }
2714 if ((nif->nif_hwassist & IFNET_TSO_IPV6) != 0) {
2715 tso_v6_mtu = ifp->if_tso_v6_mtu;
2716 }
2717 VERIFY(PP_BUF_SIZE_DEF(nif->nif_nx->nx_tx_pp) >=
2718 max(tso_v4_mtu, tso_v6_mtu));
2719 }
2720
2721 void
na_netif_finalize(struct nexus_netif_adapter * nifna,struct ifnet * ifp)2722 na_netif_finalize(struct nexus_netif_adapter *nifna, struct ifnet *ifp)
2723 {
2724 struct nx_netif *nif = nifna->nifna_netif;
2725 struct kern_nexus *nx = nif->nif_nx;
2726 struct nexus_adapter *devna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
2727 struct nexus_adapter *hostna = nx_port_get_na(nx,
2728 NEXUS_PORT_NET_IF_HOST);
2729
2730 ASSERT(devna != NULL);
2731 ASSERT(hostna != NULL);
2732
2733 if (!ifnet_is_attached(ifp, 1)) {
2734 VERIFY(0);
2735 /* NOTREACHED */
2736 __builtin_unreachable();
2737 }
2738
2739 ASSERT(devna->na_private == ifp);
2740 ASSERT(devna->na_ifp == NULL);
2741 /* use I/O refcnt held by ifnet_is_attached() above */
2742 devna->na_ifp = devna->na_private;
2743 devna->na_private = NULL;
2744
2745 ASSERT(hostna->na_private == ifp);
2746 ASSERT(hostna->na_ifp == NULL);
2747 hostna->na_ifp = hostna->na_private;
2748 hostna->na_private = NULL;
2749 ifnet_incr_iorefcnt(hostna->na_ifp);
2750
2751 nx_netif_flags_init(nif);
2752 nx_netif_llink_init(nif);
2753 nx_netif_filter_init(nif);
2754 nx_netif_flow_init(nif);
2755 nx_netif_capabilities_init(nif);
2756 nx_netif_agent_init(nif);
2757 (void) nxctl_inet_traffic_rule_get_count(ifp->if_xname,
2758 &ifp->if_traffic_rule_count);
2759 nx_netif_verify_tso_config(nif, ifp);
2760 }
2761
2762 void
nx_netif_reap(struct nexus_netif_adapter * nifna,struct ifnet * ifp,uint32_t thres,boolean_t low)2763 nx_netif_reap(struct nexus_netif_adapter *nifna, struct ifnet *ifp,
2764 uint32_t thres, boolean_t low)
2765 {
2766 #pragma unused(ifp)
2767 struct nx_netif *nif = nifna->nifna_netif;
2768 struct kern_nexus *nx = nif->nif_nx;
2769 struct nexus_adapter *devna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
2770 uint64_t now = _net_uptime;
2771 boolean_t purge;
2772
2773 ASSERT(thres != 0);
2774
2775 if (devna->na_work_ts == 0) {
2776 return;
2777 }
2778
2779 /*
2780 * Purge if it's has been inactive for some time (twice the drain
2781 * threshold), and clear the work timestamp to temporarily skip this
2782 * adapter until it's active again. Purging cached objects can be
2783 * expensive since we'd need to allocate and construct them again,
2784 * so we do it only when necessary.
2785 */
2786 if (low || (now - devna->na_work_ts) >= (thres << 1)) {
2787 devna->na_work_ts = 0;
2788 purge = TRUE;
2789 } else {
2790 purge = FALSE;
2791 }
2792
2793 SK_DF(SK_VERB_NETIF, "%s: %s na %s", ifp->if_xname,
2794 (purge ? "purging" : "pruning"), devna->na_name);
2795
2796 /*
2797 * Device and host adapters share the same packet buffer pool,
2798 * so just reap the arena belonging to the device instance.
2799 */
2800 skmem_arena_reap(devna->na_arena, purge);
2801 }
2802
2803 /*
2804 * The purpose of this callback is to forceably remove resources held by VPNAs
2805 * in event of an interface detach. Without this callback an application can
2806 * prevent the detach from completing indefinitely. Note that this is only needed
2807 * for low latency VPNAs. Userspace do get notified about interface detach events
2808 * for other NA types (custom ether and filter) and will do the necessary cleanup.
2809 * The cleanup is done in two phases:
2810 * 1) VPNAs channels are defuncted. This releases the resources held by VPNAs and
2811 * causes the device channel to be closed. All ifnet references held by VPNAs
2812 * are also released.
2813 * 2) This cleans up the netif nexus and releases the two remaining ifnet
2814 * references held by the device and host ports (nx_netif_clean()).
2815 */
2816 void
nx_netif_detach_notify(struct nexus_netif_adapter * nifna)2817 nx_netif_detach_notify(struct nexus_netif_adapter *nifna)
2818 {
2819 struct nx_netif *nif = nifna->nifna_netif;
2820 struct kern_nexus *nx = nif->nif_nx;
2821 struct kern_channel **ch_list = NULL;
2822 struct kern_channel *ch;
2823 int err, i, all_ch_cnt = 0, vp_ch_cnt = 0;
2824 struct proc *p;
2825
2826 ASSERT(NETIF_IS_LOW_LATENCY(nif));
2827 /*
2828 * kern_channel_defunct() requires sk_lock to be not held. We
2829 * will first find the list of channels we want to defunct and
2830 * then call kern_channel_defunct() on each of them. The number
2831 * of channels cannot increase after sk_lock is released since
2832 * this interface is being detached.
2833 */
2834 SK_LOCK();
2835 all_ch_cnt = nx->nx_ch_count;
2836 if (all_ch_cnt == 0) {
2837 DTRACE_SKYWALK1(no__channel, struct kern_nexus *, nx);
2838 SK_UNLOCK();
2839 return;
2840 }
2841 ch_list = sk_alloc_type_array(struct kern_channel *, all_ch_cnt,
2842 Z_WAITOK | Z_NOFAIL, skmem_tag_netif_temp);
2843
2844 STAILQ_FOREACH(ch, &nx->nx_ch_head, ch_link) {
2845 struct nexus_adapter *na = ch->ch_na;
2846
2847 if (na != NULL && na->na_type == NA_NETIF_VP) {
2848 ASSERT(vp_ch_cnt < all_ch_cnt);
2849
2850 /* retain channel to prevent it from being freed */
2851 ch_retain_locked(ch);
2852 ch_list[vp_ch_cnt] = ch;
2853 DTRACE_SKYWALK3(vp__ch__found, struct kern_nexus *, nx,
2854 struct kern_channel *, ch, struct nexus_adapter *, na);
2855 vp_ch_cnt++;
2856 }
2857 }
2858 if (vp_ch_cnt == 0) {
2859 DTRACE_SKYWALK1(vp__ch__not__found, struct kern_nexus *, nx);
2860 sk_free_type_array(struct kern_channel *, all_ch_cnt, ch_list);
2861 SK_UNLOCK();
2862 return;
2863 }
2864 /* prevents the netif from being freed */
2865 nx_netif_retain(nif);
2866 SK_UNLOCK();
2867
2868 for (i = 0; i < vp_ch_cnt; i++) {
2869 ch = ch_list[i];
2870 p = proc_find(ch->ch_pid);
2871 if (p == NULL) {
2872 SK_ERR("ch 0x%llx pid %d not found", SK_KVA(ch), ch->ch_pid);
2873 DTRACE_SKYWALK3(ch__pid__not__found, struct kern_nexus *, nx,
2874 struct kern_channel *, ch, pid_t, ch->ch_pid);
2875 ch_release(ch);
2876 continue;
2877 }
2878 /*
2879 * It is possible for the channel to be closed before defunct gets
2880 * called. We need to get the fd lock here to ensure that the check
2881 * for the closed state and the calling of channel defunct are done
2882 * atomically.
2883 */
2884 proc_fdlock(p);
2885 if ((ch->ch_flags & CHANF_ATTACHED) != 0) {
2886 kern_channel_defunct(p, ch);
2887 }
2888 proc_fdunlock(p);
2889 proc_rele(p);
2890 ch_release(ch);
2891 }
2892 sk_free_type_array(struct kern_channel *, all_ch_cnt, ch_list);
2893
2894 SK_LOCK();
2895 /*
2896 * Quiescing is not needed because:
2897 * The defuncting above ensures that no more tx syncs could enter.
2898 * The driver layer ensures that ifnet_detach() (this path) does not get
2899 * called until RX upcalls have returned.
2900 *
2901 * Before sk_lock is reacquired above, userspace could close its channels
2902 * and cause the nexus's destructor to be called. This is fine because we
2903 * have retained the nif so it can't disappear.
2904 */
2905 err = nx_netif_clean(nif, FALSE);
2906 if (err != 0) {
2907 SK_ERR("netif clean failed: err %d", err);
2908 DTRACE_SKYWALK2(nif__clean__failed, struct nx_netif *, nif, int, err);
2909 }
2910 nx_netif_release(nif);
2911 SK_UNLOCK();
2912 }
2913
2914 void
nx_netif_copy_stats(struct nexus_netif_adapter * nifna,struct if_netif_stats * if_ns)2915 nx_netif_copy_stats(struct nexus_netif_adapter *nifna,
2916 struct if_netif_stats *if_ns)
2917 {
2918 struct nx_netif_mit *mit;
2919 struct mit_cfg_tbl *mit_cfg;
2920
2921 if ((mit = nifna->nifna_rx_mit) == NULL) {
2922 return;
2923 }
2924
2925 if ((mit->mit_flags & NETIF_MITF_INITIALIZED) == 0) {
2926 return;
2927 }
2928
2929 if_ns->ifn_rx_mit_interval = mit->mit_interval;
2930 if_ns->ifn_rx_mit_mode = mit->mit_mode;
2931 if_ns->ifn_rx_mit_packets_avg = mit->mit_packets_avg;
2932 if_ns->ifn_rx_mit_packets_min = mit->mit_packets_min;
2933 if_ns->ifn_rx_mit_packets_max = mit->mit_packets_max;
2934 if_ns->ifn_rx_mit_bytes_avg = mit->mit_bytes_avg;
2935 if_ns->ifn_rx_mit_bytes_min = mit->mit_bytes_min;
2936 if_ns->ifn_rx_mit_bytes_max = mit->mit_bytes_max;
2937 if_ns->ifn_rx_mit_cfg_idx = mit->mit_cfg_idx;
2938
2939 VERIFY(if_ns->ifn_rx_mit_cfg_idx < mit->mit_cfg_idx_max);
2940 mit_cfg = &mit->mit_tbl[if_ns->ifn_rx_mit_cfg_idx];
2941 if_ns->ifn_rx_mit_cfg_packets_lowat = mit_cfg->cfg_plowat;
2942 if_ns->ifn_rx_mit_cfg_packets_hiwat = mit_cfg->cfg_phiwat;
2943 if_ns->ifn_rx_mit_cfg_bytes_lowat = mit_cfg->cfg_blowat;
2944 if_ns->ifn_rx_mit_cfg_bytes_hiwat = mit_cfg->cfg_bhiwat;
2945 if_ns->ifn_rx_mit_cfg_interval = mit_cfg->cfg_ival;
2946 }
2947
2948 int
nx_netif_na_special(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr,nxspec_cmd_t spec_cmd)2949 nx_netif_na_special(struct nexus_adapter *na, struct kern_channel *ch,
2950 struct chreq *chr, nxspec_cmd_t spec_cmd)
2951 {
2952 ASSERT(na->na_type == NA_NETIF_DEV ||
2953 na->na_type == NA_NETIF_COMPAT_DEV);
2954 return nx_netif_na_special_common(na, ch, chr, spec_cmd);
2955 }
2956
2957 int
nx_netif_na_special_common(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr,nxspec_cmd_t spec_cmd)2958 nx_netif_na_special_common(struct nexus_adapter *na, struct kern_channel *ch,
2959 struct chreq *chr, nxspec_cmd_t spec_cmd)
2960 {
2961 int error = 0;
2962
2963 ASSERT(na->na_type == NA_NETIF_DEV || na->na_type == NA_NETIF_HOST ||
2964 na->na_type == NA_NETIF_COMPAT_DEV ||
2965 na->na_type == NA_NETIF_COMPAT_HOST);
2966 SK_LOCK_ASSERT_HELD();
2967
2968 switch (spec_cmd) {
2969 case NXSPEC_CMD_CONNECT:
2970 /*
2971 * netif adapter isn't created exclusively for kernel.
2972 * We mark (and clear) NAF_KERNEL_ONLY flag upon a succesful
2973 * na_special() connect and disconnect.
2974 */
2975 if (NA_KERNEL_ONLY(na)) {
2976 error = EBUSY;
2977 goto done;
2978 }
2979 ASSERT(!(na->na_flags & NAF_SPEC_INIT));
2980
2981 atomic_bitset_32(&na->na_flags, NAF_KERNEL_ONLY);
2982 error = na_bind_channel(na, ch, chr);
2983 if (error != 0) {
2984 atomic_bitclear_32(&na->na_flags, NAF_KERNEL_ONLY);
2985 goto done;
2986 }
2987 atomic_bitset_32(&na->na_flags, NAF_SPEC_INIT);
2988 break;
2989
2990 case NXSPEC_CMD_DISCONNECT:
2991 ASSERT(NA_KERNEL_ONLY(na));
2992 ASSERT(na->na_channels > 0);
2993 ASSERT(na->na_flags & NAF_SPEC_INIT);
2994 na_unbind_channel(ch);
2995 atomic_bitclear_32(&na->na_flags,
2996 (NAF_SPEC_INIT | NAF_KERNEL_ONLY));
2997 break;
2998
2999 case NXSPEC_CMD_START:
3000 na_kr_drop(na, FALSE);
3001 break;
3002
3003 case NXSPEC_CMD_STOP:
3004 na_kr_drop(na, TRUE);
3005 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED);
3006 lck_mtx_lock(&ch->ch_lock);
3007 nxprov_advise_disconnect(na->na_nx, ch);
3008 lck_mtx_unlock(&ch->ch_lock);
3009 break;
3010
3011 default:
3012 error = EINVAL;
3013 break;
3014 }
3015
3016 done:
3017 SK_DF(error ? SK_VERB_ERROR : SK_VERB_NETIF,
3018 "ch 0x%llx from na \"%s\" (0x%llx) naflags %b nx 0x%llx "
3019 "spec_cmd %u (err %d)", SK_KVA(ch), na->na_name, SK_KVA(na),
3020 na->na_flags, NAF_BITS, SK_KVA(ch->ch_nexus), spec_cmd, error);
3021
3022 return error;
3023 }
3024
3025 /*
3026 * Get a skywalk netif adapter for the port.
3027 */
3028 int
nx_netif_na_find(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** nap,boolean_t create)3029 nx_netif_na_find(struct kern_nexus *nx, struct kern_channel *ch,
3030 struct chreq *chr, struct nxbind *nxb, struct proc *p,
3031 struct nexus_adapter **nap, boolean_t create)
3032 {
3033 #pragma unused(ch)
3034 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
3035 boolean_t anon = NX_ANONYMOUS_PROV(nx);
3036 ch_endpoint_t ep = chr->cr_endpoint;
3037 nexus_port_t nx_port = chr->cr_port;
3038 struct nexus_adapter *na = NULL;
3039 struct ifnet *ifp;
3040 int err = 0;
3041
3042 SK_LOCK_ASSERT_HELD();
3043 *nap = NULL; /* default */
3044
3045 #if SK_LOG
3046 uuid_string_t uuidstr;
3047 SK_D("name \"%s\" spec_uuid \"%s\" port %d mode 0x%b pipe_id %u "
3048 "ring_id %d ring_set %u ep_type %u:%u create %u%s",
3049 chr->cr_name, sk_uuid_unparse(chr->cr_spec_uuid, uuidstr),
3050 (int)chr->cr_port, chr->cr_mode, CHMODE_BITS,
3051 chr->cr_pipe_id, (int)chr->cr_ring_id, chr->cr_ring_set,
3052 chr->cr_real_endpoint, chr->cr_endpoint, create,
3053 (ep != CH_ENDPOINT_NET_IF) ? " (skipped)" : "");
3054 #endif /* SK_LOG */
3055
3056 if (!create || ep != CH_ENDPOINT_NET_IF) {
3057 err = ENODEV;
3058 goto done;
3059 }
3060
3061 ASSERT(NX_DOM(nx)->nxdom_type == NEXUS_TYPE_NET_IF);
3062 if (nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV) == NULL) {
3063 err = ENXIO;
3064 goto done;
3065 }
3066 ifp = nif->nif_ifp;
3067 if (!(SKYWALK_CAPABLE(ifp))) {
3068 SK_ERR("interface %s is no longer usable", if_name(ifp));
3069 err = ENOTSUP;
3070 goto done;
3071 }
3072
3073 if (chr->cr_mode & CHMODE_LOW_LATENCY) {
3074 SK_ERR("low latency is not supported for netif channel");
3075 err = ENOTSUP;
3076 goto done;
3077 }
3078
3079 switch (nx_port) {
3080 case NEXUS_PORT_NET_IF_DEV:
3081 /*
3082 * We have to reject direct user open that's not explicitly
3083 * allowed because netif nexuses do not by default have
3084 * user memory regions.
3085 */
3086 if (p != kernproc &&
3087 (!skywalk_netif_direct_allowed(ifp->if_xname) ||
3088 (kauth_cred_issuser(kauth_cred_get()) == 0 &&
3089 (anon || nif->nif_dev_nxb == NULL || nxb == NULL ||
3090 !nxb_is_equal(nif->nif_dev_nxb, nxb))))) {
3091 DTRACE_SKYWALK2(direct__not__allowed, struct ifnet *,
3092 ifp, struct chreq *, chr);
3093 err = ENOTSUP;
3094 goto done;
3095 }
3096 if (chr->cr_mode & CHMODE_EVENT_RING) {
3097 SK_ERR("event ring is not supported for netif dev port channel");
3098 err = ENOTSUP;
3099 goto done;
3100 }
3101 na = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
3102 break;
3103
3104 case NEXUS_PORT_NET_IF_HOST:
3105 if (p != kernproc) {
3106 err = ENOTSUP;
3107 goto done;
3108 }
3109 if (chr->cr_mode & CHMODE_EVENT_RING) {
3110 SK_ERR("event ring is not supported for netif host port channel");
3111 err = ENOTSUP;
3112 goto done;
3113 }
3114 na = nx_port_get_na(nx, NEXUS_PORT_NET_IF_HOST);
3115 break;
3116
3117 default:
3118 ASSERT(!(chr->cr_mode & CHMODE_CONFIG));
3119
3120 NETIF_WLOCK(nif);
3121 err = nx_port_alloc(nx, nx_port, nxb, &na, p);
3122 if (err != 0) {
3123 NETIF_WUNLOCK(nif);
3124 goto done;
3125 }
3126
3127 if (na == NULL) {
3128 if (chr->cr_mode & CHMODE_FILTER) {
3129 err = netif_filter_na_create(nx, chr, &na);
3130 } else {
3131 err = netif_vp_na_create(nx, chr, &na);
3132 }
3133 if (err != 0) {
3134 NETIF_WUNLOCK(nif);
3135 goto done;
3136 }
3137 err = nx_port_alloc(nx, nx_port, nxb, &na, p);
3138 if (err != 0) {
3139 NETIF_WUNLOCK(nif);
3140 goto done;
3141 }
3142 }
3143 NETIF_WUNLOCK(nif);
3144
3145 break;
3146 }
3147
3148 ASSERT(err == 0);
3149 ASSERT(na != NULL);
3150
3151 #if CONFIG_NEXUS_USER_PIPE
3152 if (NA_OWNED_BY_ANY(na) || na->na_next_pipe > 0) {
3153 #else /* !CONFIG_NEXUS_USER_PIPE */
3154 if (NA_OWNED_BY_ANY(na)) {
3155 #endif /* !CONFIG_NEXUS_USER_PIPE */
3156 err = EBUSY;
3157 na = NULL;
3158 goto done;
3159 }
3160
3161 *nap = na;
3162 na_retain_locked(na);
3163
3164 done:
3165 ASSERT(err != 0 || na != NULL);
3166 if (err) {
3167 SK_ERR("na not found, err(%d)", err);
3168 } else {
3169 SK_DF(SK_VERB_NETIF, "found na 0x%llu", na);
3170 }
3171 return err;
3172 }
3173
3174 /* na_krings_create callback for all netif device adapters */
3175 int
3176 nx_netif_dev_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
3177 {
3178 int ret;
3179
3180 ASSERT(na->na_type == NA_NETIF_DEV ||
3181 na->na_type == NA_NETIF_COMPAT_DEV);
3182 /*
3183 * Allocate context structures for native netif only, for
3184 * IOSkywalkFamily to store its object references.
3185 */
3186 ret = na_rings_mem_setup(na, (na->na_flags & NAF_NATIVE), ch);
3187
3188 /*
3189 * We mark CKRF_DROP for kernel-only rings (kernel channel
3190 * opened by the flowswitch, etc.) to prevent packets from
3191 * going thru until after the client of the kernel channel
3192 * has fully plumbed things on its side. For userland-facing
3193 * rings (regular channel opened to netif), this is not
3194 * required, and so don't mark CKRF_DROP there.
3195 */
3196 if (ret == 0 && NA_KERNEL_ONLY(na)) {
3197 na_kr_drop(na, TRUE);
3198 }
3199
3200 return ret;
3201 }
3202
3203 /* call with SK_LOCK held */
3204 void
3205 nx_netif_dev_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
3206 boolean_t defunct)
3207 {
3208 ASSERT(na->na_type == NA_NETIF_DEV ||
3209 na->na_type == NA_NETIF_COMPAT_DEV);
3210
3211 /* see comments in nx_netif_dev_krings_create() */
3212 if (NA_KERNEL_ONLY(na)) {
3213 na_kr_drop(na, TRUE);
3214 }
3215
3216 na_rings_mem_teardown(na, ch, defunct);
3217 }
3218
3219 struct nx_netif *
3220 nx_netif_alloc(zalloc_flags_t how)
3221 {
3222 struct nx_netif *n;
3223
3224 SK_LOCK_ASSERT_HELD();
3225
3226 n = zalloc_flags(nx_netif_zone, how | Z_ZERO);
3227 if (n == NULL) {
3228 return NULL;
3229 }
3230
3231 NETIF_RWINIT(n);
3232 os_ref_init(&n->nif_refcnt, NULL);
3233 SK_DF(SK_VERB_MEM, "netif 0x%llx", SK_KVA(n));
3234
3235 return n;
3236 }
3237
3238 static void
3239 nx_netif_destroy(struct nx_netif *n)
3240 {
3241 ASSERT(n->nif_dev_nxb == NULL);
3242 ASSERT(n->nif_host_nxb == NULL);
3243 ASSERT(os_ref_get_count(&n->nif_refcnt) == 0);
3244 nx_netif_llink_config_free(n);
3245 SK_DF(SK_VERB_MEM, "netif 0x%llx", SK_KVA(n));
3246 NETIF_RWDESTROY(n);
3247 zfree(nx_netif_zone, n);
3248 }
3249
3250 void
3251 nx_netif_release(struct nx_netif *n)
3252 {
3253 SK_LOCK_ASSERT_HELD();
3254
3255 SK_DF(SK_VERB_MEM, "netif 0x%llx, refcnt %d", SK_KVA(n),
3256 os_ref_get_count(&n->nif_refcnt));
3257 if (os_ref_release(&n->nif_refcnt) == 0) {
3258 nx_netif_destroy(n);
3259 }
3260 }
3261
3262 void
3263 nx_netif_retain(struct nx_netif *n)
3264 {
3265 SK_LOCK_ASSERT_HELD();
3266
3267 /* retaining an object with a zero refcount is not allowed */
3268 ASSERT(os_ref_get_count(&n->nif_refcnt) >= 1);
3269 os_ref_retain(&n->nif_refcnt);
3270 SK_DF(SK_VERB_MEM, "netif 0x%llx, refcnt %d", SK_KVA(n),
3271 os_ref_get_count(&n->nif_refcnt));
3272 }
3273
3274 void
3275 nx_netif_free(struct nx_netif *n)
3276 {
3277 nx_netif_release(n);
3278 }
3279
3280 static int
3281 nx_netif_interface_advisory_report(struct kern_nexus *nx,
3282 const struct ifnet_interface_advisory *advisory)
3283 {
3284 struct kern_nexus *notify_nx;
3285 struct __kern_netif_intf_advisory *intf_adv;
3286 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
3287
3288 if (nif->nif_fsw_nxadv != NULL) {
3289 ASSERT(nif->nif_fsw != NULL);
3290 intf_adv = &nif->nif_fsw_nxadv->_nxadv_intf_adv;
3291 notify_nx = nif->nif_fsw->fsw_nx;
3292 } else {
3293 intf_adv = &nif->nif_netif_nxadv->__kern_intf_adv;
3294 notify_nx = nif->nif_nx;
3295 }
3296 /*
3297 * copy the advisory report in shared memory
3298 */
3299 intf_adv->cksum = os_cpu_copy_in_cksum(advisory, &intf_adv->adv,
3300 sizeof(*advisory), 0);
3301 STATS_INC(&nif->nif_stats, NETIF_STATS_IF_ADV_UPD_RECV);
3302 /*
3303 * notify user channels on advisory report availability
3304 */
3305 nx_interface_advisory_notify(notify_nx);
3306 return 0;
3307 }
3308
3309 static errno_t
3310 nx_netif_interface_advisory_notify(void *kern_ctx,
3311 const struct ifnet_interface_advisory *advisory)
3312 {
3313 _CASSERT(offsetof(struct ifnet_interface_advisory, version) ==
3314 offsetof(struct ifnet_interface_advisory, header.version));
3315 _CASSERT(offsetof(struct ifnet_interface_advisory, direction) ==
3316 offsetof(struct ifnet_interface_advisory, header.direction));
3317 _CASSERT(offsetof(struct ifnet_interface_advisory, _reserved) ==
3318 offsetof(struct ifnet_interface_advisory, header.interface_type));
3319
3320 if (__improbable(kern_ctx == NULL || advisory == NULL)) {
3321 return EINVAL;
3322 }
3323 if (__improbable((advisory->header.version <
3324 IF_INTERFACE_ADVISORY_VERSION_MIN) ||
3325 (advisory->header.version > IF_INTERFACE_ADVISORY_VERSION_MAX))) {
3326 SK_ERR("Invalid advisory version %d", advisory->header.version);
3327 return EINVAL;
3328 }
3329 if (__improbable((advisory->header.direction !=
3330 IF_INTERFACE_ADVISORY_DIRECTION_TX) &&
3331 (advisory->header.direction !=
3332 IF_INTERFACE_ADVISORY_DIRECTION_RX))) {
3333 SK_ERR("Invalid advisory direction %d",
3334 advisory->header.direction);
3335 return EINVAL;
3336 }
3337 if (__improbable(((advisory->header.interface_type <
3338 IF_INTERFACE_ADVISORY_INTERFACE_TYPE_MIN) ||
3339 (advisory->header.interface_type >
3340 IF_INTERFACE_ADVISORY_INTERFACE_TYPE_MAX)) &&
3341 (advisory->header.version >= IF_INTERFACE_ADVISORY_VERSION_2))) {
3342 SK_ERR("Invalid advisory interface type %d",
3343 advisory->header.interface_type);
3344 return EINVAL;
3345 }
3346 return nx_netif_interface_advisory_report(kern_ctx, advisory);
3347 }
3348
3349 void
3350 nx_netif_config_interface_advisory(struct kern_nexus *nx, bool enable)
3351 {
3352 struct kern_nexus *nx_netif;
3353 struct nx_netif *nif;
3354
3355 if (NX_REJECT_ACT(nx) || (nx->nx_flags & NXF_CLOSED) != 0) {
3356 return;
3357 }
3358 if (NX_PROV(nx)->nxprov_params->nxp_type == NEXUS_TYPE_FLOW_SWITCH) {
3359 struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx);
3360 nx_netif = fsw->fsw_nifna->na_nx;
3361 } else {
3362 nx_netif = nx;
3363 }
3364 ASSERT(NX_PROV(nx_netif)->nxprov_params->nxp_type == NEXUS_TYPE_NET_IF);
3365 nif = NX_NETIF_PRIVATE(nx_netif);
3366 if (nif->nif_intf_adv_config != NULL) {
3367 nif->nif_intf_adv_config(nif->nif_intf_adv_prov_ctx, enable);
3368 }
3369 }
3370
3371 void
3372 nx_netif_get_interface_tso_capabilities(struct ifnet *ifp, uint32_t *tso_v4_mtu,
3373 uint32_t *tso_v6_mtu)
3374 {
3375 #pragma unused (ifp)
3376 *tso_v4_mtu = 0;
3377 *tso_v6_mtu = 0;
3378
3379 #ifdef XNU_TARGET_OS_OSX
3380 if (SKYWALK_CAPABLE(ifp) && SKYWALK_NATIVE(ifp)) {
3381 struct nx_netif *nif = NA(ifp)->nifna_netif;
3382
3383 if ((nif->nif_hwassist & IFNET_TSO_IPV4) != 0) {
3384 *tso_v4_mtu = ifp->if_tso_v4_mtu;
3385 }
3386 if ((nif->nif_hwassist & IFNET_TSO_IPV6) != 0) {
3387 *tso_v6_mtu = ifp->if_tso_v6_mtu;
3388 }
3389 }
3390 #endif /* XNU_TARGET_OS_OSX */
3391 }
3392
3393 /*
3394 * This function has no use anymore since we are now passing truncated packets
3395 * to filters. We keep this logic just in case we need to prevent certain
3396 * packets from being passed to filters.
3397 */
3398 static boolean_t
3399 packet_is_filterable(struct nexus_netif_adapter *nifna,
3400 struct __kern_packet *pkt)
3401 {
3402 #pragma unused (nifna, pkt)
3403 return TRUE;
3404 }
3405
3406 /*
3407 * This function is only meant for supporting the RX path because the TX path
3408 * will not send packets > MTU size due to the disabling of TSO when filters
3409 * are enabled.
3410 */
3411 static void
3412 get_filterable_packets(struct nexus_netif_adapter *nifna,
3413 struct __kern_packet *pkt_chain, struct __kern_packet **fpkt_chain,
3414 struct __kern_packet **passthrough_chain)
3415 {
3416 struct nx_netif *nif = nifna->nifna_netif;
3417 struct netif_stats *nifs = &nif->nif_stats;
3418 struct __kern_packet *pkt = pkt_chain, *next, *fpkt;
3419 struct __kern_packet *fpkt_head = NULL, *passthrough_head = NULL;
3420 struct __kern_packet **fpkt_tailp = &fpkt_head;
3421 struct __kern_packet **passthrough_tailp = &passthrough_head;
3422 int fcnt = 0, pcnt = 0, dcnt = 0;
3423
3424 while (pkt != NULL) {
3425 next = pkt->pkt_nextpkt;
3426 pkt->pkt_nextpkt = NULL;
3427
3428 if (!packet_is_filterable(nifna, pkt)) {
3429 pcnt++;
3430 *passthrough_tailp = pkt;
3431 passthrough_tailp = &pkt->pkt_nextpkt;
3432 pkt = next;
3433 continue;
3434 }
3435 fpkt = nx_netif_pkt_to_filter_pkt(nifna, pkt, NETIF_CONVERT_RX);
3436 if (fpkt != NULL) {
3437 fcnt++;
3438 *fpkt_tailp = fpkt;
3439 fpkt_tailp = &fpkt->pkt_nextpkt;
3440 } else {
3441 dcnt++;
3442 }
3443 pkt = next;
3444 }
3445 *fpkt_chain = fpkt_head;
3446 *passthrough_chain = passthrough_head;
3447
3448 /*
3449 * No need to increment drop stats because that's already
3450 * done in nx_netif_pkt_to_filter_pkt.
3451 */
3452 STATS_ADD(nifs, NETIF_STATS_FILTER_RX_NOT_FILTERABLE, pcnt);
3453 DTRACE_SKYWALK6(filterable, struct nexus_netif_adapter *, nifna,
3454 int, fcnt, int, pcnt, int, dcnt, struct __kern_packet *,
3455 fpkt_head, struct __kern_packet *, passthrough_head);
3456 }
3457
3458 /*
3459 * This is only used by ring-based notify functions for now.
3460 * When a qset-based notify becomes available, this function can be used
3461 * unmodified.
3462 */
3463 void
3464 netif_receive(struct nexus_netif_adapter *nifna,
3465 struct __kern_packet *pkt_chain, struct nexus_pkt_stats *stats)
3466 {
3467 struct nx_netif *nif = nifna->nifna_netif;
3468 struct nexus_adapter *na = &nifna->nifna_up;
3469 struct netif_stats *nifs = &nif->nif_stats;
3470 int err, dropcnt, dropstat = -1;
3471
3472 /* update our work timestamp */
3473 na->na_work_ts = _net_uptime;
3474
3475 if (nif->nif_filter_cnt > 0) {
3476 struct __kern_packet *fpkt_chain = NULL;
3477 struct __kern_packet *passthrough_chain = NULL;
3478
3479 get_filterable_packets(nifna, pkt_chain, &fpkt_chain,
3480 &passthrough_chain);
3481 if (fpkt_chain != NULL) {
3482 (void) nx_netif_filter_inject(nifna, NULL, fpkt_chain,
3483 NETIF_FILTER_RX | NETIF_FILTER_SOURCE);
3484 }
3485 if (passthrough_chain != NULL) {
3486 pkt_chain = passthrough_chain;
3487 } else {
3488 return;
3489 }
3490 } else if (nx_netif_filter_default_drop != 0) {
3491 DTRACE_SKYWALK2(rx__default__drop, struct nx_netif *, nif,
3492 struct __kern_packet *, pkt_chain);
3493 dropstat = NETIF_STATS_FILTER_DROP_DEFAULT;
3494 goto drop;
3495 }
3496 if (nif->nif_flow_cnt > 0) {
3497 struct __kern_packet *remain = NULL;
3498
3499 err = nx_netif_demux(nifna, pkt_chain, &remain,
3500 NETIF_FLOW_SOURCE);
3501 if (remain == NULL) {
3502 return;
3503 }
3504 pkt_chain = remain;
3505 }
3506 if (na->na_rx != NULL) {
3507 na->na_rx(na, pkt_chain, stats);
3508 } else {
3509 DTRACE_SKYWALK2(no__rx__cb, struct nx_netif *, nif,
3510 struct __kern_packet *, pkt_chain);
3511 dropstat = NETIF_STATS_DROP_NO_RX_CB;
3512 goto drop;
3513 }
3514 return;
3515 drop:
3516 dropcnt = 0;
3517 nx_netif_free_packet_chain(pkt_chain, &dropcnt);
3518 if (dropstat != -1) {
3519 STATS_ADD(nifs, dropstat, dropcnt);
3520 }
3521 STATS_ADD(nifs, NETIF_STATS_DROP, dropcnt);
3522 }
3523
3524 static slot_idx_t
3525 netif_rate_limit(struct __kern_channel_ring *r, uint64_t rate,
3526 slot_idx_t begin, slot_idx_t end, boolean_t *rate_limited)
3527 {
3528 uint64_t elapsed;
3529 uint64_t now;
3530 struct __kern_packet *pkt;
3531 clock_sec_t sec;
3532 clock_usec_t usec;
3533 slot_idx_t i;
3534
3535 if (__probable(rate == 0)) {
3536 return end;
3537 }
3538
3539 /* init tbr if not so */
3540 if (__improbable(r->ckr_tbr_token == CKR_TBR_TOKEN_INVALID)) {
3541 r->ckr_tbr_token = rate;
3542 r->ckr_tbr_depth = rate;
3543 r->ckr_tbr_last = mach_absolute_time();
3544 } else {
3545 now = mach_absolute_time();
3546 elapsed = now - r->ckr_tbr_last;
3547 absolutetime_to_microtime(elapsed, &sec, &usec);
3548 r->ckr_tbr_token +=
3549 ((sec * USEC_PER_SEC + usec) * rate / USEC_PER_SEC);
3550 if (__improbable(r->ckr_tbr_token > r->ckr_tbr_depth)) {
3551 r->ckr_tbr_token = r->ckr_tbr_depth;
3552 }
3553 r->ckr_tbr_last = now;
3554 }
3555
3556 *rate_limited = FALSE;
3557 for (i = begin; i != end; i = SLOT_NEXT(i, r->ckr_lim)) {
3558 pkt = KR_KSD(r, i)->sd_pkt;
3559 if (__improbable(pkt == NULL)) {
3560 continue;
3561 }
3562 if (__improbable(r->ckr_tbr_token <= 0)) {
3563 end = i;
3564 *rate_limited = TRUE;
3565 break;
3566 }
3567 r->ckr_tbr_token -= pkt->pkt_length * 8;
3568 }
3569
3570 SK_DF(SK_VERB_FSW | SK_VERB_RX, "ckr %p %s rate limited at %d",
3571 r, r->ckr_name, i);
3572
3573 return end;
3574 }
3575
3576 SK_NO_INLINE_ATTRIBUTE
3577 static struct __kern_packet *
3578 consume_pkts(struct __kern_channel_ring *ring, slot_idx_t end)
3579 {
3580 struct __kern_packet *pkt_chain = NULL, **tailp = &pkt_chain;
3581 slot_idx_t idx = ring->ckr_rhead;
3582
3583 while (idx != end) {
3584 struct __kern_slot_desc *ksd = KR_KSD(ring, idx);
3585 struct __kern_packet *pkt = ksd->sd_pkt;
3586
3587 ASSERT(pkt->pkt_nextpkt == NULL);
3588 KR_SLOT_DETACH_METADATA(ring, ksd);
3589 *tailp = pkt;
3590 tailp = &pkt->pkt_nextpkt;
3591 idx = SLOT_NEXT(idx, ring->ckr_lim);
3592 }
3593 ring->ckr_rhead = end;
3594 ring->ckr_rtail = ring->ckr_ktail;
3595 return pkt_chain;
3596 }
3597
3598 int
3599 netif_rx_notify_default(struct __kern_channel_ring *ring, struct proc *p,
3600 uint32_t flags)
3601 {
3602 struct nexus_adapter *hwna;
3603 struct nexus_netif_adapter *nifna;
3604 struct nx_netif *nif;
3605 struct __kern_packet *pkt_chain;
3606 struct nexus_pkt_stats stats;
3607 sk_protect_t protect;
3608 slot_idx_t ktail;
3609 int err = 0;
3610
3611 KDBG((SK_KTRACE_NETIF_RX_NOTIFY_DEFAULT | DBG_FUNC_START),
3612 SK_KVA(ring));
3613
3614 ASSERT(ring->ckr_tx == NR_RX);
3615 ASSERT(!NA_KERNEL_ONLY(KRNA(ring)) || KR_KERNEL_ONLY(ring));
3616
3617 err = kr_enter(ring, ((flags & NA_NOTEF_CAN_SLEEP) != 0));
3618 if (err != 0) {
3619 /* not a serious error, so no need to be chatty here */
3620 SK_DF(SK_VERB_FSW,
3621 "hwna \"%s\" (0x%llx) kr \"%s\" (0x%llx) krflags 0x%b "
3622 "(%d)", KRNA(ring)->na_name, SK_KVA(KRNA(ring)),
3623 ring->ckr_name, SK_KVA(ring), ring->ckr_flags,
3624 CKRF_BITS, err);
3625 goto out;
3626 }
3627 if (__improbable(KR_DROP(ring))) {
3628 kr_exit(ring);
3629 err = ENODEV;
3630 goto out;
3631 }
3632 hwna = KRNA(ring);
3633 nifna = NIFNA(hwna);
3634 nif = nifna->nifna_netif;
3635 if (__improbable(hwna->na_ifp == NULL)) {
3636 kr_exit(ring);
3637 err = ENODEV;
3638 goto out;
3639 }
3640 protect = sk_sync_protect();
3641 err = ring->ckr_na_sync(ring, p, 0);
3642 if (err != 0 && err != EAGAIN) {
3643 goto put_out;
3644 }
3645
3646 /* read the tail pointer once */
3647 ktail = ring->ckr_ktail;
3648 if (__improbable(ring->ckr_khead == ktail)) {
3649 SK_DF(SK_VERB_FSW | SK_VERB_NOTIFY | SK_VERB_RX,
3650 "how strange, interrupt with no packets on hwna "
3651 "\"%s\" (0x%llx)", KRNA(ring)->na_name, SK_KVA(KRNA(ring)));
3652 goto put_out;
3653 }
3654 ktail = netif_rate_limit(ring, nif->nif_input_rate, ring->ckr_rhead,
3655 ktail, &ring->ckr_rate_limited);
3656
3657 pkt_chain = consume_pkts(ring, ktail);
3658 if (pkt_chain != NULL) {
3659 netif_receive(nifna, pkt_chain, &stats);
3660
3661 if (ring->ckr_netif_mit_stats != NULL &&
3662 stats.nps_pkts != 0 && stats.nps_bytes != 0) {
3663 ring->ckr_netif_mit_stats(ring, stats.nps_pkts,
3664 stats.nps_bytes);
3665 }
3666 }
3667
3668 put_out:
3669 sk_sync_unprotect(protect);
3670 kr_exit(ring);
3671
3672 out:
3673 KDBG((SK_KTRACE_NETIF_RX_NOTIFY_DEFAULT | DBG_FUNC_END),
3674 SK_KVA(ring), err);
3675 return err;
3676 }
3677
3678 int
3679 netif_rx_notify_fast(struct __kern_channel_ring *ring, struct proc *p,
3680 uint32_t flags)
3681 {
3682 #pragma unused(p, flags)
3683 sk_protect_t protect;
3684 struct nexus_adapter *hwna;
3685 struct nexus_pkt_stats stats = {};
3686 uint32_t i, count;
3687 int err = 0;
3688
3689 KDBG((SK_KTRACE_NETIF_RX_NOTIFY_FAST | DBG_FUNC_START),
3690 SK_KVA(ring));
3691
3692 /* XXX
3693 * sk_sync_protect() is not needed for this case because
3694 * we are not using the dev ring. Unfortunately lots of
3695 * macros used by fsw still require this.
3696 */
3697 protect = sk_sync_protect();
3698 hwna = KRNA(ring);
3699 count = na_get_nslots(hwna, NR_RX);
3700 err = nx_rx_sync_packets(ring, ring->ckr_scratch, &count);
3701 if (__improbable(err != 0)) {
3702 SK_ERR("nx_rx_sync_packets failed: %d", err);
3703 DTRACE_SKYWALK2(rx__sync__packets__failed,
3704 struct __kern_channel_ring *, ring, int, err);
3705 goto out;
3706 }
3707 DTRACE_SKYWALK1(chain__count, uint32_t, count);
3708 for (i = 0; i < count; i++) {
3709 struct __kern_packet *pkt_chain;
3710
3711 pkt_chain = SK_PTR_ADDR_KPKT(ring->ckr_scratch[i]);
3712 ASSERT(pkt_chain != NULL);
3713 netif_receive(NIFNA(KRNA(ring)), pkt_chain, &stats);
3714
3715 if (ring->ckr_netif_mit_stats != NULL &&
3716 stats.nps_pkts != 0 && stats.nps_bytes != 0) {
3717 ring->ckr_netif_mit_stats(ring, stats.nps_pkts,
3718 stats.nps_bytes);
3719 }
3720 }
3721 out:
3722 sk_sync_unprotect(protect);
3723 KDBG((SK_KTRACE_NETIF_RX_NOTIFY_FAST | DBG_FUNC_END),
3724 SK_KVA(ring), err);
3725 return err;
3726 }
3727
3728
3729 /*
3730 * Configure the NA to operate in a particular mode.
3731 */
3732 static channel_ring_notify_t
3733 netif_hwna_get_notify(struct __kern_channel_ring *ring, netif_mode_t mode)
3734 {
3735 channel_ring_notify_t notify = NULL;
3736 boolean_t has_sync_pkts = (sk_rx_sync_packets != 0 &&
3737 nx_has_rx_sync_packets(ring));
3738
3739 if (mode == NETIF_MODE_FSW) {
3740 notify = (has_sync_pkts ? netif_rx_notify_fast :
3741 netif_rx_notify_default);
3742 } else if (mode == NETIF_MODE_LLW) {
3743 notify = (has_sync_pkts ? netif_llw_rx_notify_fast :
3744 netif_llw_rx_notify_default);
3745 }
3746 return notify;
3747 }
3748
3749
3750 static uint32_t
3751 netif_mode_to_flag(netif_mode_t mode)
3752 {
3753 uint32_t flag = 0;
3754
3755 if (mode == NETIF_MODE_FSW) {
3756 flag = NAF_MODE_FSW;
3757 } else if (mode == NETIF_MODE_LLW) {
3758 flag = NAF_MODE_LLW;
3759 }
3760 return flag;
3761 }
3762
3763 static void
3764 netif_hwna_config_mode(struct nexus_adapter *hwna, netif_mode_t mode,
3765 void (*rx)(struct nexus_adapter *, struct __kern_packet *,
3766 struct nexus_pkt_stats *), boolean_t set)
3767 {
3768 uint32_t i;
3769 uint32_t flag;
3770
3771 ASSERT(hwna->na_type == NA_NETIF_DEV ||
3772 hwna->na_type == NA_NETIF_COMPAT_DEV);
3773
3774 for (i = 0; i < na_get_nrings(hwna, NR_RX); i++) {
3775 struct __kern_channel_ring *kr = &NAKR(hwna, NR_RX)[i];
3776 channel_ring_notify_t notify = netif_hwna_get_notify(kr, mode);
3777
3778 if (set) {
3779 kr->ckr_save_notify = kr->ckr_netif_notify;
3780 kr->ckr_netif_notify = notify;
3781 } else {
3782 kr->ckr_netif_notify = kr->ckr_save_notify;
3783 kr->ckr_save_notify = NULL;
3784 }
3785 }
3786 if (set) {
3787 hwna->na_rx = rx;
3788 flag = netif_mode_to_flag(mode);
3789 atomic_bitset_32(&hwna->na_flags, flag);
3790 } else {
3791 hwna->na_rx = NULL;
3792 atomic_bitclear_32(&hwna->na_flags,
3793 (NAF_MODE_FSW | NAF_MODE_LLW));
3794 }
3795 }
3796
3797 void
3798 netif_hwna_set_mode(struct nexus_adapter *hwna, netif_mode_t mode,
3799 void (*rx)(struct nexus_adapter *, struct __kern_packet *,
3800 struct nexus_pkt_stats *))
3801 {
3802 return netif_hwna_config_mode(hwna, mode, rx, TRUE);
3803 }
3804
3805 void
3806 netif_hwna_clear_mode(struct nexus_adapter *hwna)
3807 {
3808 return netif_hwna_config_mode(hwna, NETIF_MODE_NONE, NULL, FALSE);
3809 }
3810
3811 static void
3812 netif_inject_rx(struct nexus_adapter *na, struct __kern_packet *pkt_chain)
3813 {
3814 struct nexus_netif_adapter *nifna = NIFNA(na);
3815 struct nx_netif *nif = nifna->nifna_netif;
3816 struct netif_stats *nifs = &nif->nif_stats;
3817 struct __kern_channel_ring *r;
3818 struct nexus_pkt_stats stats;
3819 sk_protect_t protect;
3820 boolean_t ring_drop = FALSE;
3821 int err, dropcnt;
3822
3823 if (!NA_OWNED_BY_FSW(na)) {
3824 DTRACE_SKYWALK1(fsw__disabled, struct nexus_adapter *, na);
3825 goto fail;
3826 }
3827 ASSERT(na->na_rx != NULL);
3828
3829 /*
3830 * XXX
3831 * This function is called when a filter injects a packet back to the
3832 * regular RX path. We can assume the ring is 0 for now because RSS
3833 * is not supported. This needs to be revisited when we add support for
3834 * RSS.
3835 */
3836 r = &na->na_rx_rings[0];
3837 ASSERT(r->ckr_tx == NR_RX);
3838 err = kr_enter(r, TRUE);
3839 VERIFY(err == 0);
3840
3841 if (__improbable(KR_DROP(r))) {
3842 kr_exit(r);
3843 DTRACE_SKYWALK2(ring__drop, struct nexus_adapter *, na,
3844 struct __kern_channel_ring *, r);
3845 ring_drop = TRUE;
3846 goto fail;
3847 }
3848 protect = sk_sync_protect();
3849 na->na_rx(na, pkt_chain, &stats);
3850
3851 if (r->ckr_netif_mit_stats != NULL &&
3852 stats.nps_pkts != 0 && stats.nps_bytes != 0) {
3853 r->ckr_netif_mit_stats(r, stats.nps_pkts, stats.nps_bytes);
3854 }
3855 sk_sync_unprotect(protect);
3856
3857 kr_exit(r);
3858 return;
3859
3860 fail:
3861 dropcnt = 0;
3862 nx_netif_free_packet_chain(pkt_chain, &dropcnt);
3863 if (ring_drop) {
3864 STATS_ADD(nifs, NETIF_STATS_DROP_KRDROP_MODE, dropcnt);
3865 }
3866 STATS_ADD(nifs, NETIF_STATS_DROP, dropcnt);
3867 }
3868
3869 /*
3870 * This is called when an inbound packet has traversed all filters.
3871 */
3872 errno_t
3873 nx_netif_filter_rx_cb(struct nexus_netif_adapter *nifna,
3874 struct __kern_packet *fpkt_chain, uint32_t flags)
3875 {
3876 #pragma unused (flags)
3877 struct nx_netif *nif = nifna->nifna_netif;
3878 struct netif_stats *nifs = &nif->nif_stats;
3879 struct nexus_adapter *na = &nifna->nifna_up;
3880 struct __kern_packet *pkt_chain;
3881 int err;
3882
3883 pkt_chain = nx_netif_filter_pkt_to_pkt_chain(nifna,
3884 fpkt_chain, NETIF_CONVERT_RX);
3885 if (pkt_chain == NULL) {
3886 return ENOMEM;
3887 }
3888 if (nif->nif_flow_cnt > 0) {
3889 struct __kern_packet *remain = NULL;
3890
3891 err = nx_netif_demux(nifna, pkt_chain, &remain,
3892 NETIF_FLOW_INJECT);
3893 if (remain == NULL) {
3894 return err;
3895 }
3896 pkt_chain = remain;
3897 }
3898 if (na->na_rx != NULL) {
3899 netif_inject_rx(na, pkt_chain);
3900 } else {
3901 int dropcnt = 0;
3902 nx_netif_free_packet_chain(pkt_chain, &dropcnt);
3903 STATS_ADD(nifs,
3904 NETIF_STATS_FILTER_DROP_NO_RX_CB, dropcnt);
3905 STATS_ADD(nifs, NETIF_STATS_DROP, dropcnt);
3906 }
3907 return 0;
3908 }
3909
3910 /*
3911 * This is called when an outbound packet has traversed all filters.
3912 */
3913 errno_t
3914 nx_netif_filter_tx_cb(struct nexus_netif_adapter *nifna,
3915 struct __kern_packet *fpkt_chain, uint32_t flags)
3916 {
3917 #pragma unused (flags)
3918 struct nx_netif *nif = nifna->nifna_netif;
3919 struct nexus_adapter *na = &nifna->nifna_up;
3920 int err;
3921
3922 if (NETIF_IS_COMPAT(nif)) {
3923 struct mbuf *m_chain;
3924 mbuf_svc_class_t sc;
3925
3926 m_chain = nx_netif_filter_pkt_to_mbuf_chain(nifna,
3927 fpkt_chain, NETIF_CONVERT_TX);
3928 if (m_chain == NULL) {
3929 return ENOMEM;
3930 }
3931 /*
3932 * All packets in the chain have the same service class.
3933 * If the sc is missing or invalid, a valid value will be
3934 * returned.
3935 */
3936 sc = mbuf_get_service_class(m_chain);
3937 err = nx_netif_filter_tx_processed_mbuf_enqueue(nifna,
3938 sc, m_chain);
3939 } else {
3940 struct __kern_packet *pkt_chain;
3941 kern_packet_svc_class_t sc;
3942
3943 pkt_chain = nx_netif_filter_pkt_to_pkt_chain(nifna,
3944 fpkt_chain, NETIF_CONVERT_TX);
3945 if (pkt_chain == NULL) {
3946 return ENOMEM;
3947 }
3948 /*
3949 * All packets in the chain have the same service class.
3950 * If the sc is missing or invalid, a valid value will be
3951 * returned.
3952 */
3953 sc = kern_packet_get_service_class(SK_PKT2PH(pkt_chain));
3954 err = nx_netif_filter_tx_processed_pkt_enqueue(nifna,
3955 sc, pkt_chain);
3956 }
3957 /* Tell driver to resume dequeuing */
3958 ifnet_start(na->na_ifp);
3959 return err;
3960 }
3961
3962 void
3963 nx_netif_vp_region_params_adjust(struct nexus_adapter *na,
3964 struct skmem_region_params *srp)
3965 {
3966 #pragma unused(na, srp)
3967 return;
3968 }
3969
3970 /* returns true, if starter thread is utilized */
3971 static bool
3972 netif_use_starter_thread(struct ifnet *ifp, uint32_t flags)
3973 {
3974 #if (DEVELOPMENT || DEBUG)
3975 if (__improbable(nx_netif_force_ifnet_start != 0)) {
3976 ifnet_start(ifp);
3977 return true;
3978 }
3979 #endif /* !DEVELOPMENT && !DEBUG */
3980 /*
3981 * use starter thread in following conditions:
3982 * - interface is not skywalk native
3983 * - interface attached to virtual driver (ipsec, utun)
3984 * - TBR is enabled
3985 * - delayed start mechanism is in use
3986 * - remaining stack space on the thread is not enough for driver
3987 * - caller is in rx workloop context
3988 * - caller is from the flowswitch path doing ARP resolving
3989 * - caller requires the use of starter thread (stack usage)
3990 */
3991 if (!SKYWALK_NATIVE(ifp) || NA(ifp) == NULL ||
3992 !NA_IS_ACTIVE(&NA(ifp)->nifna_up) ||
3993 ((NA(ifp)->nifna_up.na_flags & NAF_VIRTUAL_DEVICE) != 0) ||
3994 IFCQ_TBR_IS_ENABLED(ifp->if_snd) ||
3995 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
3996 sk_is_rx_notify_protected() ||
3997 sk_is_async_transmit_protected() ||
3998 (sk_is_sync_protected() && (flags & NETIF_XMIT_FLAG_HOST) != 0)) {
3999 DTRACE_SKYWALK2(use__starter__thread, struct ifnet *, ifp,
4000 uint32_t, flags);
4001 ifnet_start(ifp);
4002 return true;
4003 }
4004 lck_mtx_lock_spin(&ifp->if_start_lock);
4005 /* interface is flow controlled */
4006 if (__improbable(ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
4007 lck_mtx_unlock(&ifp->if_start_lock);
4008 return true;
4009 }
4010 /* if starter thread is active, utilize it */
4011 if (ifp->if_start_active) {
4012 ifp->if_start_req++;
4013 lck_mtx_unlock(&ifp->if_start_lock);
4014 return true;
4015 }
4016 lck_mtx_unlock(&ifp->if_start_lock);
4017 /* Check remaining stack space */
4018 if ((OSKernelStackRemaining() < NX_NETIF_MIN_DRIVER_STACK_SIZE)) {
4019 ifnet_start(ifp);
4020 return true;
4021 }
4022 return false;
4023 }
4024
4025 void
4026 netif_transmit(struct ifnet *ifp, uint32_t flags)
4027 {
4028 if (netif_use_starter_thread(ifp, flags)) {
4029 return;
4030 }
4031 /*
4032 * If no longer attached, don't issue doorbell as ifp
4033 * is being destroyed; else hold an IO refcnt to
4034 * prevent the interface from being detached.
4035 */
4036 if (!ifnet_datamov_begin(ifp)) {
4037 return;
4038 }
4039 nx_netif_doorbell_internal(ifp, flags);
4040 /*
4041 * Release the IO refcnt taken above.
4042 */
4043 ifnet_datamov_end(ifp);
4044 }
4045
4046 static struct ifclassq *
4047 netif_get_default_ifcq(struct nexus_adapter *hwna)
4048 {
4049 struct nx_netif *nif;
4050 struct ifclassq *ifcq;
4051
4052 nif = NX_NETIF_PRIVATE(hwna->na_nx);
4053 if (NETIF_LLINK_ENABLED(nif)) {
4054 struct netif_qset *qset;
4055
4056 /*
4057 * Use the default ifcq for now.
4058 * In the future this could be chosen by the caller.
4059 */
4060 qset = nx_netif_get_default_qset_noref(nif);
4061 ASSERT(qset != NULL);
4062 ifcq = qset->nqs_ifcq;
4063 } else {
4064 ifcq = nif->nif_ifp->if_snd;
4065 }
4066 return ifcq;
4067 }
4068
4069 static errno_t
4070 netif_deq_packets(struct nexus_adapter *hwna, struct ifclassq *ifcq,
4071 uint32_t pkt_limit, uint32_t byte_limit, struct __kern_packet **head,
4072 boolean_t *pkts_pending, kern_packet_svc_class_t sc,
4073 uint32_t *pkt_cnt, uint32_t *bytes, uint8_t qset_idx)
4074 {
4075 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
4076 struct ifnet *ifp = hwna->na_ifp;
4077 uint32_t pkts_cnt;
4078 uint32_t bytes_cnt;
4079 errno_t rc;
4080
4081 ASSERT(ifp != NULL);
4082 ASSERT(ifp->if_output_sched_model < IFNET_SCHED_MODEL_MAX);
4083 ASSERT((pkt_limit != 0) && (byte_limit != 0));
4084
4085 if (ifcq == NULL) {
4086 ifcq = netif_get_default_ifcq(hwna);
4087 }
4088 if (ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED) {
4089 rc = ifclassq_dequeue_sc(ifcq, (mbuf_svc_class_t)sc,
4090 pkt_limit, byte_limit, &pkt_head, NULL, pkt_cnt, bytes, qset_idx);
4091 } else {
4092 rc = ifclassq_dequeue(ifcq, pkt_limit, byte_limit,
4093 &pkt_head, NULL, pkt_cnt, bytes, qset_idx);
4094 }
4095 ASSERT((rc == 0) || (rc == EAGAIN));
4096 ASSERT((pkt_head.cp_ptype == QP_PACKET) || (pkt_head.cp_kpkt == NULL));
4097
4098 ifclassq_get_len(ifcq, (mbuf_svc_class_t)sc, qset_idx,
4099 &pkts_cnt, &bytes_cnt);
4100 *pkts_pending = pkts_cnt > 0;
4101
4102 *head = pkt_head.cp_kpkt;
4103 return rc;
4104 }
4105
4106 #if SK_LOG
4107 /* Hoisted out of line to reduce kernel stack footprint */
4108 SK_LOG_ATTRIBUTE
4109 static void
4110 netif_no_ring_space_log(const struct nexus_adapter *na,
4111 const kern_channel_ring_t ring)
4112 {
4113 SK_DF(SK_VERB_SYNC | SK_VERB_TX,
4114 "no ring space: na \"%s\" [%u] "
4115 "\"%s\"(kh %u kt %u kl %u | rh %u rt %u)"
4116 "\"%s\"(kh %u kt %u kl %u | rh %u rt %u)",
4117 na->na_name, ring->ckr_ring_id,
4118 ring->ckr_name, ring->ckr_khead,
4119 ring->ckr_ktail, ring->ckr_klease,
4120 ring->ckr_rhead, ring->ckr_rtail);
4121 }
4122 #endif /* SK_LOG */
4123
4124 /*
4125 * netif refill function for rings
4126 */
4127 errno_t
4128 netif_ring_tx_refill(const kern_channel_ring_t ring, uint32_t pkt_limit,
4129 uint32_t byte_limit, boolean_t tx_doorbell_ctxt, boolean_t *pkts_pending,
4130 boolean_t canblock)
4131 {
4132 struct nexus_adapter *hwna;
4133 struct ifnet *ifp;
4134 struct __kern_packet *head = NULL;
4135 sk_protect_t protect;
4136 errno_t rc = 0;
4137 errno_t sync_err = 0;
4138 uint32_t npkts = 0, consumed = 0;
4139 uint32_t flags;
4140 slot_idx_t idx, ktail;
4141 int ring_space = 0;
4142
4143 KDBG((SK_KTRACE_NETIF_RING_TX_REFILL | DBG_FUNC_START), SK_KVA(ring));
4144
4145 VERIFY(ring != NULL);
4146 hwna = KRNA(ring);
4147 ifp = hwna->na_ifp;
4148
4149 ASSERT(hwna->na_type == NA_NETIF_DEV);
4150 ASSERT(ring->ckr_tx == NR_TX);
4151 *pkts_pending = FALSE;
4152
4153 if (__improbable(pkt_limit == 0 || byte_limit == 0)) {
4154 SK_ERR("invalid limits plim %d, blim %d",
4155 pkt_limit, byte_limit);
4156 rc = EINVAL;
4157 goto out;
4158 }
4159
4160 if (__improbable(!IF_FULLY_ATTACHED(ifp))) {
4161 SK_ERR("hwna 0x%llx ifp %s (0x%llx), interface not attached",
4162 SK_KVA(hwna), if_name(ifp), SK_KVA(ifp));
4163 rc = ENXIO;
4164 goto out;
4165 }
4166
4167 if (__improbable((ifp->if_start_flags & IFSF_FLOW_CONTROLLED) != 0)) {
4168 SK_DF(SK_VERB_SYNC | SK_VERB_TX, "hwna 0x%llx ifp %s (0x%llx), "
4169 "flow control ON", SK_KVA(hwna), if_name(ifp), SK_KVA(ifp));
4170 rc = ENXIO;
4171 goto out;
4172 }
4173
4174 /*
4175 * if the ring is busy, it means another dequeue is in
4176 * progress, so ignore this request and return success.
4177 */
4178 if (kr_enter(ring, canblock) != 0) {
4179 rc = 0;
4180 goto out;
4181 }
4182 /* mark thread with sync-in-progress flag */
4183 protect = sk_sync_protect();
4184
4185 if (__improbable(KR_DROP(ring) ||
4186 !NA_IS_ACTIVE(ring->ckr_na))) {
4187 SK_ERR("hw-kr 0x%llx stopped", SK_KVA(ring));
4188 rc = ENXIO;
4189 goto done;
4190 }
4191
4192 idx = ring->ckr_rhead;
4193 ktail = ring->ckr_ktail;
4194 /* calculate available space on tx ring */
4195 ring_space = ktail - idx;
4196 if (ring_space < 0) {
4197 ring_space += ring->ckr_num_slots;
4198 }
4199 if (ring_space == 0) {
4200 struct ifclassq *ifcq;
4201
4202 /* no space in ring, driver should retry */
4203 #if SK_LOG
4204 if (__improbable((sk_verbose &
4205 (SK_VERB_SYNC | SK_VERB_TX)) != 0)) {
4206 netif_no_ring_space_log(hwna, ring);
4207 }
4208 #endif /* SK_LOG */
4209 ifcq = netif_get_default_ifcq(hwna);
4210 if (IFCQ_LEN(ifcq) != 0) {
4211 *pkts_pending = TRUE;
4212 }
4213 /*
4214 * We ran out of space in ring, most probably
4215 * because the driver is slow to drain its TX queue.
4216 * We want another doorbell to be generated as soon
4217 * as the TX notify completion happens; mark this
4218 * through ckr_pending_doorbell counter. Do this
4219 * regardless of whether there's any pending packet.
4220 */
4221 ring->ckr_pending_doorbell++;
4222 rc = EAGAIN;
4223 goto sync_ring;
4224 }
4225
4226 if ((uint32_t)ring_space < pkt_limit) {
4227 pkt_limit = ring_space;
4228 }
4229
4230 if (tx_doorbell_ctxt &&
4231 ((hwna->na_flags & NAF_VIRTUAL_DEVICE) == 0)) {
4232 pkt_limit = MIN(pkt_limit,
4233 nx_netif_doorbell_max_dequeue);
4234 }
4235
4236 rc = netif_deq_packets(hwna, NULL, pkt_limit, byte_limit,
4237 &head, pkts_pending, ring->ckr_svc, NULL, NULL, 0);
4238
4239 /*
4240 * There's room in ring; if we haven't dequeued everything,
4241 * mark ckr_pending_doorbell for the next TX notify to issue
4242 * a TX door bell; otherwise, clear it. The next packet that
4243 * gets enqueued will trigger a door bell again.
4244 */
4245 if (*pkts_pending) {
4246 ring->ckr_pending_doorbell++;
4247 } else if (ring->ckr_pending_doorbell != 0) {
4248 ring->ckr_pending_doorbell = 0;
4249 }
4250
4251 if (rc != 0) {
4252 /*
4253 * This is expected sometimes as the IOSkywalkFamily
4254 * errs on the side of caution to perform an extra
4255 * dequeue when multiple doorbells are pending;
4256 * nothing to dequeue, do a sync if there are slots
4257 * to reclaim else just return.
4258 */
4259 SK_DF(SK_VERB_SYNC | SK_VERB_TX,
4260 "nothing to dequeue, err %d", rc);
4261
4262 if ((uint32_t)ring_space == ring->ckr_lim) {
4263 goto done;
4264 } else {
4265 goto sync_ring;
4266 }
4267 }
4268 /* move the dequeued packets to tx ring */
4269 while (head != NULL && idx != ktail) {
4270 ASSERT(npkts <= pkt_limit);
4271 struct __kern_packet *pkt = head;
4272 KR_SLOT_ATTACH_METADATA(ring, KR_KSD(ring, idx),
4273 (struct __kern_quantum *)pkt);
4274 npkts++;
4275 if (__improbable(pkt->pkt_trace_id != 0)) {
4276 KDBG(SK_KTRACE_PKT_TX_AQM | DBG_FUNC_END, pkt->pkt_trace_id);
4277 KDBG(SK_KTRACE_PKT_TX_DRV | DBG_FUNC_START, pkt->pkt_trace_id);
4278 }
4279 idx = SLOT_NEXT(idx, ring->ckr_lim);
4280 head = pkt->pkt_nextpkt;
4281 pkt->pkt_nextpkt = NULL;
4282 }
4283
4284 /*
4285 * We checked for ring space earlier so the ring should have enough
4286 * space for the entire chain.
4287 */
4288 ASSERT(head == NULL);
4289 ring->ckr_rhead = idx;
4290
4291 sync_ring:
4292 flags = NA_SYNCF_NETIF;
4293 if (ring->ckr_pending_doorbell != 0) {
4294 flags |= (NA_SYNCF_NETIF_DOORBELL | NA_SYNCF_NETIF_ASYNC);
4295 }
4296
4297 ring->ckr_khead_pre = ring->ckr_khead;
4298 sync_err = ring->ckr_na_sync(ring, kernproc, flags);
4299 if (sync_err != 0 && sync_err != EAGAIN) {
4300 SK_ERR("unexpected sync err %d", sync_err);
4301 if (rc == 0) {
4302 rc = sync_err;
4303 }
4304 goto done;
4305 }
4306 /*
4307 * Verify that the driver has detached packets from the consumed slots.
4308 */
4309 idx = ring->ckr_khead_pre;
4310 consumed = 0;
4311 while (idx != ring->ckr_khead) {
4312 struct __kern_slot_desc *ksd = KR_KSD(ring, idx);
4313
4314 consumed++;
4315 VERIFY(!KSD_VALID_METADATA(ksd));
4316 idx = SLOT_NEXT(idx, ring->ckr_lim);
4317 }
4318 ring->ckr_khead_pre = ring->ckr_khead;
4319
4320 done:
4321 sk_sync_unprotect(protect);
4322 kr_exit(ring);
4323 out:
4324 KDBG((SK_KTRACE_NETIF_RING_TX_REFILL | DBG_FUNC_END),
4325 SK_KVA(ring), rc, 0, npkts);
4326
4327 return rc;
4328 }
4329
4330 #define NQ_EWMA(old, new, decay) do { \
4331 u_int64_t _avg; \
4332 if (__probable((_avg = (old)) > 0)) \
4333 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
4334 else \
4335 _avg = (new); \
4336 (old) = _avg; \
4337 } while (0)
4338
4339 static void
4340 kern_netif_increment_queue_stats(kern_netif_queue_t queue,
4341 uint32_t pkt_count, uint32_t byte_count)
4342 {
4343 struct netif_llink *llink = queue->nq_qset->nqs_llink;
4344 struct ifnet *ifp = llink->nll_nif->nif_ifp;
4345 if ((queue->nq_flags & NETIF_QUEUE_IS_RX) == 0) {
4346 atomic_add_64(&ifp->if_data.ifi_opackets, pkt_count);
4347 atomic_add_64(&ifp->if_data.ifi_obytes, byte_count);
4348 } else {
4349 atomic_add_64(&ifp->if_data.ifi_ipackets, pkt_count);
4350 atomic_add_64(&ifp->if_data.ifi_ibytes, byte_count);
4351 }
4352
4353 if (ifp->if_data_threshold != 0) {
4354 ifnet_notify_data_threshold(ifp);
4355 }
4356
4357 uint64_t now;
4358 uint64_t diff_secs;
4359 struct netif_qstats *stats = &queue->nq_stats;
4360
4361 if (nq_stat_enable == 0) {
4362 return;
4363 }
4364
4365 if (__improbable(pkt_count == 0)) {
4366 return;
4367 }
4368
4369 stats->nq_num_xfers++;
4370 stats->nq_total_bytes += byte_count;
4371 stats->nq_total_pkts += pkt_count;
4372 if (pkt_count > stats->nq_max_pkts) {
4373 stats->nq_max_pkts = pkt_count;
4374 }
4375 if (stats->nq_min_pkts == 0 ||
4376 pkt_count < stats->nq_min_pkts) {
4377 stats->nq_min_pkts = pkt_count;
4378 }
4379
4380 now = net_uptime();
4381 if (__probable(queue->nq_accumulate_start != 0)) {
4382 diff_secs = now - queue->nq_accumulate_start;
4383 if (diff_secs >= nq_accumulate_interval) {
4384 uint64_t bps;
4385 uint64_t pps;
4386 uint64_t pps_ma;
4387
4388 /* bytes per second */
4389 bps = queue->nq_accumulated_bytes / diff_secs;
4390 NQ_EWMA(stats->nq_bytes_ps_ma,
4391 bps, nq_transfer_decay);
4392 stats->nq_bytes_ps = bps;
4393
4394 /* pkts per second */
4395 pps = queue->nq_accumulated_pkts / diff_secs;
4396 pps_ma = stats->nq_pkts_ps_ma;
4397 NQ_EWMA(pps_ma, pps, nq_transfer_decay);
4398 stats->nq_pkts_ps_ma = (uint32_t)pps_ma;
4399 stats->nq_pkts_ps = (uint32_t)pps;
4400
4401 /* start over */
4402 queue->nq_accumulate_start = now;
4403 queue->nq_accumulated_bytes = 0;
4404 queue->nq_accumulated_pkts = 0;
4405
4406 stats->nq_min_pkts = 0;
4407 stats->nq_max_pkts = 0;
4408 }
4409 } else {
4410 queue->nq_accumulate_start = now;
4411 }
4412 queue->nq_accumulated_bytes += byte_count;
4413 queue->nq_accumulated_pkts += pkt_count;
4414 }
4415
4416 void
4417 kern_netif_queue_rx_enqueue(kern_netif_queue_t queue, kern_packet_t ph_chain,
4418 uint32_t count, uint32_t flags)
4419 {
4420 #pragma unused (count)
4421 struct netif_queue *q = queue;
4422 struct netif_llink *llink = q->nq_qset->nqs_llink;
4423 struct __kern_packet *pkt_chain = SK_PTR_ADDR_KPKT(ph_chain);
4424 bool flush = ((flags & KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH) != 0);
4425 struct pktq *pktq = &q->nq_pktq;
4426 struct netif_stats *nifs = &llink->nll_nif->nif_stats;
4427 struct nexus_pkt_stats stats;
4428 sk_protect_t protect;
4429
4430 ASSERT((q->nq_flags & NETIF_QUEUE_IS_RX) != 0);
4431 if (llink->nll_state == NETIF_LLINK_STATE_DESTROYED) {
4432 int drop_cnt = 0;
4433
4434 pp_free_packet_chain(pkt_chain, &drop_cnt);
4435 STATS_ADD(nifs, NETIF_STATS_LLINK_RX_DROP_BAD_STATE, drop_cnt);
4436 return;
4437 }
4438 KPKTQ_ENQUEUE_LIST(pktq, pkt_chain);
4439 if (flush) {
4440 pkt_chain = KPKTQ_FIRST(pktq);
4441 KPKTQ_INIT(pktq);
4442
4443 protect = sk_sync_protect();
4444 netif_receive(NA(llink->nll_nif->nif_ifp), pkt_chain, &stats);
4445 sk_sync_unprotect(protect);
4446 kern_netif_increment_queue_stats(queue, (uint32_t)stats.nps_pkts,
4447 (uint32_t)stats.nps_bytes);
4448 }
4449 }
4450
4451 errno_t
4452 kern_netif_queue_tx_dequeue(kern_netif_queue_t queue, uint32_t pkt_limit,
4453 uint32_t byte_limit, boolean_t *pending, kern_packet_t *ph_chain)
4454 {
4455 struct netif_queue *q = queue;
4456 struct netif_llink *llink = q->nq_qset->nqs_llink;
4457 struct netif_stats *nifs = &llink->nll_nif->nif_stats;
4458 struct nexus_adapter *hwna;
4459 struct __kern_packet *pkt_chain = NULL;
4460 uint32_t bytes = 0, pkt_cnt = 0;
4461 errno_t rc;
4462
4463 ASSERT((q->nq_flags & NETIF_QUEUE_IS_RX) == 0);
4464 if (llink->nll_state == NETIF_LLINK_STATE_DESTROYED) {
4465 STATS_INC(nifs, NETIF_STATS_LLINK_AQM_DEQ_BAD_STATE);
4466 return ENXIO;
4467 }
4468 hwna = &NA(llink->nll_nif->nif_ifp)->nifna_up;
4469
4470 if (((hwna->na_flags & NAF_VIRTUAL_DEVICE) == 0) &&
4471 sk_is_tx_notify_protected()) {
4472 pkt_limit = MIN(pkt_limit, nx_netif_doorbell_max_dequeue);
4473 }
4474 rc = netif_deq_packets(hwna, q->nq_qset->nqs_ifcq, pkt_limit,
4475 byte_limit, &pkt_chain, pending, q->nq_svc, &pkt_cnt, &bytes,
4476 q->nq_qset->nqs_idx);
4477
4478 if (pkt_cnt > 0) {
4479 kern_netif_increment_queue_stats(queue, pkt_cnt, bytes);
4480 }
4481 if (pkt_chain != NULL) {
4482 *ph_chain = SK_PKT2PH(pkt_chain);
4483 }
4484 return rc;
4485 }
4486
4487 errno_t
4488 kern_netif_qset_tx_queue_len(kern_netif_qset_t qset, uint32_t svc,
4489 uint32_t * pkts_cnt, uint32_t * bytes_cnt)
4490 {
4491 VERIFY(qset != NULL);
4492 VERIFY(pkts_cnt != NULL);
4493 VERIFY(bytes_cnt != NULL);
4494
4495 return ifclassq_get_len(qset->nqs_ifcq, svc, qset->nqs_idx, pkts_cnt,
4496 bytes_cnt);
4497 }
4498
4499 void
4500 kern_netif_set_qset_combined(kern_netif_qset_t qset)
4501 {
4502 VERIFY(qset != NULL);
4503 VERIFY(qset->nqs_ifcq != NULL);
4504
4505 ifclassq_set_grp_combined(qset->nqs_ifcq, qset->nqs_idx);
4506 }
4507
4508 void
4509 kern_netif_set_qset_separate(kern_netif_qset_t qset)
4510 {
4511 VERIFY(qset != NULL);
4512 VERIFY(qset->nqs_ifcq != NULL);
4513
4514 ifclassq_set_grp_separated(qset->nqs_ifcq, qset->nqs_idx);
4515 }
4516
4517 errno_t
4518 kern_nexus_netif_llink_add(struct kern_nexus *nx,
4519 struct kern_nexus_netif_llink_init *llink_init)
4520 {
4521 errno_t err;
4522 struct nx_netif *nif;
4523 struct netif_llink *llink;
4524 struct netif_stats *nifs;
4525
4526 VERIFY(nx != NULL);
4527 VERIFY(llink_init != NULL);
4528 VERIFY((nx->nx_flags & NXF_ATTACHED) != 0);
4529
4530 nif = NX_NETIF_PRIVATE(nx);
4531 nifs = &nif->nif_stats;
4532
4533 err = nx_netif_validate_llink_config(llink_init, false);
4534 if (err != 0) {
4535 SK_ERR("Invalid llink init params");
4536 STATS_INC(nifs, NETIF_STATS_LLINK_ADD_BAD_PARAMS);
4537 return err;
4538 }
4539
4540 err = nx_netif_llink_add(nif, llink_init, &llink);
4541 return err;
4542 }
4543
4544 errno_t
4545 kern_nexus_netif_llink_remove(struct kern_nexus *nx,
4546 kern_nexus_netif_llink_id_t llink_id)
4547 {
4548 struct nx_netif *nif;
4549
4550 VERIFY(nx != NULL);
4551 VERIFY((nx->nx_flags & NXF_ATTACHED) != 0);
4552
4553 nif = NX_NETIF_PRIVATE(nx);
4554 return nx_netif_llink_remove(nif, llink_id);
4555 }
4556
4557 errno_t
4558 kern_netif_queue_get_service_class(kern_netif_queue_t queue,
4559 kern_packet_svc_class_t *svc)
4560 {
4561 *svc = queue->nq_svc;
4562 return 0;
4563 }
4564