1 /*
2 * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * The netif nexus domain has two domain providers: native and compat, with
31 * the latter being the default provider of this domain. The compat provider
32 * has special handlers for NXCFG_CMD_ATTACH and NXCFG_CMD_DETACH, etc.
33 *
34 * A netif nexus instance can be in a native or compat mode; in either case,
35 * it is associated with two instances of a nexus_adapter structure, and allows
36 * at most two channels opened to the nexus. Two two adapters correspond to
37 * host and device ports, respectively.
38 *
39 * By itself, a netif nexus isn't associated with a network interface. The
40 * association happens by attaching a network interface to the nexus instance.
41 * A channel can only be successfully opened to a netif nexus after it has an
42 * interface attached to it.
43 *
44 * During an attach, the interface is marked as Skywalk-capable, and its ifnet
45 * structure refers to the attached netif nexus adapter via its if_na field.
46 * The nexus also holds a reference to the interface on its na_ifp field. Note
47 * that attaching to a netif_compat nexus does not alter the input/output data
48 * path, nor does it remove any of the interface's hardware offload flags. It
49 * merely associates the interface and netif nexus together.
50 *
51 * During a detach, the above references are dropped and the fields are cleared;
52 * the interface is also marked as non-Skywalk-capable. This detach can happen
53 * explicitly via a command down the nexus, or implicitly when the nexus goes
54 * away (assuming there's no channel opened to it.)
55 *
56 * A userland channel can be opened to a netif nexus via the usual ch_open()
57 * way, assuming the nexus provider is setup to allow access for the userland
58 * process (either by binding the nexus port to PID, etc. or by creating the
59 * nexus in the anonymous mode.)
60 *
61 * Alternatively, a kernel channel can also be opened to it by some kernel
62 * subsystem, via ch_open_special(), e.g. by the flowswitch. Kernel channels
63 * don't have any task mapping created, and the flag CHANF_KERNEL is used to
64 * indicate that.
65 *
66 * Opening a channel to the host port of a native or compat netif causes the
67 * ifnet output path to be redirected to nx_netif_host_transmit(). We also,
68 * at present, disable any hardware offload features.
69 *
70 * Opening a channel to the device port of a compat netif causes the ifnet
71 * input path to be redirected to nx_netif_compat_receive(). This is specific
72 * to the compat variant, as the native variant's RX path already goes to
73 * the native netif.
74 *
75 * During channel close, we restore the original I/O callbacks, as well as the
76 * interface's offload flags.
77 */
78
79 #include <skywalk/os_skywalk_private.h>
80 #include <skywalk/nexus/netif/nx_netif.h>
81 #include <skywalk/nexus/upipe/nx_user_pipe.h>
82 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
83 #include <sys/kdebug.h>
84 #include <sys/sdt.h>
85 #include <os/refcnt.h>
86 #include <libkern/OSDebug.h>
87
88 #define NX_NETIF_MAXRINGS NX_MAX_NUM_RING_PAIR
89 #define NX_NETIF_MINSLOTS 2 /* XXX same as above */
90 #define NX_NETIF_MAXSLOTS NX_MAX_NUM_SLOT_PER_RING /* max # of slots */
91 #define NX_NETIF_TXRINGSIZE 512 /* default TX ring size */
92 #define NX_NETIF_RXRINGSIZE 1024 /* default RX ring size */
93 #define NX_NETIF_BUFSIZE (2 * 1024) /* default buffer size */
94 #define NX_NETIF_MINBUFSIZE (128) /* min buffer size */
95 #define NX_NETIF_MAXBUFSIZE (16 * 1024) /* max buffer size */
96
97 /*
98 * TODO: [email protected] -- minimum buflets for now; we will need to
99 * have a way to adjust this based on the underlying interface's
100 * parameters, e.g. jumbo MTU, large segment offload, etc.
101 */
102 #define NX_NETIF_UMD_SIZE _USER_PACKET_SIZE(BUFLETS_MIN)
103 #define NX_NETIF_KMD_SIZE _KERN_PACKET_SIZE(BUFLETS_MIN)
104
105 /*
106 * minimum stack space required for IOSkywalkFamily and Driver execution.
107 */
108 #if XNU_TARGET_OS_OSX
109 #define NX_NETIF_MIN_DRIVER_STACK_SIZE (kernel_stack_size >> 1)
110 #else /* !XNU_TARGET_OS_OSX */
111 #define NX_NETIF_MIN_DRIVER_STACK_SIZE (kernel_stack_size >> 2)
112 #endif /* XNU_TARGET_OS_OSX */
113
114 static void nx_netif_dom_init(struct nxdom *);
115 static void nx_netif_dom_terminate(struct nxdom *);
116 static void nx_netif_dom_fini(struct nxdom *);
117 static int nx_netif_prov_params_adjust(
118 const struct kern_nexus_domain_provider *, const struct nxprov_params *,
119 struct nxprov_adjusted_params *);
120
121 static int nx_netif_dom_bind_port(struct kern_nexus *, nexus_port_t *,
122 struct nxbind *, void *);
123 static int nx_netif_dom_unbind_port(struct kern_nexus *, nexus_port_t);
124 static int nx_netif_dom_connect(struct kern_nexus_domain_provider *,
125 struct kern_nexus *, struct kern_channel *, struct chreq *,
126 struct kern_channel *, struct nxbind *, struct proc *);
127 static void nx_netif_dom_disconnect(struct kern_nexus_domain_provider *,
128 struct kern_nexus *, struct kern_channel *);
129 static void nx_netif_dom_defunct(struct kern_nexus_domain_provider *,
130 struct kern_nexus *, struct kern_channel *, struct proc *);
131 static void nx_netif_dom_defunct_finalize(struct kern_nexus_domain_provider *,
132 struct kern_nexus *, struct kern_channel *, boolean_t);
133
134 static void nx_netif_doorbell(struct ifnet *);
135 static int nx_netif_na_txsync(struct __kern_channel_ring *, struct proc *,
136 uint32_t);
137 static int nx_netif_na_rxsync(struct __kern_channel_ring *, struct proc *,
138 uint32_t);
139 static void nx_netif_na_dtor(struct nexus_adapter *na);
140 static int nx_netif_na_notify_tx(struct __kern_channel_ring *, struct proc *,
141 uint32_t);
142 static int nx_netif_na_notify_rx(struct __kern_channel_ring *, struct proc *,
143 uint32_t);
144 static int nx_netif_na_activate(struct nexus_adapter *, na_activate_mode_t);
145
146 static int nx_netif_ctl(struct kern_nexus *, nxcfg_cmd_t, void *,
147 struct proc *);
148 static int nx_netif_ctl_attach(struct kern_nexus *, struct nx_spec_req *,
149 struct proc *);
150 static int nx_netif_ctl_detach(struct kern_nexus *, struct nx_spec_req *);
151 static int nx_netif_attach(struct kern_nexus *, struct ifnet *);
152 static void nx_netif_flags_init(struct nx_netif *);
153 static void nx_netif_flags_fini(struct nx_netif *);
154 static int nx_netif_na_channel_event_notify(struct nexus_adapter *,
155 struct __kern_packet *, struct __kern_channel_event *, uint16_t);
156 static void nx_netif_capabilities_fini(struct nx_netif *);
157 static errno_t nx_netif_interface_advisory_notify(void *,
158 const struct ifnet_interface_advisory *);
159
160 struct nxdom nx_netif_dom_s = {
161 .nxdom_prov_head =
162 STAILQ_HEAD_INITIALIZER(nx_netif_dom_s.nxdom_prov_head),
163 .nxdom_type = NEXUS_TYPE_NET_IF,
164 .nxdom_md_type = NEXUS_META_TYPE_PACKET,
165 .nxdom_md_subtype = NEXUS_META_SUBTYPE_RAW,
166 .nxdom_name = "netif",
167 .nxdom_ports = {
168 .nb_def = 2,
169 .nb_min = 2,
170 .nb_max = NX_NETIF_MAXPORTS,
171 },
172 .nxdom_tx_rings = {
173 .nb_def = 1,
174 .nb_min = 1,
175 .nb_max = NX_NETIF_MAXRINGS,
176 },
177 .nxdom_rx_rings = {
178 .nb_def = 1,
179 .nb_min = 1,
180 .nb_max = NX_NETIF_MAXRINGS,
181 },
182 .nxdom_tx_slots = {
183 .nb_def = NX_NETIF_TXRINGSIZE,
184 .nb_min = NX_NETIF_MINSLOTS,
185 .nb_max = NX_NETIF_MAXSLOTS,
186 },
187 .nxdom_rx_slots = {
188 .nb_def = NX_NETIF_RXRINGSIZE,
189 .nb_min = NX_NETIF_MINSLOTS,
190 .nb_max = NX_NETIF_MAXSLOTS,
191 },
192 .nxdom_buf_size = {
193 .nb_def = NX_NETIF_BUFSIZE,
194 .nb_min = NX_NETIF_MINBUFSIZE,
195 .nb_max = NX_NETIF_MAXBUFSIZE,
196 },
197 .nxdom_meta_size = {
198 .nb_def = NX_NETIF_UMD_SIZE,
199 .nb_min = NX_NETIF_UMD_SIZE,
200 .nb_max = NX_METADATA_USR_MAX_SZ,
201 },
202 .nxdom_stats_size = {
203 .nb_def = 0,
204 .nb_min = 0,
205 .nb_max = NX_STATS_MAX_SZ,
206 },
207 .nxdom_pipes = {
208 .nb_def = 0,
209 .nb_min = 0,
210 .nb_max = NX_UPIPE_MAXPIPES,
211 },
212 .nxdom_flowadv_max = {
213 .nb_def = 0,
214 .nb_min = 0,
215 .nb_max = NX_FLOWADV_MAX,
216 },
217 .nxdom_nexusadv_size = {
218 .nb_def = 0,
219 .nb_min = 0,
220 .nb_max = NX_NEXUSADV_MAX_SZ,
221 },
222 .nxdom_capabilities = {
223 .nb_def = NXPCAP_USER_CHANNEL,
224 .nb_min = 0,
225 .nb_max = NXPCAP_USER_CHANNEL,
226 },
227 .nxdom_qmap = {
228 .nb_def = NEXUS_QMAP_TYPE_DEFAULT,
229 .nb_min = NEXUS_QMAP_TYPE_DEFAULT,
230 .nb_max = NEXUS_QMAP_TYPE_WMM,
231 },
232 .nxdom_max_frags = {
233 .nb_def = NX_PBUF_FRAGS_DEFAULT,
234 .nb_min = NX_PBUF_FRAGS_MIN,
235 .nb_max = NX_PBUF_FRAGS_MAX,
236 },
237 .nxdom_init = nx_netif_dom_init,
238 .nxdom_terminate = nx_netif_dom_terminate,
239 .nxdom_fini = nx_netif_dom_fini,
240 .nxdom_find_port = NULL,
241 .nxdom_port_is_reserved = NULL,
242 .nxdom_bind_port = nx_netif_dom_bind_port,
243 .nxdom_unbind_port = nx_netif_dom_unbind_port,
244 .nxdom_connect = nx_netif_dom_connect,
245 .nxdom_disconnect = nx_netif_dom_disconnect,
246 .nxdom_defunct = nx_netif_dom_defunct,
247 .nxdom_defunct_finalize = nx_netif_dom_defunct_finalize,
248 };
249
250 struct kern_nexus_domain_provider nx_netif_prov_s = {
251 .nxdom_prov_name = NEXUS_PROVIDER_NET_IF,
252 /*
253 * Don't install this as the default domain provider, i.e.
254 * NXDOMPROVF_DEFAULT flag not set; we want netif_compat
255 * provider to be the one handling userland-issued requests
256 * coming down thru nxprov_create() instead.
257 */
258 .nxdom_prov_flags = 0,
259 .nxdom_prov_cb = {
260 .dp_cb_init = nx_netif_prov_init,
261 .dp_cb_fini = nx_netif_prov_fini,
262 .dp_cb_params = nx_netif_prov_params,
263 .dp_cb_mem_new = nx_netif_prov_mem_new,
264 .dp_cb_config = nx_netif_prov_config,
265 .dp_cb_nx_ctor = nx_netif_prov_nx_ctor,
266 .dp_cb_nx_dtor = nx_netif_prov_nx_dtor,
267 .dp_cb_nx_mem_info = nx_netif_prov_nx_mem_info,
268 .dp_cb_nx_mib_get = nx_netif_prov_nx_mib_get,
269 .dp_cb_nx_stop = nx_netif_prov_nx_stop,
270 },
271 };
272
273 struct nexus_ifnet_ops na_netif_ops = {
274 .ni_finalize = na_netif_finalize,
275 .ni_reap = nx_netif_reap,
276 .ni_dequeue = nx_netif_native_tx_dequeue,
277 .ni_get_len = nx_netif_native_tx_get_len
278 };
279
280 #define NX_NETIF_DOORBELL_MAX_DEQUEUE 64
281 uint32_t nx_netif_doorbell_max_dequeue = NX_NETIF_DOORBELL_MAX_DEQUEUE;
282
283 SYSCTL_EXTENSIBLE_NODE(_kern_skywalk, OID_AUTO, netif,
284 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk network interface");
285 #if (DEVELOPMENT || DEBUG)
286 SYSCTL_STRING(_kern_skywalk_netif, OID_AUTO, sk_ll_prefix,
287 CTLFLAG_RW | CTLFLAG_LOCKED, sk_ll_prefix, sizeof(sk_ll_prefix),
288 "ifname prefix for enabling low latency support");
289 static uint32_t nx_netif_force_ifnet_start = 0;
290 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, force_ifnet_start,
291 CTLFLAG_RW | CTLFLAG_LOCKED, &nx_netif_force_ifnet_start, 0,
292 "always use ifnet starter thread");
293 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, doorbell_max_dequeue,
294 CTLFLAG_RW | CTLFLAG_LOCKED, &nx_netif_doorbell_max_dequeue,
295 NX_NETIF_DOORBELL_MAX_DEQUEUE,
296 "max packets to dequeue in doorbell context");
297 #endif /* !DEVELOPMENT && !DEBUG */
298
299 static ZONE_DECLARE(na_netif_zone, SKMEM_ZONE_PREFIX ".na.netif",
300 sizeof(struct nexus_netif_adapter), ZC_ZFREE_CLEARMEM);
301
302 static ZONE_DECLARE(nx_netif_zone, SKMEM_ZONE_PREFIX ".nx.netif",
303 sizeof(struct nx_netif), ZC_ZFREE_CLEARMEM);
304
305 #define SKMEM_TAG_NETIF_MIT "com.apple.skywalk.netif.mit"
306 static kern_allocation_name_t skmem_tag_netif_mit;
307
308 #define SKMEM_TAG_NETIF_FILTER "com.apple.skywalk.netif.filter"
309 kern_allocation_name_t skmem_tag_netif_filter;
310
311 #define SKMEM_TAG_NETIF_FLOW "com.apple.skywalk.netif.flow"
312 kern_allocation_name_t skmem_tag_netif_flow;
313
314 #define SKMEM_TAG_NETIF_AGENT_FLOW "com.apple.skywalk.netif.agent_flow"
315 kern_allocation_name_t skmem_tag_netif_agent_flow;
316
317 #define SKMEM_TAG_NETIF_LLINK "com.apple.skywalk.netif.llink"
318 kern_allocation_name_t skmem_tag_netif_llink;
319
320 #define SKMEM_TAG_NETIF_QSET "com.apple.skywalk.netif.qset"
321 kern_allocation_name_t skmem_tag_netif_qset;
322
323 #define SKMEM_TAG_NETIF_LLINK_INFO "com.apple.skywalk.netif.llink_info"
324 kern_allocation_name_t skmem_tag_netif_llink_info;
325
326 static void
nx_netif_dom_init(struct nxdom * nxdom)327 nx_netif_dom_init(struct nxdom *nxdom)
328 {
329 SK_LOCK_ASSERT_HELD();
330 ASSERT(!(nxdom->nxdom_flags & NEXUSDOMF_INITIALIZED));
331
332 _CASSERT(NEXUS_PORT_NET_IF_DEV == 0);
333 _CASSERT(NEXUS_PORT_NET_IF_HOST == 1);
334 _CASSERT(NEXUS_PORT_NET_IF_CLIENT == 2);
335 _CASSERT(SK_NETIF_MIT_FORCE_OFF < SK_NETIF_MIT_FORCE_SIMPLE);
336 _CASSERT(SK_NETIF_MIT_FORCE_SIMPLE < SK_NETIF_MIT_FORCE_ADVANCED);
337 _CASSERT(SK_NETIF_MIT_FORCE_ADVANCED < SK_NETIF_MIT_AUTO);
338 _CASSERT(SK_NETIF_MIT_AUTO == SK_NETIF_MIT_MAX);
339
340 (void) nxdom_prov_add(nxdom, &nx_netif_prov_s);
341
342 ASSERT(skmem_tag_netif_mit == NULL);
343 skmem_tag_netif_mit =
344 kern_allocation_name_allocate(SKMEM_TAG_NETIF_MIT, 0);
345 ASSERT(skmem_tag_netif_mit != NULL);
346
347 ASSERT(skmem_tag_netif_filter == NULL);
348 skmem_tag_netif_filter =
349 kern_allocation_name_allocate(SKMEM_TAG_NETIF_FILTER, 0);
350 ASSERT(skmem_tag_netif_filter != NULL);
351
352 ASSERT(skmem_tag_netif_flow == NULL);
353 skmem_tag_netif_flow =
354 kern_allocation_name_allocate(SKMEM_TAG_NETIF_FLOW, 0);
355 ASSERT(skmem_tag_netif_flow != NULL);
356
357 ASSERT(skmem_tag_netif_agent_flow == NULL);
358 skmem_tag_netif_agent_flow =
359 kern_allocation_name_allocate(SKMEM_TAG_NETIF_AGENT_FLOW, 0);
360 ASSERT(skmem_tag_netif_agent_flow != NULL);
361
362 ASSERT(skmem_tag_netif_llink == NULL);
363 skmem_tag_netif_llink =
364 kern_allocation_name_allocate(SKMEM_TAG_NETIF_LLINK, 0);
365 ASSERT(skmem_tag_netif_llink != NULL);
366
367 ASSERT(skmem_tag_netif_qset == NULL);
368 skmem_tag_netif_qset =
369 kern_allocation_name_allocate(SKMEM_TAG_NETIF_QSET, 0);
370 ASSERT(skmem_tag_netif_qset != NULL);
371
372 ASSERT(skmem_tag_netif_llink_info == NULL);
373 skmem_tag_netif_llink_info =
374 kern_allocation_name_allocate(SKMEM_TAG_NETIF_LLINK_INFO, 0);
375 ASSERT(skmem_tag_netif_llink_info != NULL);
376
377 nx_netif_compat_init(nxdom);
378
379 ASSERT(nxdom_prov_default[nxdom->nxdom_type] != NULL &&
380 strcmp(nxdom_prov_default[nxdom->nxdom_type]->nxdom_prov_name,
381 NEXUS_PROVIDER_NET_IF_COMPAT) == 0);
382
383 netif_gso_init();
384 nx_netif_llink_module_init();
385 }
386
387 static void
nx_netif_dom_terminate(struct nxdom * nxdom)388 nx_netif_dom_terminate(struct nxdom *nxdom)
389 {
390 struct kern_nexus_domain_provider *nxdom_prov, *tnxdp;
391
392 SK_LOCK_ASSERT_HELD();
393
394 nx_netif_llink_module_fini();
395 netif_gso_fini();
396 nx_netif_compat_fini();
397
398 if (skmem_tag_netif_llink_info != NULL) {
399 kern_allocation_name_release(skmem_tag_netif_llink_info);
400 skmem_tag_netif_llink_info = NULL;
401 }
402 if (skmem_tag_netif_qset != NULL) {
403 kern_allocation_name_release(skmem_tag_netif_qset);
404 skmem_tag_netif_qset = NULL;
405 }
406 if (skmem_tag_netif_llink != NULL) {
407 kern_allocation_name_release(skmem_tag_netif_llink);
408 skmem_tag_netif_llink = NULL;
409 }
410 if (skmem_tag_netif_agent_flow != NULL) {
411 kern_allocation_name_release(skmem_tag_netif_agent_flow);
412 skmem_tag_netif_agent_flow = NULL;
413 }
414 if (skmem_tag_netif_flow != NULL) {
415 kern_allocation_name_release(skmem_tag_netif_flow);
416 skmem_tag_netif_flow = NULL;
417 }
418 if (skmem_tag_netif_filter != NULL) {
419 kern_allocation_name_release(skmem_tag_netif_filter);
420 skmem_tag_netif_filter = NULL;
421 }
422 if (skmem_tag_netif_mit != NULL) {
423 kern_allocation_name_release(skmem_tag_netif_mit);
424 skmem_tag_netif_mit = NULL;
425 }
426
427 STAILQ_FOREACH_SAFE(nxdom_prov, &nxdom->nxdom_prov_head,
428 nxdom_prov_link, tnxdp) {
429 (void) nxdom_prov_del(nxdom_prov);
430 }
431 }
432
433 static void
nx_netif_dom_fini(struct nxdom * nxdom)434 nx_netif_dom_fini(struct nxdom *nxdom)
435 {
436 #pragma unused(nxdom)
437 }
438
439 int
nx_netif_prov_init(struct kern_nexus_domain_provider * nxdom_prov)440 nx_netif_prov_init(struct kern_nexus_domain_provider *nxdom_prov)
441 {
442 #pragma unused(nxdom_prov)
443 SK_D("initializing %s", nxdom_prov->nxdom_prov_name);
444 return 0;
445 }
446
447 static int
nx_netif_na_notify_drop(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)448 nx_netif_na_notify_drop(struct __kern_channel_ring *kring, struct proc *p,
449 uint32_t flags)
450 {
451 #pragma unused(kring, p, flags)
452 return ENXIO;
453 }
454
455 int
nx_netif_prov_nx_stop(struct kern_nexus * nx)456 nx_netif_prov_nx_stop(struct kern_nexus *nx)
457 {
458 uint32_t r;
459 struct nexus_adapter *na = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
460 struct nexus_netif_adapter *nifna = (struct nexus_netif_adapter *)na;
461
462 SK_LOCK_ASSERT_HELD();
463 ASSERT(nx != NULL);
464
465 /* place all rings in drop mode */
466 na_kr_drop(na, TRUE);
467
468 /* ensure global visibility */
469 membar_sync();
470
471 /* reset all TX notify callbacks */
472 for (r = 0; r < na_get_nrings(na, NR_TX); r++) {
473 while (!atomic_test_set_ptr(&na->na_tx_rings[r].ckr_na_notify,
474 ptrauth_nop_cast(void *, na->na_tx_rings[r].ckr_na_notify),
475 ptrauth_nop_cast(void *, &nx_netif_na_notify_drop))) {
476 ;
477 }
478 membar_sync();
479 if (nifna->nifna_tx_mit != NULL) {
480 nx_netif_mit_cleanup(&nifna->nifna_tx_mit[r]);
481 }
482 }
483 if (nifna->nifna_tx_mit != NULL) {
484 skn_free_type_array(tx, struct nx_netif_mit,
485 na_get_nrings(na, NR_TX), nifna->nifna_tx_mit);
486 nifna->nifna_tx_mit = NULL;
487 }
488
489 /* reset all RX notify callbacks */
490 for (r = 0; r < na_get_nrings(na, NR_RX); r++) {
491 while (!atomic_test_set_ptr(&na->na_rx_rings[r].ckr_na_notify,
492 ptrauth_nop_cast(void *, na->na_rx_rings[r].ckr_na_notify),
493 ptrauth_nop_cast(void *, &nx_netif_na_notify_drop))) {
494 ;
495 }
496 membar_sync();
497 if (nifna->nifna_rx_mit != NULL) {
498 nx_netif_mit_cleanup(&nifna->nifna_rx_mit[r]);
499 }
500 }
501 if (nifna->nifna_rx_mit != NULL) {
502 skn_free_type_array(rx, struct nx_netif_mit,
503 na_get_nrings(na, NR_RX), nifna->nifna_rx_mit);
504 nifna->nifna_rx_mit = NULL;
505 }
506 return 0;
507 }
508
509 static inline void
nx_netif_compat_adjust_ring_size(struct nxprov_adjusted_params * adj,ifnet_t ifp)510 nx_netif_compat_adjust_ring_size(struct nxprov_adjusted_params *adj,
511 ifnet_t ifp)
512 {
513 if (IFNET_IS_CELLULAR(ifp) && (ifp->if_unit != 0)) {
514 *(adj->adj_rx_slots) = sk_netif_compat_aux_cell_rx_ring_sz;
515 *(adj->adj_tx_slots) = sk_netif_compat_aux_cell_tx_ring_sz;
516 } else if (IFNET_IS_WIFI(ifp)) {
517 if (ifp->if_name[0] == 'a' && ifp->if_name[1] == 'p' &&
518 ifp->if_name[2] == '\0') {
519 /* Wi-Fi Access Point */
520 *(adj->adj_rx_slots) = sk_netif_compat_wap_rx_ring_sz;
521 *(adj->adj_tx_slots) = sk_netif_compat_wap_tx_ring_sz;
522 } else if (ifp->if_eflags & IFEF_AWDL) {
523 /* AWDL */
524 *(adj->adj_rx_slots) = sk_netif_compat_awdl_rx_ring_sz;
525 *(adj->adj_tx_slots) = sk_netif_compat_awdl_tx_ring_sz;
526 } else {
527 /* Wi-Fi infrastructure */
528 *(adj->adj_rx_slots) = sk_netif_compat_wif_rx_ring_sz;
529 *(adj->adj_tx_slots) = sk_netif_compat_wif_tx_ring_sz;
530 }
531 } else if (IFNET_IS_ETHERNET(ifp)) {
532 #if !XNU_TARGET_OS_OSX
533 /*
534 * On non-macOS platforms, treat all compat Ethernet
535 * interfaces as USB Ethernet with reduced ring sizes.
536 */
537 *(adj->adj_rx_slots) = sk_netif_compat_usb_eth_rx_ring_sz;
538 *(adj->adj_tx_slots) = sk_netif_compat_usb_eth_tx_ring_sz;
539 #else /* XNU_TARGET_OS_OSX */
540 if (ifp->if_subfamily == IFNET_SUBFAMILY_USB) {
541 *(adj->adj_rx_slots) =
542 sk_netif_compat_usb_eth_rx_ring_sz;
543 *(adj->adj_tx_slots) =
544 sk_netif_compat_usb_eth_tx_ring_sz;
545 }
546 #endif /* XNU_TARGET_OS_OSX */
547 }
548 }
549
550 static int
nx_netif_prov_params_adjust(const struct kern_nexus_domain_provider * nxdom_prov,const struct nxprov_params * nxp,struct nxprov_adjusted_params * adj)551 nx_netif_prov_params_adjust(const struct kern_nexus_domain_provider *nxdom_prov,
552 const struct nxprov_params *nxp, struct nxprov_adjusted_params *adj)
553 {
554 /*
555 * for netif compat adjust the following parameters for memory
556 * optimization:
557 * - change the size of buffer object to 128 bytes.
558 * - don't allocate rx ring for host port and tx ring for dev port.
559 * - for cellular interfaces other than pdp_ip0 reduce the ring size.
560 * Assumption here is that pdp_ip0 is always used as the data
561 * interface.
562 * - reduce the ring size for AWDL interface.
563 * - reduce the ring size for USB ethernet interface.
564 */
565 if (strcmp(nxdom_prov->nxdom_prov_name,
566 NEXUS_PROVIDER_NET_IF_COMPAT) == 0) {
567 /*
568 * Leave the parameters default if userspace access may be
569 * needed. We can't use skywalk_direct_allowed() here because
570 * the drivers have not attached yet.
571 */
572 if (skywalk_netif_direct_enabled()) {
573 goto done;
574 }
575
576 *(adj->adj_buf_size) = NETIF_COMPAT_BUF_SIZE;
577 *(adj->adj_tx_rings) = 1;
578 if (IF_INDEX_IN_RANGE(nxp->nxp_ifindex)) {
579 ifnet_t ifp;
580 ifnet_head_lock_shared();
581 ifp = ifindex2ifnet[nxp->nxp_ifindex];
582 ifnet_head_done();
583 VERIFY(ifp != NULL);
584 nx_netif_compat_adjust_ring_size(adj, ifp);
585 }
586 if (adj->adj_buf_srp->srp_r_seg_size == 0) {
587 adj->adj_buf_srp->srp_r_seg_size =
588 skmem_usr_buf_seg_size;
589 }
590 } else { /* netif native */
591 if (nxp->nxp_flags & NXPF_NETIF_LLINK) {
592 *(adj->adj_tx_slots) = NX_NETIF_MINSLOTS;
593 *(adj->adj_rx_slots) = NX_NETIF_MINSLOTS;
594 }
595 /*
596 * Add another extra ring for host port. Note that if the
597 * nexus isn't configured to use the same pbufpool for all of
598 * its ports, we'd end up allocating extra here.
599 * Not a big deal since that case isn't the default.
600 */
601 *(adj->adj_tx_rings) += 1;
602 *(adj->adj_rx_rings) += 1;
603
604 if ((*(adj->adj_buf_size) < PKT_MAX_PROTO_HEADER_SIZE)) {
605 SK_ERR("buf size too small, min (%d)",
606 PKT_MAX_PROTO_HEADER_SIZE);
607 return EINVAL;
608 }
609 _CASSERT(sizeof(struct __kern_netif_intf_advisory) ==
610 NX_INTF_ADV_SIZE);
611 *(adj->adj_nexusadv_size) = sizeof(struct netif_nexus_advisory);
612 }
613 done:
614 /* enable magazines layer for metadata */
615 *(adj->adj_md_magazines) = TRUE;
616 return 0;
617 }
618
619 int
nx_netif_prov_params(struct kern_nexus_domain_provider * nxdom_prov,const uint32_t req,const struct nxprov_params * nxp0,struct nxprov_params * nxp,struct skmem_region_params srp[SKMEM_REGIONS])620 nx_netif_prov_params(struct kern_nexus_domain_provider *nxdom_prov,
621 const uint32_t req, const struct nxprov_params *nxp0,
622 struct nxprov_params *nxp, struct skmem_region_params srp[SKMEM_REGIONS])
623 {
624 struct nxdom *nxdom = nxdom_prov->nxdom_prov_dom;
625
626 return nxprov_params_adjust(nxdom_prov, req, nxp0, nxp, srp,
627 nxdom, nxdom, nxdom, nx_netif_prov_params_adjust);
628 }
629
630 int
nx_netif_prov_mem_new(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct nexus_adapter * na)631 nx_netif_prov_mem_new(struct kern_nexus_domain_provider *nxdom_prov,
632 struct kern_nexus *nx, struct nexus_adapter *na)
633 {
634 #pragma unused(nxdom_prov)
635 int err = 0;
636 boolean_t pp_truncated_buf = FALSE;
637 boolean_t allow_direct;
638 boolean_t kernel_only;
639
640 SK_DF(SK_VERB_NETIF,
641 "nx 0x%llx (\"%s\":\"%s\") na \"%s\" (0x%llx)", SK_KVA(nx),
642 NX_DOM(nx)->nxdom_name, nxdom_prov->nxdom_prov_name, na->na_name,
643 SK_KVA(na));
644
645 ASSERT(na->na_arena == NULL);
646 if ((na->na_type == NA_NETIF_COMPAT_DEV) ||
647 (na->na_type == NA_NETIF_COMPAT_HOST)) {
648 pp_truncated_buf = TRUE;
649 }
650 /*
651 * We do this check to determine whether to create the extra
652 * regions needed for userspace access. This is per interface.
653 * NX_USER_CHANNEL_PROV() is systemwide so it can't be used.
654 */
655 allow_direct = skywalk_netif_direct_allowed(na->na_name);
656
657 /*
658 * Both ports (host and dev) share the same packet buffer pool;
659 * the first time a port gets opened will allocate the pp that
660 * gets stored in the nexus, which will then be used by any
661 * subsequent opens.
662 */
663 kernel_only = !allow_direct || !NX_USER_CHANNEL_PROV(nx);
664 na->na_arena = skmem_arena_create_for_nexus(na,
665 NX_PROV(nx)->nxprov_region_params, &nx->nx_tx_pp,
666 &nx->nx_rx_pp, pp_truncated_buf, kernel_only, &nx->nx_adv, &err);
667 ASSERT(na->na_arena != NULL || err != 0);
668 ASSERT(nx->nx_tx_pp == NULL || (nx->nx_tx_pp->pp_md_type ==
669 NX_DOM(nx)->nxdom_md_type && nx->nx_tx_pp->pp_md_subtype ==
670 NX_DOM(nx)->nxdom_md_subtype));
671
672 return err;
673 }
674
675 SK_NO_INLINE_ATTRIBUTE
676 static int
nx_netif_get_llink_info(struct sockopt * sopt,struct kern_nexus * nx)677 nx_netif_get_llink_info(struct sockopt *sopt, struct kern_nexus *nx)
678 {
679 struct nx_llink_info_req *nlir = NULL;
680 struct nx_netif *nif;
681 struct netif_llink *llink;
682 uint16_t llink_cnt;
683 size_t len, user_len;
684 int err, i;
685
686 nif = NX_NETIF_PRIVATE(nx);
687 if (!NETIF_LLINK_ENABLED(nif)) {
688 SK_ERR("llink mode not enabled");
689 return ENOTSUP;
690 }
691 lck_rw_lock_shared(&nif->nif_llink_lock);
692 llink_cnt = nif->nif_llink_cnt;
693 if (llink_cnt == 0) {
694 SK_ERR("zero llink cnt");
695 err = ENXIO;
696 goto done;
697 }
698 len = sizeof(*nlir) + (sizeof(struct nx_llink_info) * llink_cnt);
699 /* preserve sopt_valsize because it gets overwritten by copyin */
700 user_len = sopt->sopt_valsize;
701 if (user_len < len) {
702 SK_ERR("buffer too small");
703 err = ENOBUFS;
704 goto done;
705 }
706 nlir = sk_alloc_data(len, Z_WAITOK, skmem_tag_netif_llink_info);
707 if (nlir == NULL) {
708 SK_ERR("failed to allocate nlir");
709 err = ENOMEM;
710 goto done;
711 }
712 err = sooptcopyin(sopt, nlir, sizeof(*nlir), sizeof(*nlir));
713 if (err != 0) {
714 SK_ERR("copyin failed: %d", err);
715 goto done;
716 }
717 if (nlir->nlir_version != NETIF_LLINK_INFO_VERSION) {
718 SK_ERR("nlir version mismatch: %d != %d",
719 nlir->nlir_version, NETIF_LLINK_INFO_VERSION);
720 err = ENOTSUP;
721 goto done;
722 }
723 nlir->nlir_llink_cnt = llink_cnt;
724 i = 0;
725 STAILQ_FOREACH(llink, &nif->nif_llink_list, nll_link) {
726 struct nx_llink_info *nli;
727 struct netif_qset *qset;
728 uint16_t qset_cnt;
729 int j;
730
731 nli = &nlir->nlir_llink[i];
732 nli->nli_link_id = llink->nll_link_id;
733 nli->nli_link_id_internal = llink->nll_link_id_internal;
734 nli->nli_state = llink->nll_state;
735 nli->nli_flags = llink->nll_flags;
736
737 qset_cnt = llink->nll_qset_cnt;
738 ASSERT(qset_cnt <= NETIF_LLINK_MAX_QSETS);
739 nli->nli_qset_cnt = qset_cnt;
740
741 j = 0;
742 SLIST_FOREACH(qset, &llink->nll_qset_list, nqs_list) {
743 struct nx_qset_info *nqi;
744
745 nqi = &nli->nli_qset[j];
746 nqi->nqi_id = qset->nqs_id;
747 nqi->nqi_flags = qset->nqs_flags;
748 nqi->nqi_num_rx_queues = qset->nqs_num_rx_queues;
749 nqi->nqi_num_tx_queues = qset->nqs_num_tx_queues;
750 j++;
751 }
752 ASSERT(j == qset_cnt);
753 i++;
754 }
755 ASSERT(i == llink_cnt);
756 sopt->sopt_valsize = user_len;
757 err = sooptcopyout(sopt, nlir, len);
758 if (err != 0) {
759 SK_ERR("sooptcopyout failed: %d", err);
760 }
761 done:
762 lck_rw_unlock_shared(&nif->nif_llink_lock);
763 if (nlir != NULL) {
764 sk_free_data(nlir, len);
765 }
766 return err;
767 }
768
769 int
nx_netif_prov_config(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct nx_cfg_req * ncr,int sopt_dir,struct proc * p,kauth_cred_t cred)770 nx_netif_prov_config(struct kern_nexus_domain_provider *nxdom_prov,
771 struct kern_nexus *nx, struct nx_cfg_req *ncr, int sopt_dir,
772 struct proc *p, kauth_cred_t cred)
773 {
774 #pragma unused(nxdom_prov)
775 struct sockopt sopt;
776 int err = 0;
777
778 SK_LOCK_ASSERT_HELD();
779
780 /* proceed only if the client possesses netif entitlement */
781 if ((err = skywalk_priv_check_cred(p, cred,
782 PRIV_SKYWALK_REGISTER_NET_IF)) != 0) {
783 goto done;
784 }
785
786 if (ncr->nc_req == USER_ADDR_NULL) {
787 err = EINVAL;
788 goto done;
789 }
790
791 /* to make life easier for handling copies */
792 bzero(&sopt, sizeof(sopt));
793 sopt.sopt_dir = sopt_dir;
794 sopt.sopt_val = ncr->nc_req;
795 sopt.sopt_valsize = ncr->nc_req_len;
796 sopt.sopt_p = p;
797
798 switch (ncr->nc_cmd) {
799 case NXCFG_CMD_ATTACH:
800 case NXCFG_CMD_DETACH: {
801 struct nx_spec_req nsr;
802
803 bzero(&nsr, sizeof(nsr));
804 err = sooptcopyin(&sopt, &nsr, sizeof(nsr), sizeof(nsr));
805 if (err != 0) {
806 goto done;
807 }
808
809 /*
810 * Null-terminate in case this has an interface name;
811 * the union is already large enough for uuid_t.
812 */
813 nsr.nsr_name[sizeof(nsr.nsr_name) - 1] = '\0';
814 if (p != kernproc) {
815 nsr.nsr_flags &= NXSPECREQ_MASK;
816 }
817
818 err = nx_netif_ctl(nx, ncr->nc_cmd, &nsr, p);
819 if (err != 0) {
820 goto done;
821 }
822
823 /* XXX: [email protected] -- can this copyout fail? */
824 (void) sooptcopyout(&sopt, &nsr, sizeof(nsr));
825 break;
826 }
827 case NXCFG_CMD_FLOW_ADD:
828 case NXCFG_CMD_FLOW_DEL: {
829 _CASSERT(offsetof(struct nx_flow_req, _nfr_kernel_field_end) ==
830 offsetof(struct nx_flow_req, _nfr_common_field_end));
831 struct nx_flow_req nfr;
832
833 bzero(&nfr, sizeof(nfr));
834 err = sooptcopyin(&sopt, &nfr, sizeof(nfr), sizeof(nfr));
835 if (err != 0) {
836 goto done;
837 }
838
839 err = nx_netif_ctl(nx, ncr->nc_cmd, &nfr, p);
840 if (err != 0) {
841 goto done;
842 }
843
844 /* XXX: [email protected] -- can this copyout fail? */
845 (void) sooptcopyout(&sopt, &nfr, sizeof(nfr));
846 break;
847 }
848 case NXCFG_CMD_GET_LLINK_INFO: {
849 err = nx_netif_get_llink_info(&sopt, nx);
850 break;
851 }
852 default:
853 err = EINVAL;
854 goto done;
855 }
856 done:
857 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NETIF,
858 "nexus 0x%llx (%s) cmd %d err %d", SK_KVA(nx),
859 NX_DOM_PROV(nx)->nxdom_prov_name, ncr->nc_cmd, err);
860 return err;
861 }
862
863 void
nx_netif_prov_fini(struct kern_nexus_domain_provider * nxdom_prov)864 nx_netif_prov_fini(struct kern_nexus_domain_provider *nxdom_prov)
865 {
866 #pragma unused(nxdom_prov)
867 SK_D("destroying %s", nxdom_prov->nxdom_prov_name);
868 }
869
870 int
nx_netif_prov_nx_ctor(struct kern_nexus * nx)871 nx_netif_prov_nx_ctor(struct kern_nexus *nx)
872 {
873 struct nx_netif *n;
874 char name[64];
875 int error;
876
877 SK_LOCK_ASSERT_HELD();
878 ASSERT(nx->nx_arg == NULL);
879
880 SK_D("nexus 0x%llx (%s)", SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name);
881
882 nx->nx_arg = nx_netif_alloc(Z_WAITOK);
883 n = NX_NETIF_PRIVATE(nx);
884 if (NX_USER_CHANNEL_PROV(nx) &&
885 NX_PROV(nx)->nxprov_params->nxp_nexusadv_size != 0) {
886 (void) snprintf(name, sizeof(name), "netif_%llu", nx->nx_id);
887 error = nx_advisory_alloc(nx, name,
888 &NX_PROV(nx)->nxprov_region_params[SKMEM_REGION_NEXUSADV],
889 NEXUS_ADVISORY_TYPE_NETIF);
890 if (error != 0) {
891 nx_netif_free(n);
892 return error;
893 }
894 }
895 n->nif_nx = nx;
896 SK_D("create new netif 0x%llx for nexus 0x%llx",
897 SK_KVA(NX_NETIF_PRIVATE(nx)), SK_KVA(nx));
898 return 0;
899 }
900
901 void
nx_netif_prov_nx_dtor(struct kern_nexus * nx)902 nx_netif_prov_nx_dtor(struct kern_nexus *nx)
903 {
904 struct nx_netif *n = NX_NETIF_PRIVATE(nx);
905
906 SK_LOCK_ASSERT_HELD();
907
908 SK_D("nexus 0x%llx (%s) netif 0x%llx", SK_KVA(nx),
909 NX_DOM_PROV(nx)->nxdom_prov_name, SK_KVA(n));
910
911 /*
912 * XXX
913 * detach should be done separately to be symmetrical with attach.
914 */
915 nx_advisory_free(nx);
916 if (nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV) != NULL) {
917 /* we're called by nx_detach(), so this cannot fail */
918 int err = nx_netif_ctl_detach(nx, NULL);
919 VERIFY(err == 0);
920 }
921 if (n->nif_dev_nxb != NULL) {
922 nxb_free(n->nif_dev_nxb);
923 n->nif_dev_nxb = NULL;
924 }
925 if (n->nif_host_nxb != NULL) {
926 nxb_free(n->nif_host_nxb);
927 n->nif_host_nxb = NULL;
928 }
929 SK_DF(SK_VERB_NETIF, "marking netif 0x%llx as free", SK_KVA(n));
930 nx_netif_free(n);
931 nx->nx_arg = NULL;
932 }
933
934 int
nx_netif_prov_nx_mem_info(struct kern_nexus * nx,struct kern_pbufpool ** tpp,struct kern_pbufpool ** rpp)935 nx_netif_prov_nx_mem_info(struct kern_nexus *nx, struct kern_pbufpool **tpp,
936 struct kern_pbufpool **rpp)
937 {
938 ASSERT(nx->nx_tx_pp != NULL);
939 ASSERT(nx->nx_rx_pp != NULL);
940
941 if (tpp != NULL) {
942 *tpp = nx->nx_tx_pp;
943 }
944 if (rpp != NULL) {
945 *rpp = nx->nx_rx_pp;
946 }
947
948 return 0;
949 }
950
951 static size_t
__netif_mib_get_stats(struct kern_nexus * nx,void * out,size_t len)952 __netif_mib_get_stats(struct kern_nexus *nx, void *out, size_t len)
953 {
954 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
955 struct ifnet *ifp = nif->nif_ifp;
956 struct sk_stats_net_if *sns = out;
957 size_t actual_space = sizeof(struct sk_stats_net_if);
958
959 if (out != NULL && actual_space <= len) {
960 uuid_copy(sns->sns_nx_uuid, nx->nx_uuid);
961 if (ifp != NULL) {
962 (void) strlcpy(sns->sns_if_name, if_name(ifp), IFNAMSIZ);
963 }
964 sns->sns_nifs = nif->nif_stats;
965 }
966
967 return actual_space;
968 }
969
970 static size_t
__netif_mib_get_llinks(struct kern_nexus * nx,void * out,size_t len)971 __netif_mib_get_llinks(struct kern_nexus *nx, void *out, size_t len)
972 {
973 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
974 struct nx_llink_info *nli_list = out;
975 size_t actual_space = 0;
976 if (NETIF_LLINK_ENABLED(nif)) {
977 lck_rw_lock_shared(&nif->nif_llink_lock);
978 actual_space += nif->nif_llink_cnt * sizeof(struct nx_llink_info);
979
980 if (out != NULL && actual_space <= len) {
981 struct netif_llink *llink;
982 int i = 0;
983 STAILQ_FOREACH(llink, &nif->nif_llink_list, nll_link) {
984 struct nx_llink_info *nli;
985 struct netif_qset *qset;
986 uint16_t qset_cnt;
987 int j;
988
989 nli = &nli_list[i];
990 uuid_copy(nli->nli_netif_uuid, nx->nx_uuid);
991 nli->nli_link_id = llink->nll_link_id;
992 nli->nli_link_id_internal = llink->nll_link_id_internal;
993 nli->nli_state = llink->nll_state;
994 nli->nli_flags = llink->nll_flags;
995
996 qset_cnt = llink->nll_qset_cnt;
997 ASSERT(qset_cnt <= NETIF_LLINK_MAX_QSETS);
998 nli->nli_qset_cnt = qset_cnt;
999
1000 j = 0;
1001 SLIST_FOREACH(qset, &llink->nll_qset_list, nqs_list) {
1002 struct nx_qset_info *nqi;
1003
1004 nqi = &nli->nli_qset[j];
1005 nqi->nqi_id = qset->nqs_id;
1006 nqi->nqi_flags = qset->nqs_flags;
1007 nqi->nqi_num_rx_queues = qset->nqs_num_rx_queues;
1008 nqi->nqi_num_tx_queues = qset->nqs_num_tx_queues;
1009 j++;
1010 }
1011 ASSERT(j == qset_cnt);
1012 i++;
1013 }
1014 ASSERT(i == nif->nif_llink_cnt);
1015 }
1016 lck_rw_unlock_shared(&nif->nif_llink_lock);
1017 }
1018
1019 return actual_space;
1020 }
1021
1022 size_t
nx_netif_prov_nx_mib_get(struct kern_nexus * nx,struct nexus_mib_filter * filter,void * out,size_t len,struct proc * p)1023 nx_netif_prov_nx_mib_get(struct kern_nexus *nx, struct nexus_mib_filter *filter,
1024 void *out, size_t len, struct proc *p)
1025 {
1026 #pragma unused(p)
1027 size_t ret;
1028
1029 if ((filter->nmf_bitmap & NXMIB_FILTER_NX_UUID) &&
1030 (uuid_compare(filter->nmf_nx_uuid, nx->nx_uuid)) != 0) {
1031 return 0;
1032 }
1033
1034 switch (filter->nmf_type) {
1035 case NXMIB_NETIF_STATS:
1036 ret = __netif_mib_get_stats(nx, out, len);
1037 break;
1038 case NXMIB_LLINK_LIST:
1039 ret = __netif_mib_get_llinks(nx, out, len);
1040 break;
1041 default:
1042 ret = 0;
1043 break;
1044 }
1045 return ret;
1046 }
1047
1048 static int
nx_netif_dom_bind_port(struct kern_nexus * nx,nexus_port_t * nx_port,struct nxbind * nxb,void * info)1049 nx_netif_dom_bind_port(struct kern_nexus *nx, nexus_port_t *nx_port,
1050 struct nxbind *nxb, void *info)
1051 {
1052 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
1053 nexus_port_t first, last, port;
1054 int error;
1055
1056 ASSERT(nx_port != NULL);
1057 ASSERT(nxb != NULL);
1058
1059 port = *nx_port;
1060
1061 /*
1062 * If port is:
1063 * != NEXUS_PORT_ANY: attempt to bind to the specified port
1064 * == NEXUS_PORT_ANY: find an available port, bind to it, and
1065 * return back the assigned port.
1066 */
1067 first = NEXUS_PORT_NET_IF_CLIENT;
1068 last = NXDOM_MAX(NX_DOM(nx), ports);
1069 ASSERT(first <= last);
1070
1071 NETIF_WLOCK(nif);
1072
1073 if (__improbable(first == last)) {
1074 error = ENOMEM;
1075 } else if (port != NEXUS_PORT_ANY) {
1076 error = nx_port_bind_info(nx, port, nxb, info);
1077 SK_DF(SK_VERB_NETIF, "port %d, bind err %d", port, error);
1078 } else {
1079 error = nx_port_find(nx, first, last - 1, &port);
1080 ASSERT(error != 0 || (port >= first && port < last));
1081 if (error == 0) {
1082 error = nx_port_bind_info(nx, port, nxb, info);
1083 SK_DF(SK_VERB_NETIF, "found port %d, bind err %d",
1084 port, error);
1085 }
1086 }
1087 NETIF_WUNLOCK(nif);
1088
1089 ASSERT(*nx_port == NEXUS_PORT_ANY || *nx_port == port);
1090 if (error == 0) {
1091 *nx_port = port;
1092 }
1093
1094 SK_DF(error ? SK_VERB_ERROR : SK_VERB_NETIF,
1095 "+++ netif 0x%llx nx_port %d, total %u active %u (err %d)",
1096 SK_KVA(nif), (int)*nx_port, NX_NETIF_MAXPORTS,
1097 nx->nx_active_ports, error);
1098
1099 return error;
1100 }
1101
1102 static int
nx_netif_dom_unbind_port(struct kern_nexus * nx,nexus_port_t nx_port)1103 nx_netif_dom_unbind_port(struct kern_nexus *nx, nexus_port_t nx_port)
1104 {
1105 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
1106 int error = 0;
1107
1108 ASSERT(nx_port != NEXUS_PORT_ANY);
1109
1110 NETIF_WLOCK(nif);
1111 error = nx_port_unbind(nx, nx_port);
1112 NETIF_WUNLOCK(nif);
1113
1114 return error;
1115 }
1116
1117 static int
nx_netif_dom_connect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)1118 nx_netif_dom_connect(struct kern_nexus_domain_provider *nxdom_prov,
1119 struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
1120 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
1121 {
1122 #pragma unused(nxdom_prov)
1123 int err = 0;
1124
1125 SK_LOCK_ASSERT_HELD();
1126
1127 ASSERT(NX_DOM_PROV(nx) == nxdom_prov);
1128 ASSERT(nx->nx_prov->nxprov_params->nxp_type ==
1129 nxdom_prov->nxdom_prov_dom->nxdom_type &&
1130 nx->nx_prov->nxprov_params->nxp_type == NEXUS_TYPE_NET_IF);
1131 ASSERT(!(ch->ch_flags & CHANF_HOST));
1132
1133 switch (chr->cr_port) {
1134 case NEXUS_PORT_NET_IF_DEV:
1135 if (chr->cr_mode & CHMODE_HOST) {
1136 err = EINVAL;
1137 goto done;
1138 }
1139 break;
1140
1141 case NEXUS_PORT_NET_IF_HOST:
1142 if (!(chr->cr_mode & CHMODE_HOST)) {
1143 if (ch->ch_flags & CHANF_KERNEL) {
1144 err = EINVAL;
1145 goto done;
1146 }
1147 chr->cr_mode |= CHMODE_HOST;
1148 }
1149 /*
1150 * This channel is exclusively opened to the host
1151 * rings; don't notify the external provider.
1152 */
1153 atomic_bitset_32(&ch->ch_flags, CHANF_HOST | CHANF_EXT_SKIP);
1154 break;
1155
1156 default:
1157 /*
1158 * This channel is shared between netif and user process;
1159 * don't notify the external provider.
1160 */
1161 atomic_bitset_32(&ch->ch_flags, CHANF_EXT_SKIP);
1162 break;
1163 }
1164
1165 chr->cr_ring_set = RING_SET_DEFAULT;
1166 chr->cr_real_endpoint = chr->cr_endpoint = CH_ENDPOINT_NET_IF;
1167 (void) snprintf(chr->cr_name, sizeof(chr->cr_name), "netif:%llu:%.*s",
1168 nx->nx_id, (int)nx->nx_prov->nxprov_params->nxp_namelen,
1169 nx->nx_prov->nxprov_params->nxp_name);
1170
1171 if (ch->ch_flags & CHANF_KERNEL) {
1172 err = na_connect_spec(nx, ch, chr, p);
1173 } else {
1174 err = na_connect(nx, ch, chr, ch0, nxb, p);
1175 }
1176
1177 if (err == 0) {
1178 /*
1179 * Mark the kernel slot descriptor region as busy; this
1180 * prevents it from being torn-down at channel defunct
1181 * time, as the (external) nexus owner may be calling
1182 * KPIs that require accessing the slots.
1183 */
1184 skmem_arena_nexus_sd_set_noidle(
1185 skmem_arena_nexus(ch->ch_na->na_arena), 1);
1186 }
1187
1188 done:
1189 return err;
1190 }
1191
1192 static void
nx_netif_dom_disconnect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch)1193 nx_netif_dom_disconnect(struct kern_nexus_domain_provider *nxdom_prov,
1194 struct kern_nexus *nx, struct kern_channel *ch)
1195 {
1196 #pragma unused(nxdom_prov)
1197 SK_LOCK_ASSERT_HELD();
1198
1199 SK_D("channel 0x%llx -!- nexus 0x%llx (%s:\"%s\":%u:%d)", SK_KVA(ch),
1200 SK_KVA(nx), nxdom_prov->nxdom_prov_name, ch->ch_na->na_name,
1201 ch->ch_info->cinfo_nx_port, (int)ch->ch_info->cinfo_ch_ring_id);
1202
1203 /*
1204 * Release busy assertion held earlier in nx_netif_dom_connect();
1205 * this allows for the final arena teardown to succeed.
1206 */
1207 skmem_arena_nexus_sd_set_noidle(
1208 skmem_arena_nexus(ch->ch_na->na_arena), -1);
1209
1210 if (ch->ch_flags & CHANF_KERNEL) {
1211 na_disconnect_spec(nx, ch);
1212 } else {
1213 na_disconnect(nx, ch);
1214 }
1215 }
1216
1217 static void
nx_netif_dom_defunct(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct proc * p)1218 nx_netif_dom_defunct(struct kern_nexus_domain_provider *nxdom_prov,
1219 struct kern_nexus *nx, struct kern_channel *ch, struct proc *p)
1220 {
1221 #pragma unused(nxdom_prov, nx)
1222 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1223 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
1224 ASSERT(ch->ch_na->na_type == NA_NETIF_DEV ||
1225 ch->ch_na->na_type == NA_NETIF_HOST ||
1226 ch->ch_na->na_type == NA_NETIF_COMPAT_DEV ||
1227 ch->ch_na->na_type == NA_NETIF_COMPAT_HOST);
1228
1229 na_ch_rings_defunct(ch, p);
1230 }
1231
1232 static void
nx_netif_dom_defunct_finalize(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,boolean_t locked)1233 nx_netif_dom_defunct_finalize(struct kern_nexus_domain_provider *nxdom_prov,
1234 struct kern_nexus *nx, struct kern_channel *ch, boolean_t locked)
1235 {
1236 #pragma unused(nxdom_prov)
1237 if (!locked) {
1238 SK_LOCK_ASSERT_NOTHELD();
1239 SK_LOCK();
1240 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED);
1241 } else {
1242 SK_LOCK_ASSERT_HELD();
1243 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1244 }
1245
1246 ASSERT(ch->ch_na->na_type == NA_NETIF_DEV ||
1247 ch->ch_na->na_type == NA_NETIF_HOST ||
1248 ch->ch_na->na_type == NA_NETIF_COMPAT_DEV ||
1249 ch->ch_na->na_type == NA_NETIF_COMPAT_HOST);
1250
1251 na_defunct(nx, ch, ch->ch_na, locked);
1252
1253 SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d)",
1254 ch->ch_name, ch->ch_pid, SK_KVA(ch), SK_KVA(nx),
1255 nxdom_prov->nxdom_prov_name, ch->ch_na->na_name,
1256 ch->ch_info->cinfo_nx_port, (int)ch->ch_info->cinfo_ch_ring_id);
1257
1258 if (!locked) {
1259 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED);
1260 SK_UNLOCK();
1261 } else {
1262 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1263 SK_LOCK_ASSERT_HELD();
1264 }
1265 }
1266
1267 struct nexus_netif_adapter *
na_netif_alloc(zalloc_flags_t how)1268 na_netif_alloc(zalloc_flags_t how)
1269 {
1270 _CASSERT(offsetof(struct nexus_netif_adapter, nifna_up) == 0);
1271
1272 return zalloc_flags(na_netif_zone, how | Z_ZERO);
1273 }
1274
1275 void
na_netif_free(struct nexus_adapter * na)1276 na_netif_free(struct nexus_adapter *na)
1277 {
1278 struct nexus_netif_adapter *nifna = (struct nexus_netif_adapter *)na;
1279
1280 SK_LOCK_ASSERT_HELD();
1281 SK_DF(SK_VERB_MEM, "nifna 0x%llx FREE", SK_KVA(nifna));
1282
1283 ASSERT(na->na_refcount == 0);
1284 ASSERT(nifna->nifna_tx_mit == NULL);
1285 ASSERT(nifna->nifna_rx_mit == NULL);
1286 bzero(nifna, sizeof(*nifna));
1287
1288 zfree(na_netif_zone, nifna);
1289 }
1290
1291 /* Process NXCFG_CMD_ATTACH */
1292 SK_NO_INLINE_ATTRIBUTE
1293 static int
nx_netif_ctl_attach(struct kern_nexus * nx,struct nx_spec_req * nsr,struct proc * p)1294 nx_netif_ctl_attach(struct kern_nexus *nx, struct nx_spec_req *nsr,
1295 struct proc *p)
1296 {
1297 struct nx_netif *n = NX_NETIF_PRIVATE(nx);
1298 struct ifnet *ifp = NULL;
1299 boolean_t compat;
1300 int err = 0;
1301
1302 SK_LOCK_ASSERT_HELD();
1303
1304 ASSERT(NX_DOM(nx)->nxdom_type == NEXUS_TYPE_NET_IF);
1305 compat = (strcmp(NX_DOM_PROV(nx)->nxdom_prov_name,
1306 NEXUS_PROVIDER_NET_IF_COMPAT) == 0);
1307
1308 uuid_clear(nsr->nsr_if_uuid);
1309 /*
1310 * The netif accepts either an interface name or a pointer to
1311 * an ifnet, but never a UUID.
1312 */
1313 if (nsr->nsr_flags & NXSPECREQ_UUID) {
1314 err = EINVAL;
1315 goto done;
1316 }
1317 if (nsr->nsr_flags & NXSPECREQ_IFP) {
1318 if (p != kernproc || (ifp = nsr->nsr_ifp) == NULL) {
1319 err = EINVAL;
1320 goto done;
1321 }
1322 } else if ((ifp = ifunit_ref(nsr->nsr_name)) == NULL) {
1323 err = ENXIO;
1324 goto done;
1325 }
1326
1327 if ((compat && SKYWALK_NATIVE(ifp)) ||
1328 (!compat && !SKYWALK_NATIVE(ifp))) {
1329 /* native driver for netif; non-native for netif_compat */
1330 err = ENODEV;
1331 } else if (ifp->if_na != NULL || !uuid_is_null(n->nif_uuid)) {
1332 err = EBUSY;
1333 } else {
1334 ASSERT(uuid_is_null(n->nif_uuid));
1335 /*
1336 * Upon success, callee will hold its own ifnet iorefcnt
1337 * as well as a retain count on the nexus adapter.
1338 */
1339 if (compat) {
1340 err = nx_netif_compat_attach(nx, ifp);
1341 } else {
1342 err = nx_netif_attach(nx, ifp);
1343 }
1344
1345 if (err == 0) {
1346 /* return the adapter UUID */
1347 uuid_generate_random(n->nif_uuid);
1348 uuid_copy(nsr->nsr_if_uuid, n->nif_uuid);
1349 #if (DEVELOPMENT || DEBUG)
1350 skoid_create(&n->nif_skoid,
1351 SKOID_SNODE(_kern_skywalk_netif), if_name(ifp),
1352 CTLFLAG_RW);
1353 #endif /* !DEVELOPMENT && !DEBUG */
1354 }
1355 }
1356 done:
1357 /* drop I/O refcnt from ifunit_ref() */
1358 if (ifp != NULL && !(nsr->nsr_flags & NXSPECREQ_IFP)) {
1359 ifnet_decr_iorefcnt(ifp);
1360 }
1361
1362 #if SK_LOG
1363 uuid_string_t uuidstr, ifuuidstr;
1364 const char *nustr;
1365 if (nsr->nsr_flags & NXSPECREQ_UUID) {
1366 nustr = sk_uuid_unparse(nsr->nsr_uuid, uuidstr);
1367 } else if (nsr->nsr_flags & NXSPECREQ_IFP) {
1368 (void) snprintf((char *)uuidstr, sizeof(uuidstr), "0x%llx",
1369 SK_KVA(nsr->nsr_ifp));
1370 nustr = uuidstr;
1371 } else {
1372 nustr = nsr->nsr_name;
1373 }
1374 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NETIF,
1375 "nexus 0x%llx (%s) name/uuid \"%s\" if_uuid %s flags 0x%x err %d",
1376 SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name, nustr,
1377 sk_uuid_unparse(nsr->nsr_if_uuid, ifuuidstr), nsr->nsr_flags, err);
1378 #endif /* SK_LOG */
1379
1380 return err;
1381 }
1382
1383 /* process NXCFG_CMD_DETACH */
1384 SK_NO_INLINE_ATTRIBUTE
1385 static int
nx_netif_ctl_detach(struct kern_nexus * nx,struct nx_spec_req * nsr)1386 nx_netif_ctl_detach(struct kern_nexus *nx, struct nx_spec_req *nsr)
1387 {
1388 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
1389 int err = 0;
1390
1391 SK_LOCK_ASSERT_HELD();
1392
1393 /*
1394 * nsr is NULL when we're called from the destructor, and it
1395 * implies that we'll detach whatever that is attached.
1396 */
1397 if (nsr != NULL && uuid_is_null(nsr->nsr_if_uuid)) {
1398 err = EINVAL;
1399 } else if (nsr != NULL && uuid_compare(nsr->nsr_if_uuid,
1400 nif->nif_uuid) != 0) {
1401 err = ESRCH;
1402 } else if (nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV) == NULL) {
1403 /* nx_netif_ctl_attach() not yet done or already detached */
1404 err = ENXIO;
1405 } else if (nx->nx_ch_count != 0) {
1406 /*
1407 * There's at least a channel opened; we can't
1408 * yank the interface from underneath the nexus
1409 * since our dlil input/output handler may be
1410 * running now. Bail out and come back here
1411 * again when the nexus detaches.
1412 */
1413 err = EBUSY;
1414 } else {
1415 nx_netif_agent_fini(nif);
1416 nx_netif_capabilities_fini(nif);
1417 nx_netif_flow_fini(nif);
1418 nx_netif_filter_fini(nif);
1419 nx_netif_llink_fini(nif);
1420 nx_netif_flags_fini(nif);
1421
1422 uuid_clear(nif->nif_uuid);
1423 /* nx_netif_{compat_}attach() held both references */
1424 na_release_locked(nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV));
1425 na_release_locked(nx_port_get_na(nx, NEXUS_PORT_NET_IF_HOST));
1426 nx_port_free(nx, NEXUS_PORT_NET_IF_DEV);
1427 nx_port_free(nx, NEXUS_PORT_NET_IF_HOST);
1428
1429 nif->nif_ifp = NULL;
1430 nif->nif_netif_nxadv = NULL;
1431 #if (DEVELOPMENT || DEBUG)
1432 skoid_destroy(&nif->nif_skoid);
1433 #endif /* !DEVELOPMENT && !DEBUG */
1434 }
1435
1436 #if SK_LOG
1437 if (nsr != NULL) {
1438 uuid_string_t ifuuidstr;
1439 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NETIF,
1440 "nexus 0x%llx (%s) if_uuid %s flags 0x%x err %d",
1441 SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
1442 sk_uuid_unparse(nsr->nsr_if_uuid, ifuuidstr),
1443 nsr->nsr_flags, err);
1444 } else {
1445 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NETIF,
1446 "nexus 0x%llx (%s) err %d", SK_KVA(nx),
1447 NX_DOM_PROV(nx)->nxdom_prov_name, err);
1448 }
1449 #endif /* SK_LOG */
1450
1451 return err;
1452 }
1453
1454 /*
1455 * XXX
1456 * These checks are copied from fsw.c
1457 * There are no tests exercising this code. Do we still need this?
1458 */
1459 SK_NO_INLINE_ATTRIBUTE
1460 static int
nx_netif_ctl_flow_check(struct nx_netif * nif,nxcfg_cmd_t cmd,struct proc * p,struct nx_flow_req * req)1461 nx_netif_ctl_flow_check(struct nx_netif *nif, nxcfg_cmd_t cmd,
1462 struct proc *p, struct nx_flow_req *req)
1463 {
1464 #pragma unused(nif)
1465 boolean_t need_check;
1466 int error;
1467
1468 if (uuid_is_null(req->nfr_flow_uuid)) {
1469 return EINVAL;
1470 }
1471 req->nfr_flags &= NXFLOWREQF_MASK;
1472 req->nfr_flowadv_idx = FLOWADV_IDX_NONE;
1473
1474 if (cmd == NXCFG_CMD_FLOW_DEL) {
1475 return 0;
1476 }
1477 need_check = FALSE;
1478 if (req->nfr_epid != -1 && proc_pid(p) != req->nfr_epid) {
1479 need_check = TRUE;
1480 } else if (!uuid_is_null(req->nfr_euuid)) {
1481 uuid_t uuid;
1482
1483 /* get the UUID of the issuing process */
1484 proc_getexecutableuuid(p, uuid, sizeof(uuid));
1485
1486 /*
1487 * If this is not issued by a process for its own
1488 * executable UUID and if the process does not have
1489 * the necessary privilege, reject the request.
1490 * The logic is similar to so_set_effective_uuid().
1491 */
1492 if (uuid_compare(req->nfr_euuid, uuid) != 0) {
1493 need_check = TRUE;
1494 }
1495 }
1496 if (need_check) {
1497 kauth_cred_t cred = kauth_cred_proc_ref(p);
1498 error = priv_check_cred(cred,
1499 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
1500 kauth_cred_unref(&cred);
1501 if (error != 0) {
1502 return error;
1503 }
1504 }
1505 return 0;
1506 }
1507
1508 SK_NO_INLINE_ATTRIBUTE
1509 static int
nx_netif_ctl_flow_add(struct nx_netif * nif,struct proc * p,struct nx_flow_req * req)1510 nx_netif_ctl_flow_add(struct nx_netif *nif, struct proc *p,
1511 struct nx_flow_req *req)
1512 {
1513 int err;
1514
1515 ASSERT(p != PROC_NULL);
1516 err = nx_netif_ctl_flow_check(nif, NXCFG_CMD_FLOW_ADD, p, req);
1517 if (err != 0) {
1518 return err;
1519 }
1520
1521 /* init kernel only fields */
1522 nx_flow_req_internalize(req);
1523 req->nfr_context = NULL;
1524 req->nfr_flow_stats = NULL;
1525 req->nfr_port_reservation = NULL;
1526 req->nfr_pid = proc_pid(p);
1527
1528 err = nx_netif_netagent_flow_add(nif, req);
1529 nx_flow_req_externalize(req);
1530 return err;
1531 }
1532
1533 SK_NO_INLINE_ATTRIBUTE
1534 static int
nx_netif_ctl_flow_del(struct nx_netif * nif,struct proc * p,struct nx_flow_req * req)1535 nx_netif_ctl_flow_del(struct nx_netif *nif, struct proc *p,
1536 struct nx_flow_req *req)
1537 {
1538 int err;
1539
1540 err = nx_netif_ctl_flow_check(nif, NXCFG_CMD_FLOW_DEL, p, req);
1541 if (err != 0) {
1542 return err;
1543 }
1544
1545 nx_flow_req_internalize(req);
1546 req->nfr_pid = proc_pid(p);
1547
1548 err = nx_netif_netagent_flow_del(nif, req);
1549 nx_flow_req_externalize(req);
1550 return err;
1551 }
1552
1553 SK_NO_INLINE_ATTRIBUTE
1554 static int
nx_netif_ctl(struct kern_nexus * nx,nxcfg_cmd_t nc_cmd,void * data,struct proc * p)1555 nx_netif_ctl(struct kern_nexus *nx, nxcfg_cmd_t nc_cmd, void *data,
1556 struct proc *p)
1557 {
1558 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
1559 struct nx_spec_req *nsr = data;
1560 struct nx_flow_req *nfr = data;
1561 int error = 0;
1562
1563 SK_LOCK_ASSERT_HELD();
1564
1565 switch (nc_cmd) {
1566 case NXCFG_CMD_ATTACH:
1567 error = nx_netif_ctl_attach(nx, nsr, p);
1568 break;
1569
1570 case NXCFG_CMD_DETACH:
1571 error = nx_netif_ctl_detach(nx, nsr);
1572 break;
1573
1574 case NXCFG_CMD_FLOW_ADD:
1575 error = nx_netif_ctl_flow_add(nif, p, nfr);
1576 break;
1577
1578 case NXCFG_CMD_FLOW_DEL:
1579 error = nx_netif_ctl_flow_del(nif, p, nfr);
1580 break;
1581
1582 default:
1583 SK_ERR("invalid cmd %u", nc_cmd);
1584 error = EINVAL;
1585 break;
1586 }
1587 return error;
1588 }
1589
1590 static void
nx_netif_llink_notify(struct kern_nexus * nx,struct netif_llink * llink,uint32_t flags)1591 nx_netif_llink_notify(struct kern_nexus *nx, struct netif_llink *llink,
1592 uint32_t flags)
1593 {
1594 #pragma unused(flags)
1595 struct netif_qset *qset;
1596
1597 SLIST_FOREACH(qset, &llink->nll_qset_list, nqs_list) {
1598 (void) nx_tx_qset_notify(nx, qset->nqs_ctx);
1599 }
1600 }
1601
1602 static void
nx_netif_llink_notify_all(struct kern_nexus * nx,uint32_t flags)1603 nx_netif_llink_notify_all(struct kern_nexus *nx, uint32_t flags)
1604 {
1605 struct nx_netif *nif;
1606 struct netif_llink *llink;
1607
1608 nif = NX_NETIF_PRIVATE(nx);
1609
1610 lck_rw_lock_shared(&nif->nif_llink_lock);
1611 STAILQ_FOREACH(llink, &nif->nif_llink_list, nll_link) {
1612 nx_netif_llink_notify(nx, llink, flags);
1613 }
1614 lck_rw_unlock_shared(&nif->nif_llink_lock);
1615 }
1616
1617 /*
1618 * if_start() callback for native Skywalk interfaces, registered
1619 * at ifnet_allocate_extended() time, and invoked by the ifnet
1620 * starter thread.
1621 */
1622 static void
nx_netif_doorbell_internal(struct ifnet * ifp,uint32_t flags)1623 nx_netif_doorbell_internal(struct ifnet *ifp, uint32_t flags)
1624 {
1625 if (__improbable(ifp->if_na == NULL)) {
1626 return;
1627 }
1628
1629 /*
1630 * Do this only if the nexus adapter is active, i.e. a channel
1631 * has been opened to it by the module above (flowswitch, etc.)
1632 */
1633 struct nexus_adapter *hwna = &NA(ifp)->nifna_up;
1634 if (__probable(NA_IS_ACTIVE(hwna))) {
1635 struct kern_nexus *nx = hwna->na_nx;
1636
1637 /* update our work timestamp */
1638 hwna->na_work_ts = _net_uptime;
1639
1640 if (NX_LLINK_PROV(nx)) {
1641 nx_netif_llink_notify_all(nx, flags);
1642 } else {
1643 struct __kern_channel_ring *kring;
1644
1645 /* for doorbell purposes, use TX ring 0 */
1646 kring = &hwna->na_tx_rings[0];
1647
1648 /* Issue a synchronous TX doorbell on the netif device ring */
1649 kring->ckr_na_sync(kring, PROC_NULL,
1650 (NA_SYNCF_NETIF_DOORBELL | NA_SYNCF_NETIF_IFSTART));
1651 }
1652 } else {
1653 struct netif_stats *nifs =
1654 &NX_NETIF_PRIVATE(hwna->na_nx)->nif_stats;
1655 STATS_INC(nifs, NETIF_STATS_DROP_NA_INACTIVE);
1656 }
1657 }
1658
1659 static void
nx_netif_doorbell(struct ifnet * ifp)1660 nx_netif_doorbell(struct ifnet *ifp)
1661 {
1662 nx_netif_doorbell_internal(ifp, NETIF_XMIT_FLAG_HOST);
1663 }
1664
1665 /*
1666 * TX sync callback, called from nx_netif_doorbell() where we'd expect to
1667 * perform synchronous TX doorbell to the driver, by invoking the driver's
1668 * doorbell callback directly in the same thread context. It is also called
1669 * when the layer above performs a TX sync operation, where we might need
1670 * to do an asynchronous doorbell instead, by simply calling ifnet_start().
1671 */
1672 static int
nx_netif_na_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1673 nx_netif_na_txsync(struct __kern_channel_ring *kring, struct proc *p,
1674 uint32_t flags)
1675 {
1676 #pragma unused(p)
1677 struct ifnet *ifp = KRNA(kring)->na_ifp;
1678 boolean_t sync_only;
1679 int ret = 0;
1680
1681 ASSERT(ifp != NULL);
1682
1683 SK_DF(SK_VERB_NETIF | SK_VERB_SYNC | SK_VERB_TX,
1684 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
1685 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
1686 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
1687 flags);
1688
1689 if (__improbable(!IF_FULLY_ATTACHED(ifp))) {
1690 SK_ERR("kr 0x%llx ifp %s (0x%llx), interface not attached",
1691 SK_KVA(kring), if_name(ifp), SK_KVA(ifp));
1692 return ENXIO;
1693 }
1694
1695 if (__improbable((ifp->if_start_flags & IFSF_FLOW_CONTROLLED) != 0)) {
1696 SK_DF(SK_VERB_SYNC | SK_VERB_TX, "kr 0x%llx ifp %s (0x%llx), "
1697 "flow control ON", SK_KVA(kring), if_name(ifp),
1698 SK_KVA(ifp));
1699 return ENXIO;
1700 }
1701
1702 /* update our work timestamp */
1703 KRNA(kring)->na_work_ts = _net_uptime;
1704
1705 sync_only = ((flags & NA_SYNCF_SYNC_ONLY) != 0) ||
1706 !KR_KERNEL_ONLY(kring);
1707 /* regular sync (reclaim) */
1708 if ((flags & NA_SYNCF_NETIF) != 0 || __improbable(sync_only)) {
1709 ret = nx_sync_tx(kring, (flags & NA_SYNCF_FORCE_RECLAIM) ||
1710 kring->ckr_pending_intr != 0);
1711 kring->ckr_pending_intr = 0;
1712
1713 /* direct user channels do not need to use the doorbell */
1714 if (__improbable(sync_only)) {
1715 return ret;
1716 }
1717 }
1718
1719 /*
1720 * Doorbell call. Here we do doorbell explicitly if the flag is
1721 * set or implicitly if we're opened directly by a user channel.
1722 * Synchronous vs. asynchronous depending on the context.
1723 */
1724 if (__probable((flags & NA_SYNCF_NETIF_DOORBELL) != 0)) {
1725 if ((flags & NA_SYNCF_NETIF_IFSTART) != 0) {
1726 ASSERT(!(flags & NA_SYNCF_NETIF_IFSTART) ||
1727 !(flags & NA_SYNCF_NETIF_ASYNC));
1728 nx_tx_doorbell(kring, (flags & NA_SYNCF_NETIF_ASYNC));
1729 } else {
1730 ifnet_start(ifp);
1731 }
1732 }
1733
1734 return ret;
1735 }
1736
1737 static int
nx_netif_na_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1738 nx_netif_na_rxsync(struct __kern_channel_ring *kring, struct proc *p,
1739 uint32_t flags)
1740 {
1741 #pragma unused(p)
1742 int ret;
1743
1744 SK_DF(SK_VERB_NETIF | SK_VERB_SYNC | SK_VERB_RX,
1745 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
1746 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
1747 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
1748 flags);
1749
1750 ASSERT(kring->ckr_rhead <= kring->ckr_lim);
1751
1752 /* update our work timestamp */
1753 KRNA(kring)->na_work_ts = _net_uptime;
1754
1755 ret = nx_sync_rx(kring, (flags & NA_SYNCF_FORCE_READ) ||
1756 kring->ckr_pending_intr != 0);
1757 kring->ckr_pending_intr = 0;
1758
1759 return ret;
1760 }
1761
1762 static void
nx_netif_na_dtor(struct nexus_adapter * na)1763 nx_netif_na_dtor(struct nexus_adapter *na)
1764 {
1765 struct ifnet *ifp;
1766 struct nexus_netif_adapter *nifna = NIFNA(na);
1767
1768 SK_LOCK_ASSERT_HELD();
1769 ASSERT(na->na_type == NA_NETIF_DEV || na->na_type == NA_NETIF_HOST);
1770
1771 SK_DF(SK_VERB_NETIF, "na \"%s\" (0x%llx)", na->na_name, SK_KVA(na));
1772
1773 /*
1774 * If the finalizer callback hasn't been called for whatever
1775 * reasons, pick up the embryonic ifnet stored in na_private.
1776 * na_release_locked() will release the I/O refcnt of a
1777 * non-NULL na_ifp.
1778 */
1779 if ((ifp = na->na_ifp) == NULL) {
1780 ifp = na->na_private;
1781 }
1782 na->na_private = NULL;
1783
1784 if (nifna->nifna_netif != NULL) {
1785 nx_netif_release(nifna->nifna_netif);
1786 nifna->nifna_netif = NULL;
1787 }
1788 ASSERT(SKYWALK_NATIVE(ifp));
1789 }
1790
1791 /*
1792 * Dispatch rx/tx interrupts to the channel rings.
1793 *
1794 * The 'notify' routine depends on what the ring is attached to.
1795 * - for a channel file descriptor, do an event wakeup on the individual
1796 * waitqueue, plus one on the global one if needed (see na_notify)
1797 * - for a device port connected to a FlowSwitch, call the proper
1798 * forwarding routine; see nx_fsw_tx_hwna_notify()
1799 * or nx_fsw_rx_hwna_notify().
1800 */
1801 int
nx_netif_common_intr(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags,uint32_t * work_done)1802 nx_netif_common_intr(struct __kern_channel_ring *kring, struct proc *p,
1803 uint32_t flags, uint32_t *work_done)
1804 {
1805 struct netif_stats *nifs =
1806 &NX_NETIF_PRIVATE(KRNA(kring)->na_nx)->nif_stats;
1807 int (*notify)(struct __kern_channel_ring *kring,
1808 struct proc *, uint32_t flags);
1809 int ret;
1810
1811 KDBG((SK_KTRACE_NETIF_COMMON_INTR | DBG_FUNC_START), SK_KVA(kring));
1812
1813 SK_DF(SK_VERB_NETIF | SK_VERB_INTR |
1814 ((kring->ckr_tx == NR_RX) ? SK_VERB_RX : SK_VERB_TX),
1815 "na \"%s\" (0x%llx) kr \"%s\" (0x%llx) krflags 0x%b",
1816 KRNA(kring)->na_name, SK_KVA(KRNA(kring)), kring->ckr_name,
1817 SK_KVA(kring), kring->ckr_flags, CKRF_BITS);
1818
1819 /* update our work timestamp */
1820 KRNA(kring)->na_work_ts = _net_uptime;
1821
1822 kring->ckr_pending_intr++;
1823 if (work_done != NULL) {
1824 *work_done = 1; /* do not fire again */
1825 }
1826 /*
1827 * We can't be calling ckr_na_notify here since we could already be
1828 * intercepting it, else we'd end up recursively calling ourselves.
1829 * Use the original na_notify callback saved during na_activate, or in
1830 * the case when the module above us is the flowswitch, the notify
1831 * routine that it has installed in place of our original one.
1832 */
1833 if (__probable(!KR_DROP(kring) &&
1834 (notify = kring->ckr_netif_notify) != NULL)) {
1835 ret = notify(kring, p, flags);
1836 } else {
1837 /*
1838 * If the ring is in drop mode, pretend as if it's busy.
1839 * This allows the mitigation thread to pause for a while
1840 * before attempting again.
1841 */
1842 ret = EBUSY;
1843 }
1844 if (__improbable(ret != 0)) {
1845 switch (kring->ckr_tx) {
1846 case NR_RX:
1847 if (ret == EBUSY) {
1848 STATS_INC(nifs, NETIF_STATS_RX_IRQ_BUSY);
1849 } else if (ret == EAGAIN) {
1850 STATS_INC(nifs, NETIF_STATS_RX_IRQ_AGAIN);
1851 } else {
1852 STATS_INC(nifs, NETIF_STATS_RX_IRQ_ERR);
1853 }
1854 break;
1855
1856 case NR_TX:
1857 if (ret == EBUSY) {
1858 STATS_INC(nifs, NETIF_STATS_TX_IRQ_BUSY);
1859 } else if (ret == EAGAIN) {
1860 STATS_INC(nifs, NETIF_STATS_TX_IRQ_AGAIN);
1861 } else {
1862 STATS_INC(nifs, NETIF_STATS_TX_IRQ_ERR);
1863 }
1864 break;
1865
1866 default:
1867 break;
1868 }
1869 }
1870
1871 KDBG((SK_KTRACE_NETIF_COMMON_INTR | DBG_FUNC_END), SK_KVA(kring), ret);
1872
1873 return ret;
1874 }
1875
1876 static int
nx_netif_na_notify_tx(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1877 nx_netif_na_notify_tx(struct __kern_channel_ring *kring, struct proc *p,
1878 uint32_t flags)
1879 {
1880 return nx_netif_mit_tx_intr(kring, p, flags, NULL);
1881 }
1882
1883 static int
nx_netif_na_notify_rx(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1884 nx_netif_na_notify_rx(struct __kern_channel_ring *kring, struct proc *p,
1885 uint32_t flags)
1886 {
1887 int ret;
1888
1889 /*
1890 * In the event the mitigation thread is disabled, protect
1891 * against recursion by detecting if we're already in the
1892 * context of an RX notify. IOSkywalkFamily may invoke the
1893 * notify callback as part of its RX sync callback.
1894 */
1895 if (__probable(!sk_is_rx_notify_protected())) {
1896 sk_protect_t protect;
1897 uint32_t work_done;
1898
1899 protect = sk_rx_notify_protect();
1900 ret = nx_netif_mit_rx_intr(kring, p, flags, &work_done);
1901 sk_sync_unprotect(protect);
1902 } else {
1903 ret = EAGAIN;
1904 }
1905
1906 return ret;
1907 }
1908
1909 void
nx_netif_mit_config(struct nexus_netif_adapter * nifna,boolean_t * tx_mit,boolean_t * tx_mit_simple,boolean_t * rx_mit,boolean_t * rx_mit_simple)1910 nx_netif_mit_config(struct nexus_netif_adapter *nifna,
1911 boolean_t *tx_mit, boolean_t *tx_mit_simple,
1912 boolean_t *rx_mit, boolean_t *rx_mit_simple)
1913 {
1914 struct nx_netif *nif = nifna->nifna_netif;
1915
1916 /*
1917 * TX mitigation is disabled by default, but can be
1918 * overridden via "sk_netif_tx_mit=N" boot-arg, where
1919 * N is one of SK_NETIF_MIT_FORCE_* values.
1920 */
1921 *tx_mit = *tx_mit_simple = FALSE;
1922 switch (sk_netif_tx_mit) {
1923 case SK_NETIF_MIT_FORCE_SIMPLE:
1924 *tx_mit_simple = TRUE;
1925 OS_FALLTHROUGH;
1926 case SK_NETIF_MIT_FORCE_ADVANCED:
1927 *tx_mit = TRUE;
1928 break;
1929 case SK_NETIF_MIT_FORCE_OFF:
1930 case SK_NETIF_MIT_AUTO:
1931 ASSERT(*tx_mit == FALSE);
1932 break;
1933 default:
1934 VERIFY(0);
1935 /* NOTREACHED */
1936 __builtin_unreachable();
1937 }
1938
1939 /*
1940 * RX mitigation is enabled by default only for BSD-style
1941 * virtual network interfaces, but can be overridden
1942 * via "sk_netif_rx_mit=N" boot-arg, where N is one of
1943 * SK_NETIF_MIT_FORCE_* values.
1944 */
1945 *rx_mit = *rx_mit_simple = FALSE;
1946 switch (sk_netif_rx_mit) {
1947 case SK_NETIF_MIT_FORCE_OFF:
1948 ASSERT(*rx_mit == FALSE);
1949 break;
1950 case SK_NETIF_MIT_FORCE_SIMPLE:
1951 *rx_mit_simple = TRUE;
1952 OS_FALLTHROUGH;
1953 case SK_NETIF_MIT_FORCE_ADVANCED:
1954 *rx_mit = TRUE;
1955 break;
1956 case SK_NETIF_MIT_AUTO:
1957 *rx_mit_simple = TRUE;
1958 #if !XNU_TARGET_OS_OSX
1959 /*
1960 * On non-macOS platforms, enable RX mitigation
1961 * thread only for BSD-style virtual (and regular)
1962 * interfaces, since otherwise we may run out of
1963 * stack when subjected to IPsec processing, etc.
1964 */
1965 *rx_mit = (NX_PROV(nifna->nifna_up.na_nx)->nxprov_flags &
1966 NXPROVF_VIRTUAL_DEVICE) && !NETIF_IS_LOW_LATENCY(nif);
1967 #else /* XNU_TARGET_OS_OSX */
1968 /*
1969 * On macOS platform, enable RX mitigation on all but
1970 * low-latency interfaces, since we could potentially
1971 * have filter providers, etc. Ideally this should
1972 * be detected and dealt with dynamically.
1973 */
1974 *rx_mit = !NETIF_IS_LOW_LATENCY(nif);
1975 #endif /* XNU_TARGET_OS_OSX */
1976 break;
1977 default:
1978 VERIFY(0);
1979 /* NOTREACHED */
1980 __builtin_unreachable();
1981 }
1982 }
1983
1984 static int
nx_netif_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)1985 nx_netif_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
1986 {
1987 struct nexus_netif_adapter *nifna = (struct nexus_netif_adapter *)na;
1988 boolean_t tx_mit, rx_mit, tx_mit_simple, rx_mit_simple;
1989 struct nx_netif *nif = nifna->nifna_netif;
1990 struct ifnet *ifp = na->na_ifp;
1991 int error = 0;
1992 uint32_t r;
1993
1994 ASSERT(na->na_type == NA_NETIF_DEV);
1995 ASSERT(!(na->na_flags & NAF_HOST_ONLY));
1996
1997 SK_DF(SK_VERB_NETIF, "na \"%s\" (0x%llx) %s [%s]", na->na_name,
1998 SK_KVA(na), ifp->if_xname, na_activate_mode2str(mode));
1999
2000 switch (mode) {
2001 case NA_ACTIVATE_MODE_ON:
2002 ASSERT(SKYWALK_CAPABLE(ifp));
2003
2004 nx_netif_mit_config(nifna, &tx_mit, &tx_mit_simple,
2005 &rx_mit, &rx_mit_simple);
2006
2007 /*
2008 * Init the mitigation support on all the dev TX rings.
2009 */
2010 if (tx_mit) {
2011 nifna->nifna_tx_mit =
2012 skn_alloc_type_array(tx_on, struct nx_netif_mit,
2013 na_get_nrings(na, NR_TX), Z_WAITOK,
2014 skmem_tag_netif_mit);
2015 if (nifna->nifna_tx_mit == NULL) {
2016 SK_ERR("TX mitigation allocation failed");
2017 error = ENOMEM;
2018 goto out;
2019 }
2020 } else {
2021 ASSERT(nifna->nifna_tx_mit == NULL);
2022 }
2023
2024 /*
2025 * Init the mitigation support on all the dev RX rings.
2026 */
2027 if (rx_mit) {
2028 nifna->nifna_rx_mit =
2029 skn_alloc_type_array(rx_on, struct nx_netif_mit,
2030 na_get_nrings(na, NR_RX), Z_WAITOK,
2031 skmem_tag_netif_mit);
2032 if (nifna->nifna_rx_mit == NULL) {
2033 SK_ERR("RX mitigation allocation failed");
2034 if (nifna->nifna_tx_mit != NULL) {
2035 skn_free_type_array(rx_fail,
2036 struct nx_netif_mit,
2037 na_get_nrings(na, NR_TX),
2038 nifna->nifna_tx_mit);
2039 nifna->nifna_tx_mit = NULL;
2040 }
2041 error = ENOMEM;
2042 goto out;
2043 }
2044 } else {
2045 ASSERT(nifna->nifna_rx_mit == NULL);
2046 }
2047
2048 /* intercept na_notify callback on the TX rings */
2049 for (r = 0; r < na_get_nrings(na, NR_TX); r++) {
2050 na->na_tx_rings[r].ckr_netif_notify =
2051 na->na_tx_rings[r].ckr_na_notify;
2052 na->na_tx_rings[r].ckr_na_notify =
2053 nx_netif_na_notify_tx;
2054 if (nifna->nifna_tx_mit != NULL) {
2055 nx_netif_mit_init(nif, ifp,
2056 &nifna->nifna_tx_mit[r],
2057 &na->na_tx_rings[r], tx_mit_simple);
2058 }
2059 }
2060
2061 /* intercept na_notify callback on the RX rings */
2062 for (r = 0; r < na_get_nrings(na, NR_RX); r++) {
2063 na->na_rx_rings[r].ckr_netif_notify =
2064 na->na_rx_rings[r].ckr_na_notify;
2065 na->na_rx_rings[r].ckr_na_notify =
2066 nx_netif_na_notify_rx;
2067 if (nifna->nifna_rx_mit != NULL) {
2068 nx_netif_mit_init(nif, ifp,
2069 &nifna->nifna_rx_mit[r],
2070 &na->na_rx_rings[r], rx_mit_simple);
2071 }
2072 }
2073 nx_netif_filter_enable(nif);
2074 nx_netif_flow_enable(nif);
2075 atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
2076
2077 /* steer all start requests to netif; this must not fail */
2078 lck_mtx_lock(&ifp->if_start_lock);
2079 error = ifnet_set_start_handler(ifp, nx_netif_doorbell);
2080 VERIFY(error == 0);
2081 lck_mtx_unlock(&ifp->if_start_lock);
2082 break;
2083
2084 case NA_ACTIVATE_MODE_DEFUNCT:
2085 ASSERT(SKYWALK_CAPABLE(ifp));
2086 break;
2087
2088 case NA_ACTIVATE_MODE_OFF:
2089 /*
2090 * Note that here we cannot assert SKYWALK_CAPABLE()
2091 * as we're called in the destructor path.
2092 */
2093 atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
2094 nx_netif_flow_disable(nif);
2095 nx_netif_filter_disable(nif);
2096
2097 /*
2098 * Here we may block while holding sk_lock, but because
2099 * we've cleared NAF_ACTIVE above, kern_channel_tx_refill()
2100 * should immediately return. A better approach would be
2101 * to drop sk_lock and add a monitor for this routine.
2102 */
2103 lck_mtx_lock(&ifp->if_start_lock);
2104 while (ifp->if_start_active != 0) {
2105 ++ifp->if_start_waiters;
2106 (void) msleep(&ifp->if_start_waiters,
2107 &ifp->if_start_lock, (PZERO - 1),
2108 na->na_name, NULL);
2109 }
2110 /* steer all start requests to default handler */
2111 ifnet_reset_start_handler(ifp);
2112 lck_mtx_unlock(&ifp->if_start_lock);
2113
2114 /* reset all TX notify callbacks */
2115 for (r = 0; r < na_get_nrings(na, NR_TX); r++) {
2116 na->na_tx_rings[r].ckr_na_notify =
2117 na->na_tx_rings[r].ckr_netif_notify;
2118 na->na_tx_rings[r].ckr_netif_notify = NULL;
2119 if (nifna->nifna_tx_mit != NULL) {
2120 na->na_tx_rings[r].ckr_netif_mit_stats = NULL;
2121 nx_netif_mit_cleanup(&nifna->nifna_tx_mit[r]);
2122 }
2123 }
2124
2125 if (nifna->nifna_tx_mit != NULL) {
2126 skn_free_type_array(tx_off, struct nx_netif_mit,
2127 na_get_nrings(na, NR_TX), nifna->nifna_tx_mit);
2128 nifna->nifna_tx_mit = NULL;
2129 }
2130
2131 /* reset all RX notify callbacks */
2132 for (r = 0; r < na_get_nrings(na, NR_RX); r++) {
2133 na->na_rx_rings[r].ckr_na_notify =
2134 na->na_rx_rings[r].ckr_netif_notify;
2135 na->na_rx_rings[r].ckr_netif_notify = NULL;
2136 if (nifna->nifna_rx_mit != NULL) {
2137 na->na_rx_rings[r].ckr_netif_mit_stats = NULL;
2138 nx_netif_mit_cleanup(&nifna->nifna_rx_mit[r]);
2139 }
2140 }
2141 if (nifna->nifna_rx_mit != NULL) {
2142 skn_free_type_array(rx_off, struct nx_netif_mit,
2143 na_get_nrings(na, NR_RX), nifna->nifna_rx_mit);
2144 nifna->nifna_rx_mit = NULL;
2145 }
2146 break;
2147
2148 default:
2149 VERIFY(0);
2150 /* NOTREACHED */
2151 __builtin_unreachable();
2152 }
2153 out:
2154 return error;
2155 }
2156
2157 SK_NO_INLINE_ATTRIBUTE
2158 static int
nx_netif_attach(struct kern_nexus * nx,struct ifnet * ifp)2159 nx_netif_attach(struct kern_nexus *nx, struct ifnet *ifp)
2160 __attribute__((optnone))
2161 {
2162 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
2163 struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
2164 struct nexus_netif_adapter *devnifna = NULL;
2165 struct nexus_netif_adapter *hostnifna = NULL;
2166 struct nexus_adapter *devna = NULL;
2167 struct nexus_adapter *hostna = NULL;
2168 boolean_t embryonic = FALSE;
2169 int retval = 0;
2170 uint32_t na_flags;
2171
2172 SK_LOCK_ASSERT_HELD();
2173 ASSERT(SKYWALK_NATIVE(ifp));
2174 ASSERT(!SKYWALK_CAPABLE(ifp));
2175 ASSERT(ifp->if_na == NULL);
2176 ASSERT(ifp->if_na_ops == NULL);
2177
2178 devnifna = na_netif_alloc(Z_WAITOK);
2179 hostnifna = na_netif_alloc(Z_WAITOK);
2180
2181 /*
2182 * We can be called for two different interface states:
2183 *
2184 * Fully attached: get an io ref count; upon success, this
2185 * holds a reference to the ifnet for the ifp pointer stored
2186 * in 'na_ifp' down below for both adapters.
2187 *
2188 * Embryonic: temporary hold the ifnet in na_private, which
2189 * upon a successful ifnet_attach(), will be moved over to
2190 * the 'na_ifp' with an io ref count held.
2191 *
2192 * The ifnet in 'na_ifp' will be released by na_release_locked().
2193 */
2194 if (!ifnet_is_attached(ifp, 1)) {
2195 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
2196 ifp = NULL;
2197 retval = ENXIO;
2198 goto err;
2199 }
2200 embryonic = TRUE;
2201 }
2202
2203 /* initialize the device netif adapter */
2204 devnifna->nifna_netif = nif;
2205 nx_netif_retain(nif);
2206 devna = &devnifna->nifna_up;
2207 devna->na_type = NA_NETIF_DEV;
2208 devna->na_free = na_netif_free;
2209 (void) strncpy(devna->na_name, ifp->if_xname, sizeof(devna->na_name) - 1);
2210 devna->na_name[sizeof(devna->na_name) - 1] = '\0';
2211 uuid_generate_random(devna->na_uuid);
2212 if (embryonic) {
2213 /*
2214 * We will move this over to na_ifp once
2215 * the interface is fully attached.
2216 */
2217 devna->na_private = ifp;
2218 ASSERT(devna->na_ifp == NULL);
2219 } else {
2220 ASSERT(devna->na_private == NULL);
2221 /* use I/O refcnt from ifnet_is_attached() */
2222 devna->na_ifp = ifp;
2223 }
2224 devna->na_activate = nx_netif_na_activate;
2225 devna->na_channel_event_notify = nx_netif_na_channel_event_notify;
2226 devna->na_txsync = nx_netif_na_txsync;
2227 devna->na_rxsync = nx_netif_na_rxsync;
2228 devna->na_dtor = nx_netif_na_dtor;
2229 devna->na_krings_create = nx_netif_dev_krings_create;
2230 devna->na_krings_delete = nx_netif_dev_krings_delete;
2231 devna->na_special = nx_netif_na_special;
2232
2233 na_flags = NAF_NATIVE | NAF_ASYNC_DTOR;
2234 if (NX_PROV(nx)->nxprov_flags & NXPROVF_VIRTUAL_DEVICE) {
2235 na_flags |= NAF_VIRTUAL_DEVICE;
2236 }
2237 if (NX_LLINK_PROV(nx)) {
2238 /*
2239 * while operating in logical link mode, we don't need to
2240 * create backing memory regions for the rings as they are
2241 * not used.
2242 */
2243 na_flags |= NAF_MEM_NO_INIT;
2244 }
2245 atomic_bitset_32(&devna->na_flags, na_flags);
2246 *(nexus_stats_type_t *)(uintptr_t)&devna->na_stats_type =
2247 NEXUS_STATS_TYPE_INVALID;
2248
2249 na_set_nrings(devna, NR_TX, nxp->nxp_tx_rings);
2250 na_set_nrings(devna, NR_RX, nxp->nxp_rx_rings);
2251 na_set_nslots(devna, NR_TX, nxp->nxp_tx_slots);
2252 na_set_nslots(devna, NR_RX, nxp->nxp_rx_slots);
2253 /*
2254 * Verify upper bounds; the parameters must have already been
2255 * validated by nxdom_prov_params() by the time we get here.
2256 */
2257 ASSERT(na_get_nrings(devna, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
2258 ASSERT(na_get_nrings(devna, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
2259 ASSERT(na_get_nslots(devna, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
2260 ASSERT(na_get_nslots(devna, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
2261
2262 na_attach_common(devna, nx, &nx_netif_prov_s);
2263
2264 if ((retval = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
2265 nx, devna)) != 0) {
2266 ASSERT(devna->na_arena == NULL);
2267 goto err;
2268 }
2269 ASSERT(devna->na_arena != NULL);
2270
2271 *(uint32_t *)(uintptr_t)&devna->na_flowadv_max = nxp->nxp_flowadv_max;
2272 ASSERT(devna->na_flowadv_max == 0 ||
2273 skmem_arena_nexus(devna->na_arena)->arn_flowadv_obj != NULL);
2274
2275 /* setup packet copy routines */
2276 if (skmem_arena_nexus(devna->na_arena)->arn_rx_pp->pp_max_frags > 1) {
2277 nif->nif_pkt_copy_from_mbuf = pkt_copy_multi_buflet_from_mbuf;
2278 nif->nif_pkt_copy_to_mbuf = pkt_copy_multi_buflet_to_mbuf;
2279 nif->nif_pkt_copy_from_pkt = pkt_copy_multi_buflet_from_pkt;
2280 } else {
2281 nif->nif_pkt_copy_from_mbuf = pkt_copy_from_mbuf;
2282 nif->nif_pkt_copy_to_mbuf = pkt_copy_to_mbuf;
2283 nif->nif_pkt_copy_from_pkt = pkt_copy_from_pkt;
2284 }
2285
2286 /* initialize the host netif adapter */
2287 hostnifna->nifna_netif = nif;
2288 nx_netif_retain(nif);
2289 hostna = &hostnifna->nifna_up;
2290 (void) snprintf(hostna->na_name, sizeof(hostna->na_name),
2291 "%s^", devna->na_name);
2292 uuid_generate_random(hostna->na_uuid);
2293 if (embryonic) {
2294 /*
2295 * We will move this over to na_ifp once
2296 * the interface is fully attached.
2297 */
2298 hostna->na_private = ifp;
2299 ASSERT(hostna->na_ifp == NULL);
2300 } else {
2301 ASSERT(hostna->na_private == NULL);
2302 hostna->na_ifp = devna->na_ifp;
2303 ifnet_incr_iorefcnt(hostna->na_ifp);
2304 }
2305 hostna->na_type = NA_NETIF_HOST;
2306 hostna->na_free = na_netif_free;
2307 hostna->na_activate = nx_netif_host_na_activate;
2308 hostna->na_txsync = nx_netif_host_na_txsync;
2309 hostna->na_rxsync = nx_netif_host_na_rxsync;
2310 hostna->na_dtor = nx_netif_na_dtor;
2311 hostna->na_krings_create = nx_netif_host_krings_create;
2312 hostna->na_krings_delete = nx_netif_host_krings_delete;
2313 hostna->na_special = nx_netif_host_na_special;
2314
2315 na_flags = NAF_HOST_ONLY | NAF_NATIVE | NAF_ASYNC_DTOR;
2316 if (NX_LLINK_PROV(nx)) {
2317 /*
2318 * while operating in logical link mode, we don't need to
2319 * create backing memory regions for the rings as they are
2320 * not used.
2321 */
2322 na_flags |= NAF_MEM_NO_INIT;
2323 }
2324 atomic_bitset_32(&hostna->na_flags, na_flags);
2325 *(nexus_stats_type_t *)(uintptr_t)&hostna->na_stats_type =
2326 NEXUS_STATS_TYPE_INVALID;
2327
2328 na_set_nrings(hostna, NR_TX, 1);
2329 na_set_nrings(hostna, NR_RX, 1);
2330 na_set_nslots(hostna, NR_TX, nxp->nxp_tx_slots);
2331 na_set_nslots(hostna, NR_RX, nxp->nxp_rx_slots);
2332
2333 na_attach_common(hostna, nx, &nx_netif_prov_s);
2334
2335 if ((retval = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
2336 nx, hostna)) != 0) {
2337 ASSERT(hostna->na_arena == NULL);
2338 goto err;
2339 }
2340 ASSERT(hostna->na_arena != NULL);
2341
2342 *(uint32_t *)(uintptr_t)&hostna->na_flowadv_max = nxp->nxp_flowadv_max;
2343 ASSERT(hostna->na_flowadv_max == 0 ||
2344 skmem_arena_nexus(hostna->na_arena)->arn_flowadv_obj != NULL);
2345
2346 /* adjust the classq packet drop limit */
2347 if (embryonic) {
2348 uint32_t drop_lim;
2349 struct kern_pbufpool_memory_info pp_info;
2350
2351 retval = kern_pbufpool_get_memory_info(nx->nx_tx_pp, &pp_info);
2352 VERIFY(retval == 0);
2353
2354 /* set the drop limit as 80% of size of packet pool */
2355 drop_lim = (pp_info.kpm_packets * 4) / 5;
2356 VERIFY(drop_lim != 0);
2357 IFCQ_PKT_DROP_LIMIT(ifp->if_snd) = drop_lim;
2358 }
2359
2360 /* these will be undone by destructor */
2361 ifp->if_na_ops = &na_netif_ops;
2362 ifp->if_na = devnifna;
2363 na_retain_locked(devna);
2364 na_retain_locked(hostna);
2365
2366 SKYWALK_SET_CAPABLE(ifp, devna);
2367
2368 NETIF_WLOCK(nif);
2369 nif->nif_ifp = ifp;
2370 nif->nif_netif_nxadv = nx->nx_adv.netif_nxv_adv;
2371 retval = nx_port_alloc(nx, NEXUS_PORT_NET_IF_DEV, NULL, &devna,
2372 kernproc);
2373 ASSERT(retval == 0);
2374 retval = nx_port_alloc(nx, NEXUS_PORT_NET_IF_HOST, NULL, &hostna,
2375 kernproc);
2376 ASSERT(retval == 0);
2377 NETIF_WUNLOCK(nif);
2378
2379 #if SK_LOG
2380 uuid_string_t uuidstr;
2381 SK_DF(SK_VERB_NETIF, "devna: \"%s\"", devna->na_name);
2382 SK_DF(SK_VERB_NETIF, " UUID: %s",
2383 sk_uuid_unparse(devna->na_uuid, uuidstr));
2384 SK_DF(SK_VERB_NETIF, " nx: 0x%llx (\"%s\":\"%s\")",
2385 SK_KVA(devna->na_nx), NX_DOM(devna->na_nx)->nxdom_name,
2386 NX_DOM_PROV(devna->na_nx)->nxdom_prov_name);
2387 SK_DF(SK_VERB_NETIF, " flags: 0x%b", devna->na_flags, NAF_BITS);
2388 SK_DF(SK_VERB_NETIF, " flowadv_max: %u", devna->na_flowadv_max);
2389 SK_DF(SK_VERB_NETIF, " rings: tx %u rx %u",
2390 na_get_nrings(devna, NR_TX), na_get_nrings(devna, NR_RX));
2391 SK_DF(SK_VERB_NETIF, " slots: tx %u rx %u",
2392 na_get_nslots(devna, NR_TX), na_get_nslots(devna, NR_RX));
2393 #if CONFIG_NEXUS_USER_PIPE
2394 SK_DF(SK_VERB_NETIF, " next_pipe: %u", devna->na_next_pipe);
2395 SK_DF(SK_VERB_NETIF, " max_pipes: %u", devna->na_max_pipes);
2396 #endif /* CONFIG_NEXUS_USER_PIPE */
2397 SK_DF(SK_VERB_NETIF, " ifp: 0x%llx %s [ioref %u]",
2398 SK_KVA(ifp), ifp->if_xname, ifp->if_refio);
2399 SK_DF(SK_VERB_NETIF, "hostna: \"%s\"", hostna->na_name);
2400 SK_DF(SK_VERB_NETIF, " UUID: %s",
2401 sk_uuid_unparse(hostna->na_uuid, uuidstr));
2402 SK_DF(SK_VERB_NETIF, " nx: 0x%llx (\"%s\":\"%s\")",
2403 SK_KVA(hostna->na_nx), NX_DOM(hostna->na_nx)->nxdom_name,
2404 NX_DOM_PROV(hostna->na_nx)->nxdom_prov_name);
2405 SK_DF(SK_VERB_NETIF, " flags: 0x%b",
2406 hostna->na_flags, NAF_BITS);
2407 SK_DF(SK_VERB_NETIF, " flowadv_max: %u", hostna->na_flowadv_max);
2408 SK_DF(SK_VERB_NETIF, " rings: tx %u rx %u",
2409 na_get_nrings(hostna, NR_TX), na_get_nrings(hostna, NR_RX));
2410 SK_DF(SK_VERB_NETIF, " slots: tx %u rx %u",
2411 na_get_nslots(hostna, NR_TX), na_get_nslots(hostna, NR_RX));
2412 #if CONFIG_NEXUS_USER_PIPE
2413 SK_DF(SK_VERB_NETIF, " next_pipe: %u", hostna->na_next_pipe);
2414 SK_DF(SK_VERB_NETIF, " max_pipes: %u", hostna->na_max_pipes);
2415 #endif /* CONFIG_NEXUS_USER_PIPE */
2416 SK_DF(SK_VERB_NETIF, " ifp: 0x%llx %s [ioref %u]",
2417 SK_KVA(ifp), ifp->if_xname, ifp->if_refio);
2418 #endif /* SK_LOG */
2419
2420 err:
2421 if (retval != 0) {
2422 if (ifp != NULL) {
2423 if (!embryonic) {
2424 ifnet_decr_iorefcnt(ifp);
2425 }
2426 ifp = NULL;
2427 }
2428 if (devna != NULL) {
2429 if (devna->na_arena != NULL) {
2430 skmem_arena_release(devna->na_arena);
2431 devna->na_arena = NULL;
2432 }
2433 if (devna->na_ifp != NULL) {
2434 ifnet_decr_iorefcnt(devna->na_ifp);
2435 devna->na_ifp = NULL;
2436 }
2437 devna->na_private = NULL;
2438 }
2439 if (hostna != NULL) {
2440 if (hostna->na_arena != NULL) {
2441 skmem_arena_release(hostna->na_arena);
2442 hostna->na_arena = NULL;
2443 }
2444 if (hostna->na_ifp != NULL) {
2445 ifnet_decr_iorefcnt(hostna->na_ifp);
2446 hostna->na_ifp = NULL;
2447 }
2448 hostna->na_private = NULL;
2449 }
2450 if (devnifna != NULL) {
2451 if (devnifna->nifna_netif != NULL) {
2452 nx_netif_release(devnifna->nifna_netif);
2453 devnifna->nifna_netif = NULL;
2454 }
2455 na_netif_free((struct nexus_adapter *)devnifna);
2456 }
2457 if (hostnifna != NULL) {
2458 if (hostnifna->nifna_netif != NULL) {
2459 nx_netif_release(hostnifna->nifna_netif);
2460 hostnifna->nifna_netif = NULL;
2461 }
2462 na_netif_free((struct nexus_adapter *)hostnifna);
2463 }
2464 }
2465 return retval;
2466 }
2467
2468 /*
2469 * Any per-netif state that can be discovered at attach time should be
2470 * initialized here.
2471 */
2472 static void
nx_netif_flags_init(struct nx_netif * nif)2473 nx_netif_flags_init(struct nx_netif *nif)
2474 {
2475 ifnet_t ifp = nif->nif_ifp;
2476 struct kern_nexus *nx = nif->nif_nx;
2477 struct nexus_adapter *devna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
2478
2479 switch (devna->na_type) {
2480 case NA_NETIF_DEV:
2481 if (strcmp(ifp->if_name, sk_ll_prefix) == 0) {
2482 nif->nif_flags |= NETIF_FLAG_LOW_LATENCY;
2483 if_set_xflags(ifp, IFXF_LOW_LATENCY);
2484 }
2485 break;
2486 case NA_NETIF_COMPAT_DEV:
2487 nif->nif_flags |= NETIF_FLAG_COMPAT;
2488 break;
2489 default:
2490 break;
2491 }
2492 }
2493
2494 /*
2495 * This is also supposed to check for any inconsistent state at detach time.
2496 */
2497 static void
nx_netif_flags_fini(struct nx_netif * nif)2498 nx_netif_flags_fini(struct nx_netif *nif)
2499 {
2500 ifnet_t ifp = nif->nif_ifp;
2501
2502 if (ifp != NULL) {
2503 if_clear_xflags(ifp, IFXF_LOW_LATENCY);
2504 }
2505 nif->nif_flags = 0;
2506 }
2507
2508 static void
nx_netif_capabilities_init(struct nx_netif * nif)2509 nx_netif_capabilities_init(struct nx_netif *nif)
2510 {
2511 struct kern_nexus_capab_interface_advisory kncia;
2512 struct kern_nexus *nx = nif->nif_nx;
2513 nxprov_capab_config_fn_t capab_fn;
2514 uint32_t capab_len;
2515 int error;
2516
2517 if ((NX_PROV(nx)->nxprov_netif_ext.nxnpi_version) ==
2518 KERN_NEXUS_PROVIDER_VERSION_NETIF) {
2519 capab_fn = NX_PROV(nx)->nxprov_netif_ext.nxnpi_config_capab;
2520 ASSERT(capab_fn != NULL);
2521 } else {
2522 capab_fn = NX_PROV(nx)->nxprov_ext.nxpi_config_capab;
2523 }
2524 if (capab_fn == NULL) {
2525 return;
2526 }
2527 /* check/configure interface advisory notifications */
2528 if ((nif->nif_ifp->if_eflags & IFEF_ADV_REPORT) != 0) {
2529 bzero(&kncia, sizeof(kncia));
2530 kncia.kncia_version =
2531 KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1;
2532 *__DECONST(kern_nexus_capab_interface_advisory_notify_fn_t *,
2533 &(kncia.kncia_notify)) = nx_netif_interface_advisory_notify;
2534 *__DECONST(void **, &(kncia.kncia_kern_context)) = nx;
2535 capab_len = sizeof(kncia);
2536 error = capab_fn(NX_PROV(nx), nx,
2537 KERN_NEXUS_CAPAB_INTERFACE_ADVISORY, &kncia, &capab_len);
2538 if (error == 0) {
2539 VERIFY(kncia.kncia_config != NULL);
2540 VERIFY(kncia.kncia_provider_context != NULL);
2541 nif->nif_intf_adv_config = kncia.kncia_config;
2542 nif->nif_intf_adv_prov_ctx =
2543 kncia.kncia_provider_context;
2544 }
2545 }
2546 }
2547
2548 static void
nx_netif_capabilities_fini(struct nx_netif * nif)2549 nx_netif_capabilities_fini(struct nx_netif *nif)
2550 {
2551 nif->nif_intf_adv_config = NULL;
2552 nif->nif_intf_adv_prov_ctx = NULL;
2553 }
2554
2555 void
na_netif_finalize(struct nexus_netif_adapter * nifna,struct ifnet * ifp)2556 na_netif_finalize(struct nexus_netif_adapter *nifna, struct ifnet *ifp)
2557 {
2558 struct nx_netif *nif = nifna->nifna_netif;
2559 struct kern_nexus *nx = nif->nif_nx;
2560 struct nexus_adapter *devna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
2561 struct nexus_adapter *hostna = nx_port_get_na(nx,
2562 NEXUS_PORT_NET_IF_HOST);
2563
2564 ASSERT(devna != NULL);
2565 ASSERT(hostna != NULL);
2566
2567 if (!ifnet_is_attached(ifp, 1)) {
2568 VERIFY(0);
2569 /* NOTREACHED */
2570 __builtin_unreachable();
2571 }
2572
2573 ASSERT(devna->na_private == ifp);
2574 ASSERT(devna->na_ifp == NULL);
2575 /* use I/O refcnt held by ifnet_is_attached() above */
2576 devna->na_ifp = devna->na_private;
2577 devna->na_private = NULL;
2578
2579 ASSERT(hostna->na_private == ifp);
2580 ASSERT(hostna->na_ifp == NULL);
2581 hostna->na_ifp = hostna->na_private;
2582 hostna->na_private = NULL;
2583 ifnet_incr_iorefcnt(hostna->na_ifp);
2584
2585 nx_netif_flags_init(nif);
2586 nx_netif_llink_init(nif);
2587 nx_netif_filter_init(nif);
2588 nx_netif_flow_init(nif);
2589 nx_netif_capabilities_init(nif);
2590 nx_netif_agent_init(nif);
2591 }
2592
2593 void
nx_netif_reap(struct nexus_netif_adapter * nifna,struct ifnet * ifp,uint32_t thres,boolean_t low)2594 nx_netif_reap(struct nexus_netif_adapter *nifna, struct ifnet *ifp,
2595 uint32_t thres, boolean_t low)
2596 {
2597 #pragma unused(ifp)
2598 struct nx_netif *nif = nifna->nifna_netif;
2599 struct kern_nexus *nx = nif->nif_nx;
2600 struct nexus_adapter *devna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
2601 uint64_t now = _net_uptime;
2602 boolean_t purge;
2603
2604 ASSERT(thres != 0);
2605
2606 if (devna->na_work_ts == 0) {
2607 return;
2608 }
2609
2610 /*
2611 * Purge if it's has been inactive for some time (twice the drain
2612 * threshold), and clear the work timestamp to temporarily skip this
2613 * adapter until it's active again. Purging cached objects can be
2614 * expensive since we'd need to allocate and construct them again,
2615 * so we do it only when necessary.
2616 */
2617 if (low || (now - devna->na_work_ts) >= (thres << 1)) {
2618 devna->na_work_ts = 0;
2619 purge = TRUE;
2620 } else {
2621 purge = FALSE;
2622 }
2623
2624 SK_DF(SK_VERB_NETIF, "%s: %s na %s", ifp->if_xname,
2625 (purge ? "purging" : "pruning"), devna->na_name);
2626
2627 /*
2628 * Device and host adapters share the same packet buffer pool,
2629 * so just reap the arena belonging to the device instance.
2630 */
2631 skmem_arena_reap(devna->na_arena, purge);
2632
2633 /*
2634 * Reap any caches configured for classq.
2635 */
2636 ifclassq_reap_caches(purge);
2637 }
2638
2639 void
nx_netif_copy_stats(struct nexus_netif_adapter * nifna,struct if_netif_stats * if_ns)2640 nx_netif_copy_stats(struct nexus_netif_adapter *nifna,
2641 struct if_netif_stats *if_ns)
2642 {
2643 struct nx_netif_mit *mit;
2644 struct mit_cfg_tbl *mit_cfg;
2645
2646 if ((mit = nifna->nifna_rx_mit) == NULL) {
2647 return;
2648 }
2649
2650 if ((mit->mit_flags & NETIF_MITF_INITIALIZED) == 0) {
2651 return;
2652 }
2653
2654 if_ns->ifn_rx_mit_interval = mit->mit_interval;
2655 if_ns->ifn_rx_mit_mode = mit->mit_mode;
2656 if_ns->ifn_rx_mit_packets_avg = mit->mit_packets_avg;
2657 if_ns->ifn_rx_mit_packets_min = mit->mit_packets_min;
2658 if_ns->ifn_rx_mit_packets_max = mit->mit_packets_max;
2659 if_ns->ifn_rx_mit_bytes_avg = mit->mit_bytes_avg;
2660 if_ns->ifn_rx_mit_bytes_min = mit->mit_bytes_min;
2661 if_ns->ifn_rx_mit_bytes_max = mit->mit_bytes_max;
2662 if_ns->ifn_rx_mit_cfg_idx = mit->mit_cfg_idx;
2663
2664 VERIFY(if_ns->ifn_rx_mit_cfg_idx < mit->mit_cfg_idx_max);
2665 mit_cfg = &mit->mit_tbl[if_ns->ifn_rx_mit_cfg_idx];
2666 if_ns->ifn_rx_mit_cfg_packets_lowat = mit_cfg->cfg_plowat;
2667 if_ns->ifn_rx_mit_cfg_packets_hiwat = mit_cfg->cfg_phiwat;
2668 if_ns->ifn_rx_mit_cfg_bytes_lowat = mit_cfg->cfg_blowat;
2669 if_ns->ifn_rx_mit_cfg_bytes_hiwat = mit_cfg->cfg_bhiwat;
2670 if_ns->ifn_rx_mit_cfg_interval = mit_cfg->cfg_ival;
2671 }
2672
2673 int
nx_netif_na_special(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr,nxspec_cmd_t spec_cmd)2674 nx_netif_na_special(struct nexus_adapter *na, struct kern_channel *ch,
2675 struct chreq *chr, nxspec_cmd_t spec_cmd)
2676 {
2677 ASSERT(na->na_type == NA_NETIF_DEV ||
2678 na->na_type == NA_NETIF_COMPAT_DEV);
2679 return nx_netif_na_special_common(na, ch, chr, spec_cmd);
2680 }
2681
2682 int
nx_netif_na_special_common(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr,nxspec_cmd_t spec_cmd)2683 nx_netif_na_special_common(struct nexus_adapter *na, struct kern_channel *ch,
2684 struct chreq *chr, nxspec_cmd_t spec_cmd)
2685 {
2686 int error = 0;
2687
2688 ASSERT(na->na_type == NA_NETIF_DEV || na->na_type == NA_NETIF_HOST ||
2689 na->na_type == NA_NETIF_COMPAT_DEV ||
2690 na->na_type == NA_NETIF_COMPAT_HOST);
2691 SK_LOCK_ASSERT_HELD();
2692
2693 switch (spec_cmd) {
2694 case NXSPEC_CMD_CONNECT:
2695 /*
2696 * netif adapter isn't created exclusively for kernel.
2697 * We mark (and clear) NAF_KERNEL_ONLY flag upon a succesful
2698 * na_special() connect and disconnect.
2699 */
2700 if (NA_KERNEL_ONLY(na)) {
2701 error = EBUSY;
2702 goto done;
2703 }
2704 ASSERT(!(na->na_flags & NAF_SPEC_INIT));
2705
2706 atomic_bitset_32(&na->na_flags, NAF_KERNEL_ONLY);
2707 error = na_bind_channel(na, ch, chr);
2708 if (error != 0) {
2709 atomic_bitclear_32(&na->na_flags, NAF_KERNEL_ONLY);
2710 goto done;
2711 }
2712 atomic_bitset_32(&na->na_flags, NAF_SPEC_INIT);
2713 break;
2714
2715 case NXSPEC_CMD_DISCONNECT:
2716 ASSERT(NA_KERNEL_ONLY(na));
2717 ASSERT(na->na_channels > 0);
2718 ASSERT(na->na_flags & NAF_SPEC_INIT);
2719 na_unbind_channel(ch);
2720 atomic_bitclear_32(&na->na_flags,
2721 (NAF_SPEC_INIT | NAF_KERNEL_ONLY));
2722 break;
2723
2724 case NXSPEC_CMD_START:
2725 na_kr_drop(na, FALSE);
2726 break;
2727
2728 case NXSPEC_CMD_STOP:
2729 na_kr_drop(na, TRUE);
2730 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED);
2731 lck_mtx_lock(&ch->ch_lock);
2732 nxprov_advise_disconnect(na->na_nx, ch);
2733 lck_mtx_unlock(&ch->ch_lock);
2734 break;
2735
2736 default:
2737 error = EINVAL;
2738 break;
2739 }
2740
2741 done:
2742 SK_DF(error ? SK_VERB_ERROR : SK_VERB_NETIF,
2743 "ch 0x%llx from na \"%s\" (0x%llx) naflags %b nx 0x%llx "
2744 "spec_cmd %u (err %d)", SK_KVA(ch), na->na_name, SK_KVA(na),
2745 na->na_flags, NAF_BITS, SK_KVA(ch->ch_nexus), spec_cmd, error);
2746
2747 return error;
2748 }
2749
2750 /*
2751 * Get a skywalk netif adapter for the port.
2752 */
2753 int
nx_netif_na_find(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** nap,boolean_t create)2754 nx_netif_na_find(struct kern_nexus *nx, struct kern_channel *ch,
2755 struct chreq *chr, struct nxbind *nxb, struct proc *p,
2756 struct nexus_adapter **nap, boolean_t create)
2757 {
2758 #pragma unused(ch)
2759 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
2760 boolean_t anon = NX_ANONYMOUS_PROV(nx);
2761 ch_endpoint_t ep = chr->cr_endpoint;
2762 nexus_port_t nx_port = chr->cr_port;
2763 struct nexus_adapter *na = NULL;
2764 struct ifnet *ifp;
2765 int err = 0;
2766
2767 SK_LOCK_ASSERT_HELD();
2768 *nap = NULL; /* default */
2769
2770 #if SK_LOG
2771 uuid_string_t uuidstr;
2772 SK_D("name \"%s\" spec_uuid \"%s\" port %d mode 0x%b pipe_id %u "
2773 "ring_id %d ring_set %u ep_type %u:%u create %u%s",
2774 chr->cr_name, sk_uuid_unparse(chr->cr_spec_uuid, uuidstr),
2775 (int)chr->cr_port, chr->cr_mode, CHMODE_BITS,
2776 chr->cr_pipe_id, (int)chr->cr_ring_id, chr->cr_ring_set,
2777 chr->cr_real_endpoint, chr->cr_endpoint, create,
2778 (ep != CH_ENDPOINT_NET_IF) ? " (skipped)" : "");
2779 #endif /* SK_LOG */
2780
2781 if (!create || ep != CH_ENDPOINT_NET_IF) {
2782 err = ENODEV;
2783 goto done;
2784 }
2785
2786 ASSERT(NX_DOM(nx)->nxdom_type == NEXUS_TYPE_NET_IF);
2787 if (nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV) == NULL) {
2788 err = ENXIO;
2789 goto done;
2790 }
2791 ifp = nif->nif_ifp;
2792 if (!(SKYWALK_CAPABLE(ifp))) {
2793 SK_ERR("interface %s is no longer usable", if_name(ifp));
2794 err = ENOTSUP;
2795 goto done;
2796 }
2797
2798 if (chr->cr_mode & CHMODE_LOW_LATENCY) {
2799 SK_ERR("low latency is not supported for netif channel");
2800 err = ENOTSUP;
2801 goto done;
2802 }
2803
2804 switch (nx_port) {
2805 case NEXUS_PORT_NET_IF_DEV:
2806 /*
2807 * We have to reject direct user open that's not explicitly
2808 * allowed because netif nexuses do not by default have
2809 * user memory regions.
2810 */
2811 if (p != kernproc &&
2812 (!skywalk_netif_direct_allowed(ifp->if_xname) ||
2813 (kauth_cred_issuser(kauth_cred_get()) == 0 &&
2814 (anon || nif->nif_dev_nxb == NULL || nxb == NULL ||
2815 !nxb_is_equal(nif->nif_dev_nxb, nxb))))) {
2816 DTRACE_SKYWALK2(direct__not__allowed, struct ifnet *,
2817 ifp, struct chreq *, chr);
2818 err = ENOTSUP;
2819 goto done;
2820 }
2821 if (chr->cr_mode & CHMODE_EVENT_RING) {
2822 SK_ERR("event ring is not supported for netif dev port channel");
2823 err = ENOTSUP;
2824 goto done;
2825 }
2826 na = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
2827 break;
2828
2829 case NEXUS_PORT_NET_IF_HOST:
2830 if (p != kernproc) {
2831 err = ENOTSUP;
2832 goto done;
2833 }
2834 if (chr->cr_mode & CHMODE_EVENT_RING) {
2835 SK_ERR("event ring is not supported for netif host port channel");
2836 err = ENOTSUP;
2837 goto done;
2838 }
2839 na = nx_port_get_na(nx, NEXUS_PORT_NET_IF_HOST);
2840 break;
2841
2842 default:
2843 ASSERT(!(chr->cr_mode & CHMODE_CONFIG));
2844
2845 NETIF_WLOCK(nif);
2846 err = nx_port_alloc(nx, nx_port, nxb, &na, p);
2847 if (err != 0) {
2848 NETIF_WUNLOCK(nif);
2849 goto done;
2850 }
2851
2852 if (na == NULL) {
2853 if (chr->cr_mode & CHMODE_FILTER) {
2854 err = netif_filter_na_create(nx, chr, &na);
2855 } else {
2856 err = netif_vp_na_create(nx, chr, &na);
2857 }
2858 if (err != 0) {
2859 NETIF_WUNLOCK(nif);
2860 goto done;
2861 }
2862 err = nx_port_alloc(nx, nx_port, nxb, &na, p);
2863 if (err != 0) {
2864 NETIF_WUNLOCK(nif);
2865 goto done;
2866 }
2867 }
2868 NETIF_WUNLOCK(nif);
2869
2870 break;
2871 }
2872
2873 ASSERT(err == 0);
2874 ASSERT(na != NULL);
2875
2876 #if CONFIG_NEXUS_USER_PIPE
2877 if (NA_OWNED_BY_ANY(na) || na->na_next_pipe > 0) {
2878 #else /* !CONFIG_NEXUS_USER_PIPE */
2879 if (NA_OWNED_BY_ANY(na)) {
2880 #endif /* !CONFIG_NEXUS_USER_PIPE */
2881 err = EBUSY;
2882 na = NULL;
2883 goto done;
2884 }
2885
2886 *nap = na;
2887 na_retain_locked(na);
2888
2889 done:
2890 ASSERT(err != 0 || na != NULL);
2891 if (err) {
2892 SK_ERR("na not found, err(%d)", err);
2893 } else {
2894 SK_DF(SK_VERB_NETIF, "found na 0x%llu", na);
2895 }
2896 return err;
2897 }
2898
2899 /* na_krings_create callback for all netif device adapters */
2900 int
2901 nx_netif_dev_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
2902 {
2903 int ret;
2904
2905 ASSERT(na->na_type == NA_NETIF_DEV ||
2906 na->na_type == NA_NETIF_COMPAT_DEV);
2907 /*
2908 * Allocate context structures for native netif only, for
2909 * IOSkywalkFamily to store its object references.
2910 */
2911 ret = na_rings_mem_setup(na, 0, (na->na_flags & NAF_NATIVE), ch);
2912
2913 /*
2914 * We mark CKRF_DROP for kernel-only rings (kernel channel
2915 * opened by the flowswitch, etc.) to prevent packets from
2916 * going thru until after the client of the kernel channel
2917 * has fully plumbed things on its side. For userland-facing
2918 * rings (regular channel opened to netif), this is not
2919 * required, and so don't mark CKRF_DROP there.
2920 */
2921 if (ret == 0 && NA_KERNEL_ONLY(na)) {
2922 na_kr_drop(na, TRUE);
2923 }
2924
2925 return ret;
2926 }
2927
2928 /* call with SK_LOCK held */
2929 void
2930 nx_netif_dev_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
2931 boolean_t defunct)
2932 {
2933 ASSERT(na->na_type == NA_NETIF_DEV ||
2934 na->na_type == NA_NETIF_COMPAT_DEV);
2935
2936 /* see comments in nx_netif_dev_krings_create() */
2937 if (NA_KERNEL_ONLY(na)) {
2938 na_kr_drop(na, TRUE);
2939 }
2940
2941 na_rings_mem_teardown(na, ch, defunct);
2942 }
2943
2944 struct nx_netif *
2945 nx_netif_alloc(zalloc_flags_t how)
2946 {
2947 struct nx_netif *n;
2948
2949 SK_LOCK_ASSERT_HELD();
2950
2951 n = zalloc_flags(nx_netif_zone, how | Z_ZERO);
2952 if (n == NULL) {
2953 return NULL;
2954 }
2955
2956 NETIF_RWINIT(n);
2957 os_ref_init(&n->nif_refcnt, NULL);
2958 SK_DF(SK_VERB_MEM, "netif 0x%llx", SK_KVA(n));
2959
2960 return n;
2961 }
2962
2963 static void
2964 nx_netif_destroy(struct nx_netif *n)
2965 {
2966 ASSERT(n->nif_dev_nxb == NULL);
2967 ASSERT(n->nif_host_nxb == NULL);
2968 ASSERT(os_ref_get_count(&n->nif_refcnt) == 0);
2969 nx_netif_llink_config_free(n);
2970 SK_DF(SK_VERB_MEM, "netif 0x%llx", SK_KVA(n));
2971 NETIF_RWDESTROY(n);
2972 zfree(nx_netif_zone, n);
2973 }
2974
2975 void
2976 nx_netif_release(struct nx_netif *n)
2977 {
2978 SK_LOCK_ASSERT_HELD();
2979
2980 SK_DF(SK_VERB_MEM, "netif 0x%llx, refcnt %d", SK_KVA(n),
2981 os_ref_get_count(&n->nif_refcnt));
2982 if (os_ref_release(&n->nif_refcnt) == 0) {
2983 nx_netif_destroy(n);
2984 }
2985 }
2986
2987 void
2988 nx_netif_retain(struct nx_netif *n)
2989 {
2990 SK_LOCK_ASSERT_HELD();
2991
2992 /* retaining an object with a zero refcount is not allowed */
2993 ASSERT(os_ref_get_count(&n->nif_refcnt) >= 1);
2994 os_ref_retain(&n->nif_refcnt);
2995 SK_DF(SK_VERB_MEM, "netif 0x%llx, refcnt %d", SK_KVA(n),
2996 os_ref_get_count(&n->nif_refcnt));
2997 }
2998
2999 void
3000 nx_netif_free(struct nx_netif *n)
3001 {
3002 nx_netif_release(n);
3003 }
3004
3005 static int
3006 nx_netif_na_channel_event_notify(struct nexus_adapter *na,
3007 struct __kern_packet *kpkt, struct __kern_channel_event *ev,
3008 uint16_t ev_len)
3009 {
3010 int err;
3011 struct netif_flow *nf;
3012 struct nexus_adapter *netif_vpna;
3013 struct nx_netif *nif = NIFNA(na)->nifna_netif;
3014 struct netif_stats *nifs = &NIFNA(na)->nifna_netif->nif_stats;
3015
3016 NETIF_RLOCK(nif);
3017 if (!NETIF_IS_LOW_LATENCY(nif)) {
3018 err = ENOTSUP;
3019 goto error;
3020 }
3021 if (__improbable(!NA_IS_ACTIVE(na))) {
3022 STATS_INC(nifs, NETIF_STATS_EV_DROP_NA_INACTIVE);
3023 err = ENXIO;
3024 goto error;
3025 }
3026 if (__improbable(NA_IS_DEFUNCT(na))) {
3027 STATS_INC(nifs, NETIF_STATS_EV_DROP_NA_DEFUNCT);
3028 err = ENXIO;
3029 goto error;
3030 }
3031 if (__improbable(nif->nif_vp_cnt == 0)) {
3032 STATS_INC(nifs, NETIF_STATS_EV_DROP_NO_VPNA);
3033 err = ENXIO;
3034 goto error;
3035 }
3036 /* The returned netif flow is refcounted. */
3037 nf = nx_netif_flow_classify(nif, kpkt, NETIF_FLOW_OUTBOUND);
3038 if (nf == NULL) {
3039 SK_ERR("unclassified event (%d) dropped", ev->ev_type);
3040 STATS_INC(nifs, NETIF_STATS_EV_DROP_DEMUX_ERR);
3041 err = ENOENT;
3042 goto error;
3043 }
3044 netif_vpna = (struct nexus_adapter *)nf->nf_cb_arg;
3045 if (netif_vpna->na_channel_event_notify != NULL) {
3046 err = netif_vpna->na_channel_event_notify(netif_vpna, kpkt,
3047 ev, ev_len);
3048 } else {
3049 STATS_INC(nifs, NETIF_STATS_EV_DROP_EV_VPNA_NOTSUP);
3050 err = ENOTSUP;
3051 }
3052 nx_netif_flow_release(nif, nf);
3053 NETIF_RUNLOCK(nif);
3054 nf = NULL;
3055 return err;
3056
3057 error:
3058 STATS_INC(nifs, NETIF_STATS_EV_DROP);
3059 NETIF_RUNLOCK(nif);
3060 return err;
3061 }
3062
3063 static int
3064 nx_netif_interface_advisory_notify_common(struct kern_nexus *nx,
3065 const struct ifnet_interface_advisory *advisory)
3066 {
3067 struct kern_nexus *notify_nx;
3068 struct __kern_netif_intf_advisory *intf_adv;
3069 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
3070
3071 if (nif->nif_fsw_nxadv != NULL) {
3072 ASSERT(nif->nif_fsw != NULL);
3073 intf_adv = &nif->nif_fsw_nxadv->_nxadv_intf_adv;
3074 notify_nx = nif->nif_fsw->fsw_nx;
3075 } else {
3076 intf_adv = &nif->nif_netif_nxadv->__kern_intf_adv;
3077 notify_nx = nif->nif_nx;
3078 }
3079 /*
3080 * copy the advisory report in shared memory
3081 */
3082 intf_adv->cksum = os_cpu_copy_in_cksum(advisory, &intf_adv->adv,
3083 sizeof(*advisory), 0);
3084 STATS_INC(&nif->nif_stats, NETIF_STATS_IF_ADV_UPD_RECV);
3085 /*
3086 * notify user channels on advisory report availability
3087 */
3088 nx_interface_advisory_notify(notify_nx);
3089 return 0;
3090 }
3091
3092 int
3093 nx_netif_interface_advisory_report(struct nexus_adapter *devna,
3094 const struct ifnet_interface_advisory *advisory)
3095 {
3096 ASSERT(devna->na_type == NA_NETIF_DEV);
3097 if (__improbable(!NA_IS_ACTIVE(devna))) {
3098 return ENXIO;
3099 }
3100 if (__improbable(NA_IS_DEFUNCT(devna))) {
3101 return ENXIO;
3102 }
3103 return nx_netif_interface_advisory_notify_common(devna->na_nx,
3104 advisory);
3105 }
3106
3107 static errno_t
3108 nx_netif_interface_advisory_notify(void *kern_ctx,
3109 const struct ifnet_interface_advisory *advisory)
3110 {
3111 if (__improbable(kern_ctx == NULL || advisory == NULL ||
3112 advisory->version != IF_INTERFACE_ADVISORY_VERSION_CURRENT)) {
3113 return EINVAL;
3114 }
3115 if (__improbable((advisory->direction !=
3116 IF_INTERFACE_ADVISORY_DIRECTION_TX) &&
3117 (advisory->direction != IF_INTERFACE_ADVISORY_DIRECTION_RX))) {
3118 return EINVAL;
3119 }
3120 return nx_netif_interface_advisory_notify_common(kern_ctx, advisory);
3121 }
3122
3123 void
3124 nx_netif_config_interface_advisory(struct kern_nexus *nx, bool enable)
3125 {
3126 struct kern_nexus *nx_netif;
3127 struct nx_netif *nif;
3128
3129 if (NX_REJECT_ACT(nx) || (nx->nx_flags & NXF_CLOSED) != 0) {
3130 return;
3131 }
3132 if (NX_PROV(nx)->nxprov_params->nxp_type == NEXUS_TYPE_FLOW_SWITCH) {
3133 struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx);
3134 nx_netif = fsw->fsw_nifna->na_nx;
3135 } else {
3136 nx_netif = nx;
3137 }
3138 ASSERT(NX_PROV(nx_netif)->nxprov_params->nxp_type == NEXUS_TYPE_NET_IF);
3139 nif = NX_NETIF_PRIVATE(nx_netif);
3140 if (nif->nif_intf_adv_config != NULL) {
3141 nif->nif_intf_adv_config(nif->nif_intf_adv_prov_ctx, enable);
3142 }
3143 }
3144
3145 /*
3146 * This function has no use anymore since we are now passing truncated packets
3147 * to filters. We keep this logic just in case we need to prevent certain
3148 * packets from being passed to filters.
3149 */
3150 static boolean_t
3151 packet_is_filterable(struct nexus_netif_adapter *nifna,
3152 struct __kern_packet *pkt)
3153 {
3154 #pragma unused (nifna, pkt)
3155 return TRUE;
3156 }
3157
3158 /*
3159 * This function is only meant for supporting the RX path because the TX path
3160 * will not send packets > MTU size due to the disabling of TSO when filters
3161 * are enabled.
3162 */
3163 static void
3164 get_filterable_packets(struct nexus_netif_adapter *nifna,
3165 struct __kern_packet *pkt_chain, struct __kern_packet **fpkt_chain,
3166 struct __kern_packet **passthrough_chain)
3167 {
3168 struct nx_netif *nif = nifna->nifna_netif;
3169 struct netif_stats *nifs = &nif->nif_stats;
3170 struct __kern_packet *pkt = pkt_chain, *next, *fpkt;
3171 struct __kern_packet *fpkt_head = NULL, *passthrough_head = NULL;
3172 struct __kern_packet **fpkt_tailp = &fpkt_head;
3173 struct __kern_packet **passthrough_tailp = &passthrough_head;
3174 int fcnt = 0, pcnt = 0, dcnt = 0;
3175
3176 while (pkt != NULL) {
3177 next = pkt->pkt_nextpkt;
3178 pkt->pkt_nextpkt = NULL;
3179
3180 if (!packet_is_filterable(nifna, pkt)) {
3181 pcnt++;
3182 *passthrough_tailp = pkt;
3183 passthrough_tailp = &pkt->pkt_nextpkt;
3184 pkt = next;
3185 continue;
3186 }
3187 fpkt = nx_netif_pkt_to_filter_pkt(nifna, pkt, NETIF_CONVERT_RX);
3188 if (fpkt != NULL) {
3189 fcnt++;
3190 *fpkt_tailp = fpkt;
3191 fpkt_tailp = &fpkt->pkt_nextpkt;
3192 } else {
3193 dcnt++;
3194 }
3195 pkt = next;
3196 }
3197 *fpkt_chain = fpkt_head;
3198 *passthrough_chain = passthrough_head;
3199
3200 /*
3201 * No need to increment drop stats because that's already
3202 * done in nx_netif_pkt_to_filter_pkt.
3203 */
3204 STATS_ADD(nifs, NETIF_STATS_FILTER_RX_NOT_FILTERABLE, pcnt);
3205 DTRACE_SKYWALK6(filterable, struct nexus_netif_adapter *, nifna,
3206 int, fcnt, int, pcnt, int, dcnt, struct __kern_packet *,
3207 fpkt_head, struct __kern_packet *, passthrough_head);
3208 }
3209
3210 /*
3211 * This is only used by ring-based notify functions for now.
3212 * When a qset-based notify becomes available, this function can be used
3213 * unmodified.
3214 */
3215 void
3216 netif_receive(struct nexus_netif_adapter *nifna,
3217 struct __kern_packet *pkt_chain, struct nexus_pkt_stats *stats)
3218 {
3219 struct nx_netif *nif = nifna->nifna_netif;
3220 struct nexus_adapter *na = &nifna->nifna_up;
3221 struct netif_stats *nifs = &nif->nif_stats;
3222 int err, dropcnt, dropstat = -1;
3223
3224 /* update our work timestamp */
3225 na->na_work_ts = _net_uptime;
3226
3227 if (nif->nif_filter_cnt > 0) {
3228 struct __kern_packet *fpkt_chain = NULL;
3229 struct __kern_packet *passthrough_chain = NULL;
3230
3231 get_filterable_packets(nifna, pkt_chain, &fpkt_chain,
3232 &passthrough_chain);
3233 if (fpkt_chain != NULL) {
3234 (void) nx_netif_filter_inject(nifna, NULL, fpkt_chain,
3235 NETIF_FILTER_RX | NETIF_FILTER_SOURCE);
3236 }
3237 if (passthrough_chain != NULL) {
3238 pkt_chain = passthrough_chain;
3239 } else {
3240 return;
3241 }
3242 } else if (nx_netif_filter_default_drop != 0) {
3243 DTRACE_SKYWALK2(rx__default__drop, struct nx_netif *, nif,
3244 struct __kern_packet *, pkt_chain);
3245 dropstat = NETIF_STATS_FILTER_DROP_DEFAULT;
3246 goto drop;
3247 }
3248 if (nif->nif_flow_cnt > 0) {
3249 struct __kern_packet *remain = NULL;
3250
3251 err = nx_netif_demux(nifna, pkt_chain, &remain,
3252 NETIF_FLOW_SOURCE);
3253 if (remain == NULL) {
3254 return;
3255 }
3256 pkt_chain = remain;
3257 }
3258 if (na->na_rx != NULL) {
3259 na->na_rx(na, pkt_chain, stats);
3260 } else {
3261 DTRACE_SKYWALK2(no__rx__cb, struct nx_netif *, nif,
3262 struct __kern_packet *, pkt_chain);
3263 dropstat = NETIF_STATS_DROP_NO_RX_CB;
3264 goto drop;
3265 }
3266 return;
3267 drop:
3268 dropcnt = 0;
3269 nx_netif_free_packet_chain(pkt_chain, &dropcnt);
3270 if (dropstat != -1) {
3271 STATS_ADD(nifs, dropstat, dropcnt);
3272 }
3273 STATS_ADD(nifs, NETIF_STATS_DROP, dropcnt);
3274 }
3275
3276 static slot_idx_t
3277 netif_rate_limit(struct __kern_channel_ring *r, uint64_t rate,
3278 slot_idx_t begin, slot_idx_t end, boolean_t *rate_limited)
3279 {
3280 uint64_t elapsed;
3281 uint64_t now;
3282 struct __kern_packet *pkt;
3283 clock_sec_t sec;
3284 clock_usec_t usec;
3285 slot_idx_t i;
3286
3287 if (__probable(rate == 0)) {
3288 return end;
3289 }
3290
3291 /* init tbr if not so */
3292 if (__improbable(r->ckr_tbr_token == CKR_TBR_TOKEN_INVALID)) {
3293 r->ckr_tbr_token = rate;
3294 r->ckr_tbr_depth = rate;
3295 r->ckr_tbr_last = mach_absolute_time();
3296 } else {
3297 now = mach_absolute_time();
3298 elapsed = now - r->ckr_tbr_last;
3299 absolutetime_to_microtime(elapsed, &sec, &usec);
3300 r->ckr_tbr_token +=
3301 ((sec * USEC_PER_SEC + usec) * rate / USEC_PER_SEC);
3302 if (__improbable(r->ckr_tbr_token > r->ckr_tbr_depth)) {
3303 r->ckr_tbr_token = r->ckr_tbr_depth;
3304 }
3305 r->ckr_tbr_last = now;
3306 }
3307
3308 *rate_limited = FALSE;
3309 for (i = begin; i != end; i = SLOT_NEXT(i, r->ckr_lim)) {
3310 pkt = KR_KSD(r, i)->sd_pkt;
3311 if (__improbable(pkt == NULL)) {
3312 continue;
3313 }
3314 if (__improbable(r->ckr_tbr_token <= 0)) {
3315 end = i;
3316 *rate_limited = TRUE;
3317 break;
3318 }
3319 r->ckr_tbr_token -= pkt->pkt_length * 8;
3320 }
3321
3322 SK_DF(SK_VERB_FSW | SK_VERB_RX, "ckr %p %s rate limited at %d",
3323 r, r->ckr_name, i);
3324
3325 return end;
3326 }
3327
3328 SK_NO_INLINE_ATTRIBUTE
3329 static struct __kern_packet *
3330 consume_pkts(struct __kern_channel_ring *ring, slot_idx_t end)
3331 {
3332 struct __kern_packet *pkt_chain = NULL, **tailp = &pkt_chain;
3333 slot_idx_t idx = ring->ckr_rhead;
3334
3335 while (idx != end) {
3336 struct __kern_slot_desc *ksd = KR_KSD(ring, idx);
3337 struct __kern_packet *pkt = ksd->sd_pkt;
3338
3339 ASSERT(pkt->pkt_nextpkt == NULL);
3340 KR_SLOT_DETACH_METADATA(ring, ksd);
3341 *tailp = pkt;
3342 tailp = &pkt->pkt_nextpkt;
3343 idx = SLOT_NEXT(idx, ring->ckr_lim);
3344 }
3345 ring->ckr_rhead = end;
3346 ring->ckr_rtail = ring->ckr_ktail;
3347 return pkt_chain;
3348 }
3349
3350 int
3351 netif_rx_notify_default(struct __kern_channel_ring *ring, struct proc *p,
3352 uint32_t flags)
3353 {
3354 struct nexus_adapter *hwna;
3355 struct nexus_netif_adapter *nifna;
3356 struct nx_netif *nif;
3357 struct __kern_packet *pkt_chain;
3358 struct nexus_pkt_stats stats;
3359 sk_protect_t protect;
3360 slot_idx_t ktail;
3361 int err = 0;
3362
3363 KDBG((SK_KTRACE_NETIF_RX_NOTIFY_DEFAULT | DBG_FUNC_START),
3364 SK_KVA(ring));
3365
3366 ASSERT(ring->ckr_tx == NR_RX);
3367 ASSERT(!NA_KERNEL_ONLY(KRNA(ring)) || KR_KERNEL_ONLY(ring));
3368
3369 err = kr_enter(ring, ((flags & NA_NOTEF_CAN_SLEEP) != 0));
3370 if (err != 0) {
3371 /* not a serious error, so no need to be chatty here */
3372 SK_DF(SK_VERB_FSW,
3373 "hwna \"%s\" (0x%llx) kr \"%s\" (0x%llx) krflags 0x%b "
3374 "(%d)", KRNA(ring)->na_name, SK_KVA(KRNA(ring)),
3375 ring->ckr_name, SK_KVA(ring), ring->ckr_flags,
3376 CKRF_BITS, err);
3377 goto out;
3378 }
3379 if (__improbable(KR_DROP(ring))) {
3380 kr_exit(ring);
3381 err = ENODEV;
3382 goto out;
3383 }
3384 hwna = KRNA(ring);
3385 nifna = NIFNA(hwna);
3386 nif = nifna->nifna_netif;
3387 if (__improbable(hwna->na_ifp == NULL)) {
3388 kr_exit(ring);
3389 err = ENODEV;
3390 goto out;
3391 }
3392 protect = sk_sync_protect();
3393 err = ring->ckr_na_sync(ring, p, 0);
3394 if (err != 0 && err != EAGAIN) {
3395 goto put_out;
3396 }
3397
3398 /* read the tail pointer once */
3399 ktail = ring->ckr_ktail;
3400 if (__improbable(ring->ckr_khead == ktail)) {
3401 SK_DF(SK_VERB_FSW | SK_VERB_NOTIFY | SK_VERB_RX,
3402 "how strange, interrupt with no packets on hwna "
3403 "\"%s\" (0x%llx)", KRNA(ring)->na_name, SK_KVA(KRNA(ring)));
3404 goto put_out;
3405 }
3406 ktail = netif_rate_limit(ring, nif->nif_input_rate, ring->ckr_rhead,
3407 ktail, &ring->ckr_rate_limited);
3408
3409 pkt_chain = consume_pkts(ring, ktail);
3410 if (pkt_chain != NULL) {
3411 netif_receive(nifna, pkt_chain, &stats);
3412
3413 if (ring->ckr_netif_mit_stats != NULL &&
3414 stats.nps_pkts != 0 && stats.nps_bytes != 0) {
3415 ring->ckr_netif_mit_stats(ring, stats.nps_pkts,
3416 stats.nps_bytes);
3417 }
3418 }
3419
3420 put_out:
3421 sk_sync_unprotect(protect);
3422 kr_exit(ring);
3423
3424 out:
3425 KDBG((SK_KTRACE_NETIF_RX_NOTIFY_DEFAULT | DBG_FUNC_END),
3426 SK_KVA(ring), err);
3427 return err;
3428 }
3429
3430 int
3431 netif_rx_notify_fast(struct __kern_channel_ring *ring, struct proc *p,
3432 uint32_t flags)
3433 {
3434 #pragma unused(p, flags)
3435 sk_protect_t protect;
3436 struct nexus_adapter *hwna;
3437 struct nexus_pkt_stats stats;
3438 uint32_t i, count;
3439 int err = 0;
3440
3441 KDBG((SK_KTRACE_NETIF_RX_NOTIFY_FAST | DBG_FUNC_START),
3442 SK_KVA(ring));
3443
3444 /* XXX
3445 * sk_sync_protect() is not needed for this case because
3446 * we are not using the dev ring. Unfortunately lots of
3447 * macros used by fsw still require this.
3448 */
3449 protect = sk_sync_protect();
3450 hwna = KRNA(ring);
3451 count = na_get_nslots(hwna, NR_RX);
3452 err = nx_rx_sync_packets(ring, ring->ckr_scratch, &count);
3453 if (__improbable(err != 0)) {
3454 SK_ERR("nx_rx_sync_packets failed: %d", err);
3455 DTRACE_SKYWALK2(rx__sync__packets__failed,
3456 struct __kern_channel_ring *, ring, int, err);
3457 goto out;
3458 }
3459 DTRACE_SKYWALK1(chain__count, uint32_t, count);
3460 for (i = 0; i < count; i++) {
3461 struct __kern_packet *pkt_chain;
3462
3463 pkt_chain = SK_PTR_ADDR_KPKT(ring->ckr_scratch[i]);
3464 ASSERT(pkt_chain != NULL);
3465 netif_receive(NIFNA(KRNA(ring)), pkt_chain, &stats);
3466
3467 if (ring->ckr_netif_mit_stats != NULL &&
3468 stats.nps_pkts != 0 && stats.nps_bytes != 0) {
3469 ring->ckr_netif_mit_stats(ring, stats.nps_pkts,
3470 stats.nps_bytes);
3471 }
3472 }
3473 out:
3474 sk_sync_unprotect(protect);
3475 KDBG((SK_KTRACE_NETIF_RX_NOTIFY_FAST | DBG_FUNC_END),
3476 SK_KVA(ring), err);
3477 return err;
3478 }
3479
3480
3481 /*
3482 * Configure the NA to operate in a particular mode.
3483 */
3484 static channel_ring_notify_t
3485 netif_hwna_get_notify(struct __kern_channel_ring *ring, netif_mode_t mode)
3486 {
3487 channel_ring_notify_t notify = NULL;
3488 boolean_t has_sync_pkts = (sk_rx_sync_packets != 0 &&
3489 nx_has_rx_sync_packets(ring));
3490
3491 if (mode == NETIF_MODE_FSW) {
3492 notify = (has_sync_pkts ? netif_rx_notify_fast :
3493 netif_rx_notify_default);
3494 } else if (mode == NETIF_MODE_LLW) {
3495 notify = (has_sync_pkts ? netif_llw_rx_notify_fast :
3496 netif_llw_rx_notify_default);
3497 }
3498 return notify;
3499 }
3500
3501
3502 static uint32_t
3503 netif_mode_to_flag(netif_mode_t mode)
3504 {
3505 uint32_t flag = 0;
3506
3507 if (mode == NETIF_MODE_FSW) {
3508 flag = NAF_MODE_FSW;
3509 } else if (mode == NETIF_MODE_LLW) {
3510 flag = NAF_MODE_LLW;
3511 }
3512 return flag;
3513 }
3514
3515 static void
3516 netif_hwna_config_mode(struct nexus_adapter *hwna, netif_mode_t mode,
3517 void (*rx)(struct nexus_adapter *, struct __kern_packet *,
3518 struct nexus_pkt_stats *), boolean_t set)
3519 {
3520 uint32_t i;
3521 uint32_t flag;
3522
3523 ASSERT(hwna->na_type == NA_NETIF_DEV ||
3524 hwna->na_type == NA_NETIF_COMPAT_DEV);
3525
3526 for (i = 0; i < na_get_nrings(hwna, NR_RX); i++) {
3527 struct __kern_channel_ring *kr = &NAKR(hwna, NR_RX)[i];
3528 channel_ring_notify_t notify = netif_hwna_get_notify(kr, mode);
3529
3530 if (set) {
3531 kr->ckr_save_notify = kr->ckr_netif_notify;
3532 kr->ckr_netif_notify = notify;
3533 } else {
3534 kr->ckr_netif_notify = kr->ckr_save_notify;
3535 kr->ckr_save_notify = NULL;
3536 }
3537 }
3538 if (set) {
3539 hwna->na_rx = rx;
3540 flag = netif_mode_to_flag(mode);
3541 atomic_bitset_32(&hwna->na_flags, flag);
3542 } else {
3543 hwna->na_rx = NULL;
3544 atomic_bitclear_32(&hwna->na_flags,
3545 (NAF_MODE_FSW | NAF_MODE_LLW));
3546 }
3547 }
3548
3549 void
3550 netif_hwna_set_mode(struct nexus_adapter *hwna, netif_mode_t mode,
3551 void (*rx)(struct nexus_adapter *, struct __kern_packet *,
3552 struct nexus_pkt_stats *))
3553 {
3554 return netif_hwna_config_mode(hwna, mode, rx, TRUE);
3555 }
3556
3557 void
3558 netif_hwna_clear_mode(struct nexus_adapter *hwna)
3559 {
3560 return netif_hwna_config_mode(hwna, NETIF_MODE_NONE, NULL, FALSE);
3561 }
3562
3563 static void
3564 netif_inject_rx(struct nexus_adapter *na, struct __kern_packet *pkt_chain)
3565 {
3566 struct nexus_netif_adapter *nifna = NIFNA(na);
3567 struct nx_netif *nif = nifna->nifna_netif;
3568 struct netif_stats *nifs = &nif->nif_stats;
3569 struct __kern_channel_ring *r;
3570 struct nexus_pkt_stats stats;
3571 sk_protect_t protect;
3572 boolean_t ring_drop = FALSE;
3573 int err, dropcnt;
3574
3575 if (!NA_OWNED_BY_FSW(na)) {
3576 DTRACE_SKYWALK1(fsw__disabled, struct nexus_adapter *, na);
3577 goto fail;
3578 }
3579 ASSERT(na->na_rx != NULL);
3580
3581 /*
3582 * XXX
3583 * This function is called when a filter injects a packet back to the
3584 * regular RX path. We can assume the ring is 0 for now because RSS
3585 * is not supported. This needs to be revisited when we add support for
3586 * RSS.
3587 */
3588 r = &na->na_rx_rings[0];
3589 ASSERT(r->ckr_tx == NR_RX);
3590 err = kr_enter(r, TRUE);
3591 VERIFY(err == 0);
3592
3593 if (__improbable(KR_DROP(r))) {
3594 kr_exit(r);
3595 DTRACE_SKYWALK2(ring__drop, struct nexus_adapter *, na,
3596 struct __kern_channel_ring *, r);
3597 ring_drop = TRUE;
3598 goto fail;
3599 }
3600 protect = sk_sync_protect();
3601 na->na_rx(na, pkt_chain, &stats);
3602
3603 if (r->ckr_netif_mit_stats != NULL &&
3604 stats.nps_pkts != 0 && stats.nps_bytes != 0) {
3605 r->ckr_netif_mit_stats(r, stats.nps_pkts, stats.nps_bytes);
3606 }
3607 sk_sync_unprotect(protect);
3608
3609 kr_exit(r);
3610 return;
3611
3612 fail:
3613 dropcnt = 0;
3614 nx_netif_free_packet_chain(pkt_chain, &dropcnt);
3615 if (ring_drop) {
3616 STATS_ADD(nifs, NETIF_STATS_DROP_KRDROP_MODE, dropcnt);
3617 }
3618 STATS_ADD(nifs, NETIF_STATS_DROP, dropcnt);
3619 }
3620
3621 /*
3622 * This is called when an inbound packet has traversed all filters.
3623 */
3624 errno_t
3625 nx_netif_filter_rx_cb(struct nexus_netif_adapter *nifna,
3626 struct __kern_packet *fpkt_chain, uint32_t flags)
3627 {
3628 #pragma unused (flags)
3629 struct nx_netif *nif = nifna->nifna_netif;
3630 struct netif_stats *nifs = &nif->nif_stats;
3631 struct nexus_adapter *na = &nifna->nifna_up;
3632 struct __kern_packet *pkt_chain;
3633 int err;
3634
3635 pkt_chain = nx_netif_filter_pkt_to_pkt_chain(nifna,
3636 fpkt_chain, NETIF_CONVERT_RX);
3637 if (pkt_chain == NULL) {
3638 return ENOMEM;
3639 }
3640 if (nif->nif_flow_cnt > 0) {
3641 struct __kern_packet *remain = NULL;
3642
3643 err = nx_netif_demux(nifna, pkt_chain, &remain,
3644 NETIF_FLOW_INJECT);
3645 if (remain == NULL) {
3646 return err;
3647 }
3648 pkt_chain = remain;
3649 }
3650 if (na->na_rx != NULL) {
3651 netif_inject_rx(na, pkt_chain);
3652 } else {
3653 int dropcnt = 0;
3654 nx_netif_free_packet_chain(pkt_chain, &dropcnt);
3655 STATS_ADD(nifs,
3656 NETIF_STATS_FILTER_DROP_NO_RX_CB, dropcnt);
3657 STATS_ADD(nifs, NETIF_STATS_DROP, dropcnt);
3658 }
3659 return 0;
3660 }
3661
3662 /*
3663 * This is called when an outbound packet has traversed all filters.
3664 */
3665 errno_t
3666 nx_netif_filter_tx_cb(struct nexus_netif_adapter *nifna,
3667 struct __kern_packet *fpkt_chain, uint32_t flags)
3668 {
3669 #pragma unused (flags)
3670 struct nx_netif *nif = nifna->nifna_netif;
3671 struct nexus_adapter *na = &nifna->nifna_up;
3672 int err;
3673
3674 if (NETIF_IS_COMPAT(nif)) {
3675 struct mbuf *m_chain;
3676 mbuf_svc_class_t sc;
3677
3678 m_chain = nx_netif_filter_pkt_to_mbuf_chain(nifna,
3679 fpkt_chain, NETIF_CONVERT_TX);
3680 if (m_chain == NULL) {
3681 return ENOMEM;
3682 }
3683 /*
3684 * All packets in the chain have the same service class.
3685 * If the sc is missing or invalid, a valid value will be
3686 * returned.
3687 */
3688 sc = mbuf_get_service_class(m_chain);
3689 err = nx_netif_filter_tx_processed_mbuf_enqueue(nifna,
3690 sc, m_chain);
3691 } else {
3692 struct __kern_packet *pkt_chain;
3693 kern_packet_svc_class_t sc;
3694
3695 pkt_chain = nx_netif_filter_pkt_to_pkt_chain(nifna,
3696 fpkt_chain, NETIF_CONVERT_TX);
3697 if (pkt_chain == NULL) {
3698 return ENOMEM;
3699 }
3700 /*
3701 * All packets in the chain have the same service class.
3702 * If the sc is missing or invalid, a valid value will be
3703 * returned.
3704 */
3705 sc = kern_packet_get_service_class(SK_PKT2PH(pkt_chain));
3706 err = nx_netif_filter_tx_processed_pkt_enqueue(nifna,
3707 sc, pkt_chain);
3708 }
3709 /* Tell driver to resume dequeuing */
3710 ifnet_start(na->na_ifp);
3711 return err;
3712 }
3713
3714 void
3715 nx_netif_vp_region_params_adjust(struct nexus_adapter *na,
3716 struct skmem_region_params *srp)
3717 {
3718 #pragma unused(na, srp)
3719 return;
3720 }
3721
3722 /* returns true, if starter thread is utilized */
3723 static bool
3724 netif_use_starter_thread(struct ifnet *ifp, uint32_t flags)
3725 {
3726 #if (DEVELOPMENT || DEBUG)
3727 if (__improbable(nx_netif_force_ifnet_start != 0)) {
3728 ifnet_start(ifp);
3729 return true;
3730 }
3731 #endif /* !DEVELOPMENT && !DEBUG */
3732 /*
3733 * use starter thread in following conditions:
3734 * - interface is not skywalk native
3735 * - interface attached to virtual driver (ipsec, utun)
3736 * - TBR is enabled
3737 * - delayed start mechanism is in use
3738 * - remaining stack space on the thread is not enough for driver
3739 * - caller is in rx workloop context
3740 * - caller is from the flowswitch path doing ARP resolving
3741 * - caller requires the use of starter thread (stack usage)
3742 */
3743 if (!SKYWALK_NATIVE(ifp) || NA(ifp) == NULL ||
3744 !NA_IS_ACTIVE(&NA(ifp)->nifna_up) ||
3745 ((NA(ifp)->nifna_up.na_flags & NAF_VIRTUAL_DEVICE) != 0) ||
3746 IFCQ_TBR_IS_ENABLED(ifp->if_snd) ||
3747 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
3748 sk_is_rx_notify_protected() ||
3749 sk_is_async_transmit_protected() ||
3750 (sk_is_sync_protected() && (flags & NETIF_XMIT_FLAG_HOST) != 0)) {
3751 DTRACE_SKYWALK2(use__starter__thread, struct ifnet *, ifp,
3752 uint32_t, flags);
3753 ifnet_start(ifp);
3754 return true;
3755 }
3756 lck_mtx_lock_spin(&ifp->if_start_lock);
3757 /* interface is flow controlled */
3758 if (__improbable(ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
3759 lck_mtx_unlock(&ifp->if_start_lock);
3760 return true;
3761 }
3762 /* if starter thread is active, utilize it */
3763 if (ifp->if_start_active) {
3764 ifp->if_start_req++;
3765 lck_mtx_unlock(&ifp->if_start_lock);
3766 return true;
3767 }
3768 lck_mtx_unlock(&ifp->if_start_lock);
3769 /* Check remaining stack space */
3770 if ((OSKernelStackRemaining() < NX_NETIF_MIN_DRIVER_STACK_SIZE)) {
3771 ifnet_start(ifp);
3772 return true;
3773 }
3774 return false;
3775 }
3776
3777 void
3778 netif_transmit(struct ifnet *ifp, uint32_t flags)
3779 {
3780 if (netif_use_starter_thread(ifp, flags)) {
3781 return;
3782 }
3783 /*
3784 * If no longer attached, don't issue doorbell as ifp
3785 * is being destroyed; else hold an IO refcnt to
3786 * prevent the interface from being detached.
3787 */
3788 if (!ifnet_datamov_begin(ifp)) {
3789 return;
3790 }
3791 nx_netif_doorbell_internal(ifp, flags);
3792 /*
3793 * Release the IO refcnt taken above.
3794 */
3795 ifnet_datamov_end(ifp);
3796 }
3797
3798 static struct ifclassq *
3799 netif_get_default_ifcq(struct nexus_adapter *hwna)
3800 {
3801 struct nx_netif *nif;
3802 struct ifclassq *ifcq;
3803
3804 nif = NX_NETIF_PRIVATE(hwna->na_nx);
3805 if (NETIF_LLINK_ENABLED(nif)) {
3806 struct netif_qset *qset;
3807
3808 /*
3809 * Use the default ifcq for now.
3810 * In the future this could be chosen by the caller.
3811 */
3812 qset = nx_netif_get_default_qset_noref(nif);
3813 ASSERT(qset != NULL);
3814 ifcq = qset->nqs_ifcq;
3815 } else {
3816 ifcq = nif->nif_ifp->if_snd;
3817 }
3818 return ifcq;
3819 }
3820
3821 static errno_t
3822 netif_deq_packets(struct nexus_adapter *hwna, struct ifclassq *ifcq,
3823 uint32_t pkt_limit, uint32_t byte_limit, struct __kern_packet **head,
3824 boolean_t *pkts_pending, kern_packet_svc_class_t sc)
3825 {
3826 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3827 struct ifnet *ifp = hwna->na_ifp;
3828 errno_t rc;
3829
3830 ASSERT(ifp != NULL);
3831 ASSERT(ifp->if_output_sched_model < IFNET_SCHED_MODEL_MAX);
3832 ASSERT((pkt_limit != 0) && (byte_limit != 0));
3833
3834 if (ifcq == NULL) {
3835 ifcq = netif_get_default_ifcq(hwna);
3836 }
3837 if (ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED) {
3838 rc = ifclassq_dequeue_sc(ifcq, (mbuf_svc_class_t)sc,
3839 pkt_limit, byte_limit, &pkt_head, NULL, NULL, NULL);
3840 } else {
3841 rc = ifclassq_dequeue(ifcq, pkt_limit, byte_limit,
3842 &pkt_head, NULL, NULL, NULL);
3843 }
3844 ASSERT((rc == 0) || (rc == EAGAIN));
3845 ASSERT((pkt_head.cp_ptype == QP_PACKET) || (pkt_head.cp_kpkt == NULL));
3846
3847 if (IFCQ_LEN(ifcq) != 0) {
3848 *pkts_pending = TRUE;
3849 } else {
3850 *pkts_pending = FALSE;
3851 }
3852
3853 *head = pkt_head.cp_kpkt;
3854 return rc;
3855 }
3856
3857 #if SK_LOG
3858 /* Hoisted out of line to reduce kernel stack footprint */
3859 SK_LOG_ATTRIBUTE
3860 static void
3861 netif_no_ring_space_log(const struct nexus_adapter *na,
3862 const kern_channel_ring_t ring)
3863 {
3864 SK_DF(SK_VERB_SYNC | SK_VERB_TX,
3865 "no ring space: na \"%s\" [%u] "
3866 "\"%s\"(kh %u kt %u kl %u | rh %u rt %u)"
3867 "\"%s\"(kh %u kt %u kl %u | rh %u rt %u)",
3868 na->na_name, ring->ckr_ring_id,
3869 ring->ckr_name, ring->ckr_khead,
3870 ring->ckr_ktail, ring->ckr_klease,
3871 ring->ckr_rhead, ring->ckr_rtail);
3872 }
3873 #endif /* SK_LOG */
3874
3875 /*
3876 * netif refill function for rings
3877 */
3878 errno_t
3879 netif_ring_tx_refill(const kern_channel_ring_t ring, uint32_t pkt_limit,
3880 uint32_t byte_limit, boolean_t tx_doorbell_ctxt, boolean_t *pkts_pending,
3881 boolean_t canblock)
3882 {
3883 struct nexus_adapter *hwna;
3884 struct ifnet *ifp;
3885 struct __kern_packet *head = NULL;
3886 sk_protect_t protect;
3887 errno_t rc = 0;
3888 errno_t sync_err = 0;
3889 uint32_t npkts = 0, consumed = 0;
3890 uint32_t flags;
3891 slot_idx_t idx, ktail;
3892 int ring_space = 0;
3893
3894 KDBG((SK_KTRACE_NETIF_RING_TX_REFILL | DBG_FUNC_START), SK_KVA(ring));
3895
3896 VERIFY(ring != NULL);
3897 hwna = KRNA(ring);
3898 ifp = hwna->na_ifp;
3899
3900 ASSERT(hwna->na_type == NA_NETIF_DEV);
3901 ASSERT(ring->ckr_tx == NR_TX);
3902 *pkts_pending = FALSE;
3903
3904 if (__improbable(pkt_limit == 0 || byte_limit == 0)) {
3905 SK_ERR("invalid limits plim %d, blim %d",
3906 pkt_limit, byte_limit);
3907 rc = EINVAL;
3908 goto out;
3909 }
3910
3911 if (__improbable(!IF_FULLY_ATTACHED(ifp))) {
3912 SK_ERR("hwna 0x%llx ifp %s (0x%llx), interface not attached",
3913 SK_KVA(hwna), if_name(ifp), SK_KVA(ifp));
3914 rc = ENXIO;
3915 goto out;
3916 }
3917
3918 if (__improbable((ifp->if_start_flags & IFSF_FLOW_CONTROLLED) != 0)) {
3919 SK_DF(SK_VERB_SYNC | SK_VERB_TX, "hwna 0x%llx ifp %s (0x%llx), "
3920 "flow control ON", SK_KVA(hwna), if_name(ifp), SK_KVA(ifp));
3921 rc = ENXIO;
3922 goto out;
3923 }
3924
3925 /*
3926 * if the ring is busy, it means another dequeue is in
3927 * progress, so ignore this request and return success.
3928 */
3929 if (kr_enter(ring, canblock) != 0) {
3930 rc = 0;
3931 goto out;
3932 }
3933 /* mark thread with sync-in-progress flag */
3934 protect = sk_sync_protect();
3935
3936 if (__improbable(KR_DROP(ring) ||
3937 !NA_IS_ACTIVE(ring->ckr_na))) {
3938 SK_ERR("hw-kr 0x%llx stopped", SK_KVA(ring));
3939 rc = ENXIO;
3940 goto done;
3941 }
3942
3943 idx = ring->ckr_rhead;
3944 ktail = ring->ckr_ktail;
3945 /* calculate available space on tx ring */
3946 ring_space = ktail - idx;
3947 if (ring_space < 0) {
3948 ring_space += ring->ckr_num_slots;
3949 }
3950 if (ring_space == 0) {
3951 struct ifclassq *ifcq;
3952
3953 /* no space in ring, driver should retry */
3954 #if SK_LOG
3955 if (__improbable((sk_verbose &
3956 (SK_VERB_SYNC | SK_VERB_TX)) != 0)) {
3957 netif_no_ring_space_log(hwna, ring);
3958 }
3959 #endif /* SK_LOG */
3960 ifcq = netif_get_default_ifcq(hwna);
3961 if (IFCQ_LEN(ifcq) != 0) {
3962 *pkts_pending = TRUE;
3963 }
3964 /*
3965 * We ran out of space in ring, most probably
3966 * because the driver is slow to drain its TX queue.
3967 * We want another doorbell to be generated as soon
3968 * as the TX notify completion happens; mark this
3969 * through ckr_pending_doorbell counter. Do this
3970 * regardless of whether there's any pending packet.
3971 */
3972 ring->ckr_pending_doorbell++;
3973 rc = EAGAIN;
3974 goto sync_ring;
3975 }
3976
3977 if ((uint32_t)ring_space < pkt_limit) {
3978 pkt_limit = ring_space;
3979 }
3980
3981 if (tx_doorbell_ctxt &&
3982 ((hwna->na_flags & NAF_VIRTUAL_DEVICE) == 0)) {
3983 pkt_limit = MIN(pkt_limit,
3984 nx_netif_doorbell_max_dequeue);
3985 }
3986
3987 rc = netif_deq_packets(hwna, NULL, pkt_limit, byte_limit,
3988 &head, pkts_pending, ring->ckr_svc);
3989
3990 /*
3991 * There's room in ring; if we haven't dequeued everything,
3992 * mark ckr_pending_doorbell for the next TX notify to issue
3993 * a TX door bell; otherwise, clear it. The next packet that
3994 * gets enqueued will trigger a door bell again.
3995 */
3996 if (*pkts_pending) {
3997 ring->ckr_pending_doorbell++;
3998 } else if (ring->ckr_pending_doorbell != 0) {
3999 ring->ckr_pending_doorbell = 0;
4000 }
4001
4002 if (rc != 0) {
4003 /*
4004 * This is expected sometimes as the IOSkywalkFamily
4005 * errs on the side of caution to perform an extra
4006 * dequeue when multiple doorbells are pending;
4007 * nothing to dequeue, do a sync if there are slots
4008 * to reclaim else just return.
4009 */
4010 SK_DF(SK_VERB_SYNC | SK_VERB_TX,
4011 "nothing to dequeue, err %d", rc);
4012
4013 if ((uint32_t)ring_space == ring->ckr_lim) {
4014 goto done;
4015 } else {
4016 goto sync_ring;
4017 }
4018 }
4019 /* move the dequeued packets to tx ring */
4020 while (head != NULL && idx != ktail) {
4021 ASSERT(npkts <= pkt_limit);
4022 struct __kern_packet *pkt = head;
4023 KR_SLOT_ATTACH_METADATA(ring, KR_KSD(ring, idx),
4024 (struct __kern_quantum *)pkt);
4025 npkts++;
4026 if (__improbable(pkt->pkt_trace_id != 0)) {
4027 KDBG(SK_KTRACE_PKT_TX_AQM | DBG_FUNC_END, pkt->pkt_trace_id);
4028 KDBG(SK_KTRACE_PKT_TX_DRV | DBG_FUNC_START, pkt->pkt_trace_id);
4029 }
4030 idx = SLOT_NEXT(idx, ring->ckr_lim);
4031 head = pkt->pkt_nextpkt;
4032 pkt->pkt_nextpkt = NULL;
4033 }
4034
4035 /*
4036 * We checked for ring space earlier so the ring should have enough
4037 * space for the entire chain.
4038 */
4039 ASSERT(head == NULL);
4040 ring->ckr_rhead = idx;
4041
4042 sync_ring:
4043 flags = NA_SYNCF_NETIF;
4044 if (ring->ckr_pending_doorbell != 0) {
4045 flags |= (NA_SYNCF_NETIF_DOORBELL | NA_SYNCF_NETIF_ASYNC);
4046 }
4047
4048 ring->ckr_khead_pre = ring->ckr_khead;
4049 sync_err = ring->ckr_na_sync(ring, kernproc, flags);
4050 if (sync_err != 0 && sync_err != EAGAIN) {
4051 SK_ERR("unexpected sync err %d", sync_err);
4052 if (rc == 0) {
4053 rc = sync_err;
4054 }
4055 goto done;
4056 }
4057 /*
4058 * Verify that the driver has detached packets from the consumed slots.
4059 */
4060 idx = ring->ckr_khead_pre;
4061 consumed = 0;
4062 while (idx != ring->ckr_khead) {
4063 struct __kern_slot_desc *ksd = KR_KSD(ring, idx);
4064
4065 consumed++;
4066 VERIFY(!KSD_VALID_METADATA(ksd));
4067 idx = SLOT_NEXT(idx, ring->ckr_lim);
4068 }
4069 ring->ckr_khead_pre = ring->ckr_khead;
4070
4071 done:
4072 sk_sync_unprotect(protect);
4073 kr_exit(ring);
4074 out:
4075 KDBG((SK_KTRACE_NETIF_RING_TX_REFILL | DBG_FUNC_END),
4076 SK_KVA(ring), rc, 0, npkts);
4077
4078 return rc;
4079 }
4080
4081 void
4082 kern_netif_queue_rx_enqueue(kern_netif_queue_t queue, kern_packet_t ph_chain,
4083 uint32_t count, uint32_t flags)
4084 {
4085 #pragma unused (count)
4086 struct netif_queue *q = queue;
4087 struct netif_llink *llink = q->nq_qset->nqs_llink;
4088 struct __kern_packet *pkt_chain = SK_PTR_ADDR_KPKT(ph_chain);
4089 bool flush = ((flags & KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH) != 0);
4090 struct pktq *pktq = &q->nq_pktq;
4091 struct netif_stats *nifs = &llink->nll_nif->nif_stats;
4092 struct nexus_pkt_stats stats;
4093 sk_protect_t protect;
4094
4095 ASSERT((q->nq_flags & NETIF_QUEUE_IS_RX) != 0);
4096 if (llink->nll_state == NETIF_LLINK_STATE_DESTROYED) {
4097 int drop_cnt = 0;
4098
4099 pp_free_packet_chain(pkt_chain, &drop_cnt);
4100 STATS_ADD(nifs, NETIF_STATS_LLINK_RX_DROP_BAD_STATE, drop_cnt);
4101 return;
4102 }
4103 KPKTQ_ENQUEUE_LIST(pktq, pkt_chain);
4104 if (flush) {
4105 pkt_chain = KPKTQ_FIRST(pktq);
4106 KPKTQ_INIT(pktq);
4107
4108 protect = sk_sync_protect();
4109 netif_receive(NA(llink->nll_nif->nif_ifp), pkt_chain, &stats);
4110 sk_sync_unprotect(protect);
4111 }
4112 }
4113
4114 errno_t
4115 kern_netif_queue_tx_dequeue(kern_netif_queue_t queue, uint32_t pkt_limit,
4116 uint32_t byte_limit, boolean_t *pending, kern_packet_t *ph_chain)
4117 {
4118 struct netif_queue *q = queue;
4119 struct netif_llink *llink = q->nq_qset->nqs_llink;
4120 struct netif_stats *nifs = &llink->nll_nif->nif_stats;
4121 struct nexus_adapter *hwna;
4122 struct __kern_packet *pkt_chain = NULL;
4123 errno_t rc;
4124
4125 ASSERT((q->nq_flags & NETIF_QUEUE_IS_RX) == 0);
4126 if (llink->nll_state == NETIF_LLINK_STATE_DESTROYED) {
4127 STATS_INC(nifs, NETIF_STATS_LLINK_AQM_DEQ_BAD_STATE);
4128 return ENXIO;
4129 }
4130 hwna = &NA(llink->nll_nif->nif_ifp)->nifna_up;
4131
4132 if (((hwna->na_flags & NAF_VIRTUAL_DEVICE) == 0) &&
4133 sk_is_tx_notify_protected()) {
4134 pkt_limit = MIN(pkt_limit, nx_netif_doorbell_max_dequeue);
4135 }
4136 rc = netif_deq_packets(hwna, q->nq_qset->nqs_ifcq, pkt_limit,
4137 byte_limit, &pkt_chain, pending, q->nq_svc);
4138
4139 if (pkt_chain != NULL) {
4140 *ph_chain = SK_PKT2PH(pkt_chain);
4141 }
4142 return rc;
4143 }
4144
4145 errno_t
4146 kern_nexus_netif_llink_add(struct kern_nexus *nx,
4147 struct kern_nexus_netif_llink_init *llink_init)
4148 {
4149 errno_t err;
4150 struct nx_netif *nif;
4151 struct netif_llink *llink;
4152 struct netif_stats *nifs;
4153
4154 VERIFY(nx != NULL);
4155 VERIFY(llink_init != NULL);
4156 VERIFY((nx->nx_flags & NXF_ATTACHED) != 0);
4157
4158 nif = NX_NETIF_PRIVATE(nx);
4159 nifs = &nif->nif_stats;
4160
4161 err = nx_netif_validate_llink_config(llink_init, false);
4162 if (err != 0) {
4163 SK_ERR("Invalid llink init params");
4164 STATS_INC(nifs, NETIF_STATS_LLINK_ADD_BAD_PARAMS);
4165 return err;
4166 }
4167
4168 err = nx_netif_llink_add(nif, llink_init, &llink);
4169 return err;
4170 }
4171
4172 errno_t
4173 kern_nexus_netif_llink_remove(struct kern_nexus *nx,
4174 kern_nexus_netif_llink_id_t llink_id)
4175 {
4176 struct nx_netif *nif;
4177
4178 VERIFY(nx != NULL);
4179 VERIFY((nx->nx_flags & NXF_ATTACHED) != 0);
4180
4181 nif = NX_NETIF_PRIVATE(nx);
4182 return nx_netif_llink_remove(nif, llink_id);
4183 }
4184
4185 errno_t
4186 kern_netif_queue_get_service_class(kern_netif_queue_t queue,
4187 kern_packet_svc_class_t *svc)
4188 {
4189 *svc = queue->nq_svc;
4190 return 0;
4191 }
4192