1 /*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * if_fake.c
31 * - fake network interface used for testing
32 * - "feth" (e.g. "feth0", "feth1") is a virtual ethernet interface that allows
33 * two instances to have their output/input paths "crossed-over" so that
34 * output on one is input on the other
35 */
36
37 /*
38 * Modification History:
39 *
40 * September 9, 2015 Dieter Siegmund ([email protected])
41 * - created
42 */
43
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/queue.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/kern_event.h>
54 #include <sys/mcache.h>
55 #include <sys/syslog.h>
56
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_vlan_var.h>
61 #include <net/if_fake_var.h>
62 #include <net/if_arp.h>
63 #include <net/if_dl.h>
64 #include <net/if_ether.h>
65 #include <net/if_types.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <net/dlil.h>
69
70 #include <net/kpi_interface.h>
71 #include <net/kpi_protocol.h>
72
73 #include <kern/locks.h>
74 #include <kern/zalloc.h>
75
76 #ifdef INET
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #endif
80
81 #include <net/if_media.h>
82 #include <net/ether_if_module.h>
83 #if SKYWALK
84 #include <skywalk/os_skywalk_private.h>
85 #include <skywalk/nexus/netif/nx_netif.h>
86 #include <skywalk/channel/channel_var.h>
87 #endif /* SKYWALK */
88
89 static boolean_t
is_power_of_two(unsigned int val)90 is_power_of_two(unsigned int val)
91 {
92 return (val & (val - 1)) == 0;
93 }
94
95 #define FAKE_ETHER_NAME "feth"
96
97 SYSCTL_DECL(_net_link);
98 SYSCTL_NODE(_net_link, OID_AUTO, fake, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
99 "Fake interface");
100
101 static int if_fake_txstart = 1;
102 SYSCTL_INT(_net_link_fake, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
103 &if_fake_txstart, 0, "Fake interface TXSTART mode");
104
105 static int if_fake_hwcsum = 0;
106 SYSCTL_INT(_net_link_fake, OID_AUTO, hwcsum, CTLFLAG_RW | CTLFLAG_LOCKED,
107 &if_fake_hwcsum, 0, "Fake interface simulate hardware checksum");
108
109 static int if_fake_nxattach = 0;
110 SYSCTL_INT(_net_link_fake, OID_AUTO, nxattach, CTLFLAG_RW | CTLFLAG_LOCKED,
111 &if_fake_nxattach, 0, "Fake interface auto-attach nexus");
112
113 static int if_fake_bsd_mode = 1;
114 SYSCTL_INT(_net_link_fake, OID_AUTO, bsd_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
115 &if_fake_bsd_mode, 0, "Fake interface attach as BSD interface");
116
117 static int if_fake_debug = 0;
118 SYSCTL_INT(_net_link_fake, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
119 &if_fake_debug, 0, "Fake interface debug logs");
120
121 static int if_fake_wmm_mode = 0;
122 SYSCTL_INT(_net_link_fake, OID_AUTO, wmm_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
123 &if_fake_wmm_mode, 0, "Fake interface in 802.11 WMM mode");
124
125 static int if_fake_multibuflet = 0;
126 SYSCTL_INT(_net_link_fake, OID_AUTO, multibuflet, CTLFLAG_RW | CTLFLAG_LOCKED,
127 &if_fake_multibuflet, 0, "Fake interface using multi-buflet packets");
128
129 static int if_fake_low_latency = 0;
130 SYSCTL_INT(_net_link_fake, OID_AUTO, low_latency, CTLFLAG_RW | CTLFLAG_LOCKED,
131 &if_fake_low_latency, 0, "Fake interface with a low latency qset");
132
133 static int if_fake_switch_combined_mode = 0;
134 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_combined_mode,
135 CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_combined_mode, 0,
136 "Switch a qset between combined and separate mode during dequeues");
137
138 static int if_fake_switch_mode_frequency = 10;
139 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_mode_frequency,
140 CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_mode_frequency, 0,
141 "The number of dequeues before we switch between the combined and separated mode");
142
143 static int if_fake_tso_support = 0;
144 SYSCTL_INT(_net_link_fake, OID_AUTO, tso_support, CTLFLAG_RW | CTLFLAG_LOCKED,
145 &if_fake_tso_support, 0, "Fake interface with support for TSO offload");
146
147 typedef enum {
148 IFF_PP_MODE_GLOBAL = 0, /* share a global pool */
149 IFF_PP_MODE_PRIVATE = 1, /* creates its own rx/tx pool */
150 IFF_PP_MODE_PRIVATE_SPLIT = 2, /* creates its own split rx & tx pool */
151 } iff_pktpool_mode_t;
152 static iff_pktpool_mode_t if_fake_pktpool_mode = IFF_PP_MODE_GLOBAL;
153 SYSCTL_INT(_net_link_fake, OID_AUTO, pktpool_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
154 &if_fake_pktpool_mode, IFF_PP_MODE_GLOBAL,
155 "Fake interface packet pool mode (0 global, 1 private, 2 private split");
156
157 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX 512
158 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF 96
159 static int if_fake_link_layer_aggregation_factor =
160 FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF;
161 static int
162 feth_link_layer_aggregation_factor_sysctl SYSCTL_HANDLER_ARGS
163 {
164 #pragma unused(oidp, arg1, arg2)
165 unsigned int new_value;
166 int changed;
167 int error;
168
169 error = sysctl_io_number(req, if_fake_link_layer_aggregation_factor,
170 sizeof(if_fake_link_layer_aggregation_factor), &new_value,
171 &changed);
172 if (error == 0 && changed != 0) {
173 if (new_value <= 0 ||
174 new_value > FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX) {
175 return EINVAL;
176 }
177 if_fake_link_layer_aggregation_factor = new_value;
178 }
179 return error;
180 }
181
182 SYSCTL_PROC(_net_link_fake, OID_AUTO, link_layer_aggregation_factor,
183 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
184 0, 0, feth_link_layer_aggregation_factor_sysctl, "IU",
185 "Fake interface link layer aggregation factor");
186
187 #define FETH_TX_HEADROOM_MAX 32
188 static unsigned int if_fake_tx_headroom = FETH_TX_HEADROOM_MAX;
189 static int
190 feth_tx_headroom_sysctl SYSCTL_HANDLER_ARGS
191 {
192 #pragma unused(oidp, arg1, arg2)
193 unsigned int new_value;
194 int changed;
195 int error;
196
197 error = sysctl_io_number(req, if_fake_tx_headroom,
198 sizeof(if_fake_tx_headroom), &new_value, &changed);
199 if (error == 0 && changed != 0) {
200 if (new_value > FETH_TX_HEADROOM_MAX ||
201 (new_value % 8) != 0) {
202 return EINVAL;
203 }
204 if_fake_tx_headroom = new_value;
205 }
206 return 0;
207 }
208
209 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_headroom,
210 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
211 0, 0, feth_tx_headroom_sysctl, "IU", "Fake ethernet Tx headroom");
212
213 static int if_fake_fcs = 0;
214 SYSCTL_INT(_net_link_fake, OID_AUTO, fcs, CTLFLAG_RW | CTLFLAG_LOCKED,
215 &if_fake_fcs, 0, "Fake interface using frame check sequence");
216
217 #define FETH_TRAILER_LENGTH_MAX 28
218 char feth_trailer[FETH_TRAILER_LENGTH_MAX + 1] = "trailertrailertrailertrailer";
219 static unsigned int if_fake_trailer_length = 0;
220 static int
221 feth_trailer_length_sysctl SYSCTL_HANDLER_ARGS
222 {
223 #pragma unused(oidp, arg1, arg2)
224 unsigned int new_value;
225 int changed;
226 int error;
227
228 error = sysctl_io_number(req, if_fake_trailer_length,
229 sizeof(if_fake_trailer_length), &new_value, &changed);
230 if (error == 0 && changed != 0) {
231 if (new_value > FETH_TRAILER_LENGTH_MAX) {
232 return EINVAL;
233 }
234 if_fake_trailer_length = new_value;
235 }
236 return 0;
237 }
238
239 SYSCTL_PROC(_net_link_fake, OID_AUTO, trailer_length,
240 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
241 feth_trailer_length_sysctl, "IU", "Fake interface frame trailer length");
242
243 /* sysctl net.link.fake.max_mtu */
244 #define FETH_MAX_MTU_DEFAULT 2048
245 #define FETH_MAX_MTU_MAX ((16 * 1024) - ETHER_HDR_LEN)
246
247 static unsigned int if_fake_max_mtu = FETH_MAX_MTU_DEFAULT;
248
249 /* sysctl net.link.fake.buflet_size */
250 #define FETH_BUFLET_SIZE_MIN 512
251 #define FETH_BUFLET_SIZE_MAX (32 * 1024)
252 #define FETH_TSO_BUFLET_SIZE (16 * 1024)
253
254 static unsigned int if_fake_buflet_size = FETH_BUFLET_SIZE_MIN;
255 static unsigned int if_fake_tso_buffer_size = FETH_TSO_BUFLET_SIZE;
256
257 static int
258 feth_tso_buffer_size_sysctl SYSCTL_HANDLER_ARGS
259 {
260 #pragma unused(oidp, arg1, arg2)
261 unsigned int new_value;
262 int changed;
263 int error;
264
265 error = sysctl_io_number(req, if_fake_tso_buffer_size,
266 sizeof(if_fake_tso_buffer_size), &new_value, &changed);
267 if (error == 0 && changed != 0) {
268 /* must be a power of 2 between min and max */
269 if (new_value > FETH_BUFLET_SIZE_MAX ||
270 new_value < FETH_BUFLET_SIZE_MIN ||
271 !is_power_of_two(new_value)) {
272 return EINVAL;
273 }
274 if_fake_tso_buffer_size = new_value;
275 }
276 return 0;
277 }
278
279 SYSCTL_PROC(_net_link_fake, OID_AUTO, tso_buf_size,
280 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
281 0, 0, feth_tso_buffer_size_sysctl, "IU", "Fake interface TSO buffer size");
282
283 static int
284 feth_max_mtu_sysctl SYSCTL_HANDLER_ARGS
285 {
286 #pragma unused(oidp, arg1, arg2)
287 unsigned int new_value;
288 int changed;
289 int error;
290
291 error = sysctl_io_number(req, if_fake_max_mtu,
292 sizeof(if_fake_max_mtu), &new_value, &changed);
293 if (error == 0 && changed != 0) {
294 if (new_value > FETH_MAX_MTU_MAX ||
295 new_value < ETHERMTU ||
296 new_value <= if_fake_buflet_size) {
297 return EINVAL;
298 }
299 if_fake_max_mtu = new_value;
300 }
301 return 0;
302 }
303
304 SYSCTL_PROC(_net_link_fake, OID_AUTO, max_mtu,
305 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
306 0, 0, feth_max_mtu_sysctl, "IU", "Fake interface maximum MTU");
307
308 static int
309 feth_buflet_size_sysctl SYSCTL_HANDLER_ARGS
310 {
311 #pragma unused(oidp, arg1, arg2)
312 unsigned int new_value;
313 int changed;
314 int error;
315
316 error = sysctl_io_number(req, if_fake_buflet_size,
317 sizeof(if_fake_buflet_size), &new_value, &changed);
318 if (error == 0 && changed != 0) {
319 /* must be a power of 2 between min and max */
320 if (new_value > FETH_BUFLET_SIZE_MAX ||
321 new_value < FETH_BUFLET_SIZE_MIN ||
322 !is_power_of_two(new_value) ||
323 new_value >= if_fake_max_mtu) {
324 return EINVAL;
325 }
326 if_fake_buflet_size = new_value;
327 }
328 return 0;
329 }
330
331 SYSCTL_PROC(_net_link_fake, OID_AUTO, buflet_size,
332 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
333 0, 0, feth_buflet_size_sysctl, "IU", "Fake interface buflet size");
334
335 static unsigned int if_fake_user_access = 0;
336
337 static int
338 feth_user_access_sysctl SYSCTL_HANDLER_ARGS
339 {
340 #pragma unused(oidp, arg1, arg2)
341 unsigned int new_value;
342 int changed;
343 int error;
344
345 error = sysctl_io_number(req, if_fake_user_access,
346 sizeof(if_fake_user_access), &new_value, &changed);
347 if (error == 0 && changed != 0) {
348 if (new_value != 0) {
349 if (new_value != 1) {
350 return EINVAL;
351 }
352 }
353 if_fake_user_access = new_value;
354 }
355 return 0;
356 }
357
358 SYSCTL_PROC(_net_link_fake, OID_AUTO, user_access,
359 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
360 0, 0, feth_user_access_sysctl, "IU", "Fake interface user access");
361
362 /* sysctl net.link.fake.if_adv_intvl (unit: millisecond) */
363 #define FETH_IF_ADV_INTVL_MIN 10
364 #define FETH_IF_ADV_INTVL_MAX INT_MAX
365
366 static int if_fake_if_adv_interval = 0; /* no interface advisory */
367 static int
368 feth_if_adv_interval_sysctl SYSCTL_HANDLER_ARGS
369 {
370 #pragma unused(oidp, arg1, arg2)
371 unsigned int new_value;
372 int changed;
373 int error;
374
375 error = sysctl_io_number(req, if_fake_if_adv_interval,
376 sizeof(if_fake_if_adv_interval), &new_value, &changed);
377 if (error == 0 && changed != 0) {
378 if ((new_value != 0) && (new_value > FETH_IF_ADV_INTVL_MAX ||
379 new_value < FETH_IF_ADV_INTVL_MIN)) {
380 return EINVAL;
381 }
382 if_fake_if_adv_interval = new_value;
383 }
384 return 0;
385 }
386
387 SYSCTL_PROC(_net_link_fake, OID_AUTO, if_adv_intvl,
388 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
389 feth_if_adv_interval_sysctl, "IU",
390 "Fake interface will generate interface advisories reports at the specified interval in ms");
391
392 /* sysctl net.link.fake.tx_drops */
393 /*
394 * Fake ethernet will drop packet on the transmit path at the specified
395 * rate, i.e drop one in every if_fake_tx_drops number of packets.
396 */
397 #define FETH_TX_DROPS_MIN 0
398 #define FETH_TX_DROPS_MAX INT_MAX
399 static int if_fake_tx_drops = 0; /* no packets are dropped */
400 static int
401 feth_fake_tx_drops_sysctl SYSCTL_HANDLER_ARGS
402 {
403 #pragma unused(oidp, arg1, arg2)
404 unsigned int new_value;
405 int changed;
406 int error;
407
408 error = sysctl_io_number(req, if_fake_tx_drops,
409 sizeof(if_fake_tx_drops), &new_value, &changed);
410 if (error == 0 && changed != 0) {
411 if (new_value > FETH_TX_DROPS_MAX ||
412 new_value < FETH_TX_DROPS_MIN) {
413 return EINVAL;
414 }
415 if_fake_tx_drops = new_value;
416 }
417 return 0;
418 }
419
420 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_drops,
421 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
422 feth_fake_tx_drops_sysctl, "IU",
423 "Fake interface will intermittently drop packets on Tx path");
424
425 /* sysctl net.link.fake.tx_completion_mode */
426 typedef enum {
427 IFF_TX_COMPL_MODE_SYNC = 0,
428 IFF_TX_COMPL_MODE_ASYNC = 1,
429 } iff_tx_completion_mode_t;
430 static iff_tx_completion_mode_t if_tx_completion_mode = IFF_TX_COMPL_MODE_SYNC;
431 static int
432 feth_fake_tx_completion_mode_sysctl SYSCTL_HANDLER_ARGS
433 {
434 #pragma unused(oidp, arg1, arg2)
435 unsigned int new_value;
436 int changed;
437 int error;
438
439 error = sysctl_io_number(req, if_tx_completion_mode,
440 sizeof(if_tx_completion_mode), &new_value, &changed);
441 if (error == 0 && changed != 0) {
442 if (new_value > IFF_TX_COMPL_MODE_ASYNC ||
443 new_value < IFF_TX_COMPL_MODE_SYNC) {
444 return EINVAL;
445 }
446 if_tx_completion_mode = new_value;
447 }
448 return 0;
449 }
450 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_completion_mode,
451 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
452 feth_fake_tx_completion_mode_sysctl, "IU",
453 "Fake interface tx completion mode (0 synchronous, 1 asynchronous)");
454
455 /* sysctl net.link.fake.llink_cnt */
456
457 /* The maximum number of logical links (including default link) */
458 #define FETH_MAX_LLINKS 16
459 /*
460 * The default number of logical links (including default link).
461 * Zero means logical link mode is disabled.
462 */
463 #define FETH_DEF_LLINKS 0
464
465 static uint32_t if_fake_llink_cnt = FETH_DEF_LLINKS;
466 static int
467 feth_fake_llink_cnt_sysctl SYSCTL_HANDLER_ARGS
468 {
469 #pragma unused(oidp, arg1, arg2)
470 unsigned int new_value;
471 int changed;
472 int error;
473
474 error = sysctl_io_number(req, if_fake_llink_cnt,
475 sizeof(if_fake_llink_cnt), &new_value, &changed);
476 if (error == 0 && changed != 0) {
477 if (new_value > FETH_MAX_LLINKS) {
478 return EINVAL;
479 }
480 if_fake_llink_cnt = new_value;
481 }
482 return 0;
483 }
484
485 SYSCTL_PROC(_net_link_fake, OID_AUTO, llink_cnt,
486 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
487 feth_fake_llink_cnt_sysctl, "IU",
488 "Fake interface logical link count");
489
490 /* sysctl net.link.fake.qset_cnt */
491
492 /* The maximum number of qsets for each logical link */
493 #define FETH_MAX_QSETS 16
494 /* The default number of qsets for each logical link */
495 #define FETH_DEF_QSETS 4
496
497 static uint32_t if_fake_qset_cnt = FETH_DEF_QSETS;
498 static int
499 feth_fake_qset_cnt_sysctl SYSCTL_HANDLER_ARGS
500 {
501 #pragma unused(oidp, arg1, arg2)
502 unsigned int new_value;
503 int changed;
504 int error;
505
506 error = sysctl_io_number(req, if_fake_qset_cnt,
507 sizeof(if_fake_qset_cnt), &new_value, &changed);
508 if (error == 0 && changed != 0) {
509 if (new_value == 0 ||
510 new_value > FETH_MAX_QSETS) {
511 return EINVAL;
512 }
513 if_fake_qset_cnt = new_value;
514 }
515 return 0;
516 }
517
518 SYSCTL_PROC(_net_link_fake, OID_AUTO, qset_cnt,
519 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
520 feth_fake_qset_cnt_sysctl, "IU",
521 "Fake interface queue set count");
522
523 /**
524 ** virtual ethernet structures, types
525 **/
526
527 #define IFF_NUM_TX_RINGS_WMM_MODE 4
528 #define IFF_NUM_RX_RINGS_WMM_MODE 1
529 #define IFF_MAX_TX_RINGS IFF_NUM_TX_RINGS_WMM_MODE
530 #define IFF_MAX_RX_RINGS IFF_NUM_RX_RINGS_WMM_MODE
531 #define IFF_NUM_TX_QUEUES_WMM_MODE 4
532 #define IFF_NUM_RX_QUEUES_WMM_MODE 1
533 #define IFF_MAX_TX_QUEUES IFF_NUM_TX_QUEUES_WMM_MODE
534 #define IFF_MAX_RX_QUEUES IFF_NUM_RX_QUEUES_WMM_MODE
535
536 #define IFF_MAX_BATCH_SIZE 32
537
538 typedef uint16_t iff_flags_t;
539 #define IFF_FLAGS_HWCSUM 0x0001
540 #define IFF_FLAGS_BSD_MODE 0x0002
541 #define IFF_FLAGS_DETACHING 0x0004
542 #define IFF_FLAGS_WMM_MODE 0x0008
543 #define IFF_FLAGS_MULTIBUFLETS 0x0010
544 #define IFF_FLAGS_TSO_SUPPORT 0x0020
545
546 #if SKYWALK
547
548 typedef struct {
549 uuid_t fnx_provider;
550 uuid_t fnx_instance;
551 } fake_nx, *fake_nx_t;
552
553 typedef struct {
554 kern_netif_queue_t fq_queue;
555 } fake_queue;
556
557 typedef struct {
558 kern_netif_qset_t fqs_qset; /* provided by xnu */
559 fake_queue fqs_rx_queue[IFF_MAX_RX_QUEUES];
560 fake_queue fqs_tx_queue[IFF_MAX_TX_QUEUES];
561 uint32_t fqs_rx_queue_cnt;
562 uint32_t fqs_tx_queue_cnt;
563 uint32_t fqs_llink_idx;
564 uint32_t fqs_idx;
565 uint32_t fqs_dequeue_cnt;
566 uint64_t fqs_id;
567 boolean_t fqs_combined_mode;
568 } fake_qset;
569
570 typedef struct {
571 uint64_t fl_id;
572 uint32_t fl_idx;
573 uint32_t fl_qset_cnt;
574 fake_qset fl_qset[FETH_MAX_QSETS];
575 } fake_llink;
576
577 static kern_pbufpool_t S_pp;
578
579 #define IFF_TT_OUTPUT 0x01 /* generate trace_tag on output */
580 #define IFF_TT_INPUT 0x02 /* generate trace_tag on input */
581 static int if_fake_trace_tag_flags = 0;
582 SYSCTL_INT(_net_link_fake, OID_AUTO, trace_tag, CTLFLAG_RW | CTLFLAG_LOCKED,
583 &if_fake_trace_tag_flags, 0, "Fake interface generate trace_tag");
584 static packet_trace_tag_t if_fake_trace_tag_current = 1;
585
586 #endif /* SKYWALK */
587
588 struct if_fake {
589 char iff_name[IFNAMSIZ]; /* our unique id */
590 ifnet_t iff_ifp;
591 iff_flags_t iff_flags;
592 uint32_t iff_retain_count;
593 ifnet_t iff_peer; /* the other end */
594 int iff_media_current;
595 int iff_media_active;
596 uint32_t iff_media_count;
597 int iff_media_list[IF_FAKE_MEDIA_LIST_MAX];
598 struct mbuf * iff_pending_tx_packet;
599 boolean_t iff_start_busy;
600 unsigned int iff_max_mtu;
601 uint32_t iff_fcs;
602 uint32_t iff_trailer_length;
603 #if SKYWALK
604 fake_nx iff_nx;
605 struct netif_stats *iff_nifs;
606 uint32_t iff_nifs_ref;
607 uint32_t iff_llink_cnt;
608 kern_channel_ring_t iff_rx_ring[IFF_MAX_RX_RINGS];
609 kern_channel_ring_t iff_tx_ring[IFF_MAX_TX_RINGS];
610 fake_llink *iff_llink __counted_by(FETH_MAX_LLINKS);
611 thread_call_t iff_doorbell_tcall;
612 thread_call_t iff_if_adv_tcall;
613 boolean_t iff_doorbell_tcall_active;
614 boolean_t iff_waiting_for_tcall;
615 boolean_t iff_channel_connected;
616 iff_pktpool_mode_t iff_pp_mode;
617 kern_pbufpool_t iff_rx_pp;
618 kern_pbufpool_t iff_tx_pp;
619 uint32_t iff_tx_headroom;
620 unsigned int iff_adv_interval;
621 uint32_t iff_tx_drop_rate;
622 uint32_t iff_tx_pkts_count;
623 iff_tx_completion_mode_t iff_tx_completion_mode;
624 bool iff_intf_adv_enabled;
625 void *iff_intf_adv_kern_ctx;
626 kern_nexus_capab_interface_advisory_notify_fn_t iff_intf_adv_notify;
627 #endif /* SKYWALK */
628 };
629
630 typedef struct if_fake * if_fake_ref;
631
632 static if_fake_ref
633 ifnet_get_if_fake(ifnet_t ifp);
634
635 #define FETH_DPRINTF(fmt, ...) \
636 { if (if_fake_debug != 0) printf("%s " fmt, __func__, ## __VA_ARGS__); }
637
638 static inline boolean_t
feth_in_bsd_mode(if_fake_ref fakeif)639 feth_in_bsd_mode(if_fake_ref fakeif)
640 {
641 return (fakeif->iff_flags & IFF_FLAGS_BSD_MODE) != 0;
642 }
643
644 static inline void
feth_set_detaching(if_fake_ref fakeif)645 feth_set_detaching(if_fake_ref fakeif)
646 {
647 fakeif->iff_flags |= IFF_FLAGS_DETACHING;
648 }
649
650 static inline boolean_t
feth_is_detaching(if_fake_ref fakeif)651 feth_is_detaching(if_fake_ref fakeif)
652 {
653 return (fakeif->iff_flags & IFF_FLAGS_DETACHING) != 0;
654 }
655
656 static int
feth_enable_dequeue_stall(ifnet_t ifp,uint32_t enable)657 feth_enable_dequeue_stall(ifnet_t ifp, uint32_t enable)
658 {
659 int error;
660
661 if (enable != 0) {
662 error = ifnet_disable_output(ifp);
663 } else {
664 error = ifnet_enable_output(ifp);
665 }
666
667 return error;
668 }
669
670 #if SKYWALK
671 static inline boolean_t
feth_in_wmm_mode(if_fake_ref fakeif)672 feth_in_wmm_mode(if_fake_ref fakeif)
673 {
674 return (fakeif->iff_flags & IFF_FLAGS_WMM_MODE) != 0;
675 }
676
677 static inline boolean_t
feth_using_multibuflets(if_fake_ref fakeif)678 feth_using_multibuflets(if_fake_ref fakeif)
679 {
680 return (fakeif->iff_flags & IFF_FLAGS_MULTIBUFLETS) != 0;
681 }
682 static void feth_detach_netif_nexus(if_fake_ref fakeif);
683
684 static inline boolean_t
feth_has_intf_advisory_configured(if_fake_ref fakeif)685 feth_has_intf_advisory_configured(if_fake_ref fakeif)
686 {
687 return fakeif->iff_adv_interval > 0;
688 }
689
690 static inline bool
feth_supports_tso(if_fake_ref fakeif)691 feth_supports_tso(if_fake_ref fakeif)
692 {
693 return (fakeif->iff_flags & IFF_FLAGS_TSO_SUPPORT) != 0;
694 }
695 #endif /* SKYWALK */
696
697 #define FETH_MAXUNIT IF_MAXUNIT
698 #define FETH_ZONE_MAX_ELEM MIN(IFNETS_MAX, FETH_MAXUNIT)
699
700 static int feth_clone_create(struct if_clone *, u_int32_t, void *);
701 static int feth_clone_destroy(ifnet_t);
702 static int feth_output(ifnet_t ifp, struct mbuf *m);
703 static void feth_start(ifnet_t ifp);
704 static int feth_ioctl(ifnet_t ifp, u_long cmd, void * addr);
705 static int feth_config(ifnet_t ifp, ifnet_t peer);
706 static void feth_if_free(ifnet_t ifp);
707 static void feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp);
708 static void feth_free(if_fake_ref fakeif);
709
710 static struct if_clone
711 feth_cloner = IF_CLONE_INITIALIZER(FAKE_ETHER_NAME,
712 feth_clone_create,
713 feth_clone_destroy,
714 0,
715 FETH_MAXUNIT);
716 static void interface_link_event(ifnet_t ifp, u_int32_t event_code);
717
718 /* some media words to pretend to be ethernet */
719 static int default_media_words[] = {
720 IFM_MAKEWORD(IFM_ETHER, 0, 0, 0),
721 IFM_MAKEWORD(IFM_ETHER, IFM_10G_T, IFM_FDX, 0),
722 IFM_MAKEWORD(IFM_ETHER, IFM_2500_T, IFM_FDX, 0),
723 IFM_MAKEWORD(IFM_ETHER, IFM_5000_T, IFM_FDX, 0),
724
725 IFM_MAKEWORD(IFM_ETHER, IFM_10G_KX4, IFM_FDX, 0),
726 IFM_MAKEWORD(IFM_ETHER, IFM_20G_KR2, IFM_FDX, 0),
727 IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX, 0),
728 IFM_MAKEWORD(IFM_ETHER, IFM_25G_KR, IFM_FDX, 0),
729 IFM_MAKEWORD(IFM_ETHER, IFM_40G_SR4, IFM_FDX, 0),
730 IFM_MAKEWORD(IFM_ETHER, IFM_50G_CR2, IFM_FDX, 0),
731 IFM_MAKEWORD(IFM_ETHER, IFM_56G_R4, IFM_FDX, 0),
732 IFM_MAKEWORD(IFM_ETHER, IFM_100G_CR4, IFM_FDX, 0),
733 IFM_MAKEWORD(IFM_ETHER, IFM_400G_AUI8, IFM_FDX, 0),
734 };
735 #define default_media_words_count (sizeof(default_media_words) \
736 / sizeof (default_media_words[0]))
737
738 /**
739 ** veth locks
740 **/
741
742 static LCK_GRP_DECLARE(feth_lck_grp, "fake");
743 static LCK_MTX_DECLARE(feth_lck_mtx, &feth_lck_grp);
744
745 static inline void
feth_lock(void)746 feth_lock(void)
747 {
748 lck_mtx_lock(&feth_lck_mtx);
749 }
750
751 static inline void
feth_unlock(void)752 feth_unlock(void)
753 {
754 lck_mtx_unlock(&feth_lck_mtx);
755 }
756
757 static inline int
get_max_mtu(int bsd_mode,unsigned int max_mtu)758 get_max_mtu(int bsd_mode, unsigned int max_mtu)
759 {
760 unsigned int mtu;
761
762 if (bsd_mode != 0) {
763 mtu = (njcl > 0) ? (M16KCLBYTES - ETHER_HDR_LEN)
764 : MBIGCLBYTES - ETHER_HDR_LEN;
765 if (mtu > max_mtu) {
766 mtu = max_mtu;
767 }
768 } else {
769 mtu = max_mtu;
770 }
771 return mtu;
772 }
773
774 static inline unsigned int
feth_max_mtu(ifnet_t ifp)775 feth_max_mtu(ifnet_t ifp)
776 {
777 if_fake_ref fakeif;
778 unsigned int max_mtu = ETHERMTU;
779
780 feth_lock();
781 fakeif = ifnet_get_if_fake(ifp);
782 if (fakeif != NULL) {
783 max_mtu = fakeif->iff_max_mtu;
784 }
785 feth_unlock();
786 return max_mtu;
787 }
788
789 static void
feth_free(if_fake_ref fakeif)790 feth_free(if_fake_ref fakeif)
791 {
792 VERIFY(fakeif->iff_retain_count == 0);
793 if (feth_in_bsd_mode(fakeif)) {
794 if (fakeif->iff_pending_tx_packet) {
795 m_freem(fakeif->iff_pending_tx_packet);
796 }
797 }
798 #if SKYWALK
799 else {
800 if (fakeif->iff_pp_mode == IFF_PP_MODE_GLOBAL) {
801 VERIFY(fakeif->iff_rx_pp == S_pp);
802 VERIFY(fakeif->iff_tx_pp == S_pp);
803 pp_release(fakeif->iff_rx_pp);
804 fakeif->iff_rx_pp = NULL;
805 pp_release(fakeif->iff_tx_pp);
806 fakeif->iff_tx_pp = NULL;
807 feth_lock();
808 if (S_pp->pp_refcnt == 1) {
809 pp_release(S_pp);
810 S_pp = NULL;
811 }
812 feth_unlock();
813 } else {
814 if (fakeif->iff_rx_pp != NULL) {
815 pp_release(fakeif->iff_rx_pp);
816 fakeif->iff_rx_pp = NULL;
817 }
818 if (fakeif->iff_tx_pp != NULL) {
819 pp_release(fakeif->iff_tx_pp);
820 fakeif->iff_tx_pp = NULL;
821 }
822 }
823 }
824 #endif /* SKYWALK */
825
826 FETH_DPRINTF("%s\n", fakeif->iff_name);
827 kfree_type(fake_llink, FETH_MAX_LLINKS, fakeif->iff_llink);
828 kfree_type(struct if_fake, fakeif);
829 }
830
831 static void
feth_release(if_fake_ref fakeif)832 feth_release(if_fake_ref fakeif)
833 {
834 u_int32_t old_retain_count;
835
836 old_retain_count = OSDecrementAtomic(&fakeif->iff_retain_count);
837 switch (old_retain_count) {
838 case 0:
839 VERIFY(old_retain_count != 0);
840 break;
841 case 1:
842 feth_free(fakeif);
843 break;
844 default:
845 break;
846 }
847 return;
848 }
849
850 #if SKYWALK
851
852 static void
feth_retain(if_fake_ref fakeif)853 feth_retain(if_fake_ref fakeif)
854 {
855 OSIncrementAtomic(&fakeif->iff_retain_count);
856 }
857
858 static void
feth_packet_pool_init_prepare(if_fake_ref fakeif,struct kern_pbufpool_init * pp_init)859 feth_packet_pool_init_prepare(if_fake_ref fakeif,
860 struct kern_pbufpool_init *pp_init)
861 {
862 uint32_t max_mtu = fakeif->iff_max_mtu;
863 uint32_t buflet_size = if_fake_buflet_size;
864
865 bzero(pp_init, sizeof(*pp_init));
866 pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
867 pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
868 pp_init->kbi_packets = 1024; /* TBD configurable */
869 if (feth_supports_tso(fakeif)) {
870 buflet_size = if_fake_tso_buffer_size;
871 }
872 if (feth_using_multibuflets(fakeif)) {
873 pp_init->kbi_bufsize = buflet_size;
874 pp_init->kbi_max_frags = howmany(max_mtu, buflet_size);
875 pp_init->kbi_buflets = pp_init->kbi_packets *
876 pp_init->kbi_max_frags;
877 pp_init->kbi_flags |= KBIF_BUFFER_ON_DEMAND;
878 } else {
879 pp_init->kbi_bufsize = max(max_mtu, buflet_size);
880 pp_init->kbi_max_frags = 1;
881 pp_init->kbi_buflets = pp_init->kbi_packets;
882 }
883 pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
884 if (if_fake_user_access != 0) {
885 pp_init->kbi_flags |= KBIF_USER_ACCESS;
886 }
887 pp_init->kbi_ctx = NULL;
888 pp_init->kbi_ctx_retain = NULL;
889 pp_init->kbi_ctx_release = NULL;
890 }
891
892 static errno_t
feth_packet_pool_make(if_fake_ref fakeif)893 feth_packet_pool_make(if_fake_ref fakeif)
894 {
895 struct kern_pbufpool_init pp_init;
896 errno_t err;
897
898 feth_packet_pool_init_prepare(fakeif, &pp_init);
899
900 switch (fakeif->iff_pp_mode) {
901 case IFF_PP_MODE_GLOBAL:
902 feth_lock();
903 if (S_pp == NULL) {
904 (void)snprintf((char *)pp_init.kbi_name,
905 sizeof(pp_init.kbi_name), "%s", "feth shared pp");
906 err = kern_pbufpool_create(&pp_init, &S_pp, NULL);
907 }
908 pp_retain(S_pp);
909 feth_unlock();
910 fakeif->iff_rx_pp = S_pp;
911 pp_retain(S_pp);
912 fakeif->iff_tx_pp = S_pp;
913 break;
914 case IFF_PP_MODE_PRIVATE:
915 (void)snprintf((char *)pp_init.kbi_name,
916 sizeof(pp_init.kbi_name), "%s pp", fakeif->iff_name);
917 err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
918 pp_retain(fakeif->iff_rx_pp);
919 fakeif->iff_tx_pp = fakeif->iff_rx_pp;
920 break;
921 case IFF_PP_MODE_PRIVATE_SPLIT:
922 (void)snprintf((char *)pp_init.kbi_name,
923 sizeof(pp_init.kbi_name), "%s rx pp", fakeif->iff_name);
924 pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
925 KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
926 pp_init.kbi_flags |= (KBIF_IODIR_IN | KBIF_BUFFER_ON_DEMAND);
927 pp_init.kbi_packets = 1024;
928 pp_init.kbi_bufsize = if_fake_link_layer_aggregation_factor * 1024;
929 err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
930 if (err != 0) {
931 printf("%s: rx pp create failed %d\n", __func__, err);
932 return err;
933 }
934 pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
935 KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
936 pp_init.kbi_flags |= KBIF_IODIR_OUT;
937 pp_init.kbi_packets = 1024; /* TBD configurable */
938 pp_init.kbi_bufsize = fakeif->iff_max_mtu;
939 (void)snprintf((char *)pp_init.kbi_name,
940 sizeof(pp_init.kbi_name), "%s tx pp", fakeif->iff_name);
941 err = kern_pbufpool_create(&pp_init, &fakeif->iff_tx_pp, NULL);
942 if (err != 0) {
943 printf("%s: tx pp create failed %d\n", __func__, err);
944 pp_release(fakeif->iff_rx_pp);
945 return err;
946 }
947 break;
948 default:
949 VERIFY(0);
950 __builtin_unreachable();
951 }
952
953 return 0;
954 }
955
956 static void
feth_packet_set_trace_tag(kern_packet_t ph,int flag)957 feth_packet_set_trace_tag(kern_packet_t ph, int flag)
958 {
959 if (if_fake_trace_tag_flags & flag) {
960 if (++if_fake_trace_tag_current == 0) {
961 if_fake_trace_tag_current = 1;
962 }
963 kern_packet_set_trace_tag(ph, if_fake_trace_tag_current);
964 }
965 }
966
967 static errno_t
feth_clone_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)968 feth_clone_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
969 {
970 errno_t err = 0;
971 kern_pbufpool_t pp = dif->iff_rx_pp;
972 kern_packet_t dph = 0, dph0 = 0;
973 kern_buflet_t sbuf, dbuf0 = NULL, dbuf;
974 void *saddr, *daddr;
975 uint32_t soff, doff;
976 uint32_t slen, dlen;
977 uint32_t dlim0, dlim;
978
979 sbuf = kern_packet_get_next_buflet(sph, NULL);
980 saddr = kern_buflet_get_data_address(sbuf);
981 doff = soff = kern_buflet_get_data_offset(sbuf);
982 dlen = slen = kern_buflet_get_data_length(sbuf);
983
984 /* packet clone is only supported for single-buflet */
985 ASSERT(kern_packet_get_buflet_count(sph) == 1);
986 ASSERT(soff == kern_packet_get_headroom(sph));
987 ASSERT(slen == kern_packet_get_data_length(sph));
988
989 dph0 = *pdph;
990 if (dph0 == 0) {
991 dlim0 = 0;
992 } else {
993 dbuf0 = kern_packet_get_next_buflet(dph0, NULL);
994 ASSERT(kern_buflet_get_object_limit(dbuf0) ==
995 PP_BUF_OBJ_SIZE_DEF(pp));
996 ASSERT(kern_buflet_get_data_limit(dbuf0) % 16 == 0);
997 dlim0 = ((uintptr_t)kern_buflet_get_object_address(dbuf0) +
998 kern_buflet_get_object_limit(dbuf0)) -
999 ((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1000 kern_buflet_get_data_limit(dbuf0));
1001 }
1002
1003 if (doff + dlen > dlim0) {
1004 err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1005 if (err != 0) {
1006 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1007 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1008 return err;
1009 }
1010 dbuf = kern_packet_get_next_buflet(dph, NULL);
1011 ASSERT(kern_buflet_get_data_address(dbuf) ==
1012 kern_buflet_get_object_address(dbuf));
1013 daddr = kern_buflet_get_data_address(dbuf);
1014 dlim = kern_buflet_get_object_limit(dbuf);
1015 ASSERT(dlim == PP_BUF_OBJ_SIZE_DEF(pp));
1016 } else {
1017 err = kern_packet_clone_nosleep(dph0, &dph, KPKT_COPY_LIGHT);
1018 if (err != 0) {
1019 printf("%s: packet clone err %d\n", __func__, err);
1020 return err;
1021 }
1022 dbuf = kern_packet_get_next_buflet(dph, NULL);
1023 ASSERT(kern_buflet_get_object_address(dbuf) ==
1024 kern_buflet_get_object_address(dbuf0));
1025 daddr = (void *)((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1026 kern_buflet_get_data_limit(dbuf0));
1027 dlim = dlim0;
1028 }
1029
1030 ASSERT(doff + dlen <= dlim);
1031
1032 ASSERT((uintptr_t)daddr % 16 == 0);
1033
1034 bcopy((const void *)((uintptr_t)saddr + soff),
1035 (void *)((uintptr_t)daddr + doff), slen);
1036
1037 dlim = MIN(dlim, P2ROUNDUP(doff + dlen, 16));
1038 err = kern_buflet_set_data_address(dbuf, daddr);
1039 VERIFY(err == 0);
1040 err = kern_buflet_set_data_limit(dbuf, dlim);
1041 VERIFY(err == 0);
1042 err = kern_buflet_set_data_length(dbuf, dlen);
1043 VERIFY(err == 0);
1044 err = kern_buflet_set_data_offset(dbuf, doff);
1045 VERIFY(err == 0);
1046 err = kern_packet_set_headroom(dph, doff);
1047 VERIFY(err == 0);
1048 err = kern_packet_set_link_header_length(dph,
1049 kern_packet_get_link_header_length(sph));
1050 VERIFY(err == 0);
1051 err = kern_packet_set_service_class(dph,
1052 kern_packet_get_service_class(sph));
1053 VERIFY(err == 0);
1054 err = kern_packet_finalize(dph);
1055 VERIFY(err == 0);
1056 *pdph = dph;
1057
1058 return err;
1059 }
1060
1061 static inline void
feth_copy_buflet(kern_buflet_t sbuf,kern_buflet_t dbuf)1062 feth_copy_buflet(kern_buflet_t sbuf, kern_buflet_t dbuf)
1063 {
1064 errno_t err;
1065 uint16_t off, len;
1066 uint8_t *saddr, *daddr;
1067
1068 saddr = kern_buflet_get_data_address(sbuf);
1069 off = kern_buflet_get_data_offset(sbuf);
1070 len = kern_buflet_get_data_length(sbuf);
1071 daddr = kern_buflet_get_data_address(dbuf);
1072 bcopy((saddr + off), (daddr + off), len);
1073 err = kern_buflet_set_data_offset(dbuf, off);
1074 VERIFY(err == 0);
1075 err = kern_buflet_set_data_length(dbuf, len);
1076 VERIFY(err == 0);
1077 }
1078
1079 static int
feth_add_packet_trailer(kern_packet_t ph,void * trailer,size_t trailer_len)1080 feth_add_packet_trailer(kern_packet_t ph, void *trailer, size_t trailer_len)
1081 {
1082 errno_t err = 0;
1083
1084 ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
1085
1086 kern_buflet_t buf = NULL, iter = NULL;
1087 while ((iter = kern_packet_get_next_buflet(ph, iter)) != NULL) {
1088 buf = iter;
1089 }
1090 ASSERT(buf != NULL);
1091
1092 uint16_t dlim = kern_buflet_get_data_limit(buf);
1093 uint16_t doff = kern_buflet_get_data_offset(buf);
1094 uint16_t dlen = kern_buflet_get_data_length(buf);
1095
1096 size_t trailer_room = dlim - doff - dlen;
1097
1098 if (trailer_room < trailer_len) {
1099 printf("not enough room");
1100 return ERANGE;
1101 }
1102
1103 void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff + dlen);
1104 memcpy(data, trailer, trailer_len);
1105
1106 err = kern_buflet_set_data_length(buf, dlen + trailer_len);
1107 VERIFY(err == 0);
1108
1109 err = kern_packet_finalize(ph);
1110 VERIFY(err == 0);
1111
1112 FETH_DPRINTF("%s %zuB trailer added\n", __func__, trailer_len);
1113
1114 return 0;
1115 }
1116
1117 static int
feth_add_packet_fcs(kern_packet_t ph)1118 feth_add_packet_fcs(kern_packet_t ph)
1119 {
1120 uint32_t crc = 0;
1121 int err;
1122
1123 ASSERT(sizeof(crc) == ETHER_CRC_LEN);
1124
1125 kern_buflet_t buf = NULL;
1126 while ((buf = kern_packet_get_next_buflet(ph, buf)) != NULL) {
1127 uint16_t doff = kern_buflet_get_data_offset(buf);
1128 uint16_t dlen = kern_buflet_get_data_length(buf);
1129 void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff);
1130 crc = crc32(crc, data, dlen);
1131 }
1132
1133 err = feth_add_packet_trailer(ph, &crc, ETHER_CRC_LEN);
1134 if (!err) {
1135 return err;
1136 }
1137
1138 err = kern_packet_set_link_ethfcs(ph);
1139 VERIFY(err == 0);
1140
1141 return 0;
1142 }
1143
1144 static errno_t
feth_copy_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1145 feth_copy_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1146 {
1147 errno_t err = 0;
1148 uint16_t i, bufcnt;
1149 mach_vm_address_t baddr;
1150 kern_buflet_t sbuf = NULL, dbuf = NULL;
1151 kern_pbufpool_t pp = dif->iff_rx_pp;
1152 kern_packet_t dph;
1153 boolean_t multi_buflet = feth_using_multibuflets(dif);
1154
1155 bufcnt = kern_packet_get_buflet_count(sph);
1156 ASSERT((bufcnt == 1) || multi_buflet);
1157 *pdph = 0;
1158
1159 err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1160 if (err != 0) {
1161 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1162 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1163 return err;
1164 }
1165
1166 /* pre-constructed single buflet packet copy */
1167 sbuf = kern_packet_get_next_buflet(sph, NULL);
1168 dbuf = kern_packet_get_next_buflet(dph, NULL);
1169 feth_copy_buflet(sbuf, dbuf);
1170
1171 if (!multi_buflet) {
1172 goto done;
1173 }
1174
1175 /* un-constructed multi-buflet packet copy */
1176 for (i = 1; i < bufcnt; i++) {
1177 kern_buflet_t dbuf_next = NULL;
1178
1179 sbuf = kern_packet_get_next_buflet(sph, sbuf);
1180 VERIFY(sbuf != NULL);
1181 err = kern_pbufpool_alloc_buflet_nosleep(pp, &dbuf_next, true);
1182 if (err != 0) {
1183 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1184 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_BUF);
1185 break;
1186 }
1187 ASSERT(dbuf_next != NULL);
1188 feth_copy_buflet(sbuf, dbuf_next);
1189 err = kern_packet_add_buflet(dph, dbuf, dbuf_next);
1190 VERIFY(err == 0);
1191 dbuf = dbuf_next;
1192 }
1193 if (__improbable(err != 0)) {
1194 dbuf = NULL;
1195 while (i-- != 0) {
1196 dbuf = kern_packet_get_next_buflet(dph, dbuf);
1197 VERIFY(dbuf != NULL);
1198 baddr = (mach_vm_address_t)
1199 kern_buflet_get_data_address(dbuf);
1200 VERIFY(baddr != 0);
1201 }
1202 kern_pbufpool_free(pp, dph);
1203 dph = 0;
1204 }
1205
1206 done:
1207 if (__probable(err == 0)) {
1208 err = kern_packet_set_headroom(dph,
1209 kern_packet_get_headroom(sph));
1210 VERIFY(err == 0);
1211 err = kern_packet_set_link_header_length(dph,
1212 kern_packet_get_link_header_length(sph));
1213 VERIFY(err == 0);
1214 err = kern_packet_set_service_class(dph,
1215 kern_packet_get_service_class(sph));
1216 VERIFY(err == 0);
1217 err = kern_packet_finalize(dph);
1218 VERIFY(err == 0);
1219 VERIFY(bufcnt == kern_packet_get_buflet_count(dph));
1220 *pdph = dph;
1221 }
1222 return err;
1223 }
1224
1225 static inline void
feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)1226 feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)
1227 {
1228 /*
1229 * Nothing to do if not a TSO offloaded packet.
1230 */
1231 uint16_t seg_sz = 0;
1232 (void) kern_packet_get_protocol_segment_size(ph, &seg_sz);
1233 if (seg_sz == 0) {
1234 return;
1235 }
1236 /*
1237 * For RX, make the packet appear as a fully validated LRO packet.
1238 */
1239 packet_csum_flags_t csum_flags = PACKET_CSUM_IP_CHECKED |
1240 PACKET_CSUM_IP_VALID | PACKET_CSUM_DATA_VALID |
1241 PACKET_CSUM_PSEUDO_HDR;
1242 (void) kern_packet_set_inet_checksum(ph, csum_flags, 0, 0xFFFF, FALSE);
1243 return;
1244 }
1245
1246 static void
feth_rx_submit(if_fake_ref sif,if_fake_ref dif,kern_packet_t sphs[],uint32_t n_pkts)1247 feth_rx_submit(if_fake_ref sif, if_fake_ref dif, kern_packet_t sphs[],
1248 uint32_t n_pkts)
1249 {
1250 errno_t err = 0;
1251 struct kern_channel_ring_stat_increment stats;
1252 kern_channel_ring_t rx_ring = NULL;
1253 kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
1254 kern_packet_t sph = 0, dph = 0;
1255
1256 memset(&stats, 0, sizeof(stats));
1257
1258 rx_ring = dif->iff_rx_ring[0];
1259 if (rx_ring == NULL) {
1260 return;
1261 }
1262
1263 kr_enter(rx_ring, TRUE);
1264 kern_channel_reclaim(rx_ring);
1265 rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1266
1267 for (uint32_t i = 0; i < n_pkts && rx_slot != NULL; i++) {
1268 sph = sphs[i];
1269
1270 switch (dif->iff_pp_mode) {
1271 case IFF_PP_MODE_GLOBAL:
1272 sphs[i] = 0;
1273 dph = sph;
1274 feth_update_pkt_tso_metadata_for_rx(dph);
1275 err = kern_packet_finalize(dph);
1276 VERIFY(err == 0);
1277 break;
1278 case IFF_PP_MODE_PRIVATE:
1279 err = feth_copy_packet(dif, sph, &dph);
1280 break;
1281 case IFF_PP_MODE_PRIVATE_SPLIT:
1282 err = feth_clone_packet(dif, sph, &dph);
1283 break;
1284 default:
1285 VERIFY(0);
1286 __builtin_unreachable();
1287 }
1288 if (__improbable(err != 0)) {
1289 continue;
1290 }
1291
1292 if (sif->iff_trailer_length != 0) {
1293 feth_add_packet_trailer(dph, feth_trailer,
1294 sif->iff_trailer_length);
1295 }
1296 if (sif->iff_fcs != 0) {
1297 feth_add_packet_fcs(dph);
1298 }
1299 feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1300 bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1301 stats.kcrsi_slots_transferred++;
1302 stats.kcrsi_bytes_transferred
1303 += kern_packet_get_data_length(dph);
1304
1305 /* attach the packet to the RX ring */
1306 err = kern_channel_slot_attach_packet(rx_ring, rx_slot, dph);
1307 VERIFY(err == 0);
1308 last_rx_slot = rx_slot;
1309 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1310 }
1311
1312 if (last_rx_slot != NULL) {
1313 kern_channel_advance_slot(rx_ring, last_rx_slot);
1314 kern_channel_increment_ring_net_stats(rx_ring, dif->iff_ifp,
1315 &stats);
1316 }
1317
1318 if (rx_ring != NULL) {
1319 kr_exit(rx_ring);
1320 kern_channel_notify(rx_ring, 0);
1321 }
1322 }
1323
1324 static void
feth_rx_queue_submit(if_fake_ref sif,if_fake_ref dif,uint32_t llink_idx,uint32_t qset_idx,kern_packet_t sphs[],uint32_t n_pkts)1325 feth_rx_queue_submit(if_fake_ref sif, if_fake_ref dif, uint32_t llink_idx,
1326 uint32_t qset_idx, kern_packet_t sphs[], uint32_t n_pkts)
1327 {
1328 errno_t err = 0;
1329 kern_netif_queue_t queue;
1330 kern_packet_t sph = 0, dph = 0;
1331 fake_llink *llink;
1332 fake_qset *qset;
1333
1334 if (llink_idx >= dif->iff_llink_cnt) {
1335 printf("%s: invalid llink_idx idx %d (max %d) on peer %s\n",
1336 __func__, llink_idx, dif->iff_llink_cnt, dif->iff_name);
1337 return;
1338 }
1339 llink = &dif->iff_llink[llink_idx];
1340 if (qset_idx >= llink->fl_qset_cnt) {
1341 printf("%s: invalid qset_idx %d (max %d) on peer %s\n",
1342 __func__, qset_idx, llink->fl_qset_cnt, dif->iff_name);
1343 return;
1344 }
1345 qset = &dif->iff_llink[llink_idx].fl_qset[qset_idx];
1346 queue = qset->fqs_rx_queue[0].fq_queue;
1347 if (queue == NULL) {
1348 printf("%s: NULL default queue (llink_idx %d, qset_idx %d) "
1349 "on peer %s\n", __func__, llink_idx, qset_idx,
1350 dif->iff_name);
1351 return;
1352 }
1353 for (uint32_t i = 0; i < n_pkts; i++) {
1354 uint32_t flags;
1355
1356 sph = sphs[i];
1357
1358 switch (dif->iff_pp_mode) {
1359 case IFF_PP_MODE_GLOBAL:
1360 sphs[i] = 0;
1361 dph = sph;
1362 feth_update_pkt_tso_metadata_for_rx(dph);
1363 break;
1364 case IFF_PP_MODE_PRIVATE:
1365 err = feth_copy_packet(dif, sph, &dph);
1366 break;
1367 case IFF_PP_MODE_PRIVATE_SPLIT:
1368 err = feth_clone_packet(dif, sph, &dph);
1369 break;
1370 default:
1371 VERIFY(0);
1372 __builtin_unreachable();
1373 }
1374 if (__improbable(err != 0)) {
1375 continue;
1376 }
1377
1378 if (sif->iff_trailer_length != 0) {
1379 feth_add_packet_trailer(dph, feth_trailer,
1380 sif->iff_trailer_length);
1381 }
1382 if (sif->iff_fcs != 0) {
1383 feth_add_packet_fcs(dph);
1384 }
1385 feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1386 bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1387
1388 flags = (i == n_pkts - 1) ?
1389 KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH : 0;
1390 kern_netif_queue_rx_enqueue(queue, dph, 1, flags);
1391 }
1392 }
1393
1394 static void
feth_tx_complete(if_fake_ref fakeif,kern_packet_t phs[],uint32_t nphs)1395 feth_tx_complete(if_fake_ref fakeif, kern_packet_t phs[], uint32_t nphs)
1396 {
1397 for (uint32_t i = 0; i < nphs; i++) {
1398 kern_packet_t ph = phs[i];
1399 if (ph == 0) {
1400 continue;
1401 }
1402 int err = kern_packet_set_tx_completion_status(ph, 0);
1403 VERIFY(err == 0);
1404 kern_packet_tx_completion(ph, fakeif->iff_ifp);
1405 kern_pbufpool_free(fakeif->iff_tx_pp, phs[i]);
1406 phs[i] = 0;
1407 }
1408 }
1409
1410 /* returns true if the packet is selected for TX error & dropped */
1411 static bool
feth_tx_complete_error(if_fake_ref fakeif,kern_packet_t * ph,struct netif_stats * nifs)1412 feth_tx_complete_error(if_fake_ref fakeif, kern_packet_t *ph,
1413 struct netif_stats *nifs)
1414 {
1415 int err;
1416
1417 if (fakeif->iff_tx_drop_rate == 0 ||
1418 fakeif->iff_tx_pkts_count != fakeif->iff_tx_drop_rate) {
1419 return false;
1420 }
1421 /* simulate TX completion error on the packet */
1422 if (fakeif->iff_tx_completion_mode == IFF_TX_COMPL_MODE_SYNC) {
1423 err = kern_packet_set_tx_completion_status(*ph,
1424 CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED);
1425 VERIFY(err == 0);
1426 kern_packet_tx_completion(*ph, fakeif->iff_ifp);
1427 } else {
1428 uint32_t nx_port_id = 0;
1429 os_channel_event_packet_transmit_status_t pkt_tx_status = {0};
1430
1431 pkt_tx_status.packet_status =
1432 CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED;
1433 do {
1434 err = kern_packet_get_packetid(*ph,
1435 &pkt_tx_status.packet_id);
1436 if (err != 0) {
1437 break;
1438 }
1439 err = kern_packet_get_tx_nexus_port_id(*ph,
1440 &nx_port_id);
1441 if (err != 0) {
1442 break;
1443 }
1444 err = kern_channel_event_transmit_status(
1445 fakeif->iff_ifp, &pkt_tx_status, nx_port_id);
1446 } while (0);
1447 if (err != 0) {
1448 FETH_DPRINTF("err %d, nx_port_id: 0x%x\n",
1449 err, nx_port_id);
1450 }
1451 }
1452 fakeif->iff_tx_pkts_count = 0;
1453 kern_pbufpool_free(fakeif->iff_tx_pp, *ph);
1454 *ph = 0;
1455 STATS_INC(nifs, NETIF_STATS_DROP);
1456 return true;
1457 }
1458
1459 static void
feth_if_adv(thread_call_param_t arg0,thread_call_param_t arg1)1460 feth_if_adv(thread_call_param_t arg0, thread_call_param_t arg1)
1461 {
1462 #pragma unused(arg1)
1463 errno_t error;
1464 if_fake_ref fakeif = (if_fake_ref)arg0;
1465 struct ifnet_interface_advisory if_adv;
1466 struct ifnet_stats_param if_stat;
1467
1468 feth_lock();
1469 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1470 feth_unlock();
1471 return;
1472 }
1473 feth_unlock();
1474
1475 if (!fakeif->iff_intf_adv_enabled) {
1476 goto done;
1477 }
1478
1479 error = ifnet_stat(fakeif->iff_ifp, &if_stat);
1480 if (error != 0) {
1481 FETH_DPRINTF("%s: ifnet_stat() failed %d\n",
1482 fakeif->iff_name, error);
1483 goto done;
1484 }
1485 if_adv.header.version = IF_INTERFACE_ADVISORY_VERSION_CURRENT;
1486 if_adv.header.direction = IF_INTERFACE_ADVISORY_DIRECTION_TX;
1487 if_adv.header.interface_type =
1488 IF_INTERFACE_ADVISORY_INTERFACE_TYPE_WIFI;
1489 if_adv.capacity.timestamp = mach_absolute_time();
1490 if_adv.capacity.rate_trend_suggestion =
1491 IF_INTERFACE_ADVISORY_RATE_SUGGESTION_RAMP_NEUTRAL;
1492 if_adv.capacity.max_bandwidth = 1000 * 1000 * 1000; /* 1Gbps */
1493 if_adv.capacity.total_byte_count = if_stat.packets_out;
1494 if_adv.capacity.average_throughput = 1000 * 1000 * 1000; /* 1Gbps */
1495 if_adv.capacity.flushable_queue_size = UINT32_MAX;
1496 if_adv.capacity.non_flushable_queue_size = UINT32_MAX;
1497 if_adv.capacity.average_delay = 1; /* ms */
1498
1499 error = fakeif->iff_intf_adv_notify(fakeif->iff_intf_adv_kern_ctx,
1500 &if_adv);
1501 if (error != 0) {
1502 FETH_DPRINTF("%s: interface advisory report failed %d\n",
1503 fakeif->iff_name, error);
1504 }
1505
1506 done:
1507 feth_lock();
1508 if (!feth_is_detaching(fakeif) && fakeif->iff_channel_connected) {
1509 uint64_t deadline;
1510 clock_interval_to_deadline(fakeif->iff_adv_interval,
1511 NSEC_PER_MSEC, &deadline);
1512 thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1513 }
1514 feth_unlock();
1515 }
1516
1517 static int
feth_if_adv_tcall_create(if_fake_ref fakeif)1518 feth_if_adv_tcall_create(if_fake_ref fakeif)
1519 {
1520 uint64_t deadline;
1521
1522 feth_lock();
1523 ASSERT(fakeif->iff_if_adv_tcall == NULL);
1524 ASSERT(fakeif->iff_adv_interval > 0);
1525 ASSERT(fakeif->iff_channel_connected);
1526 fakeif->iff_if_adv_tcall =
1527 thread_call_allocate_with_options(feth_if_adv,
1528 (thread_call_param_t)fakeif, THREAD_CALL_PRIORITY_KERNEL,
1529 THREAD_CALL_OPTIONS_ONCE);
1530 if (fakeif->iff_if_adv_tcall == NULL) {
1531 printf("%s: %s if_adv tcall alloc failed\n", __func__,
1532 fakeif->iff_name);
1533 return ENXIO;
1534 }
1535 /* retain for the interface advisory thread call */
1536 feth_retain(fakeif);
1537 clock_interval_to_deadline(fakeif->iff_adv_interval,
1538 NSEC_PER_MSEC, &deadline);
1539 thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1540 feth_unlock();
1541 return 0;
1542 }
1543
1544 static void
feth_if_adv_tcall_destroy(if_fake_ref fakeif)1545 feth_if_adv_tcall_destroy(if_fake_ref fakeif)
1546 {
1547 thread_call_t tcall;
1548
1549 feth_lock();
1550 ASSERT(fakeif->iff_if_adv_tcall != NULL);
1551 tcall = fakeif->iff_if_adv_tcall;
1552 feth_unlock();
1553 (void) thread_call_cancel_wait(tcall);
1554 if (!thread_call_free(tcall)) {
1555 boolean_t freed;
1556 (void) thread_call_cancel_wait(tcall);
1557 freed = thread_call_free(tcall);
1558 VERIFY(freed);
1559 }
1560 feth_lock();
1561 fakeif->iff_if_adv_tcall = NULL;
1562 feth_unlock();
1563 /* release for the interface advisory thread call */
1564 feth_release(fakeif);
1565 }
1566
1567
1568 /**
1569 ** nexus netif domain provider
1570 **/
1571 static errno_t
feth_nxdp_init(kern_nexus_domain_provider_t domprov)1572 feth_nxdp_init(kern_nexus_domain_provider_t domprov)
1573 {
1574 #pragma unused(domprov)
1575 return 0;
1576 }
1577
1578 static void
feth_nxdp_fini(kern_nexus_domain_provider_t domprov)1579 feth_nxdp_fini(kern_nexus_domain_provider_t domprov)
1580 {
1581 #pragma unused(domprov)
1582 }
1583
1584 static uuid_t feth_nx_dom_prov;
1585
1586 static errno_t
feth_register_nexus_domain_provider(void)1587 feth_register_nexus_domain_provider(void)
1588 {
1589 const struct kern_nexus_domain_provider_init dp_init = {
1590 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1591 .nxdpi_flags = 0,
1592 .nxdpi_init = feth_nxdp_init,
1593 .nxdpi_fini = feth_nxdp_fini
1594 };
1595 errno_t err = 0;
1596
1597 /* feth_nxdp_init() is called before this function returns */
1598 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
1599 (const uint8_t *)
1600 "com.apple.feth",
1601 &dp_init, sizeof(dp_init),
1602 &feth_nx_dom_prov);
1603 if (err != 0) {
1604 printf("%s: failed to register domain provider\n", __func__);
1605 return err;
1606 }
1607 return 0;
1608 }
1609
1610 /**
1611 ** netif nexus routines
1612 **/
1613 static if_fake_ref
feth_nexus_context(kern_nexus_t nexus)1614 feth_nexus_context(kern_nexus_t nexus)
1615 {
1616 if_fake_ref fakeif;
1617
1618 fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
1619 assert(fakeif != NULL);
1620 return fakeif;
1621 }
1622
1623 static uint8_t
feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1624 feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1625 {
1626 switch (svc_class) {
1627 case KPKT_SC_VO:
1628 return 0;
1629 case KPKT_SC_VI:
1630 return 1;
1631 case KPKT_SC_BE:
1632 return 2;
1633 case KPKT_SC_BK:
1634 return 3;
1635 default:
1636 VERIFY(0);
1637 return 0;
1638 }
1639 }
1640
1641 static errno_t
feth_nx_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1642 feth_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1643 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1644 void **ring_ctx)
1645 {
1646 if_fake_ref fakeif;
1647 int err;
1648 #pragma unused(nxprov, channel, ring_ctx)
1649 feth_lock();
1650 fakeif = feth_nexus_context(nexus);
1651 if (feth_is_detaching(fakeif)) {
1652 feth_unlock();
1653 return 0;
1654 }
1655 if (is_tx_ring) {
1656 if (feth_in_wmm_mode(fakeif)) {
1657 kern_packet_svc_class_t svc_class;
1658 uint8_t ring_idx;
1659
1660 err = kern_channel_get_service_class(ring, &svc_class);
1661 VERIFY(err == 0);
1662 ring_idx = feth_find_tx_ring_by_svc(svc_class);
1663 VERIFY(ring_idx < IFF_NUM_TX_RINGS_WMM_MODE);
1664 VERIFY(fakeif->iff_tx_ring[ring_idx] == NULL);
1665 fakeif->iff_tx_ring[ring_idx] = ring;
1666 } else {
1667 VERIFY(fakeif->iff_tx_ring[0] == NULL);
1668 fakeif->iff_tx_ring[0] = ring;
1669 }
1670 } else {
1671 VERIFY(fakeif->iff_rx_ring[0] == NULL);
1672 fakeif->iff_rx_ring[0] = ring;
1673 }
1674 fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1675 feth_unlock();
1676 FETH_DPRINTF("%s: %s ring init\n",
1677 fakeif->iff_name, is_tx_ring ? "TX" : "RX");
1678 return 0;
1679 }
1680
1681 static void
feth_nx_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)1682 feth_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1683 kern_channel_ring_t ring)
1684 {
1685 #pragma unused(nxprov, ring)
1686 if_fake_ref fakeif;
1687 thread_call_t tcall = NULL;
1688
1689 feth_lock();
1690 fakeif = feth_nexus_context(nexus);
1691 if (fakeif->iff_rx_ring[0] == ring) {
1692 fakeif->iff_rx_ring[0] = NULL;
1693 FETH_DPRINTF("%s: RX ring fini\n", fakeif->iff_name);
1694 } else if (feth_in_wmm_mode(fakeif)) {
1695 int i;
1696 for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
1697 if (fakeif->iff_tx_ring[i] == ring) {
1698 fakeif->iff_tx_ring[i] = NULL;
1699 break;
1700 }
1701 }
1702 for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
1703 if (fakeif->iff_tx_ring[i] != NULL) {
1704 break;
1705 }
1706 }
1707 if (i == IFF_MAX_TX_RINGS) {
1708 tcall = fakeif->iff_doorbell_tcall;
1709 fakeif->iff_doorbell_tcall = NULL;
1710 }
1711 FETH_DPRINTF("%s: TX ring fini\n", fakeif->iff_name);
1712 } else if (fakeif->iff_tx_ring[0] == ring) {
1713 tcall = fakeif->iff_doorbell_tcall;
1714 fakeif->iff_doorbell_tcall = NULL;
1715 fakeif->iff_tx_ring[0] = NULL;
1716 }
1717 fakeif->iff_nifs = NULL;
1718 feth_unlock();
1719 if (tcall != NULL) {
1720 boolean_t success;
1721
1722 success = thread_call_cancel_wait(tcall);
1723 FETH_DPRINTF("%s: thread_call_cancel %s\n",
1724 fakeif->iff_name,
1725 success ? "SUCCESS" : "FAILURE");
1726 if (!success) {
1727 feth_lock();
1728 if (fakeif->iff_doorbell_tcall_active) {
1729 fakeif->iff_waiting_for_tcall = TRUE;
1730 FETH_DPRINTF("%s: *waiting for threadcall\n",
1731 fakeif->iff_name);
1732 do {
1733 msleep(fakeif, &feth_lck_mtx,
1734 PZERO, "feth threadcall", 0);
1735 } while (fakeif->iff_doorbell_tcall_active);
1736 FETH_DPRINTF("%s: ^threadcall done\n",
1737 fakeif->iff_name);
1738 fakeif->iff_waiting_for_tcall = FALSE;
1739 }
1740 feth_unlock();
1741 }
1742 success = thread_call_free(tcall);
1743 FETH_DPRINTF("%s: thread_call_free %s\n",
1744 fakeif->iff_name,
1745 success ? "SUCCESS" : "FAILURE");
1746 feth_release(fakeif);
1747 VERIFY(success == TRUE);
1748 }
1749 }
1750
1751 static errno_t
feth_nx_pre_connect(kern_nexus_provider_t nxprov,proc_t proc,kern_nexus_t nexus,nexus_port_t port,kern_channel_t channel,void ** channel_context)1752 feth_nx_pre_connect(kern_nexus_provider_t nxprov,
1753 proc_t proc, kern_nexus_t nexus, nexus_port_t port, kern_channel_t channel,
1754 void **channel_context)
1755 {
1756 #pragma unused(nxprov, proc, nexus, port, channel, channel_context)
1757 return 0;
1758 }
1759
1760 static errno_t
feth_nx_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1761 feth_nx_connected(kern_nexus_provider_t nxprov,
1762 kern_nexus_t nexus, kern_channel_t channel)
1763 {
1764 #pragma unused(nxprov, channel)
1765 int err;
1766 if_fake_ref fakeif;
1767
1768 fakeif = feth_nexus_context(nexus);
1769 feth_lock();
1770 if (feth_is_detaching(fakeif)) {
1771 feth_unlock();
1772 return EBUSY;
1773 }
1774 feth_retain(fakeif);
1775 fakeif->iff_channel_connected = TRUE;
1776 feth_unlock();
1777 if (feth_has_intf_advisory_configured(fakeif)) {
1778 err = feth_if_adv_tcall_create(fakeif);
1779 if (err != 0) {
1780 return err;
1781 }
1782 }
1783 FETH_DPRINTF("%s: connected channel %p\n",
1784 fakeif->iff_name, channel);
1785 return 0;
1786 }
1787
1788 static void
feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1789 feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,
1790 kern_nexus_t nexus, kern_channel_t channel)
1791 {
1792 #pragma unused(nxprov, channel)
1793 if_fake_ref fakeif;
1794
1795 fakeif = feth_nexus_context(nexus);
1796 FETH_DPRINTF("%s: pre-disconnect channel %p\n",
1797 fakeif->iff_name, channel);
1798 /* Quiesce the interface and flush any pending outbound packets. */
1799 if_down(fakeif->iff_ifp);
1800 feth_lock();
1801 fakeif->iff_channel_connected = FALSE;
1802 feth_unlock();
1803 if (fakeif->iff_if_adv_tcall != NULL) {
1804 feth_if_adv_tcall_destroy(fakeif);
1805 }
1806 }
1807
1808 static void
feth_nx_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1809 feth_nx_disconnected(kern_nexus_provider_t nxprov,
1810 kern_nexus_t nexus, kern_channel_t channel)
1811 {
1812 #pragma unused(nxprov, channel)
1813 if_fake_ref fakeif;
1814
1815 fakeif = feth_nexus_context(nexus);
1816 FETH_DPRINTF("%s: disconnected channel %p\n",
1817 fakeif->iff_name, channel);
1818 feth_release(fakeif);
1819 }
1820
1821 static errno_t
feth_nx_slot_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index,struct kern_slot_prop ** slot_prop_addr,void ** slot_context)1822 feth_nx_slot_init(kern_nexus_provider_t nxprov,
1823 kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
1824 uint32_t slot_index, struct kern_slot_prop **slot_prop_addr,
1825 void **slot_context)
1826 {
1827 #pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
1828 return 0;
1829 }
1830
1831 static void
feth_nx_slot_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index)1832 feth_nx_slot_fini(kern_nexus_provider_t nxprov,
1833 kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
1834 uint32_t slot_index)
1835 {
1836 #pragma unused(nxprov, nexus, ring, slot, slot_index)
1837 }
1838
1839 static errno_t
feth_nx_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)1840 feth_nx_sync_tx(kern_nexus_provider_t nxprov,
1841 kern_nexus_t nexus, kern_channel_ring_t tx_ring, uint32_t flags)
1842 {
1843 #pragma unused(nxprov)
1844 if_fake_ref fakeif;
1845 ifnet_t ifp;
1846 kern_channel_slot_t last_tx_slot = NULL;
1847 ifnet_t peer_ifp;
1848 if_fake_ref peer_fakeif = NULL;
1849 struct kern_channel_ring_stat_increment stats;
1850 kern_channel_slot_t tx_slot;
1851 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1852 kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
1853 uint32_t n_pkts = 0;
1854
1855 memset(&stats, 0, sizeof(stats));
1856
1857 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
1858 fakeif = feth_nexus_context(nexus);
1859 FETH_DPRINTF("%s ring %d flags 0x%x\n", fakeif->iff_name,
1860 tx_ring->ckr_ring_id, flags);
1861
1862 feth_lock();
1863 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1864 feth_unlock();
1865 return 0;
1866 }
1867 ifp = fakeif->iff_ifp;
1868 peer_ifp = fakeif->iff_peer;
1869 if (peer_ifp != NULL) {
1870 peer_fakeif = ifnet_get_if_fake(peer_ifp);
1871 if (peer_fakeif != NULL) {
1872 if (feth_is_detaching(peer_fakeif) ||
1873 !peer_fakeif->iff_channel_connected) {
1874 goto done;
1875 }
1876 } else {
1877 goto done;
1878 }
1879 } else {
1880 goto done;
1881 }
1882 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1883 while (tx_slot != NULL) {
1884 uint16_t off;
1885 kern_packet_t sph;
1886
1887 /* detach the packet from the TX ring */
1888 sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1889 VERIFY(sph != 0);
1890 kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
1891
1892 /* bpf tap output */
1893 off = kern_packet_get_headroom(sph);
1894 VERIFY(off >= fakeif->iff_tx_headroom);
1895 kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
1896 feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
1897 bpf_tap_packet_out(ifp, DLT_EN10MB, sph, NULL, 0);
1898
1899 /* drop packets, if requested */
1900 fakeif->iff_tx_pkts_count++;
1901 if (feth_tx_complete_error(fakeif, &sph, nifs)) {
1902 goto next_tx_slot;
1903 }
1904 ASSERT(sph != 0);
1905 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
1906 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1907
1908 stats.kcrsi_slots_transferred++;
1909 stats.kcrsi_bytes_transferred
1910 += kern_packet_get_data_length(sph);
1911
1912 /* prepare batch for receiver */
1913 pkts[n_pkts++] = sph;
1914 if (n_pkts == IFF_MAX_BATCH_SIZE) {
1915 feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
1916 feth_tx_complete(fakeif, pkts, n_pkts);
1917 n_pkts = 0;
1918 }
1919
1920 next_tx_slot:
1921 last_tx_slot = tx_slot;
1922 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1923 }
1924
1925 /* catch last batch for receiver */
1926 if (n_pkts != 0) {
1927 feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
1928 feth_tx_complete(fakeif, pkts, n_pkts);
1929 n_pkts = 0;
1930 }
1931
1932 if (last_tx_slot != NULL) {
1933 kern_channel_advance_slot(tx_ring, last_tx_slot);
1934 kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
1935 }
1936 done:
1937 feth_unlock();
1938 return 0;
1939 }
1940
1941 static errno_t
feth_nx_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)1942 feth_nx_sync_rx(kern_nexus_provider_t nxprov,
1943 kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
1944 {
1945 #pragma unused(nxprov, ring, flags)
1946 if_fake_ref fakeif;
1947 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1948
1949 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1950 fakeif = feth_nexus_context(nexus);
1951 FETH_DPRINTF("%s:\n", fakeif->iff_name);
1952 return 0;
1953 }
1954
1955 static errno_t
feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif,boolean_t doorbell_ctxt)1956 feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif, boolean_t doorbell_ctxt)
1957 {
1958 int i;
1959 errno_t error = 0;
1960 boolean_t more;
1961
1962 for (i = 0; i < IFF_NUM_TX_RINGS_WMM_MODE; i++) {
1963 kern_channel_ring_t ring = fakeif->iff_tx_ring[i];
1964 if (ring != NULL) {
1965 error = kern_channel_tx_refill(ring, UINT32_MAX,
1966 UINT32_MAX, doorbell_ctxt, &more);
1967 }
1968 if (error != 0) {
1969 FETH_DPRINTF("%s: TX refill ring %d (%s) %d\n",
1970 fakeif->iff_name, ring->ckr_ring_id,
1971 doorbell_ctxt ? "sync" : "async", error);
1972 if (!((error == EAGAIN) || (error == EBUSY))) {
1973 break;
1974 }
1975 } else {
1976 FETH_DPRINTF("%s: TX refilled ring %d (%s)\n",
1977 fakeif->iff_name, ring->ckr_ring_id,
1978 doorbell_ctxt ? "sync" : "async");
1979 }
1980 }
1981 return error;
1982 }
1983
1984 static void
feth_async_doorbell(thread_call_param_t arg0,thread_call_param_t arg1)1985 feth_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
1986 {
1987 #pragma unused(arg1)
1988 errno_t error;
1989 if_fake_ref fakeif = (if_fake_ref)arg0;
1990 kern_channel_ring_t ring;
1991 boolean_t more;
1992
1993 feth_lock();
1994 ring = fakeif->iff_tx_ring[0];
1995 if (feth_is_detaching(fakeif) ||
1996 !fakeif->iff_channel_connected ||
1997 ring == NULL) {
1998 goto done;
1999 }
2000 fakeif->iff_doorbell_tcall_active = TRUE;
2001 feth_unlock();
2002 if (feth_in_wmm_mode(fakeif)) {
2003 error = feth_nx_tx_dequeue_driver_managed(fakeif, FALSE);
2004 } else {
2005 error = kern_channel_tx_refill(ring, UINT32_MAX,
2006 UINT32_MAX, FALSE, &more);
2007 }
2008 if (error != 0) {
2009 FETH_DPRINTF("%s: TX refill failed %d\n",
2010 fakeif->iff_name, error);
2011 } else {
2012 FETH_DPRINTF("%s: TX refilled\n", fakeif->iff_name);
2013 }
2014
2015 feth_lock();
2016 done:
2017 fakeif->iff_doorbell_tcall_active = FALSE;
2018 if (fakeif->iff_waiting_for_tcall) {
2019 FETH_DPRINTF("%s: threadcall waking up waiter\n",
2020 fakeif->iff_name);
2021 wakeup((caddr_t)fakeif);
2022 }
2023 feth_unlock();
2024 }
2025
2026 static void
feth_schedule_async_doorbell(if_fake_ref fakeif)2027 feth_schedule_async_doorbell(if_fake_ref fakeif)
2028 {
2029 thread_call_t tcall;
2030
2031 feth_lock();
2032 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2033 feth_unlock();
2034 return;
2035 }
2036 tcall = fakeif->iff_doorbell_tcall;
2037 if (tcall != NULL) {
2038 thread_call_enter(tcall);
2039 } else {
2040 tcall = thread_call_allocate_with_options(feth_async_doorbell,
2041 (thread_call_param_t)fakeif,
2042 THREAD_CALL_PRIORITY_KERNEL,
2043 THREAD_CALL_OPTIONS_ONCE);
2044 if (tcall == NULL) {
2045 printf("%s: %s tcall alloc failed\n",
2046 __func__, fakeif->iff_name);
2047 } else {
2048 fakeif->iff_doorbell_tcall = tcall;
2049 feth_retain(fakeif);
2050 thread_call_enter(tcall);
2051 }
2052 }
2053 feth_unlock();
2054 }
2055
2056 static errno_t
feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2057 feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,
2058 kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2059 {
2060 #pragma unused(nxprov, ring, flags)
2061 errno_t error;
2062 if_fake_ref fakeif;
2063
2064 fakeif = feth_nexus_context(nexus);
2065 FETH_DPRINTF("%s\n", fakeif->iff_name);
2066
2067 if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
2068 boolean_t more;
2069 /* synchronous tx refill */
2070 if (feth_in_wmm_mode(fakeif)) {
2071 error = feth_nx_tx_dequeue_driver_managed(fakeif, TRUE);
2072 } else {
2073 error = kern_channel_tx_refill(ring, UINT32_MAX,
2074 UINT32_MAX, TRUE, &more);
2075 }
2076 if (error != 0) {
2077 FETH_DPRINTF("%s: TX refill (sync) %d\n",
2078 fakeif->iff_name, error);
2079 } else {
2080 FETH_DPRINTF("%s: TX refilled (sync)\n",
2081 fakeif->iff_name);
2082 }
2083 } else {
2084 FETH_DPRINTF("%s: schedule async refill\n", fakeif->iff_name);
2085 feth_schedule_async_doorbell(fakeif);
2086 }
2087 return 0;
2088 }
2089
2090 static errno_t
feth_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)2091 feth_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
2092 {
2093 if_fake_ref fakeif;
2094
2095 fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
2096 feth_ifnet_set_attrs(fakeif, ifp);
2097 return 0;
2098 }
2099
2100 static errno_t
feth_nx_intf_adv_config(void * prov_ctx,bool enable)2101 feth_nx_intf_adv_config(void *prov_ctx, bool enable)
2102 {
2103 if_fake_ref fakeif = prov_ctx;
2104
2105 feth_lock();
2106 fakeif->iff_intf_adv_enabled = enable;
2107 feth_unlock();
2108 FETH_DPRINTF("%s enable %d\n", fakeif->iff_name, enable);
2109 return 0;
2110 }
2111
2112 static errno_t
fill_capab_interface_advisory(if_fake_ref fakeif,void * contents,uint32_t * len)2113 fill_capab_interface_advisory(if_fake_ref fakeif, void *contents, uint32_t *len)
2114 {
2115 struct kern_nexus_capab_interface_advisory *capab = contents;
2116
2117 if (*len != sizeof(*capab)) {
2118 return EINVAL;
2119 }
2120 if (capab->kncia_version !=
2121 KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
2122 return EINVAL;
2123 }
2124 if (!feth_has_intf_advisory_configured(fakeif)) {
2125 return ENOTSUP;
2126 }
2127 VERIFY(capab->kncia_notify != NULL);
2128 fakeif->iff_intf_adv_kern_ctx = capab->kncia_kern_context;
2129 fakeif->iff_intf_adv_notify = capab->kncia_notify;
2130 capab->kncia_provider_context = fakeif;
2131 capab->kncia_config = feth_nx_intf_adv_config;
2132 return 0;
2133 }
2134
2135 static errno_t
feth_notify_steering_info(void * prov_ctx,void * qset_ctx,struct ifnet_traffic_descriptor_common * td,bool add)2136 feth_notify_steering_info(void *prov_ctx, void *qset_ctx,
2137 struct ifnet_traffic_descriptor_common *td, bool add)
2138 {
2139 #pragma unused(td)
2140 if_fake_ref fakeif = prov_ctx;
2141 fake_qset *qset = qset_ctx;
2142
2143 FETH_DPRINTF("%s: notify_steering_info: qset_id 0x%llx, %s\n",
2144 fakeif->iff_name, qset->fqs_id, add ? "add" : "remove");
2145 return 0;
2146 }
2147
2148 static errno_t
fill_capab_qset_extensions(if_fake_ref fakeif,void * contents,uint32_t * len)2149 fill_capab_qset_extensions(if_fake_ref fakeif, void *contents, uint32_t *len)
2150 {
2151 struct kern_nexus_capab_qset_extensions *capab = contents;
2152
2153 if (*len != sizeof(*capab)) {
2154 return EINVAL;
2155 }
2156 if (capab->cqe_version !=
2157 KERN_NEXUS_CAPAB_QSET_EXTENSIONS_VERSION_1) {
2158 return EINVAL;
2159 }
2160 capab->cqe_prov_ctx = fakeif;
2161 capab->cqe_notify_steering_info = feth_notify_steering_info;
2162 return 0;
2163 }
2164
2165 static errno_t
feth_nx_capab_config(kern_nexus_provider_t nxprov,kern_nexus_t nx,kern_nexus_capab_t capab,void * contents,uint32_t * len)2166 feth_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
2167 kern_nexus_capab_t capab, void *contents, uint32_t *len)
2168 {
2169 #pragma unused(nxprov)
2170 errno_t error;
2171 if_fake_ref fakeif;
2172
2173 fakeif = feth_nexus_context(nx);
2174 FETH_DPRINTF("%s\n", fakeif->iff_name);
2175
2176 switch (capab) {
2177 case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
2178 error = fill_capab_interface_advisory(fakeif, contents, len);
2179 break;
2180 case KERN_NEXUS_CAPAB_QSET_EXTENSIONS:
2181 error = fill_capab_qset_extensions(fakeif, contents, len);
2182 break;
2183 default:
2184 error = ENOTSUP;
2185 break;
2186 }
2187 return error;
2188 }
2189
2190 static int
feth_set_tso(ifnet_t ifp)2191 feth_set_tso(ifnet_t ifp)
2192 {
2193 ifnet_offload_t offload;
2194 uint32_t tso_v4_mtu, tso_v6_mtu;
2195 int error;
2196
2197 offload = IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2198 tso_v4_mtu = if_fake_tso_buffer_size;
2199 tso_v6_mtu = if_fake_tso_buffer_size;
2200 error = ifnet_set_offload(ifp, offload);
2201 if (error != 0) {
2202 printf("%s: set TSO offload failed on %s, err %d\n", __func__,
2203 if_name(ifp), error);
2204 return error;
2205 }
2206 error = ifnet_set_tso_mtu(ifp, AF_INET, tso_v4_mtu);
2207 if (error != 0) {
2208 printf("%s: set TSO MTU IPv4 failed on %s, err %d\n", __func__,
2209 if_name(ifp), error);
2210 return error;
2211 }
2212 error = ifnet_set_tso_mtu(ifp, AF_INET6, tso_v6_mtu);
2213 if (error != 0) {
2214 printf("%s: set TSO MTU IPv6 failed on %s, err %d\n", __func__,
2215 if_name(ifp), error);
2216 return error;
2217 }
2218 return 0;
2219 }
2220
2221 static errno_t
create_netif_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2222 create_netif_provider_and_instance(if_fake_ref fakeif,
2223 struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2224 uuid_t * provider, uuid_t * instance)
2225 {
2226 errno_t err;
2227 nexus_controller_t controller = kern_nexus_shared_controller();
2228 struct kern_nexus_net_init net_init;
2229 nexus_name_t provider_name;
2230 nexus_attr_t nexus_attr = NULL;
2231 struct kern_nexus_provider_init prov_init = {
2232 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2233 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2234 .nxpi_pre_connect = feth_nx_pre_connect,
2235 .nxpi_connected = feth_nx_connected,
2236 .nxpi_pre_disconnect = feth_nx_pre_disconnect,
2237 .nxpi_disconnected = feth_nx_disconnected,
2238 .nxpi_ring_init = feth_nx_ring_init,
2239 .nxpi_ring_fini = feth_nx_ring_fini,
2240 .nxpi_slot_init = feth_nx_slot_init,
2241 .nxpi_slot_fini = feth_nx_slot_fini,
2242 .nxpi_sync_tx = feth_nx_sync_tx,
2243 .nxpi_sync_rx = feth_nx_sync_rx,
2244 .nxpi_tx_doorbell = feth_nx_tx_doorbell,
2245 .nxpi_config_capab = feth_nx_capab_config,
2246 };
2247
2248 _CASSERT(IFF_MAX_RX_RINGS == 1);
2249 err = kern_nexus_attr_create(&nexus_attr);
2250 if (err != 0) {
2251 printf("%s nexus attribute creation failed, error %d\n",
2252 __func__, err);
2253 goto failed;
2254 }
2255 if (feth_in_wmm_mode(fakeif)) {
2256 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_TX_RINGS,
2257 IFF_NUM_TX_RINGS_WMM_MODE);
2258 VERIFY(err == 0);
2259 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_RX_RINGS,
2260 IFF_NUM_RX_RINGS_WMM_MODE);
2261 VERIFY(err == 0);
2262 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_QMAP,
2263 NEXUS_QMAP_TYPE_WMM);
2264 VERIFY(err == 0);
2265 }
2266
2267 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2268 VERIFY(err == 0);
2269 snprintf((char *)provider_name, sizeof(provider_name),
2270 "com.apple.netif.%s", fakeif->iff_name);
2271 err = kern_nexus_controller_register_provider(controller,
2272 feth_nx_dom_prov,
2273 provider_name,
2274 &prov_init,
2275 sizeof(prov_init),
2276 nexus_attr,
2277 provider);
2278 if (err != 0) {
2279 printf("%s register provider failed, error %d\n",
2280 __func__, err);
2281 goto failed;
2282 }
2283 bzero(&net_init, sizeof(net_init));
2284 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2285 net_init.nxneti_flags = 0;
2286 net_init.nxneti_eparams = init_params;
2287 net_init.nxneti_lladdr = NULL;
2288 net_init.nxneti_prepare = feth_netif_prepare;
2289 net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2290 net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2291 err = kern_nexus_controller_alloc_net_provider_instance(controller,
2292 *provider,
2293 fakeif,
2294 NULL,
2295 instance,
2296 &net_init,
2297 ifp);
2298 if (err != 0) {
2299 printf("%s alloc_net_provider_instance failed, %d\n",
2300 __func__, err);
2301 kern_nexus_controller_deregister_provider(controller,
2302 *provider);
2303 uuid_clear(*provider);
2304 goto failed;
2305 }
2306 if (feth_supports_tso(fakeif)) {
2307 if ((err = feth_set_tso(*ifp)) != 0) {
2308 goto failed;
2309 }
2310 }
2311
2312 failed:
2313 if (nexus_attr != NULL) {
2314 kern_nexus_attr_destroy(nexus_attr);
2315 }
2316 return err;
2317 }
2318
2319 /*
2320 * The nif_stats need to be referenced because we don't want it set
2321 * to NULL until the last llink is removed.
2322 */
2323 static void
get_nexus_stats(if_fake_ref fakeif,kern_nexus_t nexus)2324 get_nexus_stats(if_fake_ref fakeif, kern_nexus_t nexus)
2325 {
2326 if (++fakeif->iff_nifs_ref == 1) {
2327 ASSERT(fakeif->iff_nifs == NULL);
2328 fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2329 }
2330 }
2331
2332 static void
clear_nexus_stats(if_fake_ref fakeif)2333 clear_nexus_stats(if_fake_ref fakeif)
2334 {
2335 if (--fakeif->iff_nifs_ref == 0) {
2336 ASSERT(fakeif->iff_nifs != NULL);
2337 fakeif->iff_nifs = NULL;
2338 }
2339 }
2340
2341 static errno_t
feth_nx_qset_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * llink_ctx,uint8_t qset_idx,uint64_t qset_id,kern_netif_qset_t qset,void ** qset_ctx)2342 feth_nx_qset_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2343 void *llink_ctx, uint8_t qset_idx, uint64_t qset_id, kern_netif_qset_t qset,
2344 void **qset_ctx)
2345 {
2346 #pragma unused(nxprov)
2347 if_fake_ref fakeif;
2348 fake_llink *fl = llink_ctx;
2349 fake_qset *fqs;
2350
2351 feth_lock();
2352 fakeif = feth_nexus_context(nexus);
2353 if (feth_is_detaching(fakeif)) {
2354 feth_unlock();
2355 printf("%s: %s: detaching\n", __func__, fakeif->iff_name);
2356 return ENXIO;
2357 }
2358 if (qset_idx >= fl->fl_qset_cnt) {
2359 feth_unlock();
2360 printf("%s: %s: invalid qset_idx %d\n", __func__,
2361 fakeif->iff_name, qset_idx);
2362 return EINVAL;
2363 }
2364 fqs = &fl->fl_qset[qset_idx];
2365 ASSERT(fqs->fqs_qset == NULL);
2366 fqs->fqs_qset = qset;
2367 fqs->fqs_id = qset_id;
2368 *qset_ctx = fqs;
2369
2370 /* XXX This should really be done during registration */
2371 get_nexus_stats(fakeif, nexus);
2372 feth_unlock();
2373 return 0;
2374 }
2375
2376 static void
feth_nx_qset_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx)2377 feth_nx_qset_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2378 void *qset_ctx)
2379 {
2380 #pragma unused(nxprov)
2381 if_fake_ref fakeif;
2382 fake_qset *fqs = qset_ctx;
2383
2384 feth_lock();
2385 fakeif = feth_nexus_context(nexus);
2386 clear_nexus_stats(fakeif);
2387 ASSERT(fqs->fqs_qset != NULL);
2388 fqs->fqs_qset = NULL;
2389 fqs->fqs_id = 0;
2390 feth_unlock();
2391 }
2392
2393 static errno_t
feth_nx_queue_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint8_t qidx,bool tx,kern_netif_queue_t queue,void ** queue_ctx)2394 feth_nx_queue_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2395 void *qset_ctx, uint8_t qidx, bool tx, kern_netif_queue_t queue,
2396 void **queue_ctx)
2397 {
2398 #pragma unused(nxprov)
2399 if_fake_ref fakeif;
2400 fake_qset *fqs = qset_ctx;
2401 fake_queue *fq;
2402
2403 feth_lock();
2404 fakeif = feth_nexus_context(nexus);
2405 if (feth_is_detaching(fakeif)) {
2406 printf("%s: %s: detaching\n", __func__, fakeif->iff_name);
2407 feth_unlock();
2408 return ENXIO;
2409 }
2410 if (tx) {
2411 if (qidx >= fqs->fqs_tx_queue_cnt) {
2412 printf("%s: %s: invalid tx qidx %d\n", __func__,
2413 fakeif->iff_name, qidx);
2414 feth_unlock();
2415 return EINVAL;
2416 }
2417 fq = &fqs->fqs_tx_queue[qidx];
2418 } else {
2419 if (qidx >= fqs->fqs_rx_queue_cnt) {
2420 printf("%s: %s: invalid rx qidx %d\n", __func__,
2421 fakeif->iff_name, qidx);
2422 feth_unlock();
2423 return EINVAL;
2424 }
2425 fq = &fqs->fqs_rx_queue[qidx];
2426 }
2427 ASSERT(fq->fq_queue == NULL);
2428 fq->fq_queue = queue;
2429 *queue_ctx = fq;
2430 feth_unlock();
2431 return 0;
2432 }
2433
2434 static void
feth_nx_queue_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx)2435 feth_nx_queue_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2436 void *queue_ctx)
2437 {
2438 #pragma unused(nxprov, nexus)
2439 fake_queue *fq = queue_ctx;
2440
2441 feth_lock();
2442 ASSERT(fq->fq_queue != NULL);
2443 fq->fq_queue = NULL;
2444 feth_unlock();
2445 }
2446
2447 static void
feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif,kern_packet_t sph,struct netif_stats * nifs,if_fake_ref peer_fakeif,uint32_t llink_idx,uint32_t qset_idx)2448 feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif, kern_packet_t sph,
2449 struct netif_stats *nifs, if_fake_ref peer_fakeif,
2450 uint32_t llink_idx, uint32_t qset_idx)
2451 {
2452 kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2453 uint32_t n_pkts = 0;
2454
2455 while (sph != 0) {
2456 uint16_t off;
2457 kern_packet_t next;
2458
2459 next = kern_packet_get_next(sph);
2460 kern_packet_set_next(sph, 0);
2461
2462 /* bpf tap output */
2463 off = kern_packet_get_headroom(sph);
2464 VERIFY(off >= fakeif->iff_tx_headroom);
2465 kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2466 feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2467 bpf_tap_packet_out(fakeif->iff_ifp, DLT_EN10MB, sph, NULL, 0);
2468
2469 /* drop packets, if requested */
2470 fakeif->iff_tx_pkts_count++;
2471 if (feth_tx_complete_error(fakeif, &sph, nifs)) {
2472 goto next_pkt;
2473 }
2474 ASSERT(sph != 0);
2475 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2476 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2477
2478 /* prepare batch for receiver */
2479 pkts[n_pkts++] = sph;
2480 if (n_pkts == IFF_MAX_BATCH_SIZE) {
2481 feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx,
2482 qset_idx, pkts, n_pkts);
2483 feth_tx_complete(fakeif, pkts, n_pkts);
2484 n_pkts = 0;
2485 }
2486 next_pkt:
2487 sph = next;
2488 }
2489 /* catch last batch for receiver */
2490 if (n_pkts != 0) {
2491 feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx, qset_idx,
2492 pkts, n_pkts);
2493 feth_tx_complete(fakeif, pkts, n_pkts);
2494 n_pkts = 0;
2495 }
2496 }
2497
2498 static errno_t
feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint32_t flags)2499 feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2500 void *qset_ctx, uint32_t flags)
2501 {
2502 #pragma unused(nxprov)
2503 if_fake_ref fakeif;
2504 ifnet_t ifp;
2505 ifnet_t peer_ifp;
2506 if_fake_ref peer_fakeif = NULL;
2507 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2508 fake_qset *qset = qset_ctx;
2509 boolean_t detaching, connected;
2510 uint32_t i;
2511 errno_t err;
2512
2513 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2514 fakeif = feth_nexus_context(nexus);
2515 FETH_DPRINTF("%s qset %p, idx %d, flags 0x%x\n", fakeif->iff_name, qset,
2516 qset->fqs_idx, flags);
2517
2518 feth_lock();
2519 detaching = feth_is_detaching(fakeif);
2520 connected = fakeif->iff_channel_connected;
2521 if (detaching || !connected) {
2522 FETH_DPRINTF("%s: %s: detaching %s, channel connected %s\n",
2523 __func__, fakeif->iff_name,
2524 (detaching ? "true" : "false"),
2525 (connected ? "true" : "false"));
2526 feth_unlock();
2527 return 0;
2528 }
2529 ifp = fakeif->iff_ifp;
2530 peer_ifp = fakeif->iff_peer;
2531 if (peer_ifp != NULL) {
2532 peer_fakeif = ifnet_get_if_fake(peer_ifp);
2533 if (peer_fakeif != NULL) {
2534 detaching = feth_is_detaching(peer_fakeif);
2535 connected = peer_fakeif->iff_channel_connected;
2536 if (detaching || !connected) {
2537 FETH_DPRINTF("%s: peer %s: detaching %s, "
2538 "channel connected %s\n",
2539 __func__, peer_fakeif->iff_name,
2540 (detaching ? "true" : "false"),
2541 (connected ? "true" : "false"));
2542 goto done;
2543 }
2544 } else {
2545 FETH_DPRINTF("%s: peer_fakeif is NULL\n", __func__);
2546 goto done;
2547 }
2548 } else {
2549 printf("%s: peer_ifp is NULL\n", __func__);
2550 goto done;
2551 }
2552
2553 if (if_fake_switch_combined_mode &&
2554 qset->fqs_dequeue_cnt >= if_fake_switch_mode_frequency) {
2555 if (qset->fqs_combined_mode) {
2556 kern_netif_set_qset_separate(qset->fqs_qset);
2557 } else {
2558 kern_netif_set_qset_combined(qset->fqs_qset);
2559 }
2560 qset->fqs_combined_mode = !qset->fqs_combined_mode;
2561 qset->fqs_dequeue_cnt = 0;
2562 }
2563
2564 for (i = 0; i < qset->fqs_tx_queue_cnt; i++) {
2565 kern_packet_t sph = 0;
2566 kern_netif_queue_t queue = qset->fqs_tx_queue[i].fq_queue;
2567 boolean_t more = FALSE;
2568
2569 err = kern_netif_queue_tx_dequeue(queue, UINT32_MAX, UINT32_MAX,
2570 &more, &sph);
2571 if (err != 0 && err != EAGAIN) {
2572 FETH_DPRINTF("%s queue %p dequeue failed: err "
2573 "%d\n", fakeif->iff_name, queue, err);
2574 }
2575 feth_nx_tx_queue_deliver_pkt_chain(fakeif, sph, nifs,
2576 peer_fakeif, qset->fqs_llink_idx, qset->fqs_idx);
2577 }
2578
2579 done:
2580 feth_unlock();
2581 return 0;
2582 }
2583
2584 static void
fill_qset_info_and_params(if_fake_ref fakeif,fake_llink * llink_info,uint32_t qset_idx,struct kern_nexus_netif_llink_qset_init * qset_init,bool is_def,bool is_low_latency)2585 fill_qset_info_and_params(if_fake_ref fakeif, fake_llink *llink_info,
2586 uint32_t qset_idx, struct kern_nexus_netif_llink_qset_init *qset_init,
2587 bool is_def, bool is_low_latency)
2588 {
2589 fake_qset *qset_info = &llink_info->fl_qset[qset_idx];
2590
2591 qset_init->nlqi_flags =
2592 (is_def ? KERN_NEXUS_NET_LLINK_QSET_DEFAULT : 0) |
2593 (is_low_latency ? KERN_NEXUS_NET_LLINK_QSET_LOW_LATENCY : 0) |
2594 KERN_NEXUS_NET_LLINK_QSET_AQM;
2595
2596 if (feth_in_wmm_mode(fakeif)) {
2597 qset_init->nlqi_flags |= KERN_NEXUS_NET_LLINK_QSET_WMM_MODE;
2598 qset_init->nlqi_num_txqs = IFF_NUM_TX_QUEUES_WMM_MODE;
2599 qset_init->nlqi_num_rxqs = IFF_NUM_RX_QUEUES_WMM_MODE;
2600 } else {
2601 qset_init->nlqi_num_txqs = 1;
2602 qset_init->nlqi_num_rxqs = 1;
2603 }
2604 qset_info->fqs_tx_queue_cnt = qset_init->nlqi_num_txqs;
2605 qset_info->fqs_rx_queue_cnt = qset_init->nlqi_num_rxqs;
2606
2607 /* These are needed for locating the peer qset */
2608 qset_info->fqs_llink_idx = llink_info->fl_idx;
2609 qset_info->fqs_idx = qset_idx;
2610 }
2611
2612 static void
fill_llink_info_and_params(if_fake_ref fakeif,uint32_t llink_idx,struct kern_nexus_netif_llink_init * llink_init,uint32_t llink_id,struct kern_nexus_netif_llink_qset_init * qset_init,uint32_t qset_cnt,uint32_t flags)2613 fill_llink_info_and_params(if_fake_ref fakeif, uint32_t llink_idx,
2614 struct kern_nexus_netif_llink_init *llink_init, uint32_t llink_id,
2615 struct kern_nexus_netif_llink_qset_init *qset_init, uint32_t qset_cnt,
2616 uint32_t flags)
2617 {
2618 fake_llink *llink_info = &fakeif->iff_llink[llink_idx];
2619 uint32_t i;
2620 bool create_ll_qset = if_fake_low_latency && (llink_idx != 0);
2621
2622 for (i = 0; i < qset_cnt; i++) {
2623 fill_qset_info_and_params(fakeif, llink_info, i,
2624 &qset_init[i], i == 0, create_ll_qset && i == 1);
2625 }
2626 llink_info->fl_idx = llink_idx;
2627
2628 /* This doesn't have to be the same as llink_idx */
2629 llink_info->fl_id = llink_id;
2630 llink_info->fl_qset_cnt = qset_cnt;
2631
2632 llink_init->nli_link_id = llink_id;
2633 llink_init->nli_num_qsets = qset_cnt;
2634 llink_init->nli_qsets = qset_init;
2635 llink_init->nli_flags = flags;
2636 llink_init->nli_ctx = llink_info;
2637 }
2638
2639 static errno_t
create_non_default_llinks(if_fake_ref fakeif)2640 create_non_default_llinks(if_fake_ref fakeif)
2641 {
2642 struct kern_nexus *nx;
2643 fake_nx_t fnx = &fakeif->iff_nx;
2644 struct kern_nexus_netif_llink_init llink_init;
2645 struct kern_nexus_netif_llink_qset_init qset_init[FETH_MAX_QSETS];
2646 errno_t err;
2647 uint64_t llink_id;
2648 uint32_t i;
2649
2650 nx = nx_find(fnx->fnx_instance, FALSE);
2651 if (nx == NULL) {
2652 printf("%s: %s: nx not found\n", __func__, fakeif->iff_name);
2653 return ENXIO;
2654 }
2655 /* Default llink starts at index 0 */
2656 for (i = 1; i < if_fake_llink_cnt; i++) {
2657 llink_id = (uint64_t)i;
2658
2659 /*
2660 * The llink_init and qset_init structures are reused for
2661 * each llink creation.
2662 */
2663 fill_llink_info_and_params(fakeif, i, &llink_init,
2664 llink_id, qset_init, if_fake_qset_cnt, 0);
2665 err = kern_nexus_netif_llink_add(nx, &llink_init);
2666 if (err != 0) {
2667 printf("%s: %s: llink add failed, error %d\n",
2668 __func__, fakeif->iff_name, err);
2669 goto fail;
2670 }
2671 fakeif->iff_llink_cnt++;
2672 }
2673 nx_release(nx);
2674 return 0;
2675
2676 fail:
2677 for (i = 0; i < fakeif->iff_llink_cnt; i++) {
2678 int e;
2679
2680 e = kern_nexus_netif_llink_remove(nx, fakeif->
2681 iff_llink[i].fl_id);
2682 if (e != 0) {
2683 printf("%s: %s: llink remove failed, llink_id 0x%llx, "
2684 "error %d\n", __func__, fakeif->iff_name,
2685 fakeif->iff_llink[i].fl_id, e);
2686 }
2687 fakeif->iff_llink[i].fl_id = 0;
2688 }
2689 fakeif->iff_llink_cnt = 0;
2690 nx_release(nx);
2691 return err;
2692 }
2693
2694 static errno_t
create_netif_llink_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2695 create_netif_llink_provider_and_instance(if_fake_ref fakeif,
2696 struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2697 uuid_t * provider, uuid_t * instance)
2698 {
2699 errno_t err;
2700 nexus_controller_t controller = kern_nexus_shared_controller();
2701 struct kern_nexus_net_init net_init;
2702 struct kern_nexus_netif_llink_init llink_init;
2703 struct kern_nexus_netif_llink_qset_init qsets[FETH_MAX_QSETS];
2704
2705 nexus_name_t provider_name;
2706 nexus_attr_t nexus_attr = NULL;
2707 struct kern_nexus_netif_provider_init prov_init = {
2708 .nxnpi_version = KERN_NEXUS_DOMAIN_PROVIDER_NETIF,
2709 .nxnpi_flags = NXPIF_VIRTUAL_DEVICE,
2710 .nxnpi_pre_connect = feth_nx_pre_connect,
2711 .nxnpi_connected = feth_nx_connected,
2712 .nxnpi_pre_disconnect = feth_nx_pre_disconnect,
2713 .nxnpi_disconnected = feth_nx_disconnected,
2714 .nxnpi_qset_init = feth_nx_qset_init,
2715 .nxnpi_qset_fini = feth_nx_qset_fini,
2716 .nxnpi_queue_init = feth_nx_queue_init,
2717 .nxnpi_queue_fini = feth_nx_queue_fini,
2718 .nxnpi_tx_qset_notify = feth_nx_tx_qset_notify,
2719 .nxnpi_config_capab = feth_nx_capab_config,
2720 };
2721
2722 err = kern_nexus_attr_create(&nexus_attr);
2723 if (err != 0) {
2724 printf("%s nexus attribute creation failed, error %d\n",
2725 __func__, err);
2726 goto failed;
2727 }
2728
2729 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2730 VERIFY(err == 0);
2731
2732 snprintf((char *)provider_name, sizeof(provider_name),
2733 "com.apple.netif.%s", fakeif->iff_name);
2734 err = kern_nexus_controller_register_provider(controller,
2735 feth_nx_dom_prov,
2736 provider_name,
2737 (struct kern_nexus_provider_init *)&prov_init,
2738 sizeof(prov_init),
2739 nexus_attr,
2740 provider);
2741 if (err != 0) {
2742 printf("%s register provider failed, error %d\n",
2743 __func__, err);
2744 goto failed;
2745 }
2746 bzero(&net_init, sizeof(net_init));
2747 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2748 net_init.nxneti_flags = 0;
2749 net_init.nxneti_eparams = init_params;
2750 net_init.nxneti_lladdr = NULL;
2751 net_init.nxneti_prepare = feth_netif_prepare;
2752 net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2753 net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2754
2755 /*
2756 * Assume llink id is same as the index for if_fake.
2757 * This is not required for other drivers.
2758 */
2759 _CASSERT(NETIF_LLINK_ID_DEFAULT == 0);
2760 fill_llink_info_and_params(fakeif, 0, &llink_init,
2761 NETIF_LLINK_ID_DEFAULT, qsets, if_fake_qset_cnt,
2762 KERN_NEXUS_NET_LLINK_DEFAULT);
2763
2764 net_init.nxneti_llink = &llink_init;
2765
2766 err = kern_nexus_controller_alloc_net_provider_instance(controller,
2767 *provider, fakeif, NULL, instance, &net_init, ifp);
2768 if (err != 0) {
2769 printf("%s alloc_net_provider_instance failed, %d\n",
2770 __func__, err);
2771 kern_nexus_controller_deregister_provider(controller,
2772 *provider);
2773 uuid_clear(*provider);
2774 goto failed;
2775 }
2776 fakeif->iff_llink_cnt++;
2777
2778 if (if_fake_llink_cnt > 1) {
2779 err = create_non_default_llinks(fakeif);
2780 if (err != 0) {
2781 printf("%s create_non_default_llinks failed, %d\n",
2782 __func__, err);
2783 feth_detach_netif_nexus(fakeif);
2784 goto failed;
2785 }
2786 }
2787 if (feth_supports_tso(fakeif)) {
2788 if ((err = feth_set_tso(*ifp)) != 0) {
2789 goto failed;
2790 }
2791 }
2792 failed:
2793 if (nexus_attr != NULL) {
2794 kern_nexus_attr_destroy(nexus_attr);
2795 }
2796 return err;
2797 }
2798
2799 static errno_t
feth_attach_netif_nexus(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp)2800 feth_attach_netif_nexus(if_fake_ref fakeif,
2801 struct ifnet_init_eparams * init_params, ifnet_t *ifp)
2802 {
2803 errno_t error;
2804 fake_nx_t nx = &fakeif->iff_nx;
2805
2806 error = feth_packet_pool_make(fakeif);
2807 if (error != 0) {
2808 return error;
2809 }
2810 if (if_fake_llink_cnt == 0) {
2811 return create_netif_provider_and_instance(fakeif, init_params,
2812 ifp, &nx->fnx_provider, &nx->fnx_instance);
2813 } else {
2814 return create_netif_llink_provider_and_instance(fakeif,
2815 init_params, ifp, &nx->fnx_provider,
2816 &nx->fnx_instance);
2817 }
2818 }
2819
2820 static void
remove_non_default_llinks(if_fake_ref fakeif)2821 remove_non_default_llinks(if_fake_ref fakeif)
2822 {
2823 struct kern_nexus *nx;
2824 fake_nx_t fnx = &fakeif->iff_nx;
2825 uint32_t i;
2826
2827 if (fakeif->iff_llink_cnt <= 1) {
2828 return;
2829 }
2830 nx = nx_find(fnx->fnx_instance, FALSE);
2831 if (nx == NULL) {
2832 printf("%s: %s: nx not found\n", __func__,
2833 fakeif->iff_name);
2834 return;
2835 }
2836 /* Default llink (at index 0) is freed separately */
2837 for (i = 1; i < fakeif->iff_llink_cnt; i++) {
2838 int err;
2839
2840 err = kern_nexus_netif_llink_remove(nx, fakeif->
2841 iff_llink[i].fl_id);
2842 if (err != 0) {
2843 printf("%s: %s: llink remove failed, llink_id 0x%llx, "
2844 "error %d\n", __func__, fakeif->iff_name,
2845 fakeif->iff_llink[i].fl_id, err);
2846 }
2847 fakeif->iff_llink[i].fl_id = 0;
2848 }
2849 fakeif->iff_llink_cnt = 0;
2850 nx_release(nx);
2851 }
2852
2853 static void
detach_provider_and_instance(uuid_t provider,uuid_t instance)2854 detach_provider_and_instance(uuid_t provider, uuid_t instance)
2855 {
2856 nexus_controller_t controller = kern_nexus_shared_controller();
2857 errno_t err;
2858
2859 if (!uuid_is_null(instance)) {
2860 err = kern_nexus_controller_free_provider_instance(controller,
2861 instance);
2862 if (err != 0) {
2863 printf("%s free_provider_instance failed %d\n",
2864 __func__, err);
2865 }
2866 uuid_clear(instance);
2867 }
2868 if (!uuid_is_null(provider)) {
2869 err = kern_nexus_controller_deregister_provider(controller,
2870 provider);
2871 if (err != 0) {
2872 printf("%s deregister_provider %d\n", __func__, err);
2873 }
2874 uuid_clear(provider);
2875 }
2876 return;
2877 }
2878
2879 static void
feth_detach_netif_nexus(if_fake_ref fakeif)2880 feth_detach_netif_nexus(if_fake_ref fakeif)
2881 {
2882 fake_nx_t fnx = &fakeif->iff_nx;
2883
2884 remove_non_default_llinks(fakeif);
2885 detach_provider_and_instance(fnx->fnx_provider, fnx->fnx_instance);
2886 }
2887
2888 #endif /* SKYWALK */
2889
2890 /**
2891 ** feth interface routines
2892 **/
2893 static void
feth_ifnet_set_attrs(if_fake_ref fakeif,ifnet_t ifp)2894 feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp)
2895 {
2896 (void)ifnet_set_capabilities_enabled(ifp, 0, -1);
2897 ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
2898 ifnet_set_baudrate(ifp, 0);
2899 ifnet_set_mtu(ifp, ETHERMTU);
2900 ifnet_set_flags(ifp,
2901 IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX,
2902 0xffff);
2903 ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
2904 if ((fakeif->iff_flags & IFF_FLAGS_HWCSUM) != 0) {
2905 ifnet_set_offload(ifp,
2906 IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
2907 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6);
2908 } else {
2909 ifnet_set_offload(ifp, 0);
2910 }
2911 }
2912
2913 static void
interface_link_event(ifnet_t ifp,u_int32_t event_code)2914 interface_link_event(ifnet_t ifp, u_int32_t event_code)
2915 {
2916 struct event {
2917 u_int32_t ifnet_family;
2918 u_int32_t unit;
2919 char if_name[IFNAMSIZ];
2920 };
2921 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
2922 struct kern_event_msg *header = (struct kern_event_msg*)message;
2923 struct event *data = (struct event *)(header + 1);
2924
2925 header->total_size = sizeof(message);
2926 header->vendor_code = KEV_VENDOR_APPLE;
2927 header->kev_class = KEV_NETWORK_CLASS;
2928 header->kev_subclass = KEV_DL_SUBCLASS;
2929 header->event_code = event_code;
2930 data->ifnet_family = ifnet_family(ifp);
2931 data->unit = (u_int32_t)ifnet_unit(ifp);
2932 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
2933 ifnet_event(ifp, header);
2934 }
2935
2936 static if_fake_ref
ifnet_get_if_fake(ifnet_t ifp)2937 ifnet_get_if_fake(ifnet_t ifp)
2938 {
2939 return (if_fake_ref)ifnet_softc(ifp);
2940 }
2941
2942 static int
feth_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)2943 feth_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
2944 {
2945 int error;
2946 if_fake_ref fakeif;
2947 struct ifnet_init_eparams feth_init;
2948 ifnet_t ifp;
2949 uint8_t mac_address[ETHER_ADDR_LEN];
2950 fake_llink *iff_llink;
2951
2952 iff_llink = kalloc_type(fake_llink, FETH_MAX_LLINKS, Z_WAITOK_ZERO);
2953 if (iff_llink == NULL) {
2954 return ENOBUFS;
2955 }
2956 fakeif = kalloc_type(struct if_fake, Z_WAITOK_ZERO_NOFAIL);
2957 fakeif->iff_llink = iff_llink;
2958 fakeif->iff_retain_count = 1;
2959 #define FAKE_ETHER_NAME_LEN (sizeof(FAKE_ETHER_NAME) - 1)
2960 _CASSERT(FAKE_ETHER_NAME_LEN == 4);
2961 bcopy(FAKE_ETHER_NAME, mac_address, FAKE_ETHER_NAME_LEN);
2962 mac_address[ETHER_ADDR_LEN - 2] = (unit & 0xff00) >> 8;
2963 mac_address[ETHER_ADDR_LEN - 1] = unit & 0xff;
2964 if (if_fake_bsd_mode != 0) {
2965 fakeif->iff_flags |= IFF_FLAGS_BSD_MODE;
2966 }
2967 if (if_fake_hwcsum != 0) {
2968 fakeif->iff_flags |= IFF_FLAGS_HWCSUM;
2969 }
2970 fakeif->iff_max_mtu = get_max_mtu(if_fake_bsd_mode, if_fake_max_mtu);
2971 fakeif->iff_fcs = if_fake_fcs;
2972 fakeif->iff_trailer_length = if_fake_trailer_length;
2973
2974 /* use the interface name as the unique id for ifp recycle */
2975 if ((unsigned int)
2976 snprintf(fakeif->iff_name, sizeof(fakeif->iff_name), "%s%d",
2977 ifc->ifc_name, unit) >= sizeof(fakeif->iff_name)) {
2978 feth_release(fakeif);
2979 return EINVAL;
2980 }
2981 bzero(&feth_init, sizeof(feth_init));
2982 feth_init.ver = IFNET_INIT_CURRENT_VERSION;
2983 feth_init.len = sizeof(feth_init);
2984 if (feth_in_bsd_mode(fakeif)) {
2985 if (if_fake_txstart != 0) {
2986 feth_init.start = feth_start;
2987 } else {
2988 feth_init.flags |= IFNET_INIT_LEGACY;
2989 feth_init.output = feth_output;
2990 }
2991 }
2992 #if SKYWALK
2993 else {
2994 feth_init.flags |= IFNET_INIT_SKYWALK_NATIVE;
2995 /*
2996 * Currently we support WMM mode only for Skywalk native
2997 * interface.
2998 */
2999 if (if_fake_wmm_mode != 0) {
3000 fakeif->iff_flags |= IFF_FLAGS_WMM_MODE;
3001 }
3002
3003 if (if_fake_multibuflet != 0) {
3004 fakeif->iff_flags |= IFF_FLAGS_MULTIBUFLETS;
3005 }
3006
3007 if (if_fake_multibuflet != 0 &&
3008 if_fake_pktpool_mode == IFF_PP_MODE_PRIVATE_SPLIT) {
3009 printf("%s: multi-buflet not supported for split rx &"
3010 " tx pool", __func__);
3011 feth_release(fakeif);
3012 return EINVAL;
3013 }
3014
3015 fakeif->iff_pp_mode = if_fake_pktpool_mode;
3016 if (if_fake_tso_support != 0) {
3017 if (fakeif->iff_pp_mode != IFF_PP_MODE_GLOBAL) {
3018 printf("%s: TSO mode requires global packet"
3019 " pool mode\n", __func__);
3020 return EINVAL;
3021 }
3022 fakeif->iff_flags |= IFF_FLAGS_TSO_SUPPORT;
3023 }
3024
3025 fakeif->iff_tx_headroom = if_fake_tx_headroom;
3026 fakeif->iff_adv_interval = if_fake_if_adv_interval;
3027 if (fakeif->iff_adv_interval > 0) {
3028 feth_init.flags |= IFNET_INIT_IF_ADV;
3029 }
3030 fakeif->iff_tx_drop_rate = if_fake_tx_drops;
3031 fakeif->iff_tx_completion_mode = if_tx_completion_mode;
3032 }
3033 feth_init.tx_headroom = fakeif->iff_tx_headroom;
3034 #endif /* SKYWALK */
3035 if (if_fake_nxattach == 0) {
3036 feth_init.flags |= IFNET_INIT_NX_NOAUTO;
3037 }
3038 feth_init.uniqueid = fakeif->iff_name;
3039 feth_init.uniqueid_len = strlen(fakeif->iff_name);
3040 feth_init.name = ifc->ifc_name;
3041 feth_init.unit = unit;
3042 feth_init.family = IFNET_FAMILY_ETHERNET;
3043 feth_init.type = IFT_ETHER;
3044 feth_init.demux = ether_demux;
3045 feth_init.add_proto = ether_add_proto;
3046 feth_init.del_proto = ether_del_proto;
3047 feth_init.check_multi = ether_check_multi;
3048 feth_init.framer_extended = ether_frameout_extended;
3049 feth_init.softc = fakeif;
3050 feth_init.ioctl = feth_ioctl;
3051 feth_init.set_bpf_tap = NULL;
3052 feth_init.detach = feth_if_free;
3053 feth_init.broadcast_addr = etherbroadcastaddr;
3054 feth_init.broadcast_len = ETHER_ADDR_LEN;
3055 if (feth_in_bsd_mode(fakeif)) {
3056 error = ifnet_allocate_extended(&feth_init, &ifp);
3057 if (error) {
3058 feth_release(fakeif);
3059 return error;
3060 }
3061 feth_ifnet_set_attrs(fakeif, ifp);
3062 }
3063 #if SKYWALK
3064 else {
3065 if (feth_in_wmm_mode(fakeif)) {
3066 feth_init.output_sched_model =
3067 IFNET_SCHED_MODEL_DRIVER_MANAGED;
3068 }
3069 error = feth_attach_netif_nexus(fakeif, &feth_init, &ifp);
3070 if (error != 0) {
3071 feth_release(fakeif);
3072 return error;
3073 }
3074 /* take an additional reference to ensure that it doesn't go away */
3075 feth_retain(fakeif);
3076 fakeif->iff_ifp = ifp;
3077 }
3078 #endif /* SKYWALK */
3079 fakeif->iff_media_count = MIN(default_media_words_count, IF_FAKE_MEDIA_LIST_MAX);
3080 bcopy(default_media_words, fakeif->iff_media_list,
3081 fakeif->iff_media_count * sizeof(fakeif->iff_media_list[0]));
3082 if (feth_in_bsd_mode(fakeif)) {
3083 error = ifnet_attach(ifp, NULL);
3084 if (error) {
3085 ifnet_release(ifp);
3086 feth_release(fakeif);
3087 return error;
3088 }
3089 fakeif->iff_ifp = ifp;
3090 }
3091
3092 ifnet_set_lladdr(ifp, mac_address, sizeof(mac_address));
3093
3094 /* attach as ethernet */
3095 bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
3096 return 0;
3097 }
3098
3099 static int
feth_clone_destroy(ifnet_t ifp)3100 feth_clone_destroy(ifnet_t ifp)
3101 {
3102 if_fake_ref fakeif;
3103 #if SKYWALK
3104 boolean_t nx_attached = FALSE;
3105 #endif /* SKYWALK */
3106
3107 feth_lock();
3108 fakeif = ifnet_get_if_fake(ifp);
3109 if (fakeif == NULL || feth_is_detaching(fakeif)) {
3110 feth_unlock();
3111 return 0;
3112 }
3113 feth_set_detaching(fakeif);
3114 #if SKYWALK
3115 nx_attached = !feth_in_bsd_mode(fakeif);
3116 #endif /* SKYWALK */
3117 feth_unlock();
3118
3119 #if SKYWALK
3120 if (nx_attached) {
3121 feth_detach_netif_nexus(fakeif);
3122 feth_release(fakeif);
3123 }
3124 #endif /* SKYWALK */
3125 feth_config(ifp, NULL);
3126 ifnet_detach(ifp);
3127 return 0;
3128 }
3129
3130 static void
feth_enqueue_input(ifnet_t ifp,struct mbuf * m)3131 feth_enqueue_input(ifnet_t ifp, struct mbuf * m)
3132 {
3133 struct ifnet_stat_increment_param stats = {};
3134
3135 stats.packets_in = 1;
3136 stats.bytes_in = (uint32_t)mbuf_pkthdr_len(m) + ETHER_HDR_LEN;
3137 ifnet_input(ifp, m, &stats);
3138 }
3139
3140 static struct mbuf *
copy_mbuf(struct mbuf * m)3141 copy_mbuf(struct mbuf *m)
3142 {
3143 struct mbuf * copy_m;
3144 uint32_t pkt_len;
3145 uint32_t offset;
3146
3147 if ((m->m_flags & M_PKTHDR) == 0) {
3148 return NULL;
3149 }
3150 pkt_len = m->m_pkthdr.len;
3151 MGETHDR(copy_m, M_DONTWAIT, MT_DATA);
3152 if (copy_m == NULL) {
3153 goto failed;
3154 }
3155 if (pkt_len > MHLEN) {
3156 if (pkt_len <= MCLBYTES) {
3157 MCLGET(copy_m, M_DONTWAIT);
3158 } else if (pkt_len <= MBIGCLBYTES) {
3159 copy_m = m_mbigget(copy_m, M_DONTWAIT);
3160 } else if (pkt_len <= M16KCLBYTES && njcl > 0) {
3161 copy_m = m_m16kget(copy_m, M_DONTWAIT);
3162 } else {
3163 printf("if_fake: copy_mbuf(): packet too large %d\n",
3164 pkt_len);
3165 goto failed;
3166 }
3167 if (copy_m == NULL || (copy_m->m_flags & M_EXT) == 0) {
3168 goto failed;
3169 }
3170 }
3171 mbuf_setlen(copy_m, pkt_len);
3172 copy_m->m_pkthdr.len = pkt_len;
3173 copy_m->m_pkthdr.pkt_svc = m->m_pkthdr.pkt_svc;
3174 offset = 0;
3175 while (m != NULL && offset < pkt_len) {
3176 uint32_t frag_len;
3177
3178 frag_len = m->m_len;
3179 if (frag_len > (pkt_len - offset)) {
3180 printf("if_fake_: Large mbuf fragment %d > %d\n",
3181 frag_len, (pkt_len - offset));
3182 goto failed;
3183 }
3184 m_copydata(m, 0, frag_len, mtodo(copy_m, offset));
3185 offset += frag_len;
3186 m = m->m_next;
3187 }
3188 return copy_m;
3189
3190 failed:
3191 if (copy_m != NULL) {
3192 m_freem(copy_m);
3193 }
3194 return NULL;
3195 }
3196
3197 static int
feth_add_mbuf_trailer(struct mbuf * m,void * trailer,size_t trailer_len)3198 feth_add_mbuf_trailer(struct mbuf *m, void *trailer, size_t trailer_len)
3199 {
3200 int ret;
3201 ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
3202
3203 ret = m_append(m, trailer_len, (caddr_t)trailer);
3204 if (ret == 1) {
3205 FETH_DPRINTF("%s %zuB trailer added\n", __func__, trailer_len);
3206 return 0;
3207 }
3208 printf("%s m_append failed\n", __func__);
3209 return ENOTSUP;
3210 }
3211
3212 static int
feth_add_mbuf_fcs(struct mbuf * m)3213 feth_add_mbuf_fcs(struct mbuf *m)
3214 {
3215 uint32_t pkt_len, offset = 0;
3216 uint32_t crc = 0;
3217 int err = 0;
3218
3219 ASSERT(sizeof(crc) == ETHER_CRC_LEN);
3220
3221 pkt_len = m->m_pkthdr.len;
3222 struct mbuf *iter = m;
3223 while (iter != NULL && offset < pkt_len) {
3224 uint32_t frag_len = iter->m_len;
3225 ASSERT(frag_len <= (pkt_len - offset));
3226 crc = crc32(crc, mtod(iter, void *), frag_len);
3227 offset += frag_len;
3228 iter = m->m_next;
3229 }
3230
3231 err = feth_add_mbuf_trailer(m, &crc, ETHER_CRC_LEN);
3232 if (err != 0) {
3233 return err;
3234 }
3235
3236 m->m_flags |= M_HASFCS;
3237
3238 return 0;
3239 }
3240
3241 static void
feth_output_common(ifnet_t ifp,struct mbuf * m,ifnet_t peer,iff_flags_t flags,bool fcs,void * trailer,size_t trailer_len)3242 feth_output_common(ifnet_t ifp, struct mbuf * m, ifnet_t peer,
3243 iff_flags_t flags, bool fcs, void *trailer, size_t trailer_len)
3244 {
3245 void * frame_header;
3246
3247 frame_header = mbuf_data(m);
3248 if ((flags & IFF_FLAGS_HWCSUM) != 0) {
3249 m->m_pkthdr.csum_data = 0xffff;
3250 m->m_pkthdr.csum_flags =
3251 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3252 CSUM_IP_CHECKED | CSUM_IP_VALID;
3253 }
3254
3255 (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
3256 bpf_tap_out(ifp, DLT_EN10MB, m, NULL, 0);
3257
3258 if (trailer != 0) {
3259 feth_add_mbuf_trailer(m, trailer, trailer_len);
3260 }
3261 if (fcs) {
3262 feth_add_mbuf_fcs(m);
3263 }
3264
3265 (void)mbuf_pkthdr_setrcvif(m, peer);
3266 mbuf_pkthdr_setheader(m, frame_header);
3267 mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
3268 (void)mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
3269 mbuf_len(m) - ETHER_HDR_LEN);
3270 bpf_tap_in(peer, DLT_EN10MB, m, frame_header,
3271 sizeof(struct ether_header));
3272 feth_enqueue_input(peer, m);
3273 }
3274
3275 static void
feth_start(ifnet_t ifp)3276 feth_start(ifnet_t ifp)
3277 {
3278 struct mbuf * copy_m = NULL;
3279 if_fake_ref fakeif;
3280 iff_flags_t flags = 0;
3281 bool fcs;
3282 size_t trailer_len;
3283 ifnet_t peer = NULL;
3284 struct mbuf * m;
3285 struct mbuf * save_m;
3286
3287 feth_lock();
3288 fakeif = ifnet_get_if_fake(ifp);
3289 if (fakeif == NULL) {
3290 feth_unlock();
3291 return;
3292 }
3293
3294 if (fakeif->iff_start_busy) {
3295 feth_unlock();
3296 printf("if_fake: start is busy\n");
3297 return;
3298 }
3299
3300 peer = fakeif->iff_peer;
3301 flags = fakeif->iff_flags;
3302 fcs = fakeif->iff_fcs;
3303 trailer_len = fakeif->iff_trailer_length;
3304
3305 /* check for pending TX */
3306 m = fakeif->iff_pending_tx_packet;
3307 if (m != NULL) {
3308 if (peer != NULL) {
3309 copy_m = copy_mbuf(m);
3310 if (copy_m == NULL) {
3311 feth_unlock();
3312 return;
3313 }
3314 }
3315 fakeif->iff_pending_tx_packet = NULL;
3316 m_freem(m);
3317 m = NULL;
3318 }
3319 fakeif->iff_start_busy = TRUE;
3320 feth_unlock();
3321 save_m = NULL;
3322 for (;;) {
3323 if (copy_m != NULL) {
3324 VERIFY(peer != NULL);
3325 feth_output_common(ifp, copy_m, peer, flags, fcs,
3326 feth_trailer, trailer_len);
3327 copy_m = NULL;
3328 }
3329 if (ifnet_dequeue(ifp, &m) != 0) {
3330 break;
3331 }
3332 if (peer == NULL) {
3333 m_freem(m);
3334 } else {
3335 copy_m = copy_mbuf(m);
3336 if (copy_m == NULL) {
3337 save_m = m;
3338 break;
3339 }
3340 m_freem(m);
3341 }
3342 }
3343 peer = NULL;
3344 feth_lock();
3345 fakeif = ifnet_get_if_fake(ifp);
3346 if (fakeif != NULL) {
3347 fakeif->iff_start_busy = FALSE;
3348 if (save_m != NULL && fakeif->iff_peer != NULL) {
3349 /* save it for next time */
3350 fakeif->iff_pending_tx_packet = save_m;
3351 save_m = NULL;
3352 }
3353 }
3354 feth_unlock();
3355 if (save_m != NULL) {
3356 /* didn't save packet, so free it */
3357 m_freem(save_m);
3358 }
3359 }
3360
3361 static int
feth_output(ifnet_t ifp,struct mbuf * m)3362 feth_output(ifnet_t ifp, struct mbuf * m)
3363 {
3364 struct mbuf * copy_m;
3365 if_fake_ref fakeif;
3366 iff_flags_t flags;
3367 bool fcs;
3368 size_t trailer_len;
3369 ifnet_t peer = NULL;
3370
3371 if (m == NULL) {
3372 return 0;
3373 }
3374 copy_m = copy_mbuf(m);
3375 m_freem(m);
3376 m = NULL;
3377 if (copy_m == NULL) {
3378 /* count this as an output error */
3379 ifnet_stat_increment_out(ifp, 0, 0, 1);
3380 return 0;
3381 }
3382 feth_lock();
3383 fakeif = ifnet_get_if_fake(ifp);
3384 if (fakeif != NULL) {
3385 peer = fakeif->iff_peer;
3386 flags = fakeif->iff_flags;
3387 fcs = fakeif->iff_fcs;
3388 trailer_len = fakeif->iff_trailer_length;
3389 }
3390 feth_unlock();
3391 if (peer == NULL) {
3392 m_freem(copy_m);
3393 ifnet_stat_increment_out(ifp, 0, 0, 1);
3394 return 0;
3395 }
3396 feth_output_common(ifp, copy_m, peer, flags, fcs, feth_trailer,
3397 trailer_len);
3398 return 0;
3399 }
3400
3401 static int
feth_config(ifnet_t ifp,ifnet_t peer)3402 feth_config(ifnet_t ifp, ifnet_t peer)
3403 {
3404 int connected = FALSE;
3405 int disconnected = FALSE;
3406 int error = 0;
3407 if_fake_ref fakeif = NULL;
3408
3409 feth_lock();
3410 fakeif = ifnet_get_if_fake(ifp);
3411 if (fakeif == NULL) {
3412 error = EINVAL;
3413 goto done;
3414 }
3415 if (peer != NULL) {
3416 /* connect to peer */
3417 if_fake_ref peer_fakeif;
3418
3419 peer_fakeif = ifnet_get_if_fake(peer);
3420 if (peer_fakeif == NULL) {
3421 error = EINVAL;
3422 goto done;
3423 }
3424 if (feth_is_detaching(fakeif) ||
3425 feth_is_detaching(peer_fakeif) ||
3426 peer_fakeif->iff_peer != NULL ||
3427 fakeif->iff_peer != NULL) {
3428 error = EBUSY;
3429 goto done;
3430 }
3431 #if SKYWALK
3432 if (fakeif->iff_pp_mode !=
3433 peer_fakeif->iff_pp_mode) {
3434 error = EINVAL;
3435 goto done;
3436 }
3437 #endif /* SKYWALK */
3438 fakeif->iff_peer = peer;
3439 peer_fakeif->iff_peer = ifp;
3440 connected = TRUE;
3441 } else if (fakeif->iff_peer != NULL) {
3442 /* disconnect from peer */
3443 if_fake_ref peer_fakeif;
3444
3445 peer = fakeif->iff_peer;
3446 peer_fakeif = ifnet_get_if_fake(peer);
3447 if (peer_fakeif == NULL) {
3448 /* should not happen */
3449 error = EINVAL;
3450 goto done;
3451 }
3452 fakeif->iff_peer = NULL;
3453 peer_fakeif->iff_peer = NULL;
3454 disconnected = TRUE;
3455 }
3456
3457 done:
3458 feth_unlock();
3459
3460 /* generate link status event if we connect or disconnect */
3461 if (connected) {
3462 interface_link_event(ifp, KEV_DL_LINK_ON);
3463 interface_link_event(peer, KEV_DL_LINK_ON);
3464 } else if (disconnected) {
3465 interface_link_event(ifp, KEV_DL_LINK_OFF);
3466 interface_link_event(peer, KEV_DL_LINK_OFF);
3467 }
3468 return error;
3469 }
3470
3471 static int
feth_set_media(ifnet_t ifp,struct if_fake_request * iffr)3472 feth_set_media(ifnet_t ifp, struct if_fake_request * iffr)
3473 {
3474 if_fake_ref fakeif;
3475 int error;
3476
3477 if (iffr->iffr_media.iffm_count > IF_FAKE_MEDIA_LIST_MAX) {
3478 /* list is too long */
3479 return EINVAL;
3480 }
3481 feth_lock();
3482 fakeif = ifnet_get_if_fake(ifp);
3483 if (fakeif == NULL) {
3484 error = EINVAL;
3485 goto done;
3486 }
3487 fakeif->iff_media_count = iffr->iffr_media.iffm_count;
3488 bcopy(iffr->iffr_media.iffm_list, fakeif->iff_media_list,
3489 iffr->iffr_media.iffm_count * sizeof(fakeif->iff_media_list[0]));
3490 #if 0
3491 /* XXX: "auto-negotiate" active with peer? */
3492 /* generate link status event? */
3493 fakeif->iff_media_current = iffr->iffr_media.iffm_current;
3494 #endif
3495 error = 0;
3496 done:
3497 feth_unlock();
3498 return error;
3499 }
3500
3501 static int
if_fake_request_copyin(user_addr_t user_addr,struct if_fake_request * iffr,u_int32_t len)3502 if_fake_request_copyin(user_addr_t user_addr,
3503 struct if_fake_request *iffr, u_int32_t len)
3504 {
3505 int error;
3506
3507 if (user_addr == USER_ADDR_NULL || len < sizeof(*iffr)) {
3508 error = EINVAL;
3509 goto done;
3510 }
3511 error = copyin(user_addr, iffr, sizeof(*iffr));
3512 if (error != 0) {
3513 goto done;
3514 }
3515 if (iffr->iffr_reserved[0] != 0 || iffr->iffr_reserved[1] != 0 ||
3516 iffr->iffr_reserved[2] != 0 || iffr->iffr_reserved[3] != 0) {
3517 error = EINVAL;
3518 goto done;
3519 }
3520 done:
3521 return error;
3522 }
3523
3524 static int
feth_set_drvspec(ifnet_t ifp,uint32_t cmd,u_int32_t len,user_addr_t user_addr)3525 feth_set_drvspec(ifnet_t ifp, uint32_t cmd, u_int32_t len,
3526 user_addr_t user_addr)
3527 {
3528 int error;
3529 struct if_fake_request iffr;
3530 ifnet_t peer;
3531
3532 switch (cmd) {
3533 case IF_FAKE_S_CMD_SET_PEER:
3534 error = if_fake_request_copyin(user_addr, &iffr, len);
3535 if (error != 0) {
3536 break;
3537 }
3538 if (iffr.iffr_peer_name[0] == '\0') {
3539 error = feth_config(ifp, NULL);
3540 break;
3541 }
3542
3543 /* ensure nul termination */
3544 iffr.iffr_peer_name[IFNAMSIZ - 1] = '\0';
3545 peer = ifunit(iffr.iffr_peer_name);
3546 if (peer == NULL) {
3547 error = ENXIO;
3548 break;
3549 }
3550 if (ifnet_type(peer) != IFT_ETHER) {
3551 error = EINVAL;
3552 break;
3553 }
3554 if (strcmp(ifnet_name(peer), FAKE_ETHER_NAME) != 0) {
3555 error = EINVAL;
3556 break;
3557 }
3558 error = feth_config(ifp, peer);
3559 break;
3560 case IF_FAKE_S_CMD_SET_MEDIA:
3561 error = if_fake_request_copyin(user_addr, &iffr, len);
3562 if (error != 0) {
3563 break;
3564 }
3565 error = feth_set_media(ifp, &iffr);
3566 break;
3567 case IF_FAKE_S_CMD_SET_DEQUEUE_STALL:
3568 error = if_fake_request_copyin(user_addr, &iffr, len);
3569 if (error != 0) {
3570 break;
3571 }
3572 error = feth_enable_dequeue_stall(ifp,
3573 iffr.iffr_dequeue_stall);
3574 break;
3575 default:
3576 error = EOPNOTSUPP;
3577 break;
3578 }
3579 return error;
3580 }
3581
3582 static int
feth_get_drvspec(ifnet_t ifp,u_int32_t cmd,u_int32_t len,user_addr_t user_addr)3583 feth_get_drvspec(ifnet_t ifp, u_int32_t cmd, u_int32_t len,
3584 user_addr_t user_addr)
3585 {
3586 int error = EOPNOTSUPP;
3587 if_fake_ref fakeif;
3588 struct if_fake_request iffr;
3589 ifnet_t peer;
3590
3591 switch (cmd) {
3592 case IF_FAKE_G_CMD_GET_PEER:
3593 if (len < sizeof(iffr)) {
3594 error = EINVAL;
3595 break;
3596 }
3597 feth_lock();
3598 fakeif = ifnet_get_if_fake(ifp);
3599 if (fakeif == NULL) {
3600 feth_unlock();
3601 error = EOPNOTSUPP;
3602 break;
3603 }
3604 peer = fakeif->iff_peer;
3605 feth_unlock();
3606 bzero(&iffr, sizeof(iffr));
3607 if (peer != NULL) {
3608 strlcpy(iffr.iffr_peer_name,
3609 if_name(peer),
3610 sizeof(iffr.iffr_peer_name));
3611 }
3612 error = copyout(&iffr, user_addr, sizeof(iffr));
3613 break;
3614 default:
3615 break;
3616 }
3617 return error;
3618 }
3619
3620 union ifdrvu {
3621 struct ifdrv32 *ifdrvu_32;
3622 struct ifdrv64 *ifdrvu_64;
3623 void *ifdrvu_p;
3624 };
3625
3626 static int
feth_ioctl(ifnet_t ifp,u_long cmd,void * data)3627 feth_ioctl(ifnet_t ifp, u_long cmd, void * data)
3628 {
3629 unsigned int count;
3630 struct ifdevmtu * devmtu_p;
3631 union ifdrvu drv;
3632 uint32_t drv_cmd;
3633 uint32_t drv_len;
3634 boolean_t drv_set_command = FALSE;
3635 int error = 0;
3636 struct ifmediareq * ifmr;
3637 struct ifreq * ifr;
3638 if_fake_ref fakeif;
3639 int status;
3640 user_addr_t user_addr;
3641
3642 ifr = (struct ifreq *)data;
3643 switch (cmd) {
3644 case SIOCSIFADDR:
3645 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
3646 break;
3647
3648 case SIOCGIFMEDIA32:
3649 case SIOCGIFMEDIA64:
3650 feth_lock();
3651 fakeif = ifnet_get_if_fake(ifp);
3652 if (fakeif == NULL) {
3653 feth_unlock();
3654 return EOPNOTSUPP;
3655 }
3656 status = (fakeif->iff_peer != NULL)
3657 ? (IFM_AVALID | IFM_ACTIVE) : IFM_AVALID;
3658 ifmr = (struct ifmediareq *)data;
3659 user_addr = (cmd == SIOCGIFMEDIA64) ?
3660 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
3661 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
3662 count = ifmr->ifm_count;
3663 ifmr->ifm_active = IFM_ETHER;
3664 ifmr->ifm_current = IFM_ETHER;
3665 ifmr->ifm_mask = 0;
3666 ifmr->ifm_status = status;
3667 if (user_addr == USER_ADDR_NULL) {
3668 ifmr->ifm_count = fakeif->iff_media_count;
3669 } else if (count > 0) {
3670 if (count > fakeif->iff_media_count) {
3671 count = fakeif->iff_media_count;
3672 }
3673 ifmr->ifm_count = count;
3674 error = copyout(&fakeif->iff_media_list, user_addr,
3675 count * sizeof(int));
3676 }
3677 feth_unlock();
3678 break;
3679
3680 case SIOCGIFDEVMTU:
3681 devmtu_p = &ifr->ifr_devmtu;
3682 devmtu_p->ifdm_current = ifnet_mtu(ifp);
3683 devmtu_p->ifdm_max = feth_max_mtu(ifp);
3684 devmtu_p->ifdm_min = IF_MINMTU;
3685 break;
3686
3687 case SIOCSIFMTU:
3688 if ((unsigned int)ifr->ifr_mtu > feth_max_mtu(ifp) ||
3689 ifr->ifr_mtu < IF_MINMTU) {
3690 error = EINVAL;
3691 } else {
3692 error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
3693 }
3694 break;
3695
3696 case SIOCSDRVSPEC32:
3697 case SIOCSDRVSPEC64:
3698 error = proc_suser(current_proc());
3699 if (error != 0) {
3700 break;
3701 }
3702 drv_set_command = TRUE;
3703 OS_FALLTHROUGH;
3704 case SIOCGDRVSPEC32:
3705 case SIOCGDRVSPEC64:
3706 drv.ifdrvu_p = data;
3707 if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
3708 drv_cmd = drv.ifdrvu_32->ifd_cmd;
3709 drv_len = drv.ifdrvu_32->ifd_len;
3710 user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
3711 } else {
3712 drv_cmd = drv.ifdrvu_64->ifd_cmd;
3713 drv_len = drv.ifdrvu_64->ifd_len;
3714 user_addr = drv.ifdrvu_64->ifd_data;
3715 }
3716 if (drv_set_command) {
3717 error = feth_set_drvspec(ifp, drv_cmd, drv_len,
3718 user_addr);
3719 } else {
3720 error = feth_get_drvspec(ifp, drv_cmd, drv_len,
3721 user_addr);
3722 }
3723 break;
3724
3725 case SIOCSIFLLADDR:
3726 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
3727 ifr->ifr_addr.sa_len);
3728 break;
3729
3730 case SIOCSIFFLAGS:
3731 if ((ifp->if_flags & IFF_UP) != 0) {
3732 /* marked up, set running if not already set */
3733 if ((ifp->if_flags & IFF_RUNNING) == 0) {
3734 /* set running */
3735 error = ifnet_set_flags(ifp, IFF_RUNNING,
3736 IFF_RUNNING);
3737 }
3738 } else if ((ifp->if_flags & IFF_RUNNING) != 0) {
3739 /* marked down, clear running */
3740 error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
3741 }
3742 break;
3743
3744 case SIOCADDMULTI:
3745 case SIOCDELMULTI:
3746 error = 0;
3747 break;
3748 default:
3749 error = EOPNOTSUPP;
3750 break;
3751 }
3752 return error;
3753 }
3754
3755 static void
feth_if_free(ifnet_t ifp)3756 feth_if_free(ifnet_t ifp)
3757 {
3758 if_fake_ref fakeif;
3759
3760 if (ifp == NULL) {
3761 return;
3762 }
3763 feth_lock();
3764 fakeif = ifnet_get_if_fake(ifp);
3765 if (fakeif == NULL) {
3766 feth_unlock();
3767 return;
3768 }
3769 ifp->if_softc = NULL;
3770 #if SKYWALK
3771 VERIFY(fakeif->iff_doorbell_tcall == NULL);
3772 #endif /* SKYWALK */
3773 feth_unlock();
3774 feth_release(fakeif);
3775 ifnet_release(ifp);
3776 return;
3777 }
3778
3779 __private_extern__ void
if_fake_init(void)3780 if_fake_init(void)
3781 {
3782 int error;
3783
3784 #if SKYWALK
3785 (void)feth_register_nexus_domain_provider();
3786 #endif /* SKYWALK */
3787 error = if_clone_attach(&feth_cloner);
3788 if (error != 0) {
3789 return;
3790 }
3791 return;
3792 }
3793