1 /*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * if_fake.c
31 * - fake network interface used for testing
32 * - "feth" (e.g. "feth0", "feth1") is a virtual ethernet interface that allows
33 * two instances to have their output/input paths "crossed-over" so that
34 * output on one is input on the other
35 */
36
37 /*
38 * Modification History:
39 *
40 * September 9, 2015 Dieter Siegmund ([email protected])
41 * - created
42 */
43
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/queue.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/kern_event.h>
54 #include <sys/mcache.h>
55 #include <sys/syslog.h>
56
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_vlan_var.h>
61 #include <net/if_fake_var.h>
62 #include <net/if_arp.h>
63 #include <net/if_dl.h>
64 #include <net/if_ether.h>
65 #include <net/if_types.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <net/dlil.h>
69
70 #include <net/kpi_interface.h>
71 #include <net/kpi_protocol.h>
72
73 #include <kern/locks.h>
74 #include <kern/zalloc.h>
75
76 #ifdef INET
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #endif
80
81 #include <net/if_media.h>
82 #include <net/ether_if_module.h>
83 #if SKYWALK
84 #include <skywalk/os_skywalk_private.h>
85 #include <skywalk/nexus/netif/nx_netif.h>
86 #include <skywalk/channel/channel_var.h>
87 #endif /* SKYWALK */
88
89 static boolean_t
is_power_of_two(unsigned int val)90 is_power_of_two(unsigned int val)
91 {
92 return (val & (val - 1)) == 0;
93 }
94
95 #define FAKE_ETHER_NAME "feth"
96
97 SYSCTL_DECL(_net_link);
98 SYSCTL_NODE(_net_link, OID_AUTO, fake, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
99 "Fake interface");
100
101 static int if_fake_txstart = 1;
102 SYSCTL_INT(_net_link_fake, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
103 &if_fake_txstart, 0, "Fake interface TXSTART mode");
104
105 static int if_fake_hwcsum = 0;
106 SYSCTL_INT(_net_link_fake, OID_AUTO, hwcsum, CTLFLAG_RW | CTLFLAG_LOCKED,
107 &if_fake_hwcsum, 0, "Fake interface simulate hardware checksum");
108
109 static int if_fake_nxattach = 0;
110 SYSCTL_INT(_net_link_fake, OID_AUTO, nxattach, CTLFLAG_RW | CTLFLAG_LOCKED,
111 &if_fake_nxattach, 0, "Fake interface auto-attach nexus");
112
113 static int if_fake_bsd_mode = 1;
114 SYSCTL_INT(_net_link_fake, OID_AUTO, bsd_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
115 &if_fake_bsd_mode, 0, "Fake interface attach as BSD interface");
116
117 static int if_fake_debug = 0;
118 SYSCTL_INT(_net_link_fake, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
119 &if_fake_debug, 0, "Fake interface debug logs");
120
121 static int if_fake_wmm_mode = 0;
122 SYSCTL_INT(_net_link_fake, OID_AUTO, wmm_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
123 &if_fake_wmm_mode, 0, "Fake interface in 802.11 WMM mode");
124
125 static int if_fake_multibuflet = 0;
126 SYSCTL_INT(_net_link_fake, OID_AUTO, multibuflet, CTLFLAG_RW | CTLFLAG_LOCKED,
127 &if_fake_multibuflet, 0, "Fake interface using multi-buflet packets");
128
129 static int if_fake_low_latency = 0;
130 SYSCTL_INT(_net_link_fake, OID_AUTO, low_latency, CTLFLAG_RW | CTLFLAG_LOCKED,
131 &if_fake_low_latency, 0, "Fake interface with a low latency qset");
132
133 static int if_fake_switch_combined_mode = 0;
134 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_combined_mode,
135 CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_combined_mode, 0,
136 "Switch a qset between combined and separate mode during dequeues");
137
138 static int if_fake_switch_mode_frequency = 10;
139 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_mode_frequency,
140 CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_mode_frequency, 0,
141 "The number of dequeues before we switch between the combined and separated mode");
142
143 static int if_fake_tso_support = 0;
144 SYSCTL_INT(_net_link_fake, OID_AUTO, tso_support, CTLFLAG_RW | CTLFLAG_LOCKED,
145 &if_fake_tso_support, 0, "Fake interface with support for TSO offload");
146
147 typedef enum {
148 IFF_PP_MODE_GLOBAL = 0, /* share a global pool */
149 IFF_PP_MODE_PRIVATE = 1, /* creates its own rx/tx pool */
150 IFF_PP_MODE_PRIVATE_SPLIT = 2, /* creates its own split rx & tx pool */
151 } iff_pktpool_mode_t;
152 static iff_pktpool_mode_t if_fake_pktpool_mode = IFF_PP_MODE_GLOBAL;
153 SYSCTL_INT(_net_link_fake, OID_AUTO, pktpool_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
154 &if_fake_pktpool_mode, IFF_PP_MODE_GLOBAL,
155 "Fake interface packet pool mode (0 global, 1 private, 2 private split");
156
157 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX 512
158 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF 96
159 static int if_fake_link_layer_aggregation_factor =
160 FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF;
161 static int
162 feth_link_layer_aggregation_factor_sysctl SYSCTL_HANDLER_ARGS
163 {
164 #pragma unused(oidp, arg1, arg2)
165 unsigned int new_value;
166 int changed;
167 int error;
168
169 error = sysctl_io_number(req, if_fake_link_layer_aggregation_factor,
170 sizeof(if_fake_link_layer_aggregation_factor), &new_value,
171 &changed);
172 if (error == 0 && changed != 0) {
173 if (new_value <= 0 ||
174 new_value > FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX) {
175 return EINVAL;
176 }
177 if_fake_link_layer_aggregation_factor = new_value;
178 }
179 return error;
180 }
181
182 SYSCTL_PROC(_net_link_fake, OID_AUTO, link_layer_aggregation_factor,
183 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
184 0, 0, feth_link_layer_aggregation_factor_sysctl, "IU",
185 "Fake interface link layer aggregation factor");
186
187 #define FETH_TX_HEADROOM_MAX 32
188 static unsigned int if_fake_tx_headroom = FETH_TX_HEADROOM_MAX;
189 static int
190 feth_tx_headroom_sysctl SYSCTL_HANDLER_ARGS
191 {
192 #pragma unused(oidp, arg1, arg2)
193 unsigned int new_value;
194 int changed;
195 int error;
196
197 error = sysctl_io_number(req, if_fake_tx_headroom,
198 sizeof(if_fake_tx_headroom), &new_value, &changed);
199 if (error == 0 && changed != 0) {
200 if (new_value > FETH_TX_HEADROOM_MAX ||
201 (new_value % 8) != 0) {
202 return EINVAL;
203 }
204 if_fake_tx_headroom = new_value;
205 }
206 return 0;
207 }
208
209 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_headroom,
210 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
211 0, 0, feth_tx_headroom_sysctl, "IU", "Fake ethernet Tx headroom");
212
213 static int if_fake_fcs = 0;
214 SYSCTL_INT(_net_link_fake, OID_AUTO, fcs, CTLFLAG_RW | CTLFLAG_LOCKED,
215 &if_fake_fcs, 0, "Fake interface using frame check sequence");
216
217 #define FETH_TRAILER_LENGTH_MAX 28
218 char feth_trailer[FETH_TRAILER_LENGTH_MAX + 1] = "trailertrailertrailertrailer";
219 static unsigned int if_fake_trailer_length = 0;
220 static int
221 feth_trailer_length_sysctl SYSCTL_HANDLER_ARGS
222 {
223 #pragma unused(oidp, arg1, arg2)
224 unsigned int new_value;
225 int changed;
226 int error;
227
228 error = sysctl_io_number(req, if_fake_trailer_length,
229 sizeof(if_fake_trailer_length), &new_value, &changed);
230 if (error == 0 && changed != 0) {
231 if (new_value > FETH_TRAILER_LENGTH_MAX) {
232 return EINVAL;
233 }
234 if_fake_trailer_length = new_value;
235 }
236 return 0;
237 }
238
239 SYSCTL_PROC(_net_link_fake, OID_AUTO, trailer_length,
240 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
241 feth_trailer_length_sysctl, "IU", "Fake interface frame trailer length");
242
243 /* sysctl net.link.fake.max_mtu */
244 #define FETH_MAX_MTU_DEFAULT 2048
245 #define FETH_MAX_MTU_MAX ((16 * 1024) - ETHER_HDR_LEN)
246
247 static unsigned int if_fake_max_mtu = FETH_MAX_MTU_DEFAULT;
248
249 /* sysctl net.link.fake.buflet_size */
250 #define FETH_BUFLET_SIZE_MIN 512
251 #define FETH_BUFLET_SIZE_MAX (32 * 1024)
252 #define FETH_TSO_BUFLET_SIZE (16 * 1024)
253
254 static unsigned int if_fake_buflet_size = FETH_BUFLET_SIZE_MIN;
255 static unsigned int if_fake_tso_buffer_size = FETH_TSO_BUFLET_SIZE;
256
257 static int
258 feth_tso_buffer_size_sysctl SYSCTL_HANDLER_ARGS
259 {
260 #pragma unused(oidp, arg1, arg2)
261 unsigned int new_value;
262 int changed;
263 int error;
264
265 error = sysctl_io_number(req, if_fake_tso_buffer_size,
266 sizeof(if_fake_tso_buffer_size), &new_value, &changed);
267 if (error == 0 && changed != 0) {
268 /* must be a power of 2 between min and max */
269 if (new_value > FETH_BUFLET_SIZE_MAX ||
270 new_value < FETH_BUFLET_SIZE_MIN ||
271 !is_power_of_two(new_value)) {
272 return EINVAL;
273 }
274 if_fake_tso_buffer_size = new_value;
275 }
276 return 0;
277 }
278
279 SYSCTL_PROC(_net_link_fake, OID_AUTO, tso_buf_size,
280 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
281 0, 0, feth_tso_buffer_size_sysctl, "IU", "Fake interface TSO buffer size");
282
283 static int
284 feth_max_mtu_sysctl SYSCTL_HANDLER_ARGS
285 {
286 #pragma unused(oidp, arg1, arg2)
287 unsigned int new_value;
288 int changed;
289 int error;
290
291 error = sysctl_io_number(req, if_fake_max_mtu,
292 sizeof(if_fake_max_mtu), &new_value, &changed);
293 if (error == 0 && changed != 0) {
294 if (new_value > FETH_MAX_MTU_MAX ||
295 new_value < ETHERMTU ||
296 new_value <= if_fake_buflet_size) {
297 return EINVAL;
298 }
299 if_fake_max_mtu = new_value;
300 }
301 return 0;
302 }
303
304 SYSCTL_PROC(_net_link_fake, OID_AUTO, max_mtu,
305 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
306 0, 0, feth_max_mtu_sysctl, "IU", "Fake interface maximum MTU");
307
308 static int
309 feth_buflet_size_sysctl SYSCTL_HANDLER_ARGS
310 {
311 #pragma unused(oidp, arg1, arg2)
312 unsigned int new_value;
313 int changed;
314 int error;
315
316 error = sysctl_io_number(req, if_fake_buflet_size,
317 sizeof(if_fake_buflet_size), &new_value, &changed);
318 if (error == 0 && changed != 0) {
319 /* must be a power of 2 between min and max */
320 if (new_value > FETH_BUFLET_SIZE_MAX ||
321 new_value < FETH_BUFLET_SIZE_MIN ||
322 !is_power_of_two(new_value) ||
323 new_value >= if_fake_max_mtu) {
324 return EINVAL;
325 }
326 if_fake_buflet_size = new_value;
327 }
328 return 0;
329 }
330
331 SYSCTL_PROC(_net_link_fake, OID_AUTO, buflet_size,
332 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
333 0, 0, feth_buflet_size_sysctl, "IU", "Fake interface buflet size");
334
335 static unsigned int if_fake_user_access = 0;
336
337 static int
338 feth_user_access_sysctl SYSCTL_HANDLER_ARGS
339 {
340 #pragma unused(oidp, arg1, arg2)
341 unsigned int new_value;
342 int changed;
343 int error;
344
345 error = sysctl_io_number(req, if_fake_user_access,
346 sizeof(if_fake_user_access), &new_value, &changed);
347 if (error == 0 && changed != 0) {
348 if (new_value != 0) {
349 if (new_value != 1) {
350 return EINVAL;
351 }
352 }
353 if_fake_user_access = new_value;
354 }
355 return 0;
356 }
357
358 SYSCTL_PROC(_net_link_fake, OID_AUTO, user_access,
359 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
360 0, 0, feth_user_access_sysctl, "IU", "Fake interface user access");
361
362 /* sysctl net.link.fake.if_adv_intvl (unit: millisecond) */
363 #define FETH_IF_ADV_INTVL_MIN 10
364 #define FETH_IF_ADV_INTVL_MAX INT_MAX
365
366 static int if_fake_if_adv_interval = 0; /* no interface advisory */
367 static int
368 feth_if_adv_interval_sysctl SYSCTL_HANDLER_ARGS
369 {
370 #pragma unused(oidp, arg1, arg2)
371 unsigned int new_value;
372 int changed;
373 int error;
374
375 error = sysctl_io_number(req, if_fake_if_adv_interval,
376 sizeof(if_fake_if_adv_interval), &new_value, &changed);
377 if (error == 0 && changed != 0) {
378 if ((new_value != 0) && (new_value > FETH_IF_ADV_INTVL_MAX ||
379 new_value < FETH_IF_ADV_INTVL_MIN)) {
380 return EINVAL;
381 }
382 if_fake_if_adv_interval = new_value;
383 }
384 return 0;
385 }
386
387 SYSCTL_PROC(_net_link_fake, OID_AUTO, if_adv_intvl,
388 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
389 feth_if_adv_interval_sysctl, "IU",
390 "Fake interface will generate interface advisories reports at the specified interval in ms");
391
392 /* sysctl net.link.fake.tx_drops */
393 /*
394 * Fake ethernet will drop packet on the transmit path at the specified
395 * rate, i.e drop one in every if_fake_tx_drops number of packets.
396 */
397 #define FETH_TX_DROPS_MIN 0
398 #define FETH_TX_DROPS_MAX INT_MAX
399 static int if_fake_tx_drops = 0; /* no packets are dropped */
400 static int
401 feth_fake_tx_drops_sysctl SYSCTL_HANDLER_ARGS
402 {
403 #pragma unused(oidp, arg1, arg2)
404 unsigned int new_value;
405 int changed;
406 int error;
407
408 error = sysctl_io_number(req, if_fake_tx_drops,
409 sizeof(if_fake_tx_drops), &new_value, &changed);
410 if (error == 0 && changed != 0) {
411 if (new_value > FETH_TX_DROPS_MAX ||
412 new_value < FETH_TX_DROPS_MIN) {
413 return EINVAL;
414 }
415 if_fake_tx_drops = new_value;
416 }
417 return 0;
418 }
419
420 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_drops,
421 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
422 feth_fake_tx_drops_sysctl, "IU",
423 "Fake interface will intermittently drop packets on Tx path");
424
425 /* sysctl net.link.fake.tx_completion_mode */
426 typedef enum {
427 IFF_TX_COMPL_MODE_SYNC = 0,
428 IFF_TX_COMPL_MODE_ASYNC = 1,
429 } iff_tx_completion_mode_t;
430 static iff_tx_completion_mode_t if_tx_completion_mode = IFF_TX_COMPL_MODE_SYNC;
431 static int
432 feth_fake_tx_completion_mode_sysctl SYSCTL_HANDLER_ARGS
433 {
434 #pragma unused(oidp, arg1, arg2)
435 unsigned int new_value;
436 int changed;
437 int error;
438
439 error = sysctl_io_number(req, if_tx_completion_mode,
440 sizeof(if_tx_completion_mode), &new_value, &changed);
441 if (error == 0 && changed != 0) {
442 if (new_value > IFF_TX_COMPL_MODE_ASYNC ||
443 new_value < IFF_TX_COMPL_MODE_SYNC) {
444 return EINVAL;
445 }
446 if_tx_completion_mode = new_value;
447 }
448 return 0;
449 }
450 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_completion_mode,
451 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
452 feth_fake_tx_completion_mode_sysctl, "IU",
453 "Fake interface tx completion mode (0 synchronous, 1 asynchronous)");
454
455 /* sysctl net.link.fake.llink_cnt */
456
457 /* The maximum number of logical links (including default link) */
458 #define FETH_MAX_LLINKS 16
459 /*
460 * The default number of logical links (including default link).
461 * Zero means logical link mode is disabled.
462 */
463 #define FETH_DEF_LLINKS 0
464
465 static uint32_t if_fake_llink_cnt = FETH_DEF_LLINKS;
466 static int
467 feth_fake_llink_cnt_sysctl SYSCTL_HANDLER_ARGS
468 {
469 #pragma unused(oidp, arg1, arg2)
470 unsigned int new_value;
471 int changed;
472 int error;
473
474 error = sysctl_io_number(req, if_fake_llink_cnt,
475 sizeof(if_fake_llink_cnt), &new_value, &changed);
476 if (error == 0 && changed != 0) {
477 if (new_value > FETH_MAX_LLINKS) {
478 return EINVAL;
479 }
480 if_fake_llink_cnt = new_value;
481 }
482 return 0;
483 }
484
485 SYSCTL_PROC(_net_link_fake, OID_AUTO, llink_cnt,
486 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
487 feth_fake_llink_cnt_sysctl, "IU",
488 "Fake interface logical link count");
489
490 /* sysctl net.link.fake.qset_cnt */
491
492 /* The maximum number of qsets for each logical link */
493 #define FETH_MAX_QSETS 16
494 /* The default number of qsets for each logical link */
495 #define FETH_DEF_QSETS 4
496
497 static uint32_t if_fake_qset_cnt = FETH_DEF_QSETS;
498 static int
499 feth_fake_qset_cnt_sysctl SYSCTL_HANDLER_ARGS
500 {
501 #pragma unused(oidp, arg1, arg2)
502 unsigned int new_value;
503 int changed;
504 int error;
505
506 error = sysctl_io_number(req, if_fake_qset_cnt,
507 sizeof(if_fake_qset_cnt), &new_value, &changed);
508 if (error == 0 && changed != 0) {
509 if (new_value == 0 ||
510 new_value > FETH_MAX_QSETS) {
511 return EINVAL;
512 }
513 if_fake_qset_cnt = new_value;
514 }
515 return 0;
516 }
517
518 SYSCTL_PROC(_net_link_fake, OID_AUTO, qset_cnt,
519 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
520 feth_fake_qset_cnt_sysctl, "IU",
521 "Fake interface queue set count");
522
523 /**
524 ** virtual ethernet structures, types
525 **/
526
527 #define IFF_NUM_TX_RINGS_WMM_MODE 4
528 #define IFF_NUM_RX_RINGS_WMM_MODE 1
529 #define IFF_MAX_TX_RINGS IFF_NUM_TX_RINGS_WMM_MODE
530 #define IFF_MAX_RX_RINGS IFF_NUM_RX_RINGS_WMM_MODE
531 #define IFF_NUM_TX_QUEUES_WMM_MODE 4
532 #define IFF_NUM_RX_QUEUES_WMM_MODE 1
533 #define IFF_MAX_TX_QUEUES IFF_NUM_TX_QUEUES_WMM_MODE
534 #define IFF_MAX_RX_QUEUES IFF_NUM_RX_QUEUES_WMM_MODE
535
536 #define IFF_MAX_BATCH_SIZE 32
537
538 typedef uint16_t iff_flags_t;
539 #define IFF_FLAGS_HWCSUM 0x0001
540 #define IFF_FLAGS_BSD_MODE 0x0002
541 #define IFF_FLAGS_DETACHING 0x0004
542 #define IFF_FLAGS_WMM_MODE 0x0008
543 #define IFF_FLAGS_MULTIBUFLETS 0x0010
544 #define IFF_FLAGS_TSO_SUPPORT 0x0020
545
546 #if SKYWALK
547
548 typedef struct {
549 uuid_t fnx_provider;
550 uuid_t fnx_instance;
551 } fake_nx, *fake_nx_t;
552
553 typedef struct {
554 kern_netif_queue_t fq_queue;
555 } fake_queue;
556
557 typedef struct {
558 kern_netif_qset_t fqs_qset; /* provided by xnu */
559 fake_queue fqs_rx_queue[IFF_MAX_RX_QUEUES];
560 fake_queue fqs_tx_queue[IFF_MAX_TX_QUEUES];
561 uint32_t fqs_rx_queue_cnt;
562 uint32_t fqs_tx_queue_cnt;
563 uint32_t fqs_llink_idx;
564 uint32_t fqs_idx;
565 uint32_t fqs_dequeue_cnt;
566 uint64_t fqs_id;
567 boolean_t fqs_combined_mode;
568 } fake_qset;
569
570 typedef struct {
571 uint64_t fl_id;
572 uint32_t fl_idx;
573 fake_qset fl_qset[FETH_MAX_QSETS];
574 uint32_t fl_qset_cnt;
575 } fake_llink;
576
577 static kern_pbufpool_t S_pp;
578
579 #define IFF_TT_OUTPUT 0x01 /* generate trace_tag on output */
580 #define IFF_TT_INPUT 0x02 /* generate trace_tag on input */
581 static int if_fake_trace_tag_flags = 0;
582 SYSCTL_INT(_net_link_fake, OID_AUTO, trace_tag, CTLFLAG_RW | CTLFLAG_LOCKED,
583 &if_fake_trace_tag_flags, 0, "Fake interface generate trace_tag");
584 static packet_trace_tag_t if_fake_trace_tag_current = 1;
585
586 #endif /* SKYWALK */
587
588 struct if_fake {
589 char iff_name[IFNAMSIZ]; /* our unique id */
590 ifnet_t iff_ifp;
591 iff_flags_t iff_flags;
592 uint32_t iff_retain_count;
593 ifnet_t iff_peer; /* the other end */
594 int iff_media_current;
595 int iff_media_active;
596 uint32_t iff_media_count;
597 int iff_media_list[IF_FAKE_MEDIA_LIST_MAX];
598 struct mbuf * iff_pending_tx_packet;
599 boolean_t iff_start_busy;
600 unsigned int iff_max_mtu;
601 uint32_t iff_fcs;
602 uint32_t iff_trailer_length;
603 #if SKYWALK
604 fake_nx iff_nx;
605 struct netif_stats *iff_nifs;
606 uint32_t iff_nifs_ref;
607 kern_channel_ring_t iff_rx_ring[IFF_MAX_RX_RINGS];
608 kern_channel_ring_t iff_tx_ring[IFF_MAX_TX_RINGS];
609 fake_llink iff_llink[FETH_MAX_LLINKS];
610 uint32_t iff_llink_cnt;
611 thread_call_t iff_doorbell_tcall;
612 thread_call_t iff_if_adv_tcall;
613 boolean_t iff_doorbell_tcall_active;
614 boolean_t iff_waiting_for_tcall;
615 boolean_t iff_channel_connected;
616 iff_pktpool_mode_t iff_pp_mode;
617 kern_pbufpool_t iff_rx_pp;
618 kern_pbufpool_t iff_tx_pp;
619 uint32_t iff_tx_headroom;
620 unsigned int iff_adv_interval;
621 uint32_t iff_tx_drop_rate;
622 uint32_t iff_tx_pkts_count;
623 iff_tx_completion_mode_t iff_tx_completion_mode;
624 bool iff_intf_adv_enabled;
625 void *iff_intf_adv_kern_ctx;
626 kern_nexus_capab_interface_advisory_notify_fn_t iff_intf_adv_notify;
627 #endif /* SKYWALK */
628 };
629
630 typedef struct if_fake * if_fake_ref;
631
632 static if_fake_ref
633 ifnet_get_if_fake(ifnet_t ifp);
634
635 #define FETH_DPRINTF(fmt, ...) \
636 { if (if_fake_debug != 0) printf("%s " fmt, __func__, ## __VA_ARGS__); }
637
638 static inline boolean_t
feth_in_bsd_mode(if_fake_ref fakeif)639 feth_in_bsd_mode(if_fake_ref fakeif)
640 {
641 return (fakeif->iff_flags & IFF_FLAGS_BSD_MODE) != 0;
642 }
643
644 static inline void
feth_set_detaching(if_fake_ref fakeif)645 feth_set_detaching(if_fake_ref fakeif)
646 {
647 fakeif->iff_flags |= IFF_FLAGS_DETACHING;
648 }
649
650 static inline boolean_t
feth_is_detaching(if_fake_ref fakeif)651 feth_is_detaching(if_fake_ref fakeif)
652 {
653 return (fakeif->iff_flags & IFF_FLAGS_DETACHING) != 0;
654 }
655
656 static int
feth_enable_dequeue_stall(ifnet_t ifp,uint32_t enable)657 feth_enable_dequeue_stall(ifnet_t ifp, uint32_t enable)
658 {
659 int error;
660
661 if (enable != 0) {
662 error = ifnet_disable_output(ifp);
663 } else {
664 error = ifnet_enable_output(ifp);
665 }
666
667 return error;
668 }
669
670 #if SKYWALK
671 static inline boolean_t
feth_in_wmm_mode(if_fake_ref fakeif)672 feth_in_wmm_mode(if_fake_ref fakeif)
673 {
674 return (fakeif->iff_flags & IFF_FLAGS_WMM_MODE) != 0;
675 }
676
677 static inline boolean_t
feth_using_multibuflets(if_fake_ref fakeif)678 feth_using_multibuflets(if_fake_ref fakeif)
679 {
680 return (fakeif->iff_flags & IFF_FLAGS_MULTIBUFLETS) != 0;
681 }
682 static void feth_detach_netif_nexus(if_fake_ref fakeif);
683
684 static inline boolean_t
feth_has_intf_advisory_configured(if_fake_ref fakeif)685 feth_has_intf_advisory_configured(if_fake_ref fakeif)
686 {
687 return fakeif->iff_adv_interval > 0;
688 }
689
690 static inline bool
feth_supports_tso(if_fake_ref fakeif)691 feth_supports_tso(if_fake_ref fakeif)
692 {
693 return (fakeif->iff_flags & IFF_FLAGS_TSO_SUPPORT) != 0;
694 }
695 #endif /* SKYWALK */
696
697 #define FETH_MAXUNIT IF_MAXUNIT
698 #define FETH_ZONE_MAX_ELEM MIN(IFNETS_MAX, FETH_MAXUNIT)
699
700 static int feth_clone_create(struct if_clone *, u_int32_t, void *);
701 static int feth_clone_destroy(ifnet_t);
702 static int feth_output(ifnet_t ifp, struct mbuf *m);
703 static void feth_start(ifnet_t ifp);
704 static int feth_ioctl(ifnet_t ifp, u_long cmd, void * addr);
705 static int feth_config(ifnet_t ifp, ifnet_t peer);
706 static void feth_if_free(ifnet_t ifp);
707 static void feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp);
708 static void feth_free(if_fake_ref fakeif);
709
710 static struct if_clone
711 feth_cloner = IF_CLONE_INITIALIZER(FAKE_ETHER_NAME,
712 feth_clone_create,
713 feth_clone_destroy,
714 0,
715 FETH_MAXUNIT,
716 FETH_ZONE_MAX_ELEM,
717 sizeof(struct if_fake));
718 static void interface_link_event(ifnet_t ifp, u_int32_t event_code);
719
720 /* some media words to pretend to be ethernet */
721 static int default_media_words[] = {
722 IFM_MAKEWORD(IFM_ETHER, 0, 0, 0),
723 IFM_MAKEWORD(IFM_ETHER, IFM_10G_T, IFM_FDX, 0),
724 IFM_MAKEWORD(IFM_ETHER, IFM_2500_T, IFM_FDX, 0),
725 IFM_MAKEWORD(IFM_ETHER, IFM_5000_T, IFM_FDX, 0),
726
727 IFM_MAKEWORD(IFM_ETHER, IFM_10G_KX4, IFM_FDX, 0),
728 IFM_MAKEWORD(IFM_ETHER, IFM_20G_KR2, IFM_FDX, 0),
729 IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX, 0),
730 IFM_MAKEWORD(IFM_ETHER, IFM_25G_KR, IFM_FDX, 0),
731 IFM_MAKEWORD(IFM_ETHER, IFM_40G_SR4, IFM_FDX, 0),
732 IFM_MAKEWORD(IFM_ETHER, IFM_50G_CR2, IFM_FDX, 0),
733 IFM_MAKEWORD(IFM_ETHER, IFM_56G_R4, IFM_FDX, 0),
734 IFM_MAKEWORD(IFM_ETHER, IFM_100G_CR4, IFM_FDX, 0),
735 IFM_MAKEWORD(IFM_ETHER, IFM_400G_AUI8, IFM_FDX, 0),
736 };
737 #define default_media_words_count (sizeof(default_media_words) \
738 / sizeof (default_media_words[0]))
739
740 /**
741 ** veth locks
742 **/
743
744 static LCK_GRP_DECLARE(feth_lck_grp, "fake");
745 static LCK_MTX_DECLARE(feth_lck_mtx, &feth_lck_grp);
746
747 static inline void
feth_lock(void)748 feth_lock(void)
749 {
750 lck_mtx_lock(&feth_lck_mtx);
751 }
752
753 static inline void
feth_unlock(void)754 feth_unlock(void)
755 {
756 lck_mtx_unlock(&feth_lck_mtx);
757 }
758
759 static inline int
get_max_mtu(int bsd_mode,unsigned int max_mtu)760 get_max_mtu(int bsd_mode, unsigned int max_mtu)
761 {
762 unsigned int mtu;
763
764 if (bsd_mode != 0) {
765 mtu = (njcl > 0) ? (M16KCLBYTES - ETHER_HDR_LEN)
766 : MBIGCLBYTES - ETHER_HDR_LEN;
767 if (mtu > max_mtu) {
768 mtu = max_mtu;
769 }
770 } else {
771 mtu = max_mtu;
772 }
773 return mtu;
774 }
775
776 static inline unsigned int
feth_max_mtu(ifnet_t ifp)777 feth_max_mtu(ifnet_t ifp)
778 {
779 if_fake_ref fakeif;
780 unsigned int max_mtu = ETHERMTU;
781
782 feth_lock();
783 fakeif = ifnet_get_if_fake(ifp);
784 if (fakeif != NULL) {
785 max_mtu = fakeif->iff_max_mtu;
786 }
787 feth_unlock();
788 return max_mtu;
789 }
790
791 static void
feth_free(if_fake_ref fakeif)792 feth_free(if_fake_ref fakeif)
793 {
794 VERIFY(fakeif->iff_retain_count == 0);
795 if (feth_in_bsd_mode(fakeif)) {
796 if (fakeif->iff_pending_tx_packet) {
797 m_freem(fakeif->iff_pending_tx_packet);
798 }
799 }
800 #if SKYWALK
801 else {
802 if (fakeif->iff_pp_mode == IFF_PP_MODE_GLOBAL) {
803 VERIFY(fakeif->iff_rx_pp == S_pp);
804 VERIFY(fakeif->iff_tx_pp == S_pp);
805 pp_release(fakeif->iff_rx_pp);
806 fakeif->iff_rx_pp = NULL;
807 pp_release(fakeif->iff_tx_pp);
808 fakeif->iff_tx_pp = NULL;
809 feth_lock();
810 if (S_pp->pp_refcnt == 1) {
811 pp_release(S_pp);
812 S_pp = NULL;
813 }
814 feth_unlock();
815 } else {
816 if (fakeif->iff_rx_pp != NULL) {
817 pp_release(fakeif->iff_rx_pp);
818 fakeif->iff_rx_pp = NULL;
819 }
820 if (fakeif->iff_tx_pp != NULL) {
821 pp_release(fakeif->iff_tx_pp);
822 fakeif->iff_tx_pp = NULL;
823 }
824 }
825 }
826 #endif /* SKYWALK */
827
828 FETH_DPRINTF("%s\n", fakeif->iff_name);
829 if_clone_softc_deallocate(&feth_cloner, fakeif);
830 }
831
832 static void
feth_release(if_fake_ref fakeif)833 feth_release(if_fake_ref fakeif)
834 {
835 u_int32_t old_retain_count;
836
837 old_retain_count = OSDecrementAtomic(&fakeif->iff_retain_count);
838 switch (old_retain_count) {
839 case 0:
840 VERIFY(old_retain_count != 0);
841 break;
842 case 1:
843 feth_free(fakeif);
844 break;
845 default:
846 break;
847 }
848 return;
849 }
850
851 #if SKYWALK
852
853 static void
feth_retain(if_fake_ref fakeif)854 feth_retain(if_fake_ref fakeif)
855 {
856 OSIncrementAtomic(&fakeif->iff_retain_count);
857 }
858
859 static void
feth_packet_pool_init_prepare(if_fake_ref fakeif,struct kern_pbufpool_init * pp_init)860 feth_packet_pool_init_prepare(if_fake_ref fakeif,
861 struct kern_pbufpool_init *pp_init)
862 {
863 uint32_t max_mtu = fakeif->iff_max_mtu;
864 uint32_t buflet_size = if_fake_buflet_size;
865
866 bzero(pp_init, sizeof(*pp_init));
867 pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
868 pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
869 pp_init->kbi_packets = 1024; /* TBD configurable */
870 if (feth_supports_tso(fakeif)) {
871 buflet_size = if_fake_tso_buffer_size;
872 }
873 if (feth_using_multibuflets(fakeif)) {
874 pp_init->kbi_bufsize = buflet_size;
875 pp_init->kbi_max_frags = howmany(max_mtu, buflet_size);
876 pp_init->kbi_buflets = pp_init->kbi_packets *
877 pp_init->kbi_max_frags;
878 pp_init->kbi_flags |= KBIF_BUFFER_ON_DEMAND;
879 } else {
880 pp_init->kbi_bufsize = max(max_mtu, buflet_size);
881 pp_init->kbi_max_frags = 1;
882 pp_init->kbi_buflets = pp_init->kbi_packets;
883 }
884 pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
885 if (if_fake_user_access != 0) {
886 pp_init->kbi_flags |= KBIF_USER_ACCESS;
887 }
888 pp_init->kbi_ctx = NULL;
889 pp_init->kbi_ctx_retain = NULL;
890 pp_init->kbi_ctx_release = NULL;
891 }
892
893 static errno_t
feth_packet_pool_make(if_fake_ref fakeif)894 feth_packet_pool_make(if_fake_ref fakeif)
895 {
896 struct kern_pbufpool_init pp_init;
897 errno_t err;
898
899 feth_packet_pool_init_prepare(fakeif, &pp_init);
900
901 switch (fakeif->iff_pp_mode) {
902 case IFF_PP_MODE_GLOBAL:
903 feth_lock();
904 if (S_pp == NULL) {
905 (void)snprintf((char *)pp_init.kbi_name,
906 sizeof(pp_init.kbi_name), "%s", "feth shared pp");
907 err = kern_pbufpool_create(&pp_init, &S_pp, NULL);
908 }
909 pp_retain(S_pp);
910 feth_unlock();
911 fakeif->iff_rx_pp = S_pp;
912 pp_retain(S_pp);
913 fakeif->iff_tx_pp = S_pp;
914 break;
915 case IFF_PP_MODE_PRIVATE:
916 (void)snprintf((char *)pp_init.kbi_name,
917 sizeof(pp_init.kbi_name), "%s pp", fakeif->iff_name);
918 err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
919 pp_retain(fakeif->iff_rx_pp);
920 fakeif->iff_tx_pp = fakeif->iff_rx_pp;
921 break;
922 case IFF_PP_MODE_PRIVATE_SPLIT:
923 (void)snprintf((char *)pp_init.kbi_name,
924 sizeof(pp_init.kbi_name), "%s rx pp", fakeif->iff_name);
925 pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
926 KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
927 pp_init.kbi_flags |= (KBIF_IODIR_IN | KBIF_BUFFER_ON_DEMAND);
928 pp_init.kbi_packets = 1024;
929 pp_init.kbi_bufsize = if_fake_link_layer_aggregation_factor * 1024;
930 err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
931 if (err != 0) {
932 printf("%s: rx pp create failed %d\n", __func__, err);
933 return err;
934 }
935 pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
936 KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
937 pp_init.kbi_flags |= KBIF_IODIR_OUT;
938 pp_init.kbi_packets = 1024; /* TBD configurable */
939 pp_init.kbi_bufsize = fakeif->iff_max_mtu;
940 (void)snprintf((char *)pp_init.kbi_name,
941 sizeof(pp_init.kbi_name), "%s tx pp", fakeif->iff_name);
942 err = kern_pbufpool_create(&pp_init, &fakeif->iff_tx_pp, NULL);
943 if (err != 0) {
944 printf("%s: tx pp create failed %d\n", __func__, err);
945 pp_release(fakeif->iff_rx_pp);
946 return err;
947 }
948 break;
949 default:
950 VERIFY(0);
951 __builtin_unreachable();
952 }
953
954 return 0;
955 }
956
957 static void
feth_packet_set_trace_tag(kern_packet_t ph,int flag)958 feth_packet_set_trace_tag(kern_packet_t ph, int flag)
959 {
960 if (if_fake_trace_tag_flags & flag) {
961 if (++if_fake_trace_tag_current == 0) {
962 if_fake_trace_tag_current = 1;
963 }
964 kern_packet_set_trace_tag(ph, if_fake_trace_tag_current);
965 }
966 }
967
968 static errno_t
feth_clone_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)969 feth_clone_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
970 {
971 errno_t err = 0;
972 kern_pbufpool_t pp = dif->iff_rx_pp;
973 kern_packet_t dph = 0, dph0 = 0;
974 kern_buflet_t sbuf, dbuf0 = NULL, dbuf;
975 void *saddr, *daddr;
976 uint32_t soff, doff;
977 uint32_t slen, dlen;
978 uint32_t dlim0, dlim;
979
980 sbuf = kern_packet_get_next_buflet(sph, NULL);
981 saddr = kern_buflet_get_data_address(sbuf);
982 doff = soff = kern_buflet_get_data_offset(sbuf);
983 dlen = slen = kern_buflet_get_data_length(sbuf);
984
985 /* packet clone is only supported for single-buflet */
986 ASSERT(kern_packet_get_buflet_count(sph) == 1);
987 ASSERT(soff == kern_packet_get_headroom(sph));
988 ASSERT(slen == kern_packet_get_data_length(sph));
989
990 dph0 = *pdph;
991 if (dph0 == 0) {
992 dlim0 = 0;
993 } else {
994 dbuf0 = kern_packet_get_next_buflet(dph0, NULL);
995 ASSERT(kern_buflet_get_object_limit(dbuf0) ==
996 PP_BUF_OBJ_SIZE_DEF(pp));
997 ASSERT(kern_buflet_get_data_limit(dbuf0) % 16 == 0);
998 dlim0 = ((uintptr_t)kern_buflet_get_object_address(dbuf0) +
999 kern_buflet_get_object_limit(dbuf0)) -
1000 ((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1001 kern_buflet_get_data_limit(dbuf0));
1002 }
1003
1004 if (doff + dlen > dlim0) {
1005 err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1006 if (err != 0) {
1007 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1008 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1009 return err;
1010 }
1011 dbuf = kern_packet_get_next_buflet(dph, NULL);
1012 ASSERT(kern_buflet_get_data_address(dbuf) ==
1013 kern_buflet_get_object_address(dbuf));
1014 daddr = kern_buflet_get_data_address(dbuf);
1015 dlim = kern_buflet_get_object_limit(dbuf);
1016 ASSERT(dlim == PP_BUF_OBJ_SIZE_DEF(pp));
1017 } else {
1018 err = kern_packet_clone_nosleep(dph0, &dph, KPKT_COPY_LIGHT);
1019 if (err != 0) {
1020 printf("%s: packet clone err %d\n", __func__, err);
1021 return err;
1022 }
1023 dbuf = kern_packet_get_next_buflet(dph, NULL);
1024 ASSERT(kern_buflet_get_object_address(dbuf) ==
1025 kern_buflet_get_object_address(dbuf0));
1026 daddr = (void *)((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1027 kern_buflet_get_data_limit(dbuf0));
1028 dlim = dlim0;
1029 }
1030
1031 ASSERT(doff + dlen <= dlim);
1032
1033 ASSERT((uintptr_t)daddr % 16 == 0);
1034
1035 bcopy((const void *)((uintptr_t)saddr + soff),
1036 (void *)((uintptr_t)daddr + doff), slen);
1037
1038 dlim = MIN(dlim, P2ROUNDUP(doff + dlen, 16));
1039 err = kern_buflet_set_data_address(dbuf, daddr);
1040 VERIFY(err == 0);
1041 err = kern_buflet_set_data_limit(dbuf, dlim);
1042 VERIFY(err == 0);
1043 err = kern_buflet_set_data_length(dbuf, dlen);
1044 VERIFY(err == 0);
1045 err = kern_buflet_set_data_offset(dbuf, doff);
1046 VERIFY(err == 0);
1047 err = kern_packet_set_headroom(dph, doff);
1048 VERIFY(err == 0);
1049 err = kern_packet_set_link_header_length(dph,
1050 kern_packet_get_link_header_length(sph));
1051 VERIFY(err == 0);
1052 err = kern_packet_set_service_class(dph,
1053 kern_packet_get_service_class(sph));
1054 VERIFY(err == 0);
1055 err = kern_packet_finalize(dph);
1056 VERIFY(err == 0);
1057 *pdph = dph;
1058
1059 return err;
1060 }
1061
1062 static inline void
feth_copy_buflet(kern_buflet_t sbuf,kern_buflet_t dbuf)1063 feth_copy_buflet(kern_buflet_t sbuf, kern_buflet_t dbuf)
1064 {
1065 errno_t err;
1066 uint16_t off, len;
1067 uint8_t *saddr, *daddr;
1068
1069 saddr = kern_buflet_get_data_address(sbuf);
1070 off = kern_buflet_get_data_offset(sbuf);
1071 len = kern_buflet_get_data_length(sbuf);
1072 daddr = kern_buflet_get_data_address(dbuf);
1073 bcopy((saddr + off), (daddr + off), len);
1074 err = kern_buflet_set_data_offset(dbuf, off);
1075 VERIFY(err == 0);
1076 err = kern_buflet_set_data_length(dbuf, len);
1077 VERIFY(err == 0);
1078 }
1079
1080 static int
feth_add_packet_trailer(kern_packet_t ph,void * trailer,size_t trailer_len)1081 feth_add_packet_trailer(kern_packet_t ph, void *trailer, size_t trailer_len)
1082 {
1083 errno_t err = 0;
1084
1085 ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
1086
1087 kern_buflet_t buf = NULL, iter = NULL;
1088 while ((iter = kern_packet_get_next_buflet(ph, iter)) != NULL) {
1089 buf = iter;
1090 }
1091 ASSERT(buf != NULL);
1092
1093 uint16_t dlim = kern_buflet_get_data_limit(buf);
1094 uint16_t doff = kern_buflet_get_data_offset(buf);
1095 uint16_t dlen = kern_buflet_get_data_length(buf);
1096
1097 size_t trailer_room = dlim - doff - dlen;
1098
1099 if (trailer_room < trailer_len) {
1100 printf("not enough room");
1101 return ERANGE;
1102 }
1103
1104 void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff + dlen);
1105 memcpy(data, trailer, trailer_len);
1106
1107 err = kern_buflet_set_data_length(buf, dlen + trailer_len);
1108 VERIFY(err == 0);
1109
1110 err = kern_packet_finalize(ph);
1111 VERIFY(err == 0);
1112
1113 FETH_DPRINTF("%s %zuB trailer added\n", __func__, trailer_len);
1114
1115 return 0;
1116 }
1117
1118 static int
feth_add_packet_fcs(kern_packet_t ph)1119 feth_add_packet_fcs(kern_packet_t ph)
1120 {
1121 uint32_t crc = 0;
1122 int err;
1123
1124 ASSERT(sizeof(crc) == ETHER_CRC_LEN);
1125
1126 kern_buflet_t buf = NULL;
1127 while ((buf = kern_packet_get_next_buflet(ph, buf)) != NULL) {
1128 uint16_t doff = kern_buflet_get_data_offset(buf);
1129 uint16_t dlen = kern_buflet_get_data_length(buf);
1130 void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff);
1131 crc = crc32(crc, data, dlen);
1132 }
1133
1134 err = feth_add_packet_trailer(ph, &crc, ETHER_CRC_LEN);
1135 if (!err) {
1136 return err;
1137 }
1138
1139 err = kern_packet_set_link_ethfcs(ph);
1140 VERIFY(err == 0);
1141
1142 return 0;
1143 }
1144
1145 static errno_t
feth_copy_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1146 feth_copy_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1147 {
1148 errno_t err = 0;
1149 uint16_t i, bufcnt;
1150 mach_vm_address_t baddr;
1151 kern_buflet_t sbuf = NULL, dbuf = NULL;
1152 kern_pbufpool_t pp = dif->iff_rx_pp;
1153 kern_packet_t dph;
1154 boolean_t multi_buflet = feth_using_multibuflets(dif);
1155
1156 bufcnt = kern_packet_get_buflet_count(sph);
1157 ASSERT((bufcnt == 1) || multi_buflet);
1158 *pdph = 0;
1159
1160 err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1161 if (err != 0) {
1162 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1163 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1164 return err;
1165 }
1166
1167 /* pre-constructed single buflet packet copy */
1168 sbuf = kern_packet_get_next_buflet(sph, NULL);
1169 dbuf = kern_packet_get_next_buflet(dph, NULL);
1170 feth_copy_buflet(sbuf, dbuf);
1171
1172 if (!multi_buflet) {
1173 goto done;
1174 }
1175
1176 /* un-constructed multi-buflet packet copy */
1177 for (i = 1; i < bufcnt; i++) {
1178 kern_buflet_t dbuf_next = NULL;
1179
1180 sbuf = kern_packet_get_next_buflet(sph, sbuf);
1181 VERIFY(sbuf != NULL);
1182 err = kern_pbufpool_alloc_buflet_nosleep(pp, &dbuf_next, true);
1183 if (err != 0) {
1184 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1185 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_BUF);
1186 break;
1187 }
1188 ASSERT(dbuf_next != NULL);
1189 feth_copy_buflet(sbuf, dbuf_next);
1190 err = kern_packet_add_buflet(dph, dbuf, dbuf_next);
1191 VERIFY(err == 0);
1192 dbuf = dbuf_next;
1193 }
1194 if (__improbable(err != 0)) {
1195 dbuf = NULL;
1196 while (i-- != 0) {
1197 dbuf = kern_packet_get_next_buflet(dph, dbuf);
1198 VERIFY(dbuf != NULL);
1199 baddr = (mach_vm_address_t)
1200 kern_buflet_get_data_address(dbuf);
1201 VERIFY(baddr != 0);
1202 }
1203 kern_pbufpool_free(pp, dph);
1204 dph = 0;
1205 }
1206
1207 done:
1208 if (__probable(err == 0)) {
1209 err = kern_packet_set_headroom(dph,
1210 kern_packet_get_headroom(sph));
1211 VERIFY(err == 0);
1212 err = kern_packet_set_link_header_length(dph,
1213 kern_packet_get_link_header_length(sph));
1214 VERIFY(err == 0);
1215 err = kern_packet_set_service_class(dph,
1216 kern_packet_get_service_class(sph));
1217 VERIFY(err == 0);
1218 err = kern_packet_finalize(dph);
1219 VERIFY(err == 0);
1220 VERIFY(bufcnt == kern_packet_get_buflet_count(dph));
1221 *pdph = dph;
1222 }
1223 return err;
1224 }
1225
1226 static inline void
feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)1227 feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)
1228 {
1229 /*
1230 * Nothing to do if not a TSO offloaded packet.
1231 */
1232 uint16_t seg_sz = 0;
1233 (void) kern_packet_get_protocol_segment_size(ph, &seg_sz);
1234 if (seg_sz == 0) {
1235 return;
1236 }
1237 /*
1238 * For RX, make the packet appear as a fully validated LRO packet.
1239 */
1240 packet_csum_flags_t csum_flags = PACKET_CSUM_IP_CHECKED |
1241 PACKET_CSUM_IP_VALID | PACKET_CSUM_DATA_VALID |
1242 PACKET_CSUM_PSEUDO_HDR;
1243 (void) kern_packet_set_inet_checksum(ph, csum_flags, 0, 0xFFFF, FALSE);
1244 return;
1245 }
1246
1247 static void
feth_rx_submit(if_fake_ref sif,if_fake_ref dif,kern_packet_t sphs[],uint32_t n_pkts)1248 feth_rx_submit(if_fake_ref sif, if_fake_ref dif, kern_packet_t sphs[],
1249 uint32_t n_pkts)
1250 {
1251 errno_t err = 0;
1252 struct kern_channel_ring_stat_increment stats;
1253 kern_channel_ring_t rx_ring = NULL;
1254 kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
1255 kern_packet_t sph = 0, dph = 0;
1256
1257 memset(&stats, 0, sizeof(stats));
1258
1259 rx_ring = dif->iff_rx_ring[0];
1260 if (rx_ring == NULL) {
1261 return;
1262 }
1263
1264 kr_enter(rx_ring, TRUE);
1265 kern_channel_reclaim(rx_ring);
1266 rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1267
1268 for (uint32_t i = 0; i < n_pkts && rx_slot != NULL; i++) {
1269 sph = sphs[i];
1270
1271 switch (dif->iff_pp_mode) {
1272 case IFF_PP_MODE_GLOBAL:
1273 sphs[i] = 0;
1274 dph = sph;
1275 feth_update_pkt_tso_metadata_for_rx(dph);
1276 err = kern_packet_finalize(dph);
1277 VERIFY(err == 0);
1278 break;
1279 case IFF_PP_MODE_PRIVATE:
1280 err = feth_copy_packet(dif, sph, &dph);
1281 break;
1282 case IFF_PP_MODE_PRIVATE_SPLIT:
1283 err = feth_clone_packet(dif, sph, &dph);
1284 break;
1285 default:
1286 VERIFY(0);
1287 __builtin_unreachable();
1288 }
1289 if (__improbable(err != 0)) {
1290 continue;
1291 }
1292
1293 if (sif->iff_trailer_length != 0) {
1294 feth_add_packet_trailer(dph, feth_trailer,
1295 sif->iff_trailer_length);
1296 }
1297 if (sif->iff_fcs != 0) {
1298 feth_add_packet_fcs(dph);
1299 }
1300 feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1301 bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1302 stats.kcrsi_slots_transferred++;
1303 stats.kcrsi_bytes_transferred
1304 += kern_packet_get_data_length(dph);
1305
1306 /* attach the packet to the RX ring */
1307 err = kern_channel_slot_attach_packet(rx_ring, rx_slot, dph);
1308 VERIFY(err == 0);
1309 last_rx_slot = rx_slot;
1310 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1311 }
1312
1313 if (last_rx_slot != NULL) {
1314 kern_channel_advance_slot(rx_ring, last_rx_slot);
1315 kern_channel_increment_ring_net_stats(rx_ring, dif->iff_ifp,
1316 &stats);
1317 }
1318
1319 if (rx_ring != NULL) {
1320 kr_exit(rx_ring);
1321 kern_channel_notify(rx_ring, 0);
1322 }
1323 }
1324
1325 static void
feth_rx_queue_submit(if_fake_ref sif,if_fake_ref dif,uint32_t llink_idx,uint32_t qset_idx,kern_packet_t sphs[],uint32_t n_pkts)1326 feth_rx_queue_submit(if_fake_ref sif, if_fake_ref dif, uint32_t llink_idx,
1327 uint32_t qset_idx, kern_packet_t sphs[], uint32_t n_pkts)
1328 {
1329 errno_t err = 0;
1330 kern_netif_queue_t queue;
1331 kern_packet_t sph = 0, dph = 0;
1332 fake_llink *llink;
1333 fake_qset *qset;
1334
1335 if (llink_idx >= dif->iff_llink_cnt) {
1336 printf("%s: invalid llink_idx idx %d (max %d) on peer %s\n",
1337 __func__, llink_idx, dif->iff_llink_cnt, dif->iff_name);
1338 return;
1339 }
1340 llink = &dif->iff_llink[llink_idx];
1341 if (qset_idx >= llink->fl_qset_cnt) {
1342 printf("%s: invalid qset_idx %d (max %d) on peer %s\n",
1343 __func__, qset_idx, llink->fl_qset_cnt, dif->iff_name);
1344 return;
1345 }
1346 qset = &dif->iff_llink[llink_idx].fl_qset[qset_idx];
1347 queue = qset->fqs_rx_queue[0].fq_queue;
1348 if (queue == NULL) {
1349 printf("%s: NULL default queue (llink_idx %d, qset_idx %d) "
1350 "on peer %s\n", __func__, llink_idx, qset_idx,
1351 dif->iff_name);
1352 return;
1353 }
1354 for (uint32_t i = 0; i < n_pkts; i++) {
1355 uint32_t flags;
1356
1357 sph = sphs[i];
1358
1359 switch (dif->iff_pp_mode) {
1360 case IFF_PP_MODE_GLOBAL:
1361 sphs[i] = 0;
1362 dph = sph;
1363 feth_update_pkt_tso_metadata_for_rx(dph);
1364 break;
1365 case IFF_PP_MODE_PRIVATE:
1366 err = feth_copy_packet(dif, sph, &dph);
1367 break;
1368 case IFF_PP_MODE_PRIVATE_SPLIT:
1369 err = feth_clone_packet(dif, sph, &dph);
1370 break;
1371 default:
1372 VERIFY(0);
1373 __builtin_unreachable();
1374 }
1375 if (__improbable(err != 0)) {
1376 continue;
1377 }
1378
1379 if (sif->iff_trailer_length != 0) {
1380 feth_add_packet_trailer(dph, feth_trailer,
1381 sif->iff_trailer_length);
1382 }
1383 if (sif->iff_fcs != 0) {
1384 feth_add_packet_fcs(dph);
1385 }
1386 feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1387 bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1388
1389 flags = (i == n_pkts - 1) ?
1390 KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH : 0;
1391 kern_netif_queue_rx_enqueue(queue, dph, 1, flags);
1392 }
1393 }
1394
1395 static void
feth_tx_complete(if_fake_ref fakeif,kern_packet_t phs[],uint32_t nphs)1396 feth_tx_complete(if_fake_ref fakeif, kern_packet_t phs[], uint32_t nphs)
1397 {
1398 for (uint32_t i = 0; i < nphs; i++) {
1399 kern_packet_t ph = phs[i];
1400 if (ph == 0) {
1401 continue;
1402 }
1403 int err = kern_packet_set_tx_completion_status(ph, 0);
1404 VERIFY(err == 0);
1405 kern_packet_tx_completion(ph, fakeif->iff_ifp);
1406 kern_pbufpool_free(fakeif->iff_tx_pp, phs[i]);
1407 phs[i] = 0;
1408 }
1409 }
1410
1411 /* returns true if the packet is selected for TX error & dropped */
1412 static bool
feth_tx_complete_error(if_fake_ref fakeif,kern_packet_t * ph,struct netif_stats * nifs)1413 feth_tx_complete_error(if_fake_ref fakeif, kern_packet_t *ph,
1414 struct netif_stats *nifs)
1415 {
1416 int err;
1417
1418 if (fakeif->iff_tx_drop_rate == 0 ||
1419 fakeif->iff_tx_pkts_count != fakeif->iff_tx_drop_rate) {
1420 return false;
1421 }
1422 /* simulate TX completion error on the packet */
1423 if (fakeif->iff_tx_completion_mode == IFF_TX_COMPL_MODE_SYNC) {
1424 err = kern_packet_set_tx_completion_status(*ph,
1425 CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED);
1426 VERIFY(err == 0);
1427 kern_packet_tx_completion(*ph, fakeif->iff_ifp);
1428 } else {
1429 uint32_t nx_port_id = 0;
1430 os_channel_event_packet_transmit_status_t pkt_tx_status = {0};
1431
1432 pkt_tx_status.packet_status =
1433 CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED;
1434 do {
1435 err = kern_packet_get_packetid(*ph,
1436 &pkt_tx_status.packet_id);
1437 if (err != 0) {
1438 break;
1439 }
1440 err = kern_packet_get_tx_nexus_port_id(*ph,
1441 &nx_port_id);
1442 if (err != 0) {
1443 break;
1444 }
1445 err = kern_channel_event_transmit_status(
1446 fakeif->iff_ifp, &pkt_tx_status, nx_port_id);
1447 } while (0);
1448 if (err != 0) {
1449 FETH_DPRINTF("err %d, nx_port_id: 0x%x\n",
1450 err, nx_port_id);
1451 }
1452 }
1453 fakeif->iff_tx_pkts_count = 0;
1454 kern_pbufpool_free(fakeif->iff_tx_pp, *ph);
1455 *ph = 0;
1456 STATS_INC(nifs, NETIF_STATS_DROP);
1457 return true;
1458 }
1459
1460 static void
feth_if_adv(thread_call_param_t arg0,thread_call_param_t arg1)1461 feth_if_adv(thread_call_param_t arg0, thread_call_param_t arg1)
1462 {
1463 #pragma unused(arg1)
1464 errno_t error;
1465 if_fake_ref fakeif = (if_fake_ref)arg0;
1466 struct ifnet_interface_advisory if_adv;
1467 struct ifnet_stats_param if_stat;
1468
1469 feth_lock();
1470 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1471 feth_unlock();
1472 return;
1473 }
1474 feth_unlock();
1475
1476 if (!fakeif->iff_intf_adv_enabled) {
1477 goto done;
1478 }
1479
1480 error = ifnet_stat(fakeif->iff_ifp, &if_stat);
1481 if (error != 0) {
1482 FETH_DPRINTF("%s: ifnet_stat() failed %d\n",
1483 fakeif->iff_name, error);
1484 goto done;
1485 }
1486 if_adv.header.version = IF_INTERFACE_ADVISORY_VERSION_CURRENT;
1487 if_adv.header.direction = IF_INTERFACE_ADVISORY_DIRECTION_TX;
1488 if_adv.header.interface_type =
1489 IF_INTERFACE_ADVISORY_INTERFACE_TYPE_WIFI;
1490 if_adv.capacity.timestamp = mach_absolute_time();
1491 if_adv.capacity.rate_trend_suggestion =
1492 IF_INTERFACE_ADVISORY_RATE_SUGGESTION_RAMP_NEUTRAL;
1493 if_adv.capacity.max_bandwidth = 1000 * 1000 * 1000; /* 1Gbps */
1494 if_adv.capacity.total_byte_count = if_stat.packets_out;
1495 if_adv.capacity.average_throughput = 1000 * 1000 * 1000; /* 1Gbps */
1496 if_adv.capacity.flushable_queue_size = UINT32_MAX;
1497 if_adv.capacity.non_flushable_queue_size = UINT32_MAX;
1498 if_adv.capacity.average_delay = 1; /* ms */
1499
1500 error = fakeif->iff_intf_adv_notify(fakeif->iff_intf_adv_kern_ctx,
1501 &if_adv);
1502 if (error != 0) {
1503 FETH_DPRINTF("%s: interface advisory report failed %d\n",
1504 fakeif->iff_name, error);
1505 }
1506
1507 done:
1508 feth_lock();
1509 if (!feth_is_detaching(fakeif) && fakeif->iff_channel_connected) {
1510 uint64_t deadline;
1511 clock_interval_to_deadline(fakeif->iff_adv_interval,
1512 NSEC_PER_MSEC, &deadline);
1513 thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1514 }
1515 feth_unlock();
1516 }
1517
1518 static int
feth_if_adv_tcall_create(if_fake_ref fakeif)1519 feth_if_adv_tcall_create(if_fake_ref fakeif)
1520 {
1521 uint64_t deadline;
1522
1523 feth_lock();
1524 ASSERT(fakeif->iff_if_adv_tcall == NULL);
1525 ASSERT(fakeif->iff_adv_interval > 0);
1526 ASSERT(fakeif->iff_channel_connected);
1527 fakeif->iff_if_adv_tcall =
1528 thread_call_allocate_with_options(feth_if_adv,
1529 (thread_call_param_t)fakeif, THREAD_CALL_PRIORITY_KERNEL,
1530 THREAD_CALL_OPTIONS_ONCE);
1531 if (fakeif->iff_if_adv_tcall == NULL) {
1532 printf("%s: %s if_adv tcall alloc failed\n", __func__,
1533 fakeif->iff_name);
1534 return ENXIO;
1535 }
1536 /* retain for the interface advisory thread call */
1537 feth_retain(fakeif);
1538 clock_interval_to_deadline(fakeif->iff_adv_interval,
1539 NSEC_PER_MSEC, &deadline);
1540 thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1541 feth_unlock();
1542 return 0;
1543 }
1544
1545 static void
feth_if_adv_tcall_destroy(if_fake_ref fakeif)1546 feth_if_adv_tcall_destroy(if_fake_ref fakeif)
1547 {
1548 thread_call_t tcall;
1549
1550 feth_lock();
1551 ASSERT(fakeif->iff_if_adv_tcall != NULL);
1552 tcall = fakeif->iff_if_adv_tcall;
1553 feth_unlock();
1554 (void) thread_call_cancel_wait(tcall);
1555 if (!thread_call_free(tcall)) {
1556 boolean_t freed;
1557 (void) thread_call_cancel_wait(tcall);
1558 freed = thread_call_free(tcall);
1559 VERIFY(freed);
1560 }
1561 feth_lock();
1562 fakeif->iff_if_adv_tcall = NULL;
1563 feth_unlock();
1564 /* release for the interface advisory thread call */
1565 feth_release(fakeif);
1566 }
1567
1568
1569 /**
1570 ** nexus netif domain provider
1571 **/
1572 static errno_t
feth_nxdp_init(kern_nexus_domain_provider_t domprov)1573 feth_nxdp_init(kern_nexus_domain_provider_t domprov)
1574 {
1575 #pragma unused(domprov)
1576 return 0;
1577 }
1578
1579 static void
feth_nxdp_fini(kern_nexus_domain_provider_t domprov)1580 feth_nxdp_fini(kern_nexus_domain_provider_t domprov)
1581 {
1582 #pragma unused(domprov)
1583 }
1584
1585 static uuid_t feth_nx_dom_prov;
1586
1587 static errno_t
feth_register_nexus_domain_provider(void)1588 feth_register_nexus_domain_provider(void)
1589 {
1590 const struct kern_nexus_domain_provider_init dp_init = {
1591 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1592 .nxdpi_flags = 0,
1593 .nxdpi_init = feth_nxdp_init,
1594 .nxdpi_fini = feth_nxdp_fini
1595 };
1596 errno_t err = 0;
1597
1598 /* feth_nxdp_init() is called before this function returns */
1599 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
1600 (const uint8_t *)
1601 "com.apple.feth",
1602 &dp_init, sizeof(dp_init),
1603 &feth_nx_dom_prov);
1604 if (err != 0) {
1605 printf("%s: failed to register domain provider\n", __func__);
1606 return err;
1607 }
1608 return 0;
1609 }
1610
1611 /**
1612 ** netif nexus routines
1613 **/
1614 static if_fake_ref
feth_nexus_context(kern_nexus_t nexus)1615 feth_nexus_context(kern_nexus_t nexus)
1616 {
1617 if_fake_ref fakeif;
1618
1619 fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
1620 assert(fakeif != NULL);
1621 return fakeif;
1622 }
1623
1624 static uint8_t
feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1625 feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1626 {
1627 switch (svc_class) {
1628 case KPKT_SC_VO:
1629 return 0;
1630 case KPKT_SC_VI:
1631 return 1;
1632 case KPKT_SC_BE:
1633 return 2;
1634 case KPKT_SC_BK:
1635 return 3;
1636 default:
1637 VERIFY(0);
1638 return 0;
1639 }
1640 }
1641
1642 static errno_t
feth_nx_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1643 feth_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1644 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1645 void **ring_ctx)
1646 {
1647 if_fake_ref fakeif;
1648 int err;
1649 #pragma unused(nxprov, channel, ring_ctx)
1650 feth_lock();
1651 fakeif = feth_nexus_context(nexus);
1652 if (feth_is_detaching(fakeif)) {
1653 feth_unlock();
1654 return 0;
1655 }
1656 if (is_tx_ring) {
1657 if (feth_in_wmm_mode(fakeif)) {
1658 kern_packet_svc_class_t svc_class;
1659 uint8_t ring_idx;
1660
1661 err = kern_channel_get_service_class(ring, &svc_class);
1662 VERIFY(err == 0);
1663 ring_idx = feth_find_tx_ring_by_svc(svc_class);
1664 VERIFY(ring_idx < IFF_NUM_TX_RINGS_WMM_MODE);
1665 VERIFY(fakeif->iff_tx_ring[ring_idx] == NULL);
1666 fakeif->iff_tx_ring[ring_idx] = ring;
1667 } else {
1668 VERIFY(fakeif->iff_tx_ring[0] == NULL);
1669 fakeif->iff_tx_ring[0] = ring;
1670 }
1671 } else {
1672 VERIFY(fakeif->iff_rx_ring[0] == NULL);
1673 fakeif->iff_rx_ring[0] = ring;
1674 }
1675 fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1676 feth_unlock();
1677 FETH_DPRINTF("%s: %s ring init\n",
1678 fakeif->iff_name, is_tx_ring ? "TX" : "RX");
1679 return 0;
1680 }
1681
1682 static void
feth_nx_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)1683 feth_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1684 kern_channel_ring_t ring)
1685 {
1686 #pragma unused(nxprov, ring)
1687 if_fake_ref fakeif;
1688 thread_call_t tcall = NULL;
1689
1690 feth_lock();
1691 fakeif = feth_nexus_context(nexus);
1692 if (fakeif->iff_rx_ring[0] == ring) {
1693 fakeif->iff_rx_ring[0] = NULL;
1694 FETH_DPRINTF("%s: RX ring fini\n", fakeif->iff_name);
1695 } else if (feth_in_wmm_mode(fakeif)) {
1696 int i;
1697 for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
1698 if (fakeif->iff_tx_ring[i] == ring) {
1699 fakeif->iff_tx_ring[i] = NULL;
1700 break;
1701 }
1702 }
1703 for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
1704 if (fakeif->iff_tx_ring[i] != NULL) {
1705 break;
1706 }
1707 }
1708 if (i == IFF_MAX_TX_RINGS) {
1709 tcall = fakeif->iff_doorbell_tcall;
1710 fakeif->iff_doorbell_tcall = NULL;
1711 }
1712 FETH_DPRINTF("%s: TX ring fini\n", fakeif->iff_name);
1713 } else if (fakeif->iff_tx_ring[0] == ring) {
1714 tcall = fakeif->iff_doorbell_tcall;
1715 fakeif->iff_doorbell_tcall = NULL;
1716 fakeif->iff_tx_ring[0] = NULL;
1717 }
1718 fakeif->iff_nifs = NULL;
1719 feth_unlock();
1720 if (tcall != NULL) {
1721 boolean_t success;
1722
1723 success = thread_call_cancel_wait(tcall);
1724 FETH_DPRINTF("%s: thread_call_cancel %s\n",
1725 fakeif->iff_name,
1726 success ? "SUCCESS" : "FAILURE");
1727 if (!success) {
1728 feth_lock();
1729 if (fakeif->iff_doorbell_tcall_active) {
1730 fakeif->iff_waiting_for_tcall = TRUE;
1731 FETH_DPRINTF("%s: *waiting for threadcall\n",
1732 fakeif->iff_name);
1733 do {
1734 msleep(fakeif, &feth_lck_mtx,
1735 PZERO, "feth threadcall", 0);
1736 } while (fakeif->iff_doorbell_tcall_active);
1737 FETH_DPRINTF("%s: ^threadcall done\n",
1738 fakeif->iff_name);
1739 fakeif->iff_waiting_for_tcall = FALSE;
1740 }
1741 feth_unlock();
1742 }
1743 success = thread_call_free(tcall);
1744 FETH_DPRINTF("%s: thread_call_free %s\n",
1745 fakeif->iff_name,
1746 success ? "SUCCESS" : "FAILURE");
1747 feth_release(fakeif);
1748 VERIFY(success == TRUE);
1749 }
1750 }
1751
1752 static errno_t
feth_nx_pre_connect(kern_nexus_provider_t nxprov,proc_t proc,kern_nexus_t nexus,nexus_port_t port,kern_channel_t channel,void ** channel_context)1753 feth_nx_pre_connect(kern_nexus_provider_t nxprov,
1754 proc_t proc, kern_nexus_t nexus, nexus_port_t port, kern_channel_t channel,
1755 void **channel_context)
1756 {
1757 #pragma unused(nxprov, proc, nexus, port, channel, channel_context)
1758 return 0;
1759 }
1760
1761 static errno_t
feth_nx_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1762 feth_nx_connected(kern_nexus_provider_t nxprov,
1763 kern_nexus_t nexus, kern_channel_t channel)
1764 {
1765 #pragma unused(nxprov, channel)
1766 int err;
1767 if_fake_ref fakeif;
1768
1769 fakeif = feth_nexus_context(nexus);
1770 feth_lock();
1771 if (feth_is_detaching(fakeif)) {
1772 feth_unlock();
1773 return EBUSY;
1774 }
1775 feth_retain(fakeif);
1776 fakeif->iff_channel_connected = TRUE;
1777 feth_unlock();
1778 if (feth_has_intf_advisory_configured(fakeif)) {
1779 err = feth_if_adv_tcall_create(fakeif);
1780 if (err != 0) {
1781 return err;
1782 }
1783 }
1784 FETH_DPRINTF("%s: connected channel %p\n",
1785 fakeif->iff_name, channel);
1786 return 0;
1787 }
1788
1789 static void
feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1790 feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,
1791 kern_nexus_t nexus, kern_channel_t channel)
1792 {
1793 #pragma unused(nxprov, channel)
1794 if_fake_ref fakeif;
1795
1796 fakeif = feth_nexus_context(nexus);
1797 FETH_DPRINTF("%s: pre-disconnect channel %p\n",
1798 fakeif->iff_name, channel);
1799 /* Quiesce the interface and flush any pending outbound packets. */
1800 if_down(fakeif->iff_ifp);
1801 feth_lock();
1802 fakeif->iff_channel_connected = FALSE;
1803 feth_unlock();
1804 if (fakeif->iff_if_adv_tcall != NULL) {
1805 feth_if_adv_tcall_destroy(fakeif);
1806 }
1807 }
1808
1809 static void
feth_nx_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1810 feth_nx_disconnected(kern_nexus_provider_t nxprov,
1811 kern_nexus_t nexus, kern_channel_t channel)
1812 {
1813 #pragma unused(nxprov, channel)
1814 if_fake_ref fakeif;
1815
1816 fakeif = feth_nexus_context(nexus);
1817 FETH_DPRINTF("%s: disconnected channel %p\n",
1818 fakeif->iff_name, channel);
1819 feth_release(fakeif);
1820 }
1821
1822 static errno_t
feth_nx_slot_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index,struct kern_slot_prop ** slot_prop_addr,void ** slot_context)1823 feth_nx_slot_init(kern_nexus_provider_t nxprov,
1824 kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
1825 uint32_t slot_index, struct kern_slot_prop **slot_prop_addr,
1826 void **slot_context)
1827 {
1828 #pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
1829 return 0;
1830 }
1831
1832 static void
feth_nx_slot_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index)1833 feth_nx_slot_fini(kern_nexus_provider_t nxprov,
1834 kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
1835 uint32_t slot_index)
1836 {
1837 #pragma unused(nxprov, nexus, ring, slot, slot_index)
1838 }
1839
1840 static errno_t
feth_nx_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)1841 feth_nx_sync_tx(kern_nexus_provider_t nxprov,
1842 kern_nexus_t nexus, kern_channel_ring_t tx_ring, uint32_t flags)
1843 {
1844 #pragma unused(nxprov)
1845 if_fake_ref fakeif;
1846 ifnet_t ifp;
1847 kern_channel_slot_t last_tx_slot = NULL;
1848 ifnet_t peer_ifp;
1849 if_fake_ref peer_fakeif = NULL;
1850 struct kern_channel_ring_stat_increment stats;
1851 kern_channel_slot_t tx_slot;
1852 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1853 kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
1854 uint32_t n_pkts = 0;
1855
1856 memset(&stats, 0, sizeof(stats));
1857
1858 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
1859 fakeif = feth_nexus_context(nexus);
1860 FETH_DPRINTF("%s ring %d flags 0x%x\n", fakeif->iff_name,
1861 tx_ring->ckr_ring_id, flags);
1862
1863 feth_lock();
1864 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1865 feth_unlock();
1866 return 0;
1867 }
1868 ifp = fakeif->iff_ifp;
1869 peer_ifp = fakeif->iff_peer;
1870 if (peer_ifp != NULL) {
1871 peer_fakeif = ifnet_get_if_fake(peer_ifp);
1872 if (peer_fakeif != NULL) {
1873 if (feth_is_detaching(peer_fakeif) ||
1874 !peer_fakeif->iff_channel_connected) {
1875 goto done;
1876 }
1877 } else {
1878 goto done;
1879 }
1880 } else {
1881 goto done;
1882 }
1883 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1884 while (tx_slot != NULL) {
1885 uint16_t off;
1886 kern_packet_t sph;
1887
1888 /* detach the packet from the TX ring */
1889 sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1890 VERIFY(sph != 0);
1891 kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
1892
1893 /* bpf tap output */
1894 off = kern_packet_get_headroom(sph);
1895 VERIFY(off >= fakeif->iff_tx_headroom);
1896 kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
1897 feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
1898 bpf_tap_packet_out(ifp, DLT_EN10MB, sph, NULL, 0);
1899
1900 /* drop packets, if requested */
1901 fakeif->iff_tx_pkts_count++;
1902 if (feth_tx_complete_error(fakeif, &sph, nifs)) {
1903 goto next_tx_slot;
1904 }
1905 ASSERT(sph != 0);
1906 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
1907 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1908
1909 stats.kcrsi_slots_transferred++;
1910 stats.kcrsi_bytes_transferred
1911 += kern_packet_get_data_length(sph);
1912
1913 /* prepare batch for receiver */
1914 pkts[n_pkts++] = sph;
1915 if (n_pkts == IFF_MAX_BATCH_SIZE) {
1916 feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
1917 feth_tx_complete(fakeif, pkts, n_pkts);
1918 n_pkts = 0;
1919 }
1920
1921 next_tx_slot:
1922 last_tx_slot = tx_slot;
1923 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1924 }
1925
1926 /* catch last batch for receiver */
1927 if (n_pkts != 0) {
1928 feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
1929 feth_tx_complete(fakeif, pkts, n_pkts);
1930 n_pkts = 0;
1931 }
1932
1933 if (last_tx_slot != NULL) {
1934 kern_channel_advance_slot(tx_ring, last_tx_slot);
1935 kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
1936 }
1937 done:
1938 feth_unlock();
1939 return 0;
1940 }
1941
1942 static errno_t
feth_nx_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)1943 feth_nx_sync_rx(kern_nexus_provider_t nxprov,
1944 kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
1945 {
1946 #pragma unused(nxprov, ring, flags)
1947 if_fake_ref fakeif;
1948 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1949
1950 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1951 fakeif = feth_nexus_context(nexus);
1952 FETH_DPRINTF("%s:\n", fakeif->iff_name);
1953 return 0;
1954 }
1955
1956 static errno_t
feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif,boolean_t doorbell_ctxt)1957 feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif, boolean_t doorbell_ctxt)
1958 {
1959 int i;
1960 errno_t error = 0;
1961 boolean_t more;
1962
1963 for (i = 0; i < IFF_NUM_TX_RINGS_WMM_MODE; i++) {
1964 kern_channel_ring_t ring = fakeif->iff_tx_ring[i];
1965 if (ring != NULL) {
1966 error = kern_channel_tx_refill(ring, UINT32_MAX,
1967 UINT32_MAX, doorbell_ctxt, &more);
1968 }
1969 if (error != 0) {
1970 FETH_DPRINTF("%s: TX refill ring %d (%s) %d\n",
1971 fakeif->iff_name, ring->ckr_ring_id,
1972 doorbell_ctxt ? "sync" : "async", error);
1973 if (!((error == EAGAIN) || (error == EBUSY))) {
1974 break;
1975 }
1976 } else {
1977 FETH_DPRINTF("%s: TX refilled ring %d (%s)\n",
1978 fakeif->iff_name, ring->ckr_ring_id,
1979 doorbell_ctxt ? "sync" : "async");
1980 }
1981 }
1982 return error;
1983 }
1984
1985 static void
feth_async_doorbell(thread_call_param_t arg0,thread_call_param_t arg1)1986 feth_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
1987 {
1988 #pragma unused(arg1)
1989 errno_t error;
1990 if_fake_ref fakeif = (if_fake_ref)arg0;
1991 kern_channel_ring_t ring;
1992 boolean_t more;
1993
1994 feth_lock();
1995 ring = fakeif->iff_tx_ring[0];
1996 if (feth_is_detaching(fakeif) ||
1997 !fakeif->iff_channel_connected ||
1998 ring == NULL) {
1999 goto done;
2000 }
2001 fakeif->iff_doorbell_tcall_active = TRUE;
2002 feth_unlock();
2003 if (feth_in_wmm_mode(fakeif)) {
2004 error = feth_nx_tx_dequeue_driver_managed(fakeif, FALSE);
2005 } else {
2006 error = kern_channel_tx_refill(ring, UINT32_MAX,
2007 UINT32_MAX, FALSE, &more);
2008 }
2009 if (error != 0) {
2010 FETH_DPRINTF("%s: TX refill failed %d\n",
2011 fakeif->iff_name, error);
2012 } else {
2013 FETH_DPRINTF("%s: TX refilled\n", fakeif->iff_name);
2014 }
2015
2016 feth_lock();
2017 done:
2018 fakeif->iff_doorbell_tcall_active = FALSE;
2019 if (fakeif->iff_waiting_for_tcall) {
2020 FETH_DPRINTF("%s: threadcall waking up waiter\n",
2021 fakeif->iff_name);
2022 wakeup((caddr_t)fakeif);
2023 }
2024 feth_unlock();
2025 }
2026
2027 static void
feth_schedule_async_doorbell(if_fake_ref fakeif)2028 feth_schedule_async_doorbell(if_fake_ref fakeif)
2029 {
2030 thread_call_t tcall;
2031
2032 feth_lock();
2033 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2034 feth_unlock();
2035 return;
2036 }
2037 tcall = fakeif->iff_doorbell_tcall;
2038 if (tcall != NULL) {
2039 thread_call_enter(tcall);
2040 } else {
2041 tcall = thread_call_allocate_with_options(feth_async_doorbell,
2042 (thread_call_param_t)fakeif,
2043 THREAD_CALL_PRIORITY_KERNEL,
2044 THREAD_CALL_OPTIONS_ONCE);
2045 if (tcall == NULL) {
2046 printf("%s: %s tcall alloc failed\n",
2047 __func__, fakeif->iff_name);
2048 } else {
2049 fakeif->iff_doorbell_tcall = tcall;
2050 feth_retain(fakeif);
2051 thread_call_enter(tcall);
2052 }
2053 }
2054 feth_unlock();
2055 }
2056
2057 static errno_t
feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2058 feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,
2059 kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2060 {
2061 #pragma unused(nxprov, ring, flags)
2062 errno_t error;
2063 if_fake_ref fakeif;
2064
2065 fakeif = feth_nexus_context(nexus);
2066 FETH_DPRINTF("%s\n", fakeif->iff_name);
2067
2068 if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
2069 boolean_t more;
2070 /* synchronous tx refill */
2071 if (feth_in_wmm_mode(fakeif)) {
2072 error = feth_nx_tx_dequeue_driver_managed(fakeif, TRUE);
2073 } else {
2074 error = kern_channel_tx_refill(ring, UINT32_MAX,
2075 UINT32_MAX, TRUE, &more);
2076 }
2077 if (error != 0) {
2078 FETH_DPRINTF("%s: TX refill (sync) %d\n",
2079 fakeif->iff_name, error);
2080 } else {
2081 FETH_DPRINTF("%s: TX refilled (sync)\n",
2082 fakeif->iff_name);
2083 }
2084 } else {
2085 FETH_DPRINTF("%s: schedule async refill\n", fakeif->iff_name);
2086 feth_schedule_async_doorbell(fakeif);
2087 }
2088 return 0;
2089 }
2090
2091 static errno_t
feth_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)2092 feth_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
2093 {
2094 if_fake_ref fakeif;
2095
2096 fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
2097 feth_ifnet_set_attrs(fakeif, ifp);
2098 return 0;
2099 }
2100
2101 static errno_t
feth_nx_intf_adv_config(void * prov_ctx,bool enable)2102 feth_nx_intf_adv_config(void *prov_ctx, bool enable)
2103 {
2104 if_fake_ref fakeif = prov_ctx;
2105
2106 feth_lock();
2107 fakeif->iff_intf_adv_enabled = enable;
2108 feth_unlock();
2109 FETH_DPRINTF("%s enable %d\n", fakeif->iff_name, enable);
2110 return 0;
2111 }
2112
2113 static errno_t
fill_capab_interface_advisory(if_fake_ref fakeif,void * contents,uint32_t * len)2114 fill_capab_interface_advisory(if_fake_ref fakeif, void *contents, uint32_t *len)
2115 {
2116 struct kern_nexus_capab_interface_advisory *capab = contents;
2117
2118 if (*len != sizeof(*capab)) {
2119 return EINVAL;
2120 }
2121 if (capab->kncia_version !=
2122 KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
2123 return EINVAL;
2124 }
2125 if (!feth_has_intf_advisory_configured(fakeif)) {
2126 return ENOTSUP;
2127 }
2128 VERIFY(capab->kncia_notify != NULL);
2129 fakeif->iff_intf_adv_kern_ctx = capab->kncia_kern_context;
2130 fakeif->iff_intf_adv_notify = capab->kncia_notify;
2131 capab->kncia_provider_context = fakeif;
2132 capab->kncia_config = feth_nx_intf_adv_config;
2133 return 0;
2134 }
2135
2136 static errno_t
feth_notify_steering_info(void * prov_ctx,void * qset_ctx,struct ifnet_traffic_descriptor_common * td,bool add)2137 feth_notify_steering_info(void *prov_ctx, void *qset_ctx,
2138 struct ifnet_traffic_descriptor_common *td, bool add)
2139 {
2140 #pragma unused(td)
2141 if_fake_ref fakeif = prov_ctx;
2142 fake_qset *qset = qset_ctx;
2143
2144 FETH_DPRINTF("%s: notify_steering_info: qset_id 0x%llx, %s\n",
2145 fakeif->iff_name, qset->fqs_id, add ? "add" : "remove");
2146 return 0;
2147 }
2148
2149 static errno_t
fill_capab_qset_extensions(if_fake_ref fakeif,void * contents,uint32_t * len)2150 fill_capab_qset_extensions(if_fake_ref fakeif, void *contents, uint32_t *len)
2151 {
2152 struct kern_nexus_capab_qset_extensions *capab = contents;
2153
2154 if (*len != sizeof(*capab)) {
2155 return EINVAL;
2156 }
2157 if (capab->cqe_version !=
2158 KERN_NEXUS_CAPAB_QSET_EXTENSIONS_VERSION_1) {
2159 return EINVAL;
2160 }
2161 capab->cqe_prov_ctx = fakeif;
2162 capab->cqe_notify_steering_info = feth_notify_steering_info;
2163 return 0;
2164 }
2165
2166 static errno_t
feth_nx_capab_config(kern_nexus_provider_t nxprov,kern_nexus_t nx,kern_nexus_capab_t capab,void * contents,uint32_t * len)2167 feth_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
2168 kern_nexus_capab_t capab, void *contents, uint32_t *len)
2169 {
2170 #pragma unused(nxprov)
2171 errno_t error;
2172 if_fake_ref fakeif;
2173
2174 fakeif = feth_nexus_context(nx);
2175 FETH_DPRINTF("%s\n", fakeif->iff_name);
2176
2177 switch (capab) {
2178 case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
2179 error = fill_capab_interface_advisory(fakeif, contents, len);
2180 break;
2181 case KERN_NEXUS_CAPAB_QSET_EXTENSIONS:
2182 error = fill_capab_qset_extensions(fakeif, contents, len);
2183 break;
2184 default:
2185 error = ENOTSUP;
2186 break;
2187 }
2188 return error;
2189 }
2190
2191 static int
feth_set_tso(ifnet_t ifp)2192 feth_set_tso(ifnet_t ifp)
2193 {
2194 ifnet_offload_t offload;
2195 uint32_t tso_v4_mtu, tso_v6_mtu;
2196 int error;
2197
2198 offload = IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2199 tso_v4_mtu = if_fake_tso_buffer_size;
2200 tso_v6_mtu = if_fake_tso_buffer_size;
2201 error = ifnet_set_offload(ifp, offload);
2202 if (error != 0) {
2203 printf("%s: set TSO offload failed on %s, err %d\n", __func__,
2204 if_name(ifp), error);
2205 return error;
2206 }
2207 error = ifnet_set_tso_mtu(ifp, AF_INET, tso_v4_mtu);
2208 if (error != 0) {
2209 printf("%s: set TSO MTU IPv4 failed on %s, err %d\n", __func__,
2210 if_name(ifp), error);
2211 return error;
2212 }
2213 error = ifnet_set_tso_mtu(ifp, AF_INET6, tso_v6_mtu);
2214 if (error != 0) {
2215 printf("%s: set TSO MTU IPv6 failed on %s, err %d\n", __func__,
2216 if_name(ifp), error);
2217 return error;
2218 }
2219 return 0;
2220 }
2221
2222 static errno_t
create_netif_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2223 create_netif_provider_and_instance(if_fake_ref fakeif,
2224 struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2225 uuid_t * provider, uuid_t * instance)
2226 {
2227 errno_t err;
2228 nexus_controller_t controller = kern_nexus_shared_controller();
2229 struct kern_nexus_net_init net_init;
2230 nexus_name_t provider_name;
2231 nexus_attr_t nexus_attr = NULL;
2232 struct kern_nexus_provider_init prov_init = {
2233 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2234 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2235 .nxpi_pre_connect = feth_nx_pre_connect,
2236 .nxpi_connected = feth_nx_connected,
2237 .nxpi_pre_disconnect = feth_nx_pre_disconnect,
2238 .nxpi_disconnected = feth_nx_disconnected,
2239 .nxpi_ring_init = feth_nx_ring_init,
2240 .nxpi_ring_fini = feth_nx_ring_fini,
2241 .nxpi_slot_init = feth_nx_slot_init,
2242 .nxpi_slot_fini = feth_nx_slot_fini,
2243 .nxpi_sync_tx = feth_nx_sync_tx,
2244 .nxpi_sync_rx = feth_nx_sync_rx,
2245 .nxpi_tx_doorbell = feth_nx_tx_doorbell,
2246 .nxpi_config_capab = feth_nx_capab_config,
2247 };
2248
2249 _CASSERT(IFF_MAX_RX_RINGS == 1);
2250 err = kern_nexus_attr_create(&nexus_attr);
2251 if (err != 0) {
2252 printf("%s nexus attribute creation failed, error %d\n",
2253 __func__, err);
2254 goto failed;
2255 }
2256 if (feth_in_wmm_mode(fakeif)) {
2257 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_TX_RINGS,
2258 IFF_NUM_TX_RINGS_WMM_MODE);
2259 VERIFY(err == 0);
2260 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_RX_RINGS,
2261 IFF_NUM_RX_RINGS_WMM_MODE);
2262 VERIFY(err == 0);
2263 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_QMAP,
2264 NEXUS_QMAP_TYPE_WMM);
2265 VERIFY(err == 0);
2266 }
2267
2268 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2269 VERIFY(err == 0);
2270 snprintf((char *)provider_name, sizeof(provider_name),
2271 "com.apple.netif.%s", fakeif->iff_name);
2272 err = kern_nexus_controller_register_provider(controller,
2273 feth_nx_dom_prov,
2274 provider_name,
2275 &prov_init,
2276 sizeof(prov_init),
2277 nexus_attr,
2278 provider);
2279 if (err != 0) {
2280 printf("%s register provider failed, error %d\n",
2281 __func__, err);
2282 goto failed;
2283 }
2284 bzero(&net_init, sizeof(net_init));
2285 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2286 net_init.nxneti_flags = 0;
2287 net_init.nxneti_eparams = init_params;
2288 net_init.nxneti_lladdr = NULL;
2289 net_init.nxneti_prepare = feth_netif_prepare;
2290 net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2291 net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2292 err = kern_nexus_controller_alloc_net_provider_instance(controller,
2293 *provider,
2294 fakeif,
2295 NULL,
2296 instance,
2297 &net_init,
2298 ifp);
2299 if (err != 0) {
2300 printf("%s alloc_net_provider_instance failed, %d\n",
2301 __func__, err);
2302 kern_nexus_controller_deregister_provider(controller,
2303 *provider);
2304 uuid_clear(*provider);
2305 goto failed;
2306 }
2307 if (feth_supports_tso(fakeif)) {
2308 if ((err = feth_set_tso(*ifp)) != 0) {
2309 goto failed;
2310 }
2311 }
2312
2313 failed:
2314 if (nexus_attr != NULL) {
2315 kern_nexus_attr_destroy(nexus_attr);
2316 }
2317 return err;
2318 }
2319
2320 /*
2321 * The nif_stats need to be referenced because we don't want it set
2322 * to NULL until the last llink is removed.
2323 */
2324 static void
get_nexus_stats(if_fake_ref fakeif,kern_nexus_t nexus)2325 get_nexus_stats(if_fake_ref fakeif, kern_nexus_t nexus)
2326 {
2327 if (++fakeif->iff_nifs_ref == 1) {
2328 ASSERT(fakeif->iff_nifs == NULL);
2329 fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2330 }
2331 }
2332
2333 static void
clear_nexus_stats(if_fake_ref fakeif)2334 clear_nexus_stats(if_fake_ref fakeif)
2335 {
2336 if (--fakeif->iff_nifs_ref == 0) {
2337 ASSERT(fakeif->iff_nifs != NULL);
2338 fakeif->iff_nifs = NULL;
2339 }
2340 }
2341
2342 static errno_t
feth_nx_qset_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * llink_ctx,uint8_t qset_idx,uint64_t qset_id,kern_netif_qset_t qset,void ** qset_ctx)2343 feth_nx_qset_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2344 void *llink_ctx, uint8_t qset_idx, uint64_t qset_id, kern_netif_qset_t qset,
2345 void **qset_ctx)
2346 {
2347 #pragma unused(nxprov)
2348 if_fake_ref fakeif;
2349 fake_llink *fl = llink_ctx;
2350 fake_qset *fqs;
2351
2352 feth_lock();
2353 fakeif = feth_nexus_context(nexus);
2354 if (feth_is_detaching(fakeif)) {
2355 feth_unlock();
2356 printf("%s: %s: detaching\n", __func__, fakeif->iff_name);
2357 return ENXIO;
2358 }
2359 if (qset_idx >= fl->fl_qset_cnt) {
2360 feth_unlock();
2361 printf("%s: %s: invalid qset_idx %d\n", __func__,
2362 fakeif->iff_name, qset_idx);
2363 return EINVAL;
2364 }
2365 fqs = &fl->fl_qset[qset_idx];
2366 ASSERT(fqs->fqs_qset == NULL);
2367 fqs->fqs_qset = qset;
2368 fqs->fqs_id = qset_id;
2369 *qset_ctx = fqs;
2370
2371 /* XXX This should really be done during registration */
2372 get_nexus_stats(fakeif, nexus);
2373 feth_unlock();
2374 return 0;
2375 }
2376
2377 static void
feth_nx_qset_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx)2378 feth_nx_qset_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2379 void *qset_ctx)
2380 {
2381 #pragma unused(nxprov)
2382 if_fake_ref fakeif;
2383 fake_qset *fqs = qset_ctx;
2384
2385 feth_lock();
2386 fakeif = feth_nexus_context(nexus);
2387 clear_nexus_stats(fakeif);
2388 ASSERT(fqs->fqs_qset != NULL);
2389 fqs->fqs_qset = NULL;
2390 fqs->fqs_id = 0;
2391 feth_unlock();
2392 }
2393
2394 static errno_t
feth_nx_queue_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint8_t qidx,bool tx,kern_netif_queue_t queue,void ** queue_ctx)2395 feth_nx_queue_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2396 void *qset_ctx, uint8_t qidx, bool tx, kern_netif_queue_t queue,
2397 void **queue_ctx)
2398 {
2399 #pragma unused(nxprov)
2400 if_fake_ref fakeif;
2401 fake_qset *fqs = qset_ctx;
2402 fake_queue *fq;
2403
2404 feth_lock();
2405 fakeif = feth_nexus_context(nexus);
2406 if (feth_is_detaching(fakeif)) {
2407 printf("%s: %s: detaching\n", __func__, fakeif->iff_name);
2408 feth_unlock();
2409 return ENXIO;
2410 }
2411 if (tx) {
2412 if (qidx >= fqs->fqs_tx_queue_cnt) {
2413 printf("%s: %s: invalid tx qidx %d\n", __func__,
2414 fakeif->iff_name, qidx);
2415 feth_unlock();
2416 return EINVAL;
2417 }
2418 fq = &fqs->fqs_tx_queue[qidx];
2419 } else {
2420 if (qidx >= fqs->fqs_rx_queue_cnt) {
2421 printf("%s: %s: invalid rx qidx %d\n", __func__,
2422 fakeif->iff_name, qidx);
2423 feth_unlock();
2424 return EINVAL;
2425 }
2426 fq = &fqs->fqs_rx_queue[qidx];
2427 }
2428 ASSERT(fq->fq_queue == NULL);
2429 fq->fq_queue = queue;
2430 *queue_ctx = fq;
2431 feth_unlock();
2432 return 0;
2433 }
2434
2435 static void
feth_nx_queue_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx)2436 feth_nx_queue_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2437 void *queue_ctx)
2438 {
2439 #pragma unused(nxprov, nexus)
2440 fake_queue *fq = queue_ctx;
2441
2442 feth_lock();
2443 ASSERT(fq->fq_queue != NULL);
2444 fq->fq_queue = NULL;
2445 feth_unlock();
2446 }
2447
2448 static void
feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif,kern_packet_t sph,struct netif_stats * nifs,if_fake_ref peer_fakeif,uint32_t llink_idx,uint32_t qset_idx)2449 feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif, kern_packet_t sph,
2450 struct netif_stats *nifs, if_fake_ref peer_fakeif,
2451 uint32_t llink_idx, uint32_t qset_idx)
2452 {
2453 kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2454 uint32_t n_pkts = 0;
2455
2456 while (sph != 0) {
2457 uint16_t off;
2458 kern_packet_t next;
2459
2460 next = kern_packet_get_next(sph);
2461 kern_packet_set_next(sph, 0);
2462
2463 /* bpf tap output */
2464 off = kern_packet_get_headroom(sph);
2465 VERIFY(off >= fakeif->iff_tx_headroom);
2466 kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2467 feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2468 bpf_tap_packet_out(fakeif->iff_ifp, DLT_EN10MB, sph, NULL, 0);
2469
2470 /* drop packets, if requested */
2471 fakeif->iff_tx_pkts_count++;
2472 if (feth_tx_complete_error(fakeif, &sph, nifs)) {
2473 goto next_pkt;
2474 }
2475 ASSERT(sph != 0);
2476 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2477 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2478
2479 /* prepare batch for receiver */
2480 pkts[n_pkts++] = sph;
2481 if (n_pkts == IFF_MAX_BATCH_SIZE) {
2482 feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx,
2483 qset_idx, pkts, n_pkts);
2484 feth_tx_complete(fakeif, pkts, n_pkts);
2485 n_pkts = 0;
2486 }
2487 next_pkt:
2488 sph = next;
2489 }
2490 /* catch last batch for receiver */
2491 if (n_pkts != 0) {
2492 feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx, qset_idx,
2493 pkts, n_pkts);
2494 feth_tx_complete(fakeif, pkts, n_pkts);
2495 n_pkts = 0;
2496 }
2497 }
2498
2499 static errno_t
feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint32_t flags)2500 feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2501 void *qset_ctx, uint32_t flags)
2502 {
2503 #pragma unused(nxprov)
2504 if_fake_ref fakeif;
2505 ifnet_t ifp;
2506 ifnet_t peer_ifp;
2507 if_fake_ref peer_fakeif = NULL;
2508 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2509 fake_qset *qset = qset_ctx;
2510 boolean_t detaching, connected;
2511 uint32_t i;
2512 errno_t err;
2513
2514 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2515 fakeif = feth_nexus_context(nexus);
2516 FETH_DPRINTF("%s qset %p, idx %d, flags 0x%x\n", fakeif->iff_name, qset,
2517 qset->fqs_idx, flags);
2518
2519 feth_lock();
2520 detaching = feth_is_detaching(fakeif);
2521 connected = fakeif->iff_channel_connected;
2522 if (detaching || !connected) {
2523 FETH_DPRINTF("%s: %s: detaching %s, channel connected %s\n",
2524 __func__, fakeif->iff_name,
2525 (detaching ? "true" : "false"),
2526 (connected ? "true" : "false"));
2527 feth_unlock();
2528 return 0;
2529 }
2530 ifp = fakeif->iff_ifp;
2531 peer_ifp = fakeif->iff_peer;
2532 if (peer_ifp != NULL) {
2533 peer_fakeif = ifnet_get_if_fake(peer_ifp);
2534 if (peer_fakeif != NULL) {
2535 detaching = feth_is_detaching(peer_fakeif);
2536 connected = peer_fakeif->iff_channel_connected;
2537 if (detaching || !connected) {
2538 FETH_DPRINTF("%s: peer %s: detaching %s, "
2539 "channel connected %s\n",
2540 __func__, peer_fakeif->iff_name,
2541 (detaching ? "true" : "false"),
2542 (connected ? "true" : "false"));
2543 goto done;
2544 }
2545 } else {
2546 FETH_DPRINTF("%s: peer_fakeif is NULL\n", __func__);
2547 goto done;
2548 }
2549 } else {
2550 printf("%s: peer_ifp is NULL\n", __func__);
2551 goto done;
2552 }
2553
2554 if (if_fake_switch_combined_mode &&
2555 qset->fqs_dequeue_cnt >= if_fake_switch_mode_frequency) {
2556 if (qset->fqs_combined_mode) {
2557 kern_netif_set_qset_separate(qset->fqs_qset);
2558 } else {
2559 kern_netif_set_qset_combined(qset->fqs_qset);
2560 }
2561 qset->fqs_combined_mode = !qset->fqs_combined_mode;
2562 qset->fqs_dequeue_cnt = 0;
2563 }
2564
2565 for (i = 0; i < qset->fqs_tx_queue_cnt; i++) {
2566 kern_packet_t sph = 0;
2567 kern_netif_queue_t queue = qset->fqs_tx_queue[i].fq_queue;
2568 boolean_t more = FALSE;
2569
2570 err = kern_netif_queue_tx_dequeue(queue, UINT32_MAX, UINT32_MAX,
2571 &more, &sph);
2572 if (err != 0 && err != EAGAIN) {
2573 FETH_DPRINTF("%s queue %p dequeue failed: err "
2574 "%d\n", fakeif->iff_name, queue, err);
2575 }
2576 feth_nx_tx_queue_deliver_pkt_chain(fakeif, sph, nifs,
2577 peer_fakeif, qset->fqs_llink_idx, qset->fqs_idx);
2578 }
2579
2580 done:
2581 feth_unlock();
2582 return 0;
2583 }
2584
2585 static void
fill_qset_info_and_params(if_fake_ref fakeif,fake_llink * llink_info,uint32_t qset_idx,struct kern_nexus_netif_llink_qset_init * qset_init,bool is_def,bool is_low_latency)2586 fill_qset_info_and_params(if_fake_ref fakeif, fake_llink *llink_info,
2587 uint32_t qset_idx, struct kern_nexus_netif_llink_qset_init *qset_init,
2588 bool is_def, bool is_low_latency)
2589 {
2590 fake_qset *qset_info = &llink_info->fl_qset[qset_idx];
2591
2592 qset_init->nlqi_flags =
2593 (is_def ? KERN_NEXUS_NET_LLINK_QSET_DEFAULT : 0) |
2594 (is_low_latency ? KERN_NEXUS_NET_LLINK_QSET_LOW_LATENCY : 0) |
2595 KERN_NEXUS_NET_LLINK_QSET_AQM;
2596
2597 if (feth_in_wmm_mode(fakeif)) {
2598 qset_init->nlqi_flags |= KERN_NEXUS_NET_LLINK_QSET_WMM_MODE;
2599 qset_init->nlqi_num_txqs = IFF_NUM_TX_QUEUES_WMM_MODE;
2600 qset_init->nlqi_num_rxqs = IFF_NUM_RX_QUEUES_WMM_MODE;
2601 } else {
2602 qset_init->nlqi_num_txqs = 1;
2603 qset_init->nlqi_num_rxqs = 1;
2604 }
2605 qset_info->fqs_tx_queue_cnt = qset_init->nlqi_num_txqs;
2606 qset_info->fqs_rx_queue_cnt = qset_init->nlqi_num_rxqs;
2607
2608 /* These are needed for locating the peer qset */
2609 qset_info->fqs_llink_idx = llink_info->fl_idx;
2610 qset_info->fqs_idx = qset_idx;
2611 }
2612
2613 static void
fill_llink_info_and_params(if_fake_ref fakeif,uint32_t llink_idx,struct kern_nexus_netif_llink_init * llink_init,uint32_t llink_id,struct kern_nexus_netif_llink_qset_init * qset_init,uint32_t qset_cnt,uint32_t flags)2614 fill_llink_info_and_params(if_fake_ref fakeif, uint32_t llink_idx,
2615 struct kern_nexus_netif_llink_init *llink_init, uint32_t llink_id,
2616 struct kern_nexus_netif_llink_qset_init *qset_init, uint32_t qset_cnt,
2617 uint32_t flags)
2618 {
2619 fake_llink *llink_info = &fakeif->iff_llink[llink_idx];
2620 uint32_t i;
2621 bool create_ll_qset = if_fake_low_latency && (llink_idx != 0);
2622
2623 for (i = 0; i < qset_cnt; i++) {
2624 fill_qset_info_and_params(fakeif, llink_info, i,
2625 &qset_init[i], i == 0, create_ll_qset && i == 1);
2626 }
2627 llink_info->fl_idx = llink_idx;
2628
2629 /* This doesn't have to be the same as llink_idx */
2630 llink_info->fl_id = llink_id;
2631 llink_info->fl_qset_cnt = qset_cnt;
2632
2633 llink_init->nli_link_id = llink_id;
2634 llink_init->nli_num_qsets = qset_cnt;
2635 llink_init->nli_qsets = qset_init;
2636 llink_init->nli_flags = flags;
2637 llink_init->nli_ctx = llink_info;
2638 }
2639
2640 static errno_t
create_non_default_llinks(if_fake_ref fakeif)2641 create_non_default_llinks(if_fake_ref fakeif)
2642 {
2643 struct kern_nexus *nx;
2644 fake_nx_t fnx = &fakeif->iff_nx;
2645 struct kern_nexus_netif_llink_init llink_init;
2646 struct kern_nexus_netif_llink_qset_init qset_init[FETH_MAX_QSETS];
2647 errno_t err;
2648 uint64_t llink_id;
2649 uint32_t i;
2650
2651 nx = nx_find(fnx->fnx_instance, FALSE);
2652 if (nx == NULL) {
2653 printf("%s: %s: nx not found\n", __func__, fakeif->iff_name);
2654 return ENXIO;
2655 }
2656 /* Default llink starts at index 0 */
2657 for (i = 1; i < if_fake_llink_cnt; i++) {
2658 llink_id = (uint64_t)i;
2659
2660 /*
2661 * The llink_init and qset_init structures are reused for
2662 * each llink creation.
2663 */
2664 fill_llink_info_and_params(fakeif, i, &llink_init,
2665 llink_id, qset_init, if_fake_qset_cnt, 0);
2666 err = kern_nexus_netif_llink_add(nx, &llink_init);
2667 if (err != 0) {
2668 printf("%s: %s: llink add failed, error %d\n",
2669 __func__, fakeif->iff_name, err);
2670 goto fail;
2671 }
2672 fakeif->iff_llink_cnt++;
2673 }
2674 nx_release(nx);
2675 return 0;
2676
2677 fail:
2678 for (i = 0; i < fakeif->iff_llink_cnt; i++) {
2679 int e;
2680
2681 e = kern_nexus_netif_llink_remove(nx, fakeif->
2682 iff_llink[i].fl_id);
2683 if (e != 0) {
2684 printf("%s: %s: llink remove failed, llink_id 0x%llx, "
2685 "error %d\n", __func__, fakeif->iff_name,
2686 fakeif->iff_llink[i].fl_id, e);
2687 }
2688 fakeif->iff_llink[i].fl_id = 0;
2689 }
2690 fakeif->iff_llink_cnt = 0;
2691 nx_release(nx);
2692 return err;
2693 }
2694
2695 static errno_t
create_netif_llink_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2696 create_netif_llink_provider_and_instance(if_fake_ref fakeif,
2697 struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2698 uuid_t * provider, uuid_t * instance)
2699 {
2700 errno_t err;
2701 nexus_controller_t controller = kern_nexus_shared_controller();
2702 struct kern_nexus_net_init net_init;
2703 struct kern_nexus_netif_llink_init llink_init;
2704 struct kern_nexus_netif_llink_qset_init qsets[FETH_MAX_QSETS];
2705
2706 nexus_name_t provider_name;
2707 nexus_attr_t nexus_attr = NULL;
2708 struct kern_nexus_netif_provider_init prov_init = {
2709 .nxnpi_version = KERN_NEXUS_DOMAIN_PROVIDER_NETIF,
2710 .nxnpi_flags = NXPIF_VIRTUAL_DEVICE,
2711 .nxnpi_pre_connect = feth_nx_pre_connect,
2712 .nxnpi_connected = feth_nx_connected,
2713 .nxnpi_pre_disconnect = feth_nx_pre_disconnect,
2714 .nxnpi_disconnected = feth_nx_disconnected,
2715 .nxnpi_qset_init = feth_nx_qset_init,
2716 .nxnpi_qset_fini = feth_nx_qset_fini,
2717 .nxnpi_queue_init = feth_nx_queue_init,
2718 .nxnpi_queue_fini = feth_nx_queue_fini,
2719 .nxnpi_tx_qset_notify = feth_nx_tx_qset_notify,
2720 .nxnpi_config_capab = feth_nx_capab_config,
2721 };
2722
2723 err = kern_nexus_attr_create(&nexus_attr);
2724 if (err != 0) {
2725 printf("%s nexus attribute creation failed, error %d\n",
2726 __func__, err);
2727 goto failed;
2728 }
2729
2730 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2731 VERIFY(err == 0);
2732
2733 snprintf((char *)provider_name, sizeof(provider_name),
2734 "com.apple.netif.%s", fakeif->iff_name);
2735 err = kern_nexus_controller_register_provider(controller,
2736 feth_nx_dom_prov,
2737 provider_name,
2738 (struct kern_nexus_provider_init *)&prov_init,
2739 sizeof(prov_init),
2740 nexus_attr,
2741 provider);
2742 if (err != 0) {
2743 printf("%s register provider failed, error %d\n",
2744 __func__, err);
2745 goto failed;
2746 }
2747 bzero(&net_init, sizeof(net_init));
2748 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2749 net_init.nxneti_flags = 0;
2750 net_init.nxneti_eparams = init_params;
2751 net_init.nxneti_lladdr = NULL;
2752 net_init.nxneti_prepare = feth_netif_prepare;
2753 net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2754 net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2755
2756 /*
2757 * Assume llink id is same as the index for if_fake.
2758 * This is not required for other drivers.
2759 */
2760 _CASSERT(NETIF_LLINK_ID_DEFAULT == 0);
2761 fill_llink_info_and_params(fakeif, 0, &llink_init,
2762 NETIF_LLINK_ID_DEFAULT, qsets, if_fake_qset_cnt,
2763 KERN_NEXUS_NET_LLINK_DEFAULT);
2764
2765 net_init.nxneti_llink = &llink_init;
2766
2767 err = kern_nexus_controller_alloc_net_provider_instance(controller,
2768 *provider, fakeif, NULL, instance, &net_init, ifp);
2769 if (err != 0) {
2770 printf("%s alloc_net_provider_instance failed, %d\n",
2771 __func__, err);
2772 kern_nexus_controller_deregister_provider(controller,
2773 *provider);
2774 uuid_clear(*provider);
2775 goto failed;
2776 }
2777 fakeif->iff_llink_cnt++;
2778
2779 if (if_fake_llink_cnt > 1) {
2780 err = create_non_default_llinks(fakeif);
2781 if (err != 0) {
2782 printf("%s create_non_default_llinks failed, %d\n",
2783 __func__, err);
2784 feth_detach_netif_nexus(fakeif);
2785 goto failed;
2786 }
2787 }
2788 if (feth_supports_tso(fakeif)) {
2789 if ((err = feth_set_tso(*ifp)) != 0) {
2790 goto failed;
2791 }
2792 }
2793 failed:
2794 if (nexus_attr != NULL) {
2795 kern_nexus_attr_destroy(nexus_attr);
2796 }
2797 return err;
2798 }
2799
2800 static errno_t
feth_attach_netif_nexus(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp)2801 feth_attach_netif_nexus(if_fake_ref fakeif,
2802 struct ifnet_init_eparams * init_params, ifnet_t *ifp)
2803 {
2804 errno_t error;
2805 fake_nx_t nx = &fakeif->iff_nx;
2806
2807 error = feth_packet_pool_make(fakeif);
2808 if (error != 0) {
2809 return error;
2810 }
2811 if (if_fake_llink_cnt == 0) {
2812 return create_netif_provider_and_instance(fakeif, init_params,
2813 ifp, &nx->fnx_provider, &nx->fnx_instance);
2814 } else {
2815 return create_netif_llink_provider_and_instance(fakeif,
2816 init_params, ifp, &nx->fnx_provider,
2817 &nx->fnx_instance);
2818 }
2819 }
2820
2821 static void
remove_non_default_llinks(if_fake_ref fakeif)2822 remove_non_default_llinks(if_fake_ref fakeif)
2823 {
2824 struct kern_nexus *nx;
2825 fake_nx_t fnx = &fakeif->iff_nx;
2826 uint32_t i;
2827
2828 if (fakeif->iff_llink_cnt <= 1) {
2829 return;
2830 }
2831 nx = nx_find(fnx->fnx_instance, FALSE);
2832 if (nx == NULL) {
2833 printf("%s: %s: nx not found\n", __func__,
2834 fakeif->iff_name);
2835 return;
2836 }
2837 /* Default llink (at index 0) is freed separately */
2838 for (i = 1; i < fakeif->iff_llink_cnt; i++) {
2839 int err;
2840
2841 err = kern_nexus_netif_llink_remove(nx, fakeif->
2842 iff_llink[i].fl_id);
2843 if (err != 0) {
2844 printf("%s: %s: llink remove failed, llink_id 0x%llx, "
2845 "error %d\n", __func__, fakeif->iff_name,
2846 fakeif->iff_llink[i].fl_id, err);
2847 }
2848 fakeif->iff_llink[i].fl_id = 0;
2849 }
2850 fakeif->iff_llink_cnt = 0;
2851 nx_release(nx);
2852 }
2853
2854 static void
detach_provider_and_instance(uuid_t provider,uuid_t instance)2855 detach_provider_and_instance(uuid_t provider, uuid_t instance)
2856 {
2857 nexus_controller_t controller = kern_nexus_shared_controller();
2858 errno_t err;
2859
2860 if (!uuid_is_null(instance)) {
2861 err = kern_nexus_controller_free_provider_instance(controller,
2862 instance);
2863 if (err != 0) {
2864 printf("%s free_provider_instance failed %d\n",
2865 __func__, err);
2866 }
2867 uuid_clear(instance);
2868 }
2869 if (!uuid_is_null(provider)) {
2870 err = kern_nexus_controller_deregister_provider(controller,
2871 provider);
2872 if (err != 0) {
2873 printf("%s deregister_provider %d\n", __func__, err);
2874 }
2875 uuid_clear(provider);
2876 }
2877 return;
2878 }
2879
2880 static void
feth_detach_netif_nexus(if_fake_ref fakeif)2881 feth_detach_netif_nexus(if_fake_ref fakeif)
2882 {
2883 fake_nx_t fnx = &fakeif->iff_nx;
2884
2885 remove_non_default_llinks(fakeif);
2886 detach_provider_and_instance(fnx->fnx_provider, fnx->fnx_instance);
2887 }
2888
2889 #endif /* SKYWALK */
2890
2891 /**
2892 ** feth interface routines
2893 **/
2894 static void
feth_ifnet_set_attrs(if_fake_ref fakeif,ifnet_t ifp)2895 feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp)
2896 {
2897 (void)ifnet_set_capabilities_enabled(ifp, 0, -1);
2898 ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
2899 ifnet_set_baudrate(ifp, 0);
2900 ifnet_set_mtu(ifp, ETHERMTU);
2901 ifnet_set_flags(ifp,
2902 IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX,
2903 0xffff);
2904 ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
2905 if ((fakeif->iff_flags & IFF_FLAGS_HWCSUM) != 0) {
2906 ifnet_set_offload(ifp,
2907 IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
2908 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6);
2909 } else {
2910 ifnet_set_offload(ifp, 0);
2911 }
2912 }
2913
2914 static void
interface_link_event(ifnet_t ifp,u_int32_t event_code)2915 interface_link_event(ifnet_t ifp, u_int32_t event_code)
2916 {
2917 struct event {
2918 u_int32_t ifnet_family;
2919 u_int32_t unit;
2920 char if_name[IFNAMSIZ];
2921 };
2922 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
2923 struct kern_event_msg *header = (struct kern_event_msg*)message;
2924 struct event *data = (struct event *)(header + 1);
2925
2926 header->total_size = sizeof(message);
2927 header->vendor_code = KEV_VENDOR_APPLE;
2928 header->kev_class = KEV_NETWORK_CLASS;
2929 header->kev_subclass = KEV_DL_SUBCLASS;
2930 header->event_code = event_code;
2931 data->ifnet_family = ifnet_family(ifp);
2932 data->unit = (u_int32_t)ifnet_unit(ifp);
2933 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
2934 ifnet_event(ifp, header);
2935 }
2936
2937 static if_fake_ref
ifnet_get_if_fake(ifnet_t ifp)2938 ifnet_get_if_fake(ifnet_t ifp)
2939 {
2940 return (if_fake_ref)ifnet_softc(ifp);
2941 }
2942
2943 static int
feth_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)2944 feth_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
2945 {
2946 int error;
2947 if_fake_ref fakeif;
2948 struct ifnet_init_eparams feth_init;
2949 ifnet_t ifp;
2950 uint8_t mac_address[ETHER_ADDR_LEN];
2951
2952 fakeif = if_clone_softc_allocate(&feth_cloner);
2953 if (fakeif == NULL) {
2954 return ENOBUFS;
2955 }
2956 fakeif->iff_retain_count = 1;
2957 #define FAKE_ETHER_NAME_LEN (sizeof(FAKE_ETHER_NAME) - 1)
2958 _CASSERT(FAKE_ETHER_NAME_LEN == 4);
2959 bcopy(FAKE_ETHER_NAME, mac_address, FAKE_ETHER_NAME_LEN);
2960 mac_address[ETHER_ADDR_LEN - 2] = (unit & 0xff00) >> 8;
2961 mac_address[ETHER_ADDR_LEN - 1] = unit & 0xff;
2962 if (if_fake_bsd_mode != 0) {
2963 fakeif->iff_flags |= IFF_FLAGS_BSD_MODE;
2964 }
2965 if (if_fake_hwcsum != 0) {
2966 fakeif->iff_flags |= IFF_FLAGS_HWCSUM;
2967 }
2968 fakeif->iff_max_mtu = get_max_mtu(if_fake_bsd_mode, if_fake_max_mtu);
2969 fakeif->iff_fcs = if_fake_fcs;
2970 fakeif->iff_trailer_length = if_fake_trailer_length;
2971
2972 /* use the interface name as the unique id for ifp recycle */
2973 if ((unsigned int)
2974 snprintf(fakeif->iff_name, sizeof(fakeif->iff_name), "%s%d",
2975 ifc->ifc_name, unit) >= sizeof(fakeif->iff_name)) {
2976 feth_release(fakeif);
2977 return EINVAL;
2978 }
2979 bzero(&feth_init, sizeof(feth_init));
2980 feth_init.ver = IFNET_INIT_CURRENT_VERSION;
2981 feth_init.len = sizeof(feth_init);
2982 if (feth_in_bsd_mode(fakeif)) {
2983 if (if_fake_txstart != 0) {
2984 feth_init.start = feth_start;
2985 } else {
2986 feth_init.flags |= IFNET_INIT_LEGACY;
2987 feth_init.output = feth_output;
2988 }
2989 }
2990 #if SKYWALK
2991 else {
2992 feth_init.flags |= IFNET_INIT_SKYWALK_NATIVE;
2993 /*
2994 * Currently we support WMM mode only for Skywalk native
2995 * interface.
2996 */
2997 if (if_fake_wmm_mode != 0) {
2998 fakeif->iff_flags |= IFF_FLAGS_WMM_MODE;
2999 }
3000
3001 if (if_fake_multibuflet != 0) {
3002 fakeif->iff_flags |= IFF_FLAGS_MULTIBUFLETS;
3003 }
3004
3005 if (if_fake_multibuflet != 0 &&
3006 if_fake_pktpool_mode == IFF_PP_MODE_PRIVATE_SPLIT) {
3007 printf("%s: multi-buflet not supported for split rx &"
3008 " tx pool", __func__);
3009 feth_release(fakeif);
3010 return EINVAL;
3011 }
3012
3013 fakeif->iff_pp_mode = if_fake_pktpool_mode;
3014 if (if_fake_tso_support != 0) {
3015 if (fakeif->iff_pp_mode != IFF_PP_MODE_GLOBAL) {
3016 printf("%s: TSO mode requires global packet"
3017 " pool mode\n", __func__);
3018 return EINVAL;
3019 }
3020 fakeif->iff_flags |= IFF_FLAGS_TSO_SUPPORT;
3021 }
3022
3023 fakeif->iff_tx_headroom = if_fake_tx_headroom;
3024 fakeif->iff_adv_interval = if_fake_if_adv_interval;
3025 if (fakeif->iff_adv_interval > 0) {
3026 feth_init.flags |= IFNET_INIT_IF_ADV;
3027 }
3028 fakeif->iff_tx_drop_rate = if_fake_tx_drops;
3029 fakeif->iff_tx_completion_mode = if_tx_completion_mode;
3030 }
3031 feth_init.tx_headroom = fakeif->iff_tx_headroom;
3032 #endif /* SKYWALK */
3033 if (if_fake_nxattach == 0) {
3034 feth_init.flags |= IFNET_INIT_NX_NOAUTO;
3035 }
3036 feth_init.uniqueid = fakeif->iff_name;
3037 feth_init.uniqueid_len = strlen(fakeif->iff_name);
3038 feth_init.name = ifc->ifc_name;
3039 feth_init.unit = unit;
3040 feth_init.family = IFNET_FAMILY_ETHERNET;
3041 feth_init.type = IFT_ETHER;
3042 feth_init.demux = ether_demux;
3043 feth_init.add_proto = ether_add_proto;
3044 feth_init.del_proto = ether_del_proto;
3045 feth_init.check_multi = ether_check_multi;
3046 feth_init.framer_extended = ether_frameout_extended;
3047 feth_init.softc = fakeif;
3048 feth_init.ioctl = feth_ioctl;
3049 feth_init.set_bpf_tap = NULL;
3050 feth_init.detach = feth_if_free;
3051 feth_init.broadcast_addr = etherbroadcastaddr;
3052 feth_init.broadcast_len = ETHER_ADDR_LEN;
3053 if (feth_in_bsd_mode(fakeif)) {
3054 error = ifnet_allocate_extended(&feth_init, &ifp);
3055 if (error) {
3056 feth_release(fakeif);
3057 return error;
3058 }
3059 feth_ifnet_set_attrs(fakeif, ifp);
3060 }
3061 #if SKYWALK
3062 else {
3063 if (feth_in_wmm_mode(fakeif)) {
3064 feth_init.output_sched_model =
3065 IFNET_SCHED_MODEL_DRIVER_MANAGED;
3066 }
3067 error = feth_attach_netif_nexus(fakeif, &feth_init, &ifp);
3068 if (error != 0) {
3069 feth_release(fakeif);
3070 return error;
3071 }
3072 /* take an additional reference to ensure that it doesn't go away */
3073 feth_retain(fakeif);
3074 fakeif->iff_ifp = ifp;
3075 }
3076 #endif /* SKYWALK */
3077 fakeif->iff_media_count = MIN(default_media_words_count, IF_FAKE_MEDIA_LIST_MAX);
3078 bcopy(default_media_words, fakeif->iff_media_list,
3079 fakeif->iff_media_count * sizeof(fakeif->iff_media_list[0]));
3080 if (feth_in_bsd_mode(fakeif)) {
3081 error = ifnet_attach(ifp, NULL);
3082 if (error) {
3083 ifnet_release(ifp);
3084 feth_release(fakeif);
3085 return error;
3086 }
3087 fakeif->iff_ifp = ifp;
3088 }
3089
3090 ifnet_set_lladdr(ifp, mac_address, sizeof(mac_address));
3091
3092 /* attach as ethernet */
3093 bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
3094 return 0;
3095 }
3096
3097 static int
feth_clone_destroy(ifnet_t ifp)3098 feth_clone_destroy(ifnet_t ifp)
3099 {
3100 if_fake_ref fakeif;
3101 #if SKYWALK
3102 boolean_t nx_attached = FALSE;
3103 #endif /* SKYWALK */
3104
3105 feth_lock();
3106 fakeif = ifnet_get_if_fake(ifp);
3107 if (fakeif == NULL || feth_is_detaching(fakeif)) {
3108 feth_unlock();
3109 return 0;
3110 }
3111 feth_set_detaching(fakeif);
3112 #if SKYWALK
3113 nx_attached = !feth_in_bsd_mode(fakeif);
3114 #endif /* SKYWALK */
3115 feth_unlock();
3116
3117 #if SKYWALK
3118 if (nx_attached) {
3119 feth_detach_netif_nexus(fakeif);
3120 feth_release(fakeif);
3121 }
3122 #endif /* SKYWALK */
3123 feth_config(ifp, NULL);
3124 ifnet_detach(ifp);
3125 return 0;
3126 }
3127
3128 static void
feth_enqueue_input(ifnet_t ifp,struct mbuf * m)3129 feth_enqueue_input(ifnet_t ifp, struct mbuf * m)
3130 {
3131 struct ifnet_stat_increment_param stats = {};
3132
3133 stats.packets_in = 1;
3134 stats.bytes_in = (uint32_t)mbuf_pkthdr_len(m) + ETHER_HDR_LEN;
3135 ifnet_input(ifp, m, &stats);
3136 }
3137
3138 static struct mbuf *
copy_mbuf(struct mbuf * m)3139 copy_mbuf(struct mbuf *m)
3140 {
3141 struct mbuf * copy_m;
3142 uint32_t pkt_len;
3143 uint32_t offset;
3144
3145 if ((m->m_flags & M_PKTHDR) == 0) {
3146 return NULL;
3147 }
3148 pkt_len = m->m_pkthdr.len;
3149 MGETHDR(copy_m, M_DONTWAIT, MT_DATA);
3150 if (copy_m == NULL) {
3151 goto failed;
3152 }
3153 if (pkt_len > MHLEN) {
3154 if (pkt_len <= MCLBYTES) {
3155 MCLGET(copy_m, M_DONTWAIT);
3156 } else if (pkt_len <= MBIGCLBYTES) {
3157 copy_m = m_mbigget(copy_m, M_DONTWAIT);
3158 } else if (pkt_len <= M16KCLBYTES && njcl > 0) {
3159 copy_m = m_m16kget(copy_m, M_DONTWAIT);
3160 } else {
3161 printf("if_fake: copy_mbuf(): packet too large %d\n",
3162 pkt_len);
3163 goto failed;
3164 }
3165 if (copy_m == NULL || (copy_m->m_flags & M_EXT) == 0) {
3166 goto failed;
3167 }
3168 }
3169 mbuf_setlen(copy_m, pkt_len);
3170 copy_m->m_pkthdr.len = pkt_len;
3171 copy_m->m_pkthdr.pkt_svc = m->m_pkthdr.pkt_svc;
3172 offset = 0;
3173 while (m != NULL && offset < pkt_len) {
3174 uint32_t frag_len;
3175
3176 frag_len = m->m_len;
3177 if (frag_len > (pkt_len - offset)) {
3178 printf("if_fake_: Large mbuf fragment %d > %d\n",
3179 frag_len, (pkt_len - offset));
3180 goto failed;
3181 }
3182 m_copydata(m, 0, frag_len, mtodo(copy_m, offset));
3183 offset += frag_len;
3184 m = m->m_next;
3185 }
3186 return copy_m;
3187
3188 failed:
3189 if (copy_m != NULL) {
3190 m_freem(copy_m);
3191 }
3192 return NULL;
3193 }
3194
3195 static int
feth_add_mbuf_trailer(struct mbuf * m,void * trailer,size_t trailer_len)3196 feth_add_mbuf_trailer(struct mbuf *m, void *trailer, size_t trailer_len)
3197 {
3198 int ret;
3199 ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
3200
3201 ret = m_append(m, trailer_len, (caddr_t)trailer);
3202 if (ret == 1) {
3203 FETH_DPRINTF("%s %zuB trailer added\n", __func__, trailer_len);
3204 return 0;
3205 }
3206 printf("%s m_append failed\n", __func__);
3207 return ENOTSUP;
3208 }
3209
3210 static int
feth_add_mbuf_fcs(struct mbuf * m)3211 feth_add_mbuf_fcs(struct mbuf *m)
3212 {
3213 uint32_t pkt_len, offset = 0;
3214 uint32_t crc = 0;
3215 int err = 0;
3216
3217 ASSERT(sizeof(crc) == ETHER_CRC_LEN);
3218
3219 pkt_len = m->m_pkthdr.len;
3220 struct mbuf *iter = m;
3221 while (iter != NULL && offset < pkt_len) {
3222 uint32_t frag_len = iter->m_len;
3223 ASSERT(frag_len <= (pkt_len - offset));
3224 crc = crc32(crc, mtod(iter, void *), frag_len);
3225 offset += frag_len;
3226 iter = m->m_next;
3227 }
3228
3229 err = feth_add_mbuf_trailer(m, &crc, ETHER_CRC_LEN);
3230 if (err != 0) {
3231 return err;
3232 }
3233
3234 m->m_flags |= M_HASFCS;
3235
3236 return 0;
3237 }
3238
3239 static void
feth_output_common(ifnet_t ifp,struct mbuf * m,ifnet_t peer,iff_flags_t flags,bool fcs,void * trailer,size_t trailer_len)3240 feth_output_common(ifnet_t ifp, struct mbuf * m, ifnet_t peer,
3241 iff_flags_t flags, bool fcs, void *trailer, size_t trailer_len)
3242 {
3243 void * frame_header;
3244
3245 frame_header = mbuf_data(m);
3246 if ((flags & IFF_FLAGS_HWCSUM) != 0) {
3247 m->m_pkthdr.csum_data = 0xffff;
3248 m->m_pkthdr.csum_flags =
3249 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3250 CSUM_IP_CHECKED | CSUM_IP_VALID;
3251 }
3252
3253 (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
3254 bpf_tap_out(ifp, DLT_EN10MB, m, NULL, 0);
3255
3256 if (trailer != 0) {
3257 feth_add_mbuf_trailer(m, trailer, trailer_len);
3258 }
3259 if (fcs) {
3260 feth_add_mbuf_fcs(m);
3261 }
3262
3263 (void)mbuf_pkthdr_setrcvif(m, peer);
3264 mbuf_pkthdr_setheader(m, frame_header);
3265 mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
3266 (void)mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
3267 mbuf_len(m) - ETHER_HDR_LEN);
3268 bpf_tap_in(peer, DLT_EN10MB, m, frame_header,
3269 sizeof(struct ether_header));
3270 feth_enqueue_input(peer, m);
3271 }
3272
3273 static void
feth_start(ifnet_t ifp)3274 feth_start(ifnet_t ifp)
3275 {
3276 struct mbuf * copy_m = NULL;
3277 if_fake_ref fakeif;
3278 iff_flags_t flags = 0;
3279 bool fcs;
3280 size_t trailer_len;
3281 ifnet_t peer = NULL;
3282 struct mbuf * m;
3283 struct mbuf * save_m;
3284
3285 feth_lock();
3286 fakeif = ifnet_get_if_fake(ifp);
3287 if (fakeif == NULL) {
3288 feth_unlock();
3289 return;
3290 }
3291
3292 if (fakeif->iff_start_busy) {
3293 feth_unlock();
3294 printf("if_fake: start is busy\n");
3295 return;
3296 }
3297
3298 peer = fakeif->iff_peer;
3299 flags = fakeif->iff_flags;
3300 fcs = fakeif->iff_fcs;
3301 trailer_len = fakeif->iff_trailer_length;
3302
3303 /* check for pending TX */
3304 m = fakeif->iff_pending_tx_packet;
3305 if (m != NULL) {
3306 if (peer != NULL) {
3307 copy_m = copy_mbuf(m);
3308 if (copy_m == NULL) {
3309 feth_unlock();
3310 return;
3311 }
3312 }
3313 fakeif->iff_pending_tx_packet = NULL;
3314 m_freem(m);
3315 m = NULL;
3316 }
3317 fakeif->iff_start_busy = TRUE;
3318 feth_unlock();
3319 save_m = NULL;
3320 for (;;) {
3321 if (copy_m != NULL) {
3322 VERIFY(peer != NULL);
3323 feth_output_common(ifp, copy_m, peer, flags, fcs,
3324 feth_trailer, trailer_len);
3325 copy_m = NULL;
3326 }
3327 if (ifnet_dequeue(ifp, &m) != 0) {
3328 break;
3329 }
3330 if (peer == NULL) {
3331 m_freem(m);
3332 } else {
3333 copy_m = copy_mbuf(m);
3334 if (copy_m == NULL) {
3335 save_m = m;
3336 break;
3337 }
3338 m_freem(m);
3339 }
3340 }
3341 peer = NULL;
3342 feth_lock();
3343 fakeif = ifnet_get_if_fake(ifp);
3344 if (fakeif != NULL) {
3345 fakeif->iff_start_busy = FALSE;
3346 if (save_m != NULL && fakeif->iff_peer != NULL) {
3347 /* save it for next time */
3348 fakeif->iff_pending_tx_packet = save_m;
3349 save_m = NULL;
3350 }
3351 }
3352 feth_unlock();
3353 if (save_m != NULL) {
3354 /* didn't save packet, so free it */
3355 m_freem(save_m);
3356 }
3357 }
3358
3359 static int
feth_output(ifnet_t ifp,struct mbuf * m)3360 feth_output(ifnet_t ifp, struct mbuf * m)
3361 {
3362 struct mbuf * copy_m;
3363 if_fake_ref fakeif;
3364 iff_flags_t flags;
3365 bool fcs;
3366 size_t trailer_len;
3367 ifnet_t peer = NULL;
3368
3369 if (m == NULL) {
3370 return 0;
3371 }
3372 copy_m = copy_mbuf(m);
3373 m_freem(m);
3374 m = NULL;
3375 if (copy_m == NULL) {
3376 /* count this as an output error */
3377 ifnet_stat_increment_out(ifp, 0, 0, 1);
3378 return 0;
3379 }
3380 feth_lock();
3381 fakeif = ifnet_get_if_fake(ifp);
3382 if (fakeif != NULL) {
3383 peer = fakeif->iff_peer;
3384 flags = fakeif->iff_flags;
3385 fcs = fakeif->iff_fcs;
3386 trailer_len = fakeif->iff_trailer_length;
3387 }
3388 feth_unlock();
3389 if (peer == NULL) {
3390 m_freem(copy_m);
3391 ifnet_stat_increment_out(ifp, 0, 0, 1);
3392 return 0;
3393 }
3394 feth_output_common(ifp, copy_m, peer, flags, fcs, feth_trailer,
3395 trailer_len);
3396 return 0;
3397 }
3398
3399 static int
feth_config(ifnet_t ifp,ifnet_t peer)3400 feth_config(ifnet_t ifp, ifnet_t peer)
3401 {
3402 int connected = FALSE;
3403 int disconnected = FALSE;
3404 int error = 0;
3405 if_fake_ref fakeif = NULL;
3406
3407 feth_lock();
3408 fakeif = ifnet_get_if_fake(ifp);
3409 if (fakeif == NULL) {
3410 error = EINVAL;
3411 goto done;
3412 }
3413 if (peer != NULL) {
3414 /* connect to peer */
3415 if_fake_ref peer_fakeif;
3416
3417 peer_fakeif = ifnet_get_if_fake(peer);
3418 if (peer_fakeif == NULL) {
3419 error = EINVAL;
3420 goto done;
3421 }
3422 if (feth_is_detaching(fakeif) ||
3423 feth_is_detaching(peer_fakeif) ||
3424 peer_fakeif->iff_peer != NULL ||
3425 fakeif->iff_peer != NULL) {
3426 error = EBUSY;
3427 goto done;
3428 }
3429 #if SKYWALK
3430 if (fakeif->iff_pp_mode !=
3431 peer_fakeif->iff_pp_mode) {
3432 error = EINVAL;
3433 goto done;
3434 }
3435 #endif /* SKYWALK */
3436 fakeif->iff_peer = peer;
3437 peer_fakeif->iff_peer = ifp;
3438 connected = TRUE;
3439 } else if (fakeif->iff_peer != NULL) {
3440 /* disconnect from peer */
3441 if_fake_ref peer_fakeif;
3442
3443 peer = fakeif->iff_peer;
3444 peer_fakeif = ifnet_get_if_fake(peer);
3445 if (peer_fakeif == NULL) {
3446 /* should not happen */
3447 error = EINVAL;
3448 goto done;
3449 }
3450 fakeif->iff_peer = NULL;
3451 peer_fakeif->iff_peer = NULL;
3452 disconnected = TRUE;
3453 }
3454
3455 done:
3456 feth_unlock();
3457
3458 /* generate link status event if we connect or disconnect */
3459 if (connected) {
3460 interface_link_event(ifp, KEV_DL_LINK_ON);
3461 interface_link_event(peer, KEV_DL_LINK_ON);
3462 } else if (disconnected) {
3463 interface_link_event(ifp, KEV_DL_LINK_OFF);
3464 interface_link_event(peer, KEV_DL_LINK_OFF);
3465 }
3466 return error;
3467 }
3468
3469 static int
feth_set_media(ifnet_t ifp,struct if_fake_request * iffr)3470 feth_set_media(ifnet_t ifp, struct if_fake_request * iffr)
3471 {
3472 if_fake_ref fakeif;
3473 int error;
3474
3475 if (iffr->iffr_media.iffm_count > IF_FAKE_MEDIA_LIST_MAX) {
3476 /* list is too long */
3477 return EINVAL;
3478 }
3479 feth_lock();
3480 fakeif = ifnet_get_if_fake(ifp);
3481 if (fakeif == NULL) {
3482 error = EINVAL;
3483 goto done;
3484 }
3485 fakeif->iff_media_count = iffr->iffr_media.iffm_count;
3486 bcopy(iffr->iffr_media.iffm_list, fakeif->iff_media_list,
3487 iffr->iffr_media.iffm_count * sizeof(fakeif->iff_media_list[0]));
3488 #if 0
3489 /* XXX: "auto-negotiate" active with peer? */
3490 /* generate link status event? */
3491 fakeif->iff_media_current = iffr->iffr_media.iffm_current;
3492 #endif
3493 error = 0;
3494 done:
3495 feth_unlock();
3496 return error;
3497 }
3498
3499 static int
if_fake_request_copyin(user_addr_t user_addr,struct if_fake_request * iffr,u_int32_t len)3500 if_fake_request_copyin(user_addr_t user_addr,
3501 struct if_fake_request *iffr, u_int32_t len)
3502 {
3503 int error;
3504
3505 if (user_addr == USER_ADDR_NULL || len < sizeof(*iffr)) {
3506 error = EINVAL;
3507 goto done;
3508 }
3509 error = copyin(user_addr, iffr, sizeof(*iffr));
3510 if (error != 0) {
3511 goto done;
3512 }
3513 if (iffr->iffr_reserved[0] != 0 || iffr->iffr_reserved[1] != 0 ||
3514 iffr->iffr_reserved[2] != 0 || iffr->iffr_reserved[3] != 0) {
3515 error = EINVAL;
3516 goto done;
3517 }
3518 done:
3519 return error;
3520 }
3521
3522 static int
feth_set_drvspec(ifnet_t ifp,uint32_t cmd,u_int32_t len,user_addr_t user_addr)3523 feth_set_drvspec(ifnet_t ifp, uint32_t cmd, u_int32_t len,
3524 user_addr_t user_addr)
3525 {
3526 int error;
3527 struct if_fake_request iffr;
3528 ifnet_t peer;
3529
3530 switch (cmd) {
3531 case IF_FAKE_S_CMD_SET_PEER:
3532 error = if_fake_request_copyin(user_addr, &iffr, len);
3533 if (error != 0) {
3534 break;
3535 }
3536 if (iffr.iffr_peer_name[0] == '\0') {
3537 error = feth_config(ifp, NULL);
3538 break;
3539 }
3540
3541 /* ensure nul termination */
3542 iffr.iffr_peer_name[IFNAMSIZ - 1] = '\0';
3543 peer = ifunit(iffr.iffr_peer_name);
3544 if (peer == NULL) {
3545 error = ENXIO;
3546 break;
3547 }
3548 if (ifnet_type(peer) != IFT_ETHER) {
3549 error = EINVAL;
3550 break;
3551 }
3552 if (strcmp(ifnet_name(peer), FAKE_ETHER_NAME) != 0) {
3553 error = EINVAL;
3554 break;
3555 }
3556 error = feth_config(ifp, peer);
3557 break;
3558 case IF_FAKE_S_CMD_SET_MEDIA:
3559 error = if_fake_request_copyin(user_addr, &iffr, len);
3560 if (error != 0) {
3561 break;
3562 }
3563 error = feth_set_media(ifp, &iffr);
3564 break;
3565 case IF_FAKE_S_CMD_SET_DEQUEUE_STALL:
3566 error = if_fake_request_copyin(user_addr, &iffr, len);
3567 if (error != 0) {
3568 break;
3569 }
3570 error = feth_enable_dequeue_stall(ifp,
3571 iffr.iffr_dequeue_stall);
3572 break;
3573 default:
3574 error = EOPNOTSUPP;
3575 break;
3576 }
3577 return error;
3578 }
3579
3580 static int
feth_get_drvspec(ifnet_t ifp,u_int32_t cmd,u_int32_t len,user_addr_t user_addr)3581 feth_get_drvspec(ifnet_t ifp, u_int32_t cmd, u_int32_t len,
3582 user_addr_t user_addr)
3583 {
3584 int error = EOPNOTSUPP;
3585 if_fake_ref fakeif;
3586 struct if_fake_request iffr;
3587 ifnet_t peer;
3588
3589 switch (cmd) {
3590 case IF_FAKE_G_CMD_GET_PEER:
3591 if (len < sizeof(iffr)) {
3592 error = EINVAL;
3593 break;
3594 }
3595 feth_lock();
3596 fakeif = ifnet_get_if_fake(ifp);
3597 if (fakeif == NULL) {
3598 feth_unlock();
3599 error = EOPNOTSUPP;
3600 break;
3601 }
3602 peer = fakeif->iff_peer;
3603 feth_unlock();
3604 bzero(&iffr, sizeof(iffr));
3605 if (peer != NULL) {
3606 strlcpy(iffr.iffr_peer_name,
3607 if_name(peer),
3608 sizeof(iffr.iffr_peer_name));
3609 }
3610 error = copyout(&iffr, user_addr, sizeof(iffr));
3611 break;
3612 default:
3613 break;
3614 }
3615 return error;
3616 }
3617
3618 union ifdrvu {
3619 struct ifdrv32 *ifdrvu_32;
3620 struct ifdrv64 *ifdrvu_64;
3621 void *ifdrvu_p;
3622 };
3623
3624 static int
feth_ioctl(ifnet_t ifp,u_long cmd,void * data)3625 feth_ioctl(ifnet_t ifp, u_long cmd, void * data)
3626 {
3627 unsigned int count;
3628 struct ifdevmtu * devmtu_p;
3629 union ifdrvu drv;
3630 uint32_t drv_cmd;
3631 uint32_t drv_len;
3632 boolean_t drv_set_command = FALSE;
3633 int error = 0;
3634 struct ifmediareq * ifmr;
3635 struct ifreq * ifr;
3636 if_fake_ref fakeif;
3637 int status;
3638 user_addr_t user_addr;
3639
3640 ifr = (struct ifreq *)data;
3641 switch (cmd) {
3642 case SIOCSIFADDR:
3643 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
3644 break;
3645
3646 case SIOCGIFMEDIA32:
3647 case SIOCGIFMEDIA64:
3648 feth_lock();
3649 fakeif = ifnet_get_if_fake(ifp);
3650 if (fakeif == NULL) {
3651 feth_unlock();
3652 return EOPNOTSUPP;
3653 }
3654 status = (fakeif->iff_peer != NULL)
3655 ? (IFM_AVALID | IFM_ACTIVE) : IFM_AVALID;
3656 ifmr = (struct ifmediareq *)data;
3657 user_addr = (cmd == SIOCGIFMEDIA64) ?
3658 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
3659 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
3660 count = ifmr->ifm_count;
3661 ifmr->ifm_active = IFM_ETHER;
3662 ifmr->ifm_current = IFM_ETHER;
3663 ifmr->ifm_mask = 0;
3664 ifmr->ifm_status = status;
3665 if (user_addr == USER_ADDR_NULL) {
3666 ifmr->ifm_count = fakeif->iff_media_count;
3667 } else if (count > 0) {
3668 if (count > fakeif->iff_media_count) {
3669 count = fakeif->iff_media_count;
3670 }
3671 ifmr->ifm_count = count;
3672 error = copyout(&fakeif->iff_media_list, user_addr,
3673 count * sizeof(int));
3674 }
3675 feth_unlock();
3676 break;
3677
3678 case SIOCGIFDEVMTU:
3679 devmtu_p = &ifr->ifr_devmtu;
3680 devmtu_p->ifdm_current = ifnet_mtu(ifp);
3681 devmtu_p->ifdm_max = feth_max_mtu(ifp);
3682 devmtu_p->ifdm_min = IF_MINMTU;
3683 break;
3684
3685 case SIOCSIFMTU:
3686 if ((unsigned int)ifr->ifr_mtu > feth_max_mtu(ifp) ||
3687 ifr->ifr_mtu < IF_MINMTU) {
3688 error = EINVAL;
3689 } else {
3690 error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
3691 }
3692 break;
3693
3694 case SIOCSDRVSPEC32:
3695 case SIOCSDRVSPEC64:
3696 error = proc_suser(current_proc());
3697 if (error != 0) {
3698 break;
3699 }
3700 drv_set_command = TRUE;
3701 OS_FALLTHROUGH;
3702 case SIOCGDRVSPEC32:
3703 case SIOCGDRVSPEC64:
3704 drv.ifdrvu_p = data;
3705 if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
3706 drv_cmd = drv.ifdrvu_32->ifd_cmd;
3707 drv_len = drv.ifdrvu_32->ifd_len;
3708 user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
3709 } else {
3710 drv_cmd = drv.ifdrvu_64->ifd_cmd;
3711 drv_len = drv.ifdrvu_64->ifd_len;
3712 user_addr = drv.ifdrvu_64->ifd_data;
3713 }
3714 if (drv_set_command) {
3715 error = feth_set_drvspec(ifp, drv_cmd, drv_len,
3716 user_addr);
3717 } else {
3718 error = feth_get_drvspec(ifp, drv_cmd, drv_len,
3719 user_addr);
3720 }
3721 break;
3722
3723 case SIOCSIFLLADDR:
3724 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
3725 ifr->ifr_addr.sa_len);
3726 break;
3727
3728 case SIOCSIFFLAGS:
3729 if ((ifp->if_flags & IFF_UP) != 0) {
3730 /* marked up, set running if not already set */
3731 if ((ifp->if_flags & IFF_RUNNING) == 0) {
3732 /* set running */
3733 error = ifnet_set_flags(ifp, IFF_RUNNING,
3734 IFF_RUNNING);
3735 }
3736 } else if ((ifp->if_flags & IFF_RUNNING) != 0) {
3737 /* marked down, clear running */
3738 error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
3739 }
3740 break;
3741
3742 case SIOCADDMULTI:
3743 case SIOCDELMULTI:
3744 error = 0;
3745 break;
3746 default:
3747 error = EOPNOTSUPP;
3748 break;
3749 }
3750 return error;
3751 }
3752
3753 static void
feth_if_free(ifnet_t ifp)3754 feth_if_free(ifnet_t ifp)
3755 {
3756 if_fake_ref fakeif;
3757
3758 if (ifp == NULL) {
3759 return;
3760 }
3761 feth_lock();
3762 fakeif = ifnet_get_if_fake(ifp);
3763 if (fakeif == NULL) {
3764 feth_unlock();
3765 return;
3766 }
3767 ifp->if_softc = NULL;
3768 #if SKYWALK
3769 VERIFY(fakeif->iff_doorbell_tcall == NULL);
3770 #endif /* SKYWALK */
3771 feth_unlock();
3772 feth_release(fakeif);
3773 ifnet_release(ifp);
3774 return;
3775 }
3776
3777 __private_extern__ void
if_fake_init(void)3778 if_fake_init(void)
3779 {
3780 int error;
3781
3782 #if SKYWALK
3783 (void)feth_register_nexus_domain_provider();
3784 #endif /* SKYWALK */
3785 error = if_clone_attach(&feth_cloner);
3786 if (error != 0) {
3787 return;
3788 }
3789 return;
3790 }
3791