xref: /xnu-8796.101.5/bsd/net/if_fake.c (revision aca3beaa3dfbd42498b42c5e5ce20a938e6554e5) !
1 /*
2  * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * if_fake.c
31  * - fake network interface used for testing
32  * - "feth" (e.g. "feth0", "feth1") is a virtual ethernet interface that allows
33  *   two instances to have their output/input paths "crossed-over" so that
34  *   output on one is input on the other
35  */
36 
37 /*
38  * Modification History:
39  *
40  * September 9, 2015	Dieter Siegmund ([email protected])
41  * - created
42  */
43 
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/queue.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/kern_event.h>
54 #include <sys/mcache.h>
55 #include <sys/syslog.h>
56 
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_vlan_var.h>
61 #include <net/if_fake_var.h>
62 #include <net/if_arp.h>
63 #include <net/if_dl.h>
64 #include <net/if_ether.h>
65 #include <net/if_types.h>
66 #include <libkern/OSAtomic.h>
67 
68 #include <net/dlil.h>
69 
70 #include <net/kpi_interface.h>
71 #include <net/kpi_protocol.h>
72 
73 #include <kern/locks.h>
74 #include <kern/zalloc.h>
75 
76 #ifdef INET
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #endif
80 
81 #include <net/if_media.h>
82 #include <net/ether_if_module.h>
83 #if SKYWALK
84 #include <skywalk/os_skywalk_private.h>
85 #include <skywalk/nexus/netif/nx_netif.h>
86 #include <skywalk/channel/channel_var.h>
87 #endif /* SKYWALK */
88 
89 static boolean_t
is_power_of_two(unsigned int val)90 is_power_of_two(unsigned int val)
91 {
92 	return (val & (val - 1)) == 0;
93 }
94 
95 #define FAKE_ETHER_NAME         "feth"
96 
97 SYSCTL_DECL(_net_link);
98 SYSCTL_NODE(_net_link, OID_AUTO, fake, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
99     "Fake interface");
100 
101 static int if_fake_txstart = 1;
102 SYSCTL_INT(_net_link_fake, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
103     &if_fake_txstart, 0, "Fake interface TXSTART mode");
104 
105 static int if_fake_hwcsum = 0;
106 SYSCTL_INT(_net_link_fake, OID_AUTO, hwcsum, CTLFLAG_RW | CTLFLAG_LOCKED,
107     &if_fake_hwcsum, 0, "Fake interface simulate hardware checksum");
108 
109 static int if_fake_nxattach = 0;
110 SYSCTL_INT(_net_link_fake, OID_AUTO, nxattach, CTLFLAG_RW | CTLFLAG_LOCKED,
111     &if_fake_nxattach, 0, "Fake interface auto-attach nexus");
112 
113 static int if_fake_bsd_mode = 1;
114 SYSCTL_INT(_net_link_fake, OID_AUTO, bsd_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
115     &if_fake_bsd_mode, 0, "Fake interface attach as BSD interface");
116 
117 static int if_fake_debug = 0;
118 SYSCTL_INT(_net_link_fake, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
119     &if_fake_debug, 0, "Fake interface debug logs");
120 
121 static int if_fake_wmm_mode = 0;
122 SYSCTL_INT(_net_link_fake, OID_AUTO, wmm_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
123     &if_fake_wmm_mode, 0, "Fake interface in 802.11 WMM mode");
124 
125 static int if_fake_multibuflet = 0;
126 SYSCTL_INT(_net_link_fake, OID_AUTO, multibuflet, CTLFLAG_RW | CTLFLAG_LOCKED,
127     &if_fake_multibuflet, 0, "Fake interface using multi-buflet packets");
128 
129 static int if_fake_low_latency = 0;
130 SYSCTL_INT(_net_link_fake, OID_AUTO, low_latency, CTLFLAG_RW | CTLFLAG_LOCKED,
131     &if_fake_low_latency, 0, "Fake interface with a low latency qset");
132 
133 static int if_fake_switch_combined_mode = 0;
134 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_combined_mode,
135     CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_combined_mode, 0,
136     "Switch a qset between combined and separate mode during dequeues");
137 
138 static int if_fake_switch_mode_frequency = 10;
139 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_mode_frequency,
140     CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_mode_frequency, 0,
141     "The number of dequeues before we switch between the combined and separated mode");
142 
143 static int if_fake_tso_support = 0;
144 SYSCTL_INT(_net_link_fake, OID_AUTO, tso_support, CTLFLAG_RW | CTLFLAG_LOCKED,
145     &if_fake_tso_support, 0, "Fake interface with support for TSO offload");
146 
147 typedef enum {
148 	IFF_PP_MODE_GLOBAL = 0,         /* share a global pool */
149 	IFF_PP_MODE_PRIVATE = 1,        /* creates its own rx/tx pool */
150 	IFF_PP_MODE_PRIVATE_SPLIT = 2,  /* creates its own split rx & tx pool */
151 } iff_pktpool_mode_t;
152 static iff_pktpool_mode_t if_fake_pktpool_mode = IFF_PP_MODE_GLOBAL;
153 SYSCTL_INT(_net_link_fake, OID_AUTO, pktpool_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
154     &if_fake_pktpool_mode, IFF_PP_MODE_GLOBAL,
155     "Fake interface packet pool mode (0 global, 1 private, 2 private split");
156 
157 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX 512
158 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF 96
159 static int if_fake_link_layer_aggregation_factor =
160     FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF;
161 static int
162 feth_link_layer_aggregation_factor_sysctl SYSCTL_HANDLER_ARGS
163 {
164 #pragma unused(oidp, arg1, arg2)
165 	unsigned int new_value;
166 	int changed;
167 	int error;
168 
169 	error = sysctl_io_number(req, if_fake_link_layer_aggregation_factor,
170 	    sizeof(if_fake_link_layer_aggregation_factor), &new_value,
171 	    &changed);
172 	if (error == 0 && changed != 0) {
173 		if (new_value <= 0 ||
174 		    new_value > FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX) {
175 			return EINVAL;
176 		}
177 		if_fake_link_layer_aggregation_factor = new_value;
178 	}
179 	return error;
180 }
181 
182 SYSCTL_PROC(_net_link_fake, OID_AUTO, link_layer_aggregation_factor,
183     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
184     0, 0, feth_link_layer_aggregation_factor_sysctl, "IU",
185     "Fake interface link layer aggregation factor");
186 
187 #define FETH_TX_HEADROOM_MAX      32
188 static unsigned int if_fake_tx_headroom = FETH_TX_HEADROOM_MAX;
189 static int
190 feth_tx_headroom_sysctl SYSCTL_HANDLER_ARGS
191 {
192 #pragma unused(oidp, arg1, arg2)
193 	unsigned int new_value;
194 	int changed;
195 	int error;
196 
197 	error = sysctl_io_number(req, if_fake_tx_headroom,
198 	    sizeof(if_fake_tx_headroom), &new_value, &changed);
199 	if (error == 0 && changed != 0) {
200 		if (new_value > FETH_TX_HEADROOM_MAX ||
201 		    (new_value % 8) != 0) {
202 			return EINVAL;
203 		}
204 		if_fake_tx_headroom = new_value;
205 	}
206 	return 0;
207 }
208 
209 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_headroom,
210     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
211     0, 0, feth_tx_headroom_sysctl, "IU", "Fake ethernet Tx headroom");
212 
213 static int if_fake_fcs = 0;
214 SYSCTL_INT(_net_link_fake, OID_AUTO, fcs, CTLFLAG_RW | CTLFLAG_LOCKED,
215     &if_fake_fcs, 0, "Fake interface using frame check sequence");
216 
217 #define FETH_TRAILER_LENGTH_MAX 28
218 char feth_trailer[FETH_TRAILER_LENGTH_MAX + 1] = "trailertrailertrailertrailer";
219 static unsigned int if_fake_trailer_length = 0;
220 static int
221 feth_trailer_length_sysctl SYSCTL_HANDLER_ARGS
222 {
223 #pragma unused(oidp, arg1, arg2)
224 	unsigned int new_value;
225 	int changed;
226 	int error;
227 
228 	error = sysctl_io_number(req, if_fake_trailer_length,
229 	    sizeof(if_fake_trailer_length), &new_value, &changed);
230 	if (error == 0 && changed != 0) {
231 		if (new_value > FETH_TRAILER_LENGTH_MAX) {
232 			return EINVAL;
233 		}
234 		if_fake_trailer_length = new_value;
235 	}
236 	return 0;
237 }
238 
239 SYSCTL_PROC(_net_link_fake, OID_AUTO, trailer_length,
240     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
241     feth_trailer_length_sysctl, "IU", "Fake interface frame trailer length");
242 
243 /* sysctl net.link.fake.max_mtu */
244 #define FETH_MAX_MTU_DEFAULT    2048
245 #define FETH_MAX_MTU_MAX        ((16 * 1024) - ETHER_HDR_LEN)
246 
247 static unsigned int if_fake_max_mtu = FETH_MAX_MTU_DEFAULT;
248 
249 /* sysctl net.link.fake.buflet_size */
250 #define FETH_BUFLET_SIZE_MIN            512
251 #define FETH_BUFLET_SIZE_MAX            (32 * 1024)
252 #define FETH_TSO_BUFLET_SIZE            (16 * 1024)
253 
254 static unsigned int if_fake_buflet_size = FETH_BUFLET_SIZE_MIN;
255 static unsigned int if_fake_tso_buffer_size = FETH_TSO_BUFLET_SIZE;
256 
257 static int
258 feth_tso_buffer_size_sysctl SYSCTL_HANDLER_ARGS
259 {
260 #pragma unused(oidp, arg1, arg2)
261 	unsigned int new_value;
262 	int changed;
263 	int error;
264 
265 	error = sysctl_io_number(req, if_fake_tso_buffer_size,
266 	    sizeof(if_fake_tso_buffer_size), &new_value, &changed);
267 	if (error == 0 && changed != 0) {
268 		/* must be a power of 2 between min and max */
269 		if (new_value > FETH_BUFLET_SIZE_MAX ||
270 		    new_value < FETH_BUFLET_SIZE_MIN ||
271 		    !is_power_of_two(new_value)) {
272 			return EINVAL;
273 		}
274 		if_fake_tso_buffer_size = new_value;
275 	}
276 	return 0;
277 }
278 
279 SYSCTL_PROC(_net_link_fake, OID_AUTO, tso_buf_size,
280     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
281     0, 0, feth_tso_buffer_size_sysctl, "IU", "Fake interface TSO buffer size");
282 
283 static int
284 feth_max_mtu_sysctl SYSCTL_HANDLER_ARGS
285 {
286 #pragma unused(oidp, arg1, arg2)
287 	unsigned int new_value;
288 	int changed;
289 	int error;
290 
291 	error = sysctl_io_number(req, if_fake_max_mtu,
292 	    sizeof(if_fake_max_mtu), &new_value, &changed);
293 	if (error == 0 && changed != 0) {
294 		if (new_value > FETH_MAX_MTU_MAX ||
295 		    new_value < ETHERMTU ||
296 		    new_value <= if_fake_buflet_size) {
297 			return EINVAL;
298 		}
299 		if_fake_max_mtu = new_value;
300 	}
301 	return 0;
302 }
303 
304 SYSCTL_PROC(_net_link_fake, OID_AUTO, max_mtu,
305     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
306     0, 0, feth_max_mtu_sysctl, "IU", "Fake interface maximum MTU");
307 
308 static int
309 feth_buflet_size_sysctl SYSCTL_HANDLER_ARGS
310 {
311 #pragma unused(oidp, arg1, arg2)
312 	unsigned int new_value;
313 	int changed;
314 	int error;
315 
316 	error = sysctl_io_number(req, if_fake_buflet_size,
317 	    sizeof(if_fake_buflet_size), &new_value, &changed);
318 	if (error == 0 && changed != 0) {
319 		/* must be a power of 2 between min and max */
320 		if (new_value > FETH_BUFLET_SIZE_MAX ||
321 		    new_value < FETH_BUFLET_SIZE_MIN ||
322 		    !is_power_of_two(new_value) ||
323 		    new_value >= if_fake_max_mtu) {
324 			return EINVAL;
325 		}
326 		if_fake_buflet_size = new_value;
327 	}
328 	return 0;
329 }
330 
331 SYSCTL_PROC(_net_link_fake, OID_AUTO, buflet_size,
332     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
333     0, 0, feth_buflet_size_sysctl, "IU", "Fake interface buflet size");
334 
335 static unsigned int if_fake_user_access = 0;
336 
337 static int
338 feth_user_access_sysctl SYSCTL_HANDLER_ARGS
339 {
340 #pragma unused(oidp, arg1, arg2)
341 	unsigned int new_value;
342 	int changed;
343 	int error;
344 
345 	error = sysctl_io_number(req, if_fake_user_access,
346 	    sizeof(if_fake_user_access), &new_value, &changed);
347 	if (error == 0 && changed != 0) {
348 		if (new_value != 0) {
349 			if (new_value != 1) {
350 				return EINVAL;
351 			}
352 		}
353 		if_fake_user_access = new_value;
354 	}
355 	return 0;
356 }
357 
358 SYSCTL_PROC(_net_link_fake, OID_AUTO, user_access,
359     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
360     0, 0, feth_user_access_sysctl, "IU", "Fake interface user access");
361 
362 /* sysctl net.link.fake.if_adv_intvl (unit: millisecond) */
363 #define FETH_IF_ADV_INTVL_MIN            10
364 #define FETH_IF_ADV_INTVL_MAX            INT_MAX
365 
366 static int if_fake_if_adv_interval = 0; /* no interface advisory */
367 static int
368 feth_if_adv_interval_sysctl SYSCTL_HANDLER_ARGS
369 {
370 #pragma unused(oidp, arg1, arg2)
371 	unsigned int new_value;
372 	int changed;
373 	int error;
374 
375 	error = sysctl_io_number(req, if_fake_if_adv_interval,
376 	    sizeof(if_fake_if_adv_interval), &new_value, &changed);
377 	if (error == 0 && changed != 0) {
378 		if ((new_value != 0) && (new_value > FETH_IF_ADV_INTVL_MAX ||
379 		    new_value < FETH_IF_ADV_INTVL_MIN)) {
380 			return EINVAL;
381 		}
382 		if_fake_if_adv_interval = new_value;
383 	}
384 	return 0;
385 }
386 
387 SYSCTL_PROC(_net_link_fake, OID_AUTO, if_adv_intvl,
388     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
389     feth_if_adv_interval_sysctl, "IU",
390     "Fake interface will generate interface advisories reports at the specified interval in ms");
391 
392 /* sysctl net.link.fake.tx_drops */
393 /*
394  * Fake ethernet will drop packet on the transmit path at the specified
395  * rate, i.e drop one in every if_fake_tx_drops number of packets.
396  */
397 #define FETH_TX_DROPS_MIN            0
398 #define FETH_TX_DROPS_MAX            INT_MAX
399 static int if_fake_tx_drops = 0; /* no packets are dropped */
400 static int
401 feth_fake_tx_drops_sysctl SYSCTL_HANDLER_ARGS
402 {
403 #pragma unused(oidp, arg1, arg2)
404 	unsigned int new_value;
405 	int changed;
406 	int error;
407 
408 	error = sysctl_io_number(req, if_fake_tx_drops,
409 	    sizeof(if_fake_tx_drops), &new_value, &changed);
410 	if (error == 0 && changed != 0) {
411 		if (new_value > FETH_TX_DROPS_MAX ||
412 		    new_value < FETH_TX_DROPS_MIN) {
413 			return EINVAL;
414 		}
415 		if_fake_tx_drops = new_value;
416 	}
417 	return 0;
418 }
419 
420 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_drops,
421     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
422     feth_fake_tx_drops_sysctl, "IU",
423     "Fake interface will intermittently drop packets on Tx path");
424 
425 /* sysctl net.link.fake.tx_completion_mode */
426 typedef enum {
427 	IFF_TX_COMPL_MODE_SYNC = 0,
428 	IFF_TX_COMPL_MODE_ASYNC = 1,
429 } iff_tx_completion_mode_t;
430 static iff_tx_completion_mode_t if_tx_completion_mode = IFF_TX_COMPL_MODE_SYNC;
431 static int
432 feth_fake_tx_completion_mode_sysctl SYSCTL_HANDLER_ARGS
433 {
434 #pragma unused(oidp, arg1, arg2)
435 	unsigned int new_value;
436 	int changed;
437 	int error;
438 
439 	error = sysctl_io_number(req, if_tx_completion_mode,
440 	    sizeof(if_tx_completion_mode), &new_value, &changed);
441 	if (error == 0 && changed != 0) {
442 		if (new_value > IFF_TX_COMPL_MODE_ASYNC ||
443 		    new_value < IFF_TX_COMPL_MODE_SYNC) {
444 			return EINVAL;
445 		}
446 		if_tx_completion_mode = new_value;
447 	}
448 	return 0;
449 }
450 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_completion_mode,
451     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
452     feth_fake_tx_completion_mode_sysctl, "IU",
453     "Fake interface tx completion mode (0 synchronous, 1 asynchronous)");
454 
455 /* sysctl net.link.fake.llink_cnt */
456 
457 /* The maximum number of logical links (including default link) */
458 #define FETH_MAX_LLINKS 16
459 /*
460  * The default number of logical links (including default link).
461  * Zero means logical link mode is disabled.
462  */
463 #define FETH_DEF_LLINKS 0
464 
465 static uint32_t if_fake_llink_cnt = FETH_DEF_LLINKS;
466 static int
467 feth_fake_llink_cnt_sysctl SYSCTL_HANDLER_ARGS
468 {
469 #pragma unused(oidp, arg1, arg2)
470 	unsigned int new_value;
471 	int changed;
472 	int error;
473 
474 	error = sysctl_io_number(req, if_fake_llink_cnt,
475 	    sizeof(if_fake_llink_cnt), &new_value, &changed);
476 	if (error == 0 && changed != 0) {
477 		if (new_value > FETH_MAX_LLINKS) {
478 			return EINVAL;
479 		}
480 		if_fake_llink_cnt = new_value;
481 	}
482 	return 0;
483 }
484 
485 SYSCTL_PROC(_net_link_fake, OID_AUTO, llink_cnt,
486     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
487     feth_fake_llink_cnt_sysctl, "IU",
488     "Fake interface logical link count");
489 
490 /* sysctl net.link.fake.qset_cnt */
491 
492 /* The maximum number of qsets for each logical link */
493 #define FETH_MAX_QSETS  16
494 /* The default number of qsets for each logical link */
495 #define FETH_DEF_QSETS  4
496 
497 static uint32_t if_fake_qset_cnt = FETH_DEF_QSETS;
498 static int
499 feth_fake_qset_cnt_sysctl SYSCTL_HANDLER_ARGS
500 {
501 #pragma unused(oidp, arg1, arg2)
502 	unsigned int new_value;
503 	int changed;
504 	int error;
505 
506 	error = sysctl_io_number(req, if_fake_qset_cnt,
507 	    sizeof(if_fake_qset_cnt), &new_value, &changed);
508 	if (error == 0 && changed != 0) {
509 		if (new_value == 0 ||
510 		    new_value > FETH_MAX_QSETS) {
511 			return EINVAL;
512 		}
513 		if_fake_qset_cnt = new_value;
514 	}
515 	return 0;
516 }
517 
518 SYSCTL_PROC(_net_link_fake, OID_AUTO, qset_cnt,
519     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
520     feth_fake_qset_cnt_sysctl, "IU",
521     "Fake interface queue set count");
522 
523 /**
524 ** virtual ethernet structures, types
525 **/
526 
527 #define IFF_NUM_TX_RINGS_WMM_MODE       4
528 #define IFF_NUM_RX_RINGS_WMM_MODE       1
529 #define IFF_MAX_TX_RINGS        IFF_NUM_TX_RINGS_WMM_MODE
530 #define IFF_MAX_RX_RINGS        IFF_NUM_RX_RINGS_WMM_MODE
531 #define IFF_NUM_TX_QUEUES_WMM_MODE      4
532 #define IFF_NUM_RX_QUEUES_WMM_MODE      1
533 #define IFF_MAX_TX_QUEUES       IFF_NUM_TX_QUEUES_WMM_MODE
534 #define IFF_MAX_RX_QUEUES       IFF_NUM_RX_QUEUES_WMM_MODE
535 
536 #define IFF_MAX_BATCH_SIZE 32
537 
538 typedef uint16_t        iff_flags_t;
539 #define IFF_FLAGS_HWCSUM                0x0001
540 #define IFF_FLAGS_BSD_MODE              0x0002
541 #define IFF_FLAGS_DETACHING             0x0004
542 #define IFF_FLAGS_WMM_MODE              0x0008
543 #define IFF_FLAGS_MULTIBUFLETS          0x0010
544 #define IFF_FLAGS_TSO_SUPPORT           0x0020
545 
546 #if SKYWALK
547 
548 typedef struct {
549 	uuid_t                  fnx_provider;
550 	uuid_t                  fnx_instance;
551 } fake_nx, *fake_nx_t;
552 
553 typedef struct {
554 	kern_netif_queue_t      fq_queue;
555 } fake_queue;
556 
557 typedef struct {
558 	kern_netif_qset_t       fqs_qset; /* provided by xnu */
559 	fake_queue              fqs_rx_queue[IFF_MAX_RX_QUEUES];
560 	fake_queue              fqs_tx_queue[IFF_MAX_TX_QUEUES];
561 	uint32_t                fqs_rx_queue_cnt;
562 	uint32_t                fqs_tx_queue_cnt;
563 	uint32_t                fqs_llink_idx;
564 	uint32_t                fqs_idx;
565 	uint32_t                fqs_dequeue_cnt;
566 	uint64_t                fqs_id;
567 	boolean_t               fqs_combined_mode;
568 } fake_qset;
569 
570 typedef struct {
571 	uint64_t                fl_id;
572 	uint32_t                fl_idx;
573 	uint32_t                fl_qset_cnt;
574 	fake_qset               fl_qset[FETH_MAX_QSETS];
575 } fake_llink;
576 
577 static kern_pbufpool_t         S_pp;
578 
579 #define IFF_TT_OUTPUT   0x01 /* generate trace_tag on output */
580 #define IFF_TT_INPUT    0x02 /* generate trace_tag on input */
581 static int if_fake_trace_tag_flags = 0;
582 SYSCTL_INT(_net_link_fake, OID_AUTO, trace_tag, CTLFLAG_RW | CTLFLAG_LOCKED,
583     &if_fake_trace_tag_flags, 0, "Fake interface generate trace_tag");
584 static packet_trace_tag_t if_fake_trace_tag_current = 1;
585 
586 #endif /* SKYWALK */
587 
588 struct if_fake {
589 	char                    iff_name[IFNAMSIZ]; /* our unique id */
590 	ifnet_t                 iff_ifp;
591 	iff_flags_t             iff_flags;
592 	uint32_t                iff_retain_count;
593 	ifnet_t                 iff_peer;       /* the other end */
594 	int                     iff_media_current;
595 	int                     iff_media_active;
596 	uint32_t                iff_media_count;
597 	int                     iff_media_list[IF_FAKE_MEDIA_LIST_MAX];
598 	struct mbuf *           iff_pending_tx_packet;
599 	boolean_t               iff_start_busy;
600 	unsigned int            iff_max_mtu;
601 	uint32_t                iff_fcs;
602 	uint32_t                iff_trailer_length;
603 #if SKYWALK
604 	fake_nx                 iff_nx;
605 	struct netif_stats      *iff_nifs;
606 	uint32_t                iff_nifs_ref;
607 	uint32_t                iff_llink_cnt;
608 	kern_channel_ring_t     iff_rx_ring[IFF_MAX_RX_RINGS];
609 	kern_channel_ring_t     iff_tx_ring[IFF_MAX_TX_RINGS];
610 	fake_llink             *iff_llink __counted_by(FETH_MAX_LLINKS);
611 	thread_call_t           iff_doorbell_tcall;
612 	thread_call_t           iff_if_adv_tcall;
613 	boolean_t               iff_doorbell_tcall_active;
614 	boolean_t               iff_waiting_for_tcall;
615 	boolean_t               iff_channel_connected;
616 	iff_pktpool_mode_t      iff_pp_mode;
617 	kern_pbufpool_t         iff_rx_pp;
618 	kern_pbufpool_t         iff_tx_pp;
619 	uint32_t                iff_tx_headroom;
620 	unsigned int            iff_adv_interval;
621 	uint32_t                iff_tx_drop_rate;
622 	uint32_t                iff_tx_pkts_count;
623 	iff_tx_completion_mode_t iff_tx_completion_mode;
624 	bool                    iff_intf_adv_enabled;
625 	void                    *iff_intf_adv_kern_ctx;
626 	kern_nexus_capab_interface_advisory_notify_fn_t iff_intf_adv_notify;
627 #endif /* SKYWALK */
628 };
629 
630 typedef struct if_fake * if_fake_ref;
631 
632 static if_fake_ref
633 ifnet_get_if_fake(ifnet_t ifp);
634 
635 #define FETH_DPRINTF(fmt, ...)                                  \
636 	{ if (if_fake_debug != 0) printf("%s " fmt, __func__, ## __VA_ARGS__); }
637 
638 static inline boolean_t
feth_in_bsd_mode(if_fake_ref fakeif)639 feth_in_bsd_mode(if_fake_ref fakeif)
640 {
641 	return (fakeif->iff_flags & IFF_FLAGS_BSD_MODE) != 0;
642 }
643 
644 static inline void
feth_set_detaching(if_fake_ref fakeif)645 feth_set_detaching(if_fake_ref fakeif)
646 {
647 	fakeif->iff_flags |= IFF_FLAGS_DETACHING;
648 }
649 
650 static inline boolean_t
feth_is_detaching(if_fake_ref fakeif)651 feth_is_detaching(if_fake_ref fakeif)
652 {
653 	return (fakeif->iff_flags & IFF_FLAGS_DETACHING) != 0;
654 }
655 
656 static int
feth_enable_dequeue_stall(ifnet_t ifp,uint32_t enable)657 feth_enable_dequeue_stall(ifnet_t ifp, uint32_t enable)
658 {
659 	int error;
660 
661 	if (enable != 0) {
662 		error = ifnet_disable_output(ifp);
663 	} else {
664 		error = ifnet_enable_output(ifp);
665 	}
666 
667 	return error;
668 }
669 
670 #if SKYWALK
671 static inline boolean_t
feth_in_wmm_mode(if_fake_ref fakeif)672 feth_in_wmm_mode(if_fake_ref fakeif)
673 {
674 	return (fakeif->iff_flags & IFF_FLAGS_WMM_MODE) != 0;
675 }
676 
677 static inline boolean_t
feth_using_multibuflets(if_fake_ref fakeif)678 feth_using_multibuflets(if_fake_ref fakeif)
679 {
680 	return (fakeif->iff_flags & IFF_FLAGS_MULTIBUFLETS) != 0;
681 }
682 static void feth_detach_netif_nexus(if_fake_ref fakeif);
683 
684 static inline boolean_t
feth_has_intf_advisory_configured(if_fake_ref fakeif)685 feth_has_intf_advisory_configured(if_fake_ref fakeif)
686 {
687 	return fakeif->iff_adv_interval > 0;
688 }
689 
690 static inline bool
feth_supports_tso(if_fake_ref fakeif)691 feth_supports_tso(if_fake_ref fakeif)
692 {
693 	return (fakeif->iff_flags & IFF_FLAGS_TSO_SUPPORT) != 0;
694 }
695 #endif /* SKYWALK */
696 
697 #define FETH_MAXUNIT    IF_MAXUNIT
698 #define FETH_ZONE_MAX_ELEM      MIN(IFNETS_MAX, FETH_MAXUNIT)
699 
700 static  int feth_clone_create(struct if_clone *, u_int32_t, void *);
701 static  int feth_clone_destroy(ifnet_t);
702 static  int feth_output(ifnet_t ifp, struct mbuf *m);
703 static  void feth_start(ifnet_t ifp);
704 static  int feth_ioctl(ifnet_t ifp, u_long cmd, void * addr);
705 static  int feth_config(ifnet_t ifp, ifnet_t peer);
706 static  void feth_if_free(ifnet_t ifp);
707 static  void feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp);
708 static  void feth_free(if_fake_ref fakeif);
709 
710 static struct if_clone
711     feth_cloner = IF_CLONE_INITIALIZER(FAKE_ETHER_NAME,
712     feth_clone_create,
713     feth_clone_destroy,
714     0,
715     FETH_MAXUNIT);
716 static  void interface_link_event(ifnet_t ifp, u_int32_t event_code);
717 
718 /* some media words to pretend to be ethernet */
719 static int default_media_words[] = {
720 	IFM_MAKEWORD(IFM_ETHER, 0, 0, 0),
721 	IFM_MAKEWORD(IFM_ETHER, IFM_10G_T, IFM_FDX, 0),
722 	IFM_MAKEWORD(IFM_ETHER, IFM_2500_T, IFM_FDX, 0),
723 	IFM_MAKEWORD(IFM_ETHER, IFM_5000_T, IFM_FDX, 0),
724 
725 	IFM_MAKEWORD(IFM_ETHER, IFM_10G_KX4, IFM_FDX, 0),
726 	IFM_MAKEWORD(IFM_ETHER, IFM_20G_KR2, IFM_FDX, 0),
727 	IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX, 0),
728 	IFM_MAKEWORD(IFM_ETHER, IFM_25G_KR, IFM_FDX, 0),
729 	IFM_MAKEWORD(IFM_ETHER, IFM_40G_SR4, IFM_FDX, 0),
730 	IFM_MAKEWORD(IFM_ETHER, IFM_50G_CR2, IFM_FDX, 0),
731 	IFM_MAKEWORD(IFM_ETHER, IFM_56G_R4, IFM_FDX, 0),
732 	IFM_MAKEWORD(IFM_ETHER, IFM_100G_CR4, IFM_FDX, 0),
733 	IFM_MAKEWORD(IFM_ETHER, IFM_400G_AUI8, IFM_FDX, 0),
734 };
735 #define default_media_words_count (sizeof(default_media_words)          \
736 	                           / sizeof (default_media_words[0]))
737 
738 /**
739 ** veth locks
740 **/
741 
742 static LCK_GRP_DECLARE(feth_lck_grp, "fake");
743 static LCK_MTX_DECLARE(feth_lck_mtx, &feth_lck_grp);
744 
745 static inline void
feth_lock(void)746 feth_lock(void)
747 {
748 	lck_mtx_lock(&feth_lck_mtx);
749 }
750 
751 static inline void
feth_unlock(void)752 feth_unlock(void)
753 {
754 	lck_mtx_unlock(&feth_lck_mtx);
755 }
756 
757 static inline int
get_max_mtu(int bsd_mode,unsigned int max_mtu)758 get_max_mtu(int bsd_mode, unsigned int max_mtu)
759 {
760 	unsigned int    mtu;
761 
762 	if (bsd_mode != 0) {
763 		mtu = (njcl > 0) ? (M16KCLBYTES - ETHER_HDR_LEN)
764 		    : MBIGCLBYTES - ETHER_HDR_LEN;
765 		if (mtu > max_mtu) {
766 			mtu = max_mtu;
767 		}
768 	} else {
769 		mtu = max_mtu;
770 	}
771 	return mtu;
772 }
773 
774 static inline unsigned int
feth_max_mtu(ifnet_t ifp)775 feth_max_mtu(ifnet_t ifp)
776 {
777 	if_fake_ref     fakeif;
778 	unsigned int    max_mtu = ETHERMTU;
779 
780 	feth_lock();
781 	fakeif = ifnet_get_if_fake(ifp);
782 	if (fakeif != NULL) {
783 		max_mtu = fakeif->iff_max_mtu;
784 	}
785 	feth_unlock();
786 	return max_mtu;
787 }
788 
789 static void
feth_free(if_fake_ref fakeif)790 feth_free(if_fake_ref fakeif)
791 {
792 	VERIFY(fakeif->iff_retain_count == 0);
793 	if (feth_in_bsd_mode(fakeif)) {
794 		if (fakeif->iff_pending_tx_packet) {
795 			m_freem(fakeif->iff_pending_tx_packet);
796 		}
797 	}
798 #if SKYWALK
799 	else {
800 		if (fakeif->iff_pp_mode == IFF_PP_MODE_GLOBAL) {
801 			VERIFY(fakeif->iff_rx_pp == S_pp);
802 			VERIFY(fakeif->iff_tx_pp == S_pp);
803 			pp_release(fakeif->iff_rx_pp);
804 			fakeif->iff_rx_pp = NULL;
805 			pp_release(fakeif->iff_tx_pp);
806 			fakeif->iff_tx_pp = NULL;
807 			feth_lock();
808 			if (S_pp->pp_refcnt == 1) {
809 				pp_release(S_pp);
810 				S_pp = NULL;
811 			}
812 			feth_unlock();
813 		} else {
814 			if (fakeif->iff_rx_pp != NULL) {
815 				pp_release(fakeif->iff_rx_pp);
816 				fakeif->iff_rx_pp = NULL;
817 			}
818 			if (fakeif->iff_tx_pp != NULL) {
819 				pp_release(fakeif->iff_tx_pp);
820 				fakeif->iff_tx_pp = NULL;
821 			}
822 		}
823 	}
824 #endif /* SKYWALK */
825 
826 	FETH_DPRINTF("%s\n", fakeif->iff_name);
827 	kfree_type(fake_llink, FETH_MAX_LLINKS, fakeif->iff_llink);
828 	kfree_type(struct if_fake, fakeif);
829 }
830 
831 static void
feth_release(if_fake_ref fakeif)832 feth_release(if_fake_ref fakeif)
833 {
834 	u_int32_t               old_retain_count;
835 
836 	old_retain_count = OSDecrementAtomic(&fakeif->iff_retain_count);
837 	switch (old_retain_count) {
838 	case 0:
839 		VERIFY(old_retain_count != 0);
840 		break;
841 	case 1:
842 		feth_free(fakeif);
843 		break;
844 	default:
845 		break;
846 	}
847 	return;
848 }
849 
850 #if SKYWALK
851 
852 static void
feth_retain(if_fake_ref fakeif)853 feth_retain(if_fake_ref fakeif)
854 {
855 	OSIncrementAtomic(&fakeif->iff_retain_count);
856 }
857 
858 static void
feth_packet_pool_init_prepare(if_fake_ref fakeif,struct kern_pbufpool_init * pp_init)859 feth_packet_pool_init_prepare(if_fake_ref fakeif,
860     struct kern_pbufpool_init *pp_init)
861 {
862 	uint32_t max_mtu = fakeif->iff_max_mtu;
863 	uint32_t buflet_size = if_fake_buflet_size;
864 
865 	bzero(pp_init, sizeof(*pp_init));
866 	pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
867 	pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
868 	pp_init->kbi_packets = 1024; /* TBD configurable */
869 	if (feth_supports_tso(fakeif)) {
870 		buflet_size = if_fake_tso_buffer_size;
871 	}
872 	if (feth_using_multibuflets(fakeif)) {
873 		pp_init->kbi_bufsize = buflet_size;
874 		pp_init->kbi_max_frags = howmany(max_mtu, buflet_size);
875 		pp_init->kbi_buflets = pp_init->kbi_packets *
876 		    pp_init->kbi_max_frags;
877 		pp_init->kbi_flags |= KBIF_BUFFER_ON_DEMAND;
878 	} else {
879 		pp_init->kbi_bufsize = max(max_mtu, buflet_size);
880 		pp_init->kbi_max_frags = 1;
881 		pp_init->kbi_buflets = pp_init->kbi_packets;
882 	}
883 	pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
884 	if (if_fake_user_access != 0) {
885 		pp_init->kbi_flags |= KBIF_USER_ACCESS;
886 	}
887 	pp_init->kbi_ctx = NULL;
888 	pp_init->kbi_ctx_retain = NULL;
889 	pp_init->kbi_ctx_release = NULL;
890 }
891 
892 static errno_t
feth_packet_pool_make(if_fake_ref fakeif)893 feth_packet_pool_make(if_fake_ref fakeif)
894 {
895 	struct kern_pbufpool_init pp_init;
896 	errno_t err;
897 
898 	feth_packet_pool_init_prepare(fakeif, &pp_init);
899 
900 	switch (fakeif->iff_pp_mode) {
901 	case IFF_PP_MODE_GLOBAL:
902 		feth_lock();
903 		if (S_pp == NULL) {
904 			(void)snprintf((char *)pp_init.kbi_name,
905 			    sizeof(pp_init.kbi_name), "%s", "feth shared pp");
906 			err = kern_pbufpool_create(&pp_init, &S_pp, NULL);
907 		}
908 		pp_retain(S_pp);
909 		feth_unlock();
910 		fakeif->iff_rx_pp = S_pp;
911 		pp_retain(S_pp);
912 		fakeif->iff_tx_pp = S_pp;
913 		break;
914 	case IFF_PP_MODE_PRIVATE:
915 		(void)snprintf((char *)pp_init.kbi_name,
916 		    sizeof(pp_init.kbi_name), "%s pp", fakeif->iff_name);
917 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
918 		pp_retain(fakeif->iff_rx_pp);
919 		fakeif->iff_tx_pp = fakeif->iff_rx_pp;
920 		break;
921 	case IFF_PP_MODE_PRIVATE_SPLIT:
922 		(void)snprintf((char *)pp_init.kbi_name,
923 		    sizeof(pp_init.kbi_name), "%s rx pp", fakeif->iff_name);
924 		pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
925 		    KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
926 		pp_init.kbi_flags |= (KBIF_IODIR_IN | KBIF_BUFFER_ON_DEMAND);
927 		pp_init.kbi_packets = 1024;
928 		pp_init.kbi_bufsize = if_fake_link_layer_aggregation_factor * 1024;
929 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
930 		if (err != 0) {
931 			printf("%s: rx pp create failed %d\n", __func__, err);
932 			return err;
933 		}
934 		pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
935 		    KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
936 		pp_init.kbi_flags |= KBIF_IODIR_OUT;
937 		pp_init.kbi_packets = 1024;            /* TBD configurable */
938 		pp_init.kbi_bufsize = fakeif->iff_max_mtu;
939 		(void)snprintf((char *)pp_init.kbi_name,
940 		    sizeof(pp_init.kbi_name), "%s tx pp", fakeif->iff_name);
941 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_tx_pp, NULL);
942 		if (err != 0) {
943 			printf("%s: tx pp create failed %d\n", __func__, err);
944 			pp_release(fakeif->iff_rx_pp);
945 			return err;
946 		}
947 		break;
948 	default:
949 		VERIFY(0);
950 		__builtin_unreachable();
951 	}
952 
953 	return 0;
954 }
955 
956 static void
feth_packet_set_trace_tag(kern_packet_t ph,int flag)957 feth_packet_set_trace_tag(kern_packet_t ph, int flag)
958 {
959 	if (if_fake_trace_tag_flags & flag) {
960 		if (++if_fake_trace_tag_current == 0) {
961 			if_fake_trace_tag_current = 1;
962 		}
963 		kern_packet_set_trace_tag(ph, if_fake_trace_tag_current);
964 	}
965 }
966 
967 static errno_t
feth_clone_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)968 feth_clone_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
969 {
970 	errno_t err = 0;
971 	kern_pbufpool_t pp = dif->iff_rx_pp;
972 	kern_packet_t dph = 0, dph0 = 0;
973 	kern_buflet_t sbuf, dbuf0 = NULL, dbuf;
974 	void *saddr, *daddr;
975 	uint32_t soff, doff;
976 	uint32_t slen, dlen;
977 	uint32_t dlim0, dlim;
978 
979 	sbuf = kern_packet_get_next_buflet(sph, NULL);
980 	saddr = kern_buflet_get_data_address(sbuf);
981 	doff = soff = kern_buflet_get_data_offset(sbuf);
982 	dlen = slen = kern_buflet_get_data_length(sbuf);
983 
984 	/* packet clone is only supported for single-buflet */
985 	ASSERT(kern_packet_get_buflet_count(sph) == 1);
986 	ASSERT(soff == kern_packet_get_headroom(sph));
987 	ASSERT(slen == kern_packet_get_data_length(sph));
988 
989 	dph0 = *pdph;
990 	if (dph0 == 0) {
991 		dlim0 = 0;
992 	} else {
993 		dbuf0 = kern_packet_get_next_buflet(dph0, NULL);
994 		ASSERT(kern_buflet_get_object_limit(dbuf0) ==
995 		    PP_BUF_OBJ_SIZE_DEF(pp));
996 		ASSERT(kern_buflet_get_data_limit(dbuf0) % 16 == 0);
997 		dlim0 = ((uintptr_t)kern_buflet_get_object_address(dbuf0) +
998 		    kern_buflet_get_object_limit(dbuf0)) -
999 		    ((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1000 		    kern_buflet_get_data_limit(dbuf0));
1001 	}
1002 
1003 	if (doff + dlen > dlim0) {
1004 		err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1005 		if (err != 0) {
1006 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1007 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1008 			return err;
1009 		}
1010 		dbuf = kern_packet_get_next_buflet(dph, NULL);
1011 		ASSERT(kern_buflet_get_data_address(dbuf) ==
1012 		    kern_buflet_get_object_address(dbuf));
1013 		daddr = kern_buflet_get_data_address(dbuf);
1014 		dlim = kern_buflet_get_object_limit(dbuf);
1015 		ASSERT(dlim == PP_BUF_OBJ_SIZE_DEF(pp));
1016 	} else {
1017 		err = kern_packet_clone_nosleep(dph0, &dph, KPKT_COPY_LIGHT);
1018 		if (err != 0) {
1019 			printf("%s: packet clone err %d\n", __func__, err);
1020 			return err;
1021 		}
1022 		dbuf = kern_packet_get_next_buflet(dph, NULL);
1023 		ASSERT(kern_buflet_get_object_address(dbuf) ==
1024 		    kern_buflet_get_object_address(dbuf0));
1025 		daddr = (void *)((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1026 		    kern_buflet_get_data_limit(dbuf0));
1027 		dlim = dlim0;
1028 	}
1029 
1030 	ASSERT(doff + dlen <= dlim);
1031 
1032 	ASSERT((uintptr_t)daddr % 16 == 0);
1033 
1034 	bcopy((const void *)((uintptr_t)saddr + soff),
1035 	    (void *)((uintptr_t)daddr + doff), slen);
1036 
1037 	dlim = MIN(dlim, P2ROUNDUP(doff + dlen, 16));
1038 	err = kern_buflet_set_data_address(dbuf, daddr);
1039 	VERIFY(err == 0);
1040 	err = kern_buflet_set_data_limit(dbuf, dlim);
1041 	VERIFY(err == 0);
1042 	err = kern_buflet_set_data_length(dbuf, dlen);
1043 	VERIFY(err == 0);
1044 	err = kern_buflet_set_data_offset(dbuf, doff);
1045 	VERIFY(err == 0);
1046 	err = kern_packet_set_headroom(dph, doff);
1047 	VERIFY(err == 0);
1048 	err = kern_packet_set_link_header_length(dph,
1049 	    kern_packet_get_link_header_length(sph));
1050 	VERIFY(err == 0);
1051 	err = kern_packet_set_service_class(dph,
1052 	    kern_packet_get_service_class(sph));
1053 	VERIFY(err == 0);
1054 	err = kern_packet_finalize(dph);
1055 	VERIFY(err == 0);
1056 	*pdph = dph;
1057 
1058 	return err;
1059 }
1060 
1061 static inline void
feth_copy_buflet(kern_buflet_t sbuf,kern_buflet_t dbuf)1062 feth_copy_buflet(kern_buflet_t sbuf, kern_buflet_t dbuf)
1063 {
1064 	errno_t err;
1065 	uint16_t off, len;
1066 	uint8_t *saddr, *daddr;
1067 
1068 	saddr = kern_buflet_get_data_address(sbuf);
1069 	off = kern_buflet_get_data_offset(sbuf);
1070 	len = kern_buflet_get_data_length(sbuf);
1071 	daddr = kern_buflet_get_data_address(dbuf);
1072 	bcopy((saddr + off), (daddr + off), len);
1073 	err = kern_buflet_set_data_offset(dbuf, off);
1074 	VERIFY(err == 0);
1075 	err = kern_buflet_set_data_length(dbuf, len);
1076 	VERIFY(err == 0);
1077 }
1078 
1079 static int
feth_add_packet_trailer(kern_packet_t ph,void * trailer,size_t trailer_len)1080 feth_add_packet_trailer(kern_packet_t ph, void *trailer, size_t trailer_len)
1081 {
1082 	errno_t err = 0;
1083 
1084 	ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
1085 
1086 	kern_buflet_t buf = NULL, iter = NULL;
1087 	while ((iter = kern_packet_get_next_buflet(ph, iter)) != NULL) {
1088 		buf = iter;
1089 	}
1090 	ASSERT(buf != NULL);
1091 
1092 	uint16_t dlim = kern_buflet_get_data_limit(buf);
1093 	uint16_t doff = kern_buflet_get_data_offset(buf);
1094 	uint16_t dlen = kern_buflet_get_data_length(buf);
1095 
1096 	size_t trailer_room = dlim - doff - dlen;
1097 
1098 	if (trailer_room < trailer_len) {
1099 		printf("not enough room");
1100 		return ERANGE;
1101 	}
1102 
1103 	void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff + dlen);
1104 	memcpy(data, trailer, trailer_len);
1105 
1106 	err = kern_buflet_set_data_length(buf, dlen + trailer_len);
1107 	VERIFY(err == 0);
1108 
1109 	err = kern_packet_finalize(ph);
1110 	VERIFY(err == 0);
1111 
1112 	FETH_DPRINTF("%s %zuB trailer added\n", __func__, trailer_len);
1113 
1114 	return 0;
1115 }
1116 
1117 static int
feth_add_packet_fcs(kern_packet_t ph)1118 feth_add_packet_fcs(kern_packet_t ph)
1119 {
1120 	uint32_t crc = 0;
1121 	int err;
1122 
1123 	ASSERT(sizeof(crc) == ETHER_CRC_LEN);
1124 
1125 	kern_buflet_t buf = NULL;
1126 	while ((buf = kern_packet_get_next_buflet(ph, buf)) != NULL) {
1127 		uint16_t doff = kern_buflet_get_data_offset(buf);
1128 		uint16_t dlen = kern_buflet_get_data_length(buf);
1129 		void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff);
1130 		crc = crc32(crc, data, dlen);
1131 	}
1132 
1133 	err = feth_add_packet_trailer(ph, &crc, ETHER_CRC_LEN);
1134 	if (!err) {
1135 		return err;
1136 	}
1137 
1138 	err = kern_packet_set_link_ethfcs(ph);
1139 	VERIFY(err == 0);
1140 
1141 	return 0;
1142 }
1143 
1144 static errno_t
feth_copy_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1145 feth_copy_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1146 {
1147 	errno_t err = 0;
1148 	uint16_t i, bufcnt;
1149 	mach_vm_address_t baddr;
1150 	kern_buflet_t sbuf = NULL, dbuf = NULL;
1151 	kern_pbufpool_t pp = dif->iff_rx_pp;
1152 	kern_packet_t dph;
1153 	boolean_t multi_buflet = feth_using_multibuflets(dif);
1154 
1155 	bufcnt = kern_packet_get_buflet_count(sph);
1156 	ASSERT((bufcnt == 1) || multi_buflet);
1157 	*pdph = 0;
1158 
1159 	err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1160 	if (err != 0) {
1161 		STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1162 		STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1163 		return err;
1164 	}
1165 
1166 	/* pre-constructed single buflet packet copy */
1167 	sbuf = kern_packet_get_next_buflet(sph, NULL);
1168 	dbuf = kern_packet_get_next_buflet(dph, NULL);
1169 	feth_copy_buflet(sbuf, dbuf);
1170 
1171 	if (!multi_buflet) {
1172 		goto done;
1173 	}
1174 
1175 	/* un-constructed multi-buflet packet copy */
1176 	for (i = 1; i < bufcnt; i++) {
1177 		kern_buflet_t dbuf_next = NULL;
1178 
1179 		sbuf = kern_packet_get_next_buflet(sph, sbuf);
1180 		VERIFY(sbuf != NULL);
1181 		err = kern_pbufpool_alloc_buflet_nosleep(pp, &dbuf_next, true);
1182 		if (err != 0) {
1183 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1184 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_BUF);
1185 			break;
1186 		}
1187 		ASSERT(dbuf_next != NULL);
1188 		feth_copy_buflet(sbuf, dbuf_next);
1189 		err = kern_packet_add_buflet(dph, dbuf, dbuf_next);
1190 		VERIFY(err == 0);
1191 		dbuf = dbuf_next;
1192 	}
1193 	if (__improbable(err != 0)) {
1194 		dbuf = NULL;
1195 		while (i-- != 0) {
1196 			dbuf = kern_packet_get_next_buflet(dph, dbuf);
1197 			VERIFY(dbuf != NULL);
1198 			baddr = (mach_vm_address_t)
1199 			    kern_buflet_get_data_address(dbuf);
1200 			VERIFY(baddr != 0);
1201 		}
1202 		kern_pbufpool_free(pp, dph);
1203 		dph = 0;
1204 	}
1205 
1206 done:
1207 	if (__probable(err == 0)) {
1208 		err = kern_packet_set_headroom(dph,
1209 		    kern_packet_get_headroom(sph));
1210 		VERIFY(err == 0);
1211 		err = kern_packet_set_link_header_length(dph,
1212 		    kern_packet_get_link_header_length(sph));
1213 		VERIFY(err == 0);
1214 		err = kern_packet_set_service_class(dph,
1215 		    kern_packet_get_service_class(sph));
1216 		VERIFY(err == 0);
1217 		err = kern_packet_finalize(dph);
1218 		VERIFY(err == 0);
1219 		VERIFY(bufcnt == kern_packet_get_buflet_count(dph));
1220 		*pdph = dph;
1221 	}
1222 	return err;
1223 }
1224 
1225 static inline void
feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)1226 feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)
1227 {
1228 	/*
1229 	 * Nothing to do if not a TSO offloaded packet.
1230 	 */
1231 	uint16_t seg_sz = 0;
1232 	(void) kern_packet_get_protocol_segment_size(ph, &seg_sz);
1233 	if (seg_sz == 0) {
1234 		return;
1235 	}
1236 	/*
1237 	 * For RX, make the packet appear as a fully validated LRO packet.
1238 	 */
1239 	packet_csum_flags_t csum_flags = PACKET_CSUM_IP_CHECKED |
1240 	    PACKET_CSUM_IP_VALID | PACKET_CSUM_DATA_VALID |
1241 	    PACKET_CSUM_PSEUDO_HDR;
1242 	(void) kern_packet_set_inet_checksum(ph, csum_flags, 0, 0xFFFF, FALSE);
1243 	return;
1244 }
1245 
1246 static void
feth_rx_submit(if_fake_ref sif,if_fake_ref dif,kern_packet_t sphs[],uint32_t n_pkts)1247 feth_rx_submit(if_fake_ref sif, if_fake_ref dif, kern_packet_t sphs[],
1248     uint32_t n_pkts)
1249 {
1250 	errno_t err = 0;
1251 	struct kern_channel_ring_stat_increment stats;
1252 	kern_channel_ring_t rx_ring = NULL;
1253 	kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
1254 	kern_packet_t sph = 0, dph = 0;
1255 
1256 	memset(&stats, 0, sizeof(stats));
1257 
1258 	rx_ring = dif->iff_rx_ring[0];
1259 	if (rx_ring == NULL) {
1260 		return;
1261 	}
1262 
1263 	kr_enter(rx_ring, TRUE);
1264 	kern_channel_reclaim(rx_ring);
1265 	rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1266 
1267 	for (uint32_t i = 0; i < n_pkts && rx_slot != NULL; i++) {
1268 		sph = sphs[i];
1269 
1270 		switch (dif->iff_pp_mode) {
1271 		case IFF_PP_MODE_GLOBAL:
1272 			sphs[i] = 0;
1273 			dph = sph;
1274 			feth_update_pkt_tso_metadata_for_rx(dph);
1275 			err = kern_packet_finalize(dph);
1276 			VERIFY(err == 0);
1277 			break;
1278 		case IFF_PP_MODE_PRIVATE:
1279 			err = feth_copy_packet(dif, sph, &dph);
1280 			break;
1281 		case IFF_PP_MODE_PRIVATE_SPLIT:
1282 			err = feth_clone_packet(dif, sph, &dph);
1283 			break;
1284 		default:
1285 			VERIFY(0);
1286 			__builtin_unreachable();
1287 		}
1288 		if (__improbable(err != 0)) {
1289 			continue;
1290 		}
1291 
1292 		if (sif->iff_trailer_length != 0) {
1293 			feth_add_packet_trailer(dph, feth_trailer,
1294 			    sif->iff_trailer_length);
1295 		}
1296 		if (sif->iff_fcs != 0) {
1297 			feth_add_packet_fcs(dph);
1298 		}
1299 		feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1300 		bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1301 		stats.kcrsi_slots_transferred++;
1302 		stats.kcrsi_bytes_transferred
1303 		        += kern_packet_get_data_length(dph);
1304 
1305 		/* attach the packet to the RX ring */
1306 		err = kern_channel_slot_attach_packet(rx_ring, rx_slot, dph);
1307 		VERIFY(err == 0);
1308 		last_rx_slot = rx_slot;
1309 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1310 	}
1311 
1312 	if (last_rx_slot != NULL) {
1313 		kern_channel_advance_slot(rx_ring, last_rx_slot);
1314 		kern_channel_increment_ring_net_stats(rx_ring, dif->iff_ifp,
1315 		    &stats);
1316 	}
1317 
1318 	if (rx_ring != NULL) {
1319 		kr_exit(rx_ring);
1320 		kern_channel_notify(rx_ring, 0);
1321 	}
1322 }
1323 
1324 static void
feth_rx_queue_submit(if_fake_ref sif,if_fake_ref dif,uint32_t llink_idx,uint32_t qset_idx,kern_packet_t sphs[],uint32_t n_pkts)1325 feth_rx_queue_submit(if_fake_ref sif, if_fake_ref dif, uint32_t llink_idx,
1326     uint32_t qset_idx, kern_packet_t sphs[], uint32_t n_pkts)
1327 {
1328 	errno_t err = 0;
1329 	kern_netif_queue_t queue;
1330 	kern_packet_t sph = 0, dph = 0;
1331 	fake_llink *llink;
1332 	fake_qset *qset;
1333 
1334 	if (llink_idx >= dif->iff_llink_cnt) {
1335 		printf("%s: invalid llink_idx idx %d (max %d) on peer %s\n",
1336 		    __func__, llink_idx, dif->iff_llink_cnt, dif->iff_name);
1337 		return;
1338 	}
1339 	llink = &dif->iff_llink[llink_idx];
1340 	if (qset_idx >= llink->fl_qset_cnt) {
1341 		printf("%s: invalid qset_idx %d (max %d) on peer %s\n",
1342 		    __func__, qset_idx, llink->fl_qset_cnt, dif->iff_name);
1343 		return;
1344 	}
1345 	qset = &dif->iff_llink[llink_idx].fl_qset[qset_idx];
1346 	queue = qset->fqs_rx_queue[0].fq_queue;
1347 	if (queue == NULL) {
1348 		printf("%s: NULL default queue (llink_idx %d, qset_idx %d) "
1349 		    "on peer %s\n", __func__, llink_idx, qset_idx,
1350 		    dif->iff_name);
1351 		return;
1352 	}
1353 	for (uint32_t i = 0; i < n_pkts; i++) {
1354 		uint32_t flags;
1355 
1356 		sph = sphs[i];
1357 
1358 		switch (dif->iff_pp_mode) {
1359 		case IFF_PP_MODE_GLOBAL:
1360 			sphs[i] = 0;
1361 			dph = sph;
1362 			feth_update_pkt_tso_metadata_for_rx(dph);
1363 			break;
1364 		case IFF_PP_MODE_PRIVATE:
1365 			err = feth_copy_packet(dif, sph, &dph);
1366 			break;
1367 		case IFF_PP_MODE_PRIVATE_SPLIT:
1368 			err = feth_clone_packet(dif, sph, &dph);
1369 			break;
1370 		default:
1371 			VERIFY(0);
1372 			__builtin_unreachable();
1373 		}
1374 		if (__improbable(err != 0)) {
1375 			continue;
1376 		}
1377 
1378 		if (sif->iff_trailer_length != 0) {
1379 			feth_add_packet_trailer(dph, feth_trailer,
1380 			    sif->iff_trailer_length);
1381 		}
1382 		if (sif->iff_fcs != 0) {
1383 			feth_add_packet_fcs(dph);
1384 		}
1385 		feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1386 		bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1387 
1388 		flags = (i == n_pkts - 1) ?
1389 		    KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH : 0;
1390 		kern_netif_queue_rx_enqueue(queue, dph, 1, flags);
1391 	}
1392 }
1393 
1394 static void
feth_tx_complete(if_fake_ref fakeif,kern_packet_t phs[],uint32_t nphs)1395 feth_tx_complete(if_fake_ref fakeif, kern_packet_t phs[], uint32_t nphs)
1396 {
1397 	for (uint32_t i = 0; i < nphs; i++) {
1398 		kern_packet_t ph = phs[i];
1399 		if (ph == 0) {
1400 			continue;
1401 		}
1402 		int err = kern_packet_set_tx_completion_status(ph, 0);
1403 		VERIFY(err == 0);
1404 		kern_packet_tx_completion(ph, fakeif->iff_ifp);
1405 		kern_pbufpool_free(fakeif->iff_tx_pp, phs[i]);
1406 		phs[i] = 0;
1407 	}
1408 }
1409 
1410 /* returns true if the packet is selected for TX error & dropped */
1411 static bool
feth_tx_complete_error(if_fake_ref fakeif,kern_packet_t * ph,struct netif_stats * nifs)1412 feth_tx_complete_error(if_fake_ref fakeif, kern_packet_t *ph,
1413     struct netif_stats *nifs)
1414 {
1415 	int err;
1416 
1417 	if (fakeif->iff_tx_drop_rate == 0 ||
1418 	    fakeif->iff_tx_pkts_count != fakeif->iff_tx_drop_rate) {
1419 		return false;
1420 	}
1421 	/* simulate TX completion error on the packet */
1422 	if (fakeif->iff_tx_completion_mode == IFF_TX_COMPL_MODE_SYNC) {
1423 		err = kern_packet_set_tx_completion_status(*ph,
1424 		    CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED);
1425 		VERIFY(err == 0);
1426 		kern_packet_tx_completion(*ph, fakeif->iff_ifp);
1427 	} else {
1428 		uint32_t nx_port_id = 0;
1429 		os_channel_event_packet_transmit_status_t pkt_tx_status = {0};
1430 
1431 		pkt_tx_status.packet_status =
1432 		    CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED;
1433 		do {
1434 			err = kern_packet_get_packetid(*ph,
1435 			    &pkt_tx_status.packet_id);
1436 			if (err != 0) {
1437 				break;
1438 			}
1439 			err = kern_packet_get_tx_nexus_port_id(*ph,
1440 			    &nx_port_id);
1441 			if (err != 0) {
1442 				break;
1443 			}
1444 			err = kern_channel_event_transmit_status(
1445 				fakeif->iff_ifp, &pkt_tx_status, nx_port_id);
1446 		} while (0);
1447 		if (err != 0) {
1448 			FETH_DPRINTF("err %d, nx_port_id: 0x%x\n",
1449 			    err, nx_port_id);
1450 		}
1451 	}
1452 	fakeif->iff_tx_pkts_count = 0;
1453 	kern_pbufpool_free(fakeif->iff_tx_pp, *ph);
1454 	*ph = 0;
1455 	STATS_INC(nifs, NETIF_STATS_DROP);
1456 	return true;
1457 }
1458 
1459 static void
feth_if_adv(thread_call_param_t arg0,thread_call_param_t arg1)1460 feth_if_adv(thread_call_param_t arg0, thread_call_param_t arg1)
1461 {
1462 #pragma unused(arg1)
1463 	errno_t                            error;
1464 	if_fake_ref                        fakeif = (if_fake_ref)arg0;
1465 	struct ifnet_interface_advisory    if_adv;
1466 	struct ifnet_stats_param           if_stat;
1467 
1468 	feth_lock();
1469 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1470 		feth_unlock();
1471 		return;
1472 	}
1473 	feth_unlock();
1474 
1475 	if (!fakeif->iff_intf_adv_enabled) {
1476 		goto done;
1477 	}
1478 
1479 	error = ifnet_stat(fakeif->iff_ifp, &if_stat);
1480 	if (error != 0) {
1481 		FETH_DPRINTF("%s: ifnet_stat() failed %d\n",
1482 		    fakeif->iff_name, error);
1483 		goto done;
1484 	}
1485 	if_adv.header.version = IF_INTERFACE_ADVISORY_VERSION_CURRENT;
1486 	if_adv.header.direction = IF_INTERFACE_ADVISORY_DIRECTION_TX;
1487 	if_adv.header.interface_type =
1488 	    IF_INTERFACE_ADVISORY_INTERFACE_TYPE_WIFI;
1489 	if_adv.capacity.timestamp = mach_absolute_time();
1490 	if_adv.capacity.rate_trend_suggestion =
1491 	    IF_INTERFACE_ADVISORY_RATE_SUGGESTION_RAMP_NEUTRAL;
1492 	if_adv.capacity.max_bandwidth = 1000 * 1000 * 1000; /* 1Gbps */
1493 	if_adv.capacity.total_byte_count = if_stat.packets_out;
1494 	if_adv.capacity.average_throughput = 1000 * 1000 * 1000; /* 1Gbps */
1495 	if_adv.capacity.flushable_queue_size = UINT32_MAX;
1496 	if_adv.capacity.non_flushable_queue_size = UINT32_MAX;
1497 	if_adv.capacity.average_delay = 1; /* ms */
1498 
1499 	error = fakeif->iff_intf_adv_notify(fakeif->iff_intf_adv_kern_ctx,
1500 	    &if_adv);
1501 	if (error != 0) {
1502 		FETH_DPRINTF("%s: interface advisory report failed %d\n",
1503 		    fakeif->iff_name, error);
1504 	}
1505 
1506 done:
1507 	feth_lock();
1508 	if (!feth_is_detaching(fakeif) && fakeif->iff_channel_connected) {
1509 		uint64_t deadline;
1510 		clock_interval_to_deadline(fakeif->iff_adv_interval,
1511 		    NSEC_PER_MSEC, &deadline);
1512 		thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1513 	}
1514 	feth_unlock();
1515 }
1516 
1517 static int
feth_if_adv_tcall_create(if_fake_ref fakeif)1518 feth_if_adv_tcall_create(if_fake_ref fakeif)
1519 {
1520 	uint64_t deadline;
1521 
1522 	feth_lock();
1523 	ASSERT(fakeif->iff_if_adv_tcall == NULL);
1524 	ASSERT(fakeif->iff_adv_interval > 0);
1525 	ASSERT(fakeif->iff_channel_connected);
1526 	fakeif->iff_if_adv_tcall =
1527 	    thread_call_allocate_with_options(feth_if_adv,
1528 	    (thread_call_param_t)fakeif, THREAD_CALL_PRIORITY_KERNEL,
1529 	    THREAD_CALL_OPTIONS_ONCE);
1530 	if (fakeif->iff_if_adv_tcall == NULL) {
1531 		printf("%s: %s if_adv tcall alloc failed\n", __func__,
1532 		    fakeif->iff_name);
1533 		return ENXIO;
1534 	}
1535 	/* retain for the interface advisory thread call */
1536 	feth_retain(fakeif);
1537 	clock_interval_to_deadline(fakeif->iff_adv_interval,
1538 	    NSEC_PER_MSEC, &deadline);
1539 	thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1540 	feth_unlock();
1541 	return 0;
1542 }
1543 
1544 static void
feth_if_adv_tcall_destroy(if_fake_ref fakeif)1545 feth_if_adv_tcall_destroy(if_fake_ref fakeif)
1546 {
1547 	thread_call_t tcall;
1548 
1549 	feth_lock();
1550 	ASSERT(fakeif->iff_if_adv_tcall != NULL);
1551 	tcall = fakeif->iff_if_adv_tcall;
1552 	feth_unlock();
1553 	(void) thread_call_cancel_wait(tcall);
1554 	if (!thread_call_free(tcall)) {
1555 		boolean_t freed;
1556 		(void) thread_call_cancel_wait(tcall);
1557 		freed = thread_call_free(tcall);
1558 		VERIFY(freed);
1559 	}
1560 	feth_lock();
1561 	fakeif->iff_if_adv_tcall = NULL;
1562 	feth_unlock();
1563 	/* release for the interface advisory thread call */
1564 	feth_release(fakeif);
1565 }
1566 
1567 
1568 /**
1569 ** nexus netif domain provider
1570 **/
1571 static errno_t
feth_nxdp_init(kern_nexus_domain_provider_t domprov)1572 feth_nxdp_init(kern_nexus_domain_provider_t domprov)
1573 {
1574 #pragma unused(domprov)
1575 	return 0;
1576 }
1577 
1578 static void
feth_nxdp_fini(kern_nexus_domain_provider_t domprov)1579 feth_nxdp_fini(kern_nexus_domain_provider_t domprov)
1580 {
1581 #pragma unused(domprov)
1582 }
1583 
1584 static uuid_t                   feth_nx_dom_prov;
1585 
1586 static errno_t
feth_register_nexus_domain_provider(void)1587 feth_register_nexus_domain_provider(void)
1588 {
1589 	const struct kern_nexus_domain_provider_init dp_init = {
1590 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1591 		.nxdpi_flags = 0,
1592 		.nxdpi_init = feth_nxdp_init,
1593 		.nxdpi_fini = feth_nxdp_fini
1594 	};
1595 	errno_t                         err = 0;
1596 
1597 	/* feth_nxdp_init() is called before this function returns */
1598 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
1599 	    (const uint8_t *)
1600 	    "com.apple.feth",
1601 	    &dp_init, sizeof(dp_init),
1602 	    &feth_nx_dom_prov);
1603 	if (err != 0) {
1604 		printf("%s: failed to register domain provider\n", __func__);
1605 		return err;
1606 	}
1607 	return 0;
1608 }
1609 
1610 /**
1611 ** netif nexus routines
1612 **/
1613 static if_fake_ref
feth_nexus_context(kern_nexus_t nexus)1614 feth_nexus_context(kern_nexus_t nexus)
1615 {
1616 	if_fake_ref fakeif;
1617 
1618 	fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
1619 	assert(fakeif != NULL);
1620 	return fakeif;
1621 }
1622 
1623 static uint8_t
feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1624 feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1625 {
1626 	switch (svc_class) {
1627 	case KPKT_SC_VO:
1628 		return 0;
1629 	case KPKT_SC_VI:
1630 		return 1;
1631 	case KPKT_SC_BE:
1632 		return 2;
1633 	case KPKT_SC_BK:
1634 		return 3;
1635 	default:
1636 		VERIFY(0);
1637 		return 0;
1638 	}
1639 }
1640 
1641 static errno_t
feth_nx_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1642 feth_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1643     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1644     void **ring_ctx)
1645 {
1646 	if_fake_ref     fakeif;
1647 	int             err;
1648 #pragma unused(nxprov, channel, ring_ctx)
1649 	feth_lock();
1650 	fakeif = feth_nexus_context(nexus);
1651 	if (feth_is_detaching(fakeif)) {
1652 		feth_unlock();
1653 		return 0;
1654 	}
1655 	if (is_tx_ring) {
1656 		if (feth_in_wmm_mode(fakeif)) {
1657 			kern_packet_svc_class_t svc_class;
1658 			uint8_t ring_idx;
1659 
1660 			err = kern_channel_get_service_class(ring, &svc_class);
1661 			VERIFY(err == 0);
1662 			ring_idx = feth_find_tx_ring_by_svc(svc_class);
1663 			VERIFY(ring_idx < IFF_NUM_TX_RINGS_WMM_MODE);
1664 			VERIFY(fakeif->iff_tx_ring[ring_idx] == NULL);
1665 			fakeif->iff_tx_ring[ring_idx] = ring;
1666 		} else {
1667 			VERIFY(fakeif->iff_tx_ring[0] == NULL);
1668 			fakeif->iff_tx_ring[0] = ring;
1669 		}
1670 	} else {
1671 		VERIFY(fakeif->iff_rx_ring[0] == NULL);
1672 		fakeif->iff_rx_ring[0] = ring;
1673 	}
1674 	fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1675 	feth_unlock();
1676 	FETH_DPRINTF("%s: %s ring init\n",
1677 	    fakeif->iff_name, is_tx_ring ? "TX" : "RX");
1678 	return 0;
1679 }
1680 
1681 static void
feth_nx_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)1682 feth_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1683     kern_channel_ring_t ring)
1684 {
1685 #pragma unused(nxprov, ring)
1686 	if_fake_ref     fakeif;
1687 	thread_call_t   tcall = NULL;
1688 
1689 	feth_lock();
1690 	fakeif = feth_nexus_context(nexus);
1691 	if (fakeif->iff_rx_ring[0] == ring) {
1692 		fakeif->iff_rx_ring[0] = NULL;
1693 		FETH_DPRINTF("%s: RX ring fini\n", fakeif->iff_name);
1694 	} else if (feth_in_wmm_mode(fakeif)) {
1695 		int i;
1696 		for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
1697 			if (fakeif->iff_tx_ring[i] == ring) {
1698 				fakeif->iff_tx_ring[i] = NULL;
1699 				break;
1700 			}
1701 		}
1702 		for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
1703 			if (fakeif->iff_tx_ring[i] != NULL) {
1704 				break;
1705 			}
1706 		}
1707 		if (i == IFF_MAX_TX_RINGS) {
1708 			tcall = fakeif->iff_doorbell_tcall;
1709 			fakeif->iff_doorbell_tcall = NULL;
1710 		}
1711 		FETH_DPRINTF("%s: TX ring fini\n", fakeif->iff_name);
1712 	} else if (fakeif->iff_tx_ring[0] == ring) {
1713 		tcall = fakeif->iff_doorbell_tcall;
1714 		fakeif->iff_doorbell_tcall = NULL;
1715 		fakeif->iff_tx_ring[0] = NULL;
1716 	}
1717 	fakeif->iff_nifs = NULL;
1718 	feth_unlock();
1719 	if (tcall != NULL) {
1720 		boolean_t       success;
1721 
1722 		success = thread_call_cancel_wait(tcall);
1723 		FETH_DPRINTF("%s: thread_call_cancel %s\n",
1724 		    fakeif->iff_name,
1725 		    success ? "SUCCESS" : "FAILURE");
1726 		if (!success) {
1727 			feth_lock();
1728 			if (fakeif->iff_doorbell_tcall_active) {
1729 				fakeif->iff_waiting_for_tcall = TRUE;
1730 				FETH_DPRINTF("%s: *waiting for threadcall\n",
1731 				    fakeif->iff_name);
1732 				do {
1733 					msleep(fakeif, &feth_lck_mtx,
1734 					    PZERO, "feth threadcall", 0);
1735 				} while (fakeif->iff_doorbell_tcall_active);
1736 				FETH_DPRINTF("%s: ^threadcall done\n",
1737 				    fakeif->iff_name);
1738 				fakeif->iff_waiting_for_tcall = FALSE;
1739 			}
1740 			feth_unlock();
1741 		}
1742 		success = thread_call_free(tcall);
1743 		FETH_DPRINTF("%s: thread_call_free %s\n",
1744 		    fakeif->iff_name,
1745 		    success ? "SUCCESS" : "FAILURE");
1746 		feth_release(fakeif);
1747 		VERIFY(success == TRUE);
1748 	}
1749 }
1750 
1751 static errno_t
feth_nx_pre_connect(kern_nexus_provider_t nxprov,proc_t proc,kern_nexus_t nexus,nexus_port_t port,kern_channel_t channel,void ** channel_context)1752 feth_nx_pre_connect(kern_nexus_provider_t nxprov,
1753     proc_t proc, kern_nexus_t nexus, nexus_port_t port, kern_channel_t channel,
1754     void **channel_context)
1755 {
1756 #pragma unused(nxprov, proc, nexus, port, channel, channel_context)
1757 	return 0;
1758 }
1759 
1760 static errno_t
feth_nx_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1761 feth_nx_connected(kern_nexus_provider_t nxprov,
1762     kern_nexus_t nexus, kern_channel_t channel)
1763 {
1764 #pragma unused(nxprov, channel)
1765 	int err;
1766 	if_fake_ref fakeif;
1767 
1768 	fakeif = feth_nexus_context(nexus);
1769 	feth_lock();
1770 	if (feth_is_detaching(fakeif)) {
1771 		feth_unlock();
1772 		return EBUSY;
1773 	}
1774 	feth_retain(fakeif);
1775 	fakeif->iff_channel_connected = TRUE;
1776 	feth_unlock();
1777 	if (feth_has_intf_advisory_configured(fakeif)) {
1778 		err = feth_if_adv_tcall_create(fakeif);
1779 		if (err != 0) {
1780 			return err;
1781 		}
1782 	}
1783 	FETH_DPRINTF("%s: connected channel %p\n",
1784 	    fakeif->iff_name, channel);
1785 	return 0;
1786 }
1787 
1788 static void
feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1789 feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,
1790     kern_nexus_t nexus, kern_channel_t channel)
1791 {
1792 #pragma unused(nxprov, channel)
1793 	if_fake_ref fakeif;
1794 
1795 	fakeif = feth_nexus_context(nexus);
1796 	FETH_DPRINTF("%s: pre-disconnect channel %p\n",
1797 	    fakeif->iff_name, channel);
1798 	/* Quiesce the interface and flush any pending outbound packets. */
1799 	if_down(fakeif->iff_ifp);
1800 	feth_lock();
1801 	fakeif->iff_channel_connected = FALSE;
1802 	feth_unlock();
1803 	if (fakeif->iff_if_adv_tcall != NULL) {
1804 		feth_if_adv_tcall_destroy(fakeif);
1805 	}
1806 }
1807 
1808 static void
feth_nx_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1809 feth_nx_disconnected(kern_nexus_provider_t nxprov,
1810     kern_nexus_t nexus, kern_channel_t channel)
1811 {
1812 #pragma unused(nxprov, channel)
1813 	if_fake_ref fakeif;
1814 
1815 	fakeif = feth_nexus_context(nexus);
1816 	FETH_DPRINTF("%s: disconnected channel %p\n",
1817 	    fakeif->iff_name, channel);
1818 	feth_release(fakeif);
1819 }
1820 
1821 static errno_t
feth_nx_slot_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index,struct kern_slot_prop ** slot_prop_addr,void ** slot_context)1822 feth_nx_slot_init(kern_nexus_provider_t nxprov,
1823     kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
1824     uint32_t slot_index, struct kern_slot_prop **slot_prop_addr,
1825     void **slot_context)
1826 {
1827 #pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
1828 	return 0;
1829 }
1830 
1831 static void
feth_nx_slot_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index)1832 feth_nx_slot_fini(kern_nexus_provider_t nxprov,
1833     kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
1834     uint32_t slot_index)
1835 {
1836 #pragma unused(nxprov, nexus, ring, slot, slot_index)
1837 }
1838 
1839 static errno_t
feth_nx_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)1840 feth_nx_sync_tx(kern_nexus_provider_t nxprov,
1841     kern_nexus_t nexus, kern_channel_ring_t tx_ring, uint32_t flags)
1842 {
1843 #pragma unused(nxprov)
1844 	if_fake_ref             fakeif;
1845 	ifnet_t                 ifp;
1846 	kern_channel_slot_t     last_tx_slot = NULL;
1847 	ifnet_t                 peer_ifp;
1848 	if_fake_ref             peer_fakeif = NULL;
1849 	struct kern_channel_ring_stat_increment stats;
1850 	kern_channel_slot_t     tx_slot;
1851 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1852 	kern_packet_t           pkts[IFF_MAX_BATCH_SIZE];
1853 	uint32_t                n_pkts = 0;
1854 
1855 	memset(&stats, 0, sizeof(stats));
1856 
1857 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
1858 	fakeif = feth_nexus_context(nexus);
1859 	FETH_DPRINTF("%s ring %d flags 0x%x\n", fakeif->iff_name,
1860 	    tx_ring->ckr_ring_id, flags);
1861 
1862 	feth_lock();
1863 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1864 		feth_unlock();
1865 		return 0;
1866 	}
1867 	ifp = fakeif->iff_ifp;
1868 	peer_ifp = fakeif->iff_peer;
1869 	if (peer_ifp != NULL) {
1870 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
1871 		if (peer_fakeif != NULL) {
1872 			if (feth_is_detaching(peer_fakeif) ||
1873 			    !peer_fakeif->iff_channel_connected) {
1874 				goto done;
1875 			}
1876 		} else {
1877 			goto done;
1878 		}
1879 	} else {
1880 		goto done;
1881 	}
1882 	tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1883 	while (tx_slot != NULL) {
1884 		uint16_t off;
1885 		kern_packet_t sph;
1886 
1887 		/* detach the packet from the TX ring */
1888 		sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1889 		VERIFY(sph != 0);
1890 		kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
1891 
1892 		/* bpf tap output */
1893 		off = kern_packet_get_headroom(sph);
1894 		VERIFY(off >= fakeif->iff_tx_headroom);
1895 		kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
1896 		feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
1897 		bpf_tap_packet_out(ifp, DLT_EN10MB, sph, NULL, 0);
1898 
1899 		/* drop packets, if requested */
1900 		fakeif->iff_tx_pkts_count++;
1901 		if (feth_tx_complete_error(fakeif, &sph, nifs)) {
1902 			goto next_tx_slot;
1903 		}
1904 		ASSERT(sph != 0);
1905 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
1906 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1907 
1908 		stats.kcrsi_slots_transferred++;
1909 		stats.kcrsi_bytes_transferred
1910 		        += kern_packet_get_data_length(sph);
1911 
1912 		/* prepare batch for receiver */
1913 		pkts[n_pkts++] = sph;
1914 		if (n_pkts == IFF_MAX_BATCH_SIZE) {
1915 			feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
1916 			feth_tx_complete(fakeif, pkts, n_pkts);
1917 			n_pkts = 0;
1918 		}
1919 
1920 next_tx_slot:
1921 		last_tx_slot = tx_slot;
1922 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1923 	}
1924 
1925 	/* catch last batch for receiver */
1926 	if (n_pkts != 0) {
1927 		feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
1928 		feth_tx_complete(fakeif, pkts, n_pkts);
1929 		n_pkts = 0;
1930 	}
1931 
1932 	if (last_tx_slot != NULL) {
1933 		kern_channel_advance_slot(tx_ring, last_tx_slot);
1934 		kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
1935 	}
1936 done:
1937 	feth_unlock();
1938 	return 0;
1939 }
1940 
1941 static errno_t
feth_nx_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)1942 feth_nx_sync_rx(kern_nexus_provider_t nxprov,
1943     kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
1944 {
1945 #pragma unused(nxprov, ring, flags)
1946 	if_fake_ref             fakeif;
1947 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1948 
1949 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1950 	fakeif = feth_nexus_context(nexus);
1951 	FETH_DPRINTF("%s:\n", fakeif->iff_name);
1952 	return 0;
1953 }
1954 
1955 static errno_t
feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif,boolean_t doorbell_ctxt)1956 feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif, boolean_t doorbell_ctxt)
1957 {
1958 	int i;
1959 	errno_t error = 0;
1960 	boolean_t more;
1961 
1962 	for (i = 0; i < IFF_NUM_TX_RINGS_WMM_MODE; i++) {
1963 		kern_channel_ring_t ring = fakeif->iff_tx_ring[i];
1964 		if (ring != NULL) {
1965 			error = kern_channel_tx_refill(ring, UINT32_MAX,
1966 			    UINT32_MAX, doorbell_ctxt, &more);
1967 		}
1968 		if (error != 0) {
1969 			FETH_DPRINTF("%s: TX refill ring %d (%s) %d\n",
1970 			    fakeif->iff_name, ring->ckr_ring_id,
1971 			    doorbell_ctxt ? "sync" : "async", error);
1972 			if (!((error == EAGAIN) || (error == EBUSY))) {
1973 				break;
1974 			}
1975 		} else {
1976 			FETH_DPRINTF("%s: TX refilled ring %d (%s)\n",
1977 			    fakeif->iff_name, ring->ckr_ring_id,
1978 			    doorbell_ctxt ? "sync" : "async");
1979 		}
1980 	}
1981 	return error;
1982 }
1983 
1984 static void
feth_async_doorbell(thread_call_param_t arg0,thread_call_param_t arg1)1985 feth_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
1986 {
1987 #pragma unused(arg1)
1988 	errno_t                 error;
1989 	if_fake_ref             fakeif = (if_fake_ref)arg0;
1990 	kern_channel_ring_t     ring;
1991 	boolean_t               more;
1992 
1993 	feth_lock();
1994 	ring = fakeif->iff_tx_ring[0];
1995 	if (feth_is_detaching(fakeif) ||
1996 	    !fakeif->iff_channel_connected ||
1997 	    ring == NULL) {
1998 		goto done;
1999 	}
2000 	fakeif->iff_doorbell_tcall_active = TRUE;
2001 	feth_unlock();
2002 	if (feth_in_wmm_mode(fakeif)) {
2003 		error = feth_nx_tx_dequeue_driver_managed(fakeif, FALSE);
2004 	} else {
2005 		error = kern_channel_tx_refill(ring, UINT32_MAX,
2006 		    UINT32_MAX, FALSE, &more);
2007 	}
2008 	if (error != 0) {
2009 		FETH_DPRINTF("%s: TX refill failed %d\n",
2010 		    fakeif->iff_name, error);
2011 	} else {
2012 		FETH_DPRINTF("%s: TX refilled\n", fakeif->iff_name);
2013 	}
2014 
2015 	feth_lock();
2016 done:
2017 	fakeif->iff_doorbell_tcall_active = FALSE;
2018 	if (fakeif->iff_waiting_for_tcall) {
2019 		FETH_DPRINTF("%s: threadcall waking up waiter\n",
2020 		    fakeif->iff_name);
2021 		wakeup((caddr_t)fakeif);
2022 	}
2023 	feth_unlock();
2024 }
2025 
2026 static void
feth_schedule_async_doorbell(if_fake_ref fakeif)2027 feth_schedule_async_doorbell(if_fake_ref fakeif)
2028 {
2029 	thread_call_t   tcall;
2030 
2031 	feth_lock();
2032 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2033 		feth_unlock();
2034 		return;
2035 	}
2036 	tcall = fakeif->iff_doorbell_tcall;
2037 	if (tcall != NULL) {
2038 		thread_call_enter(tcall);
2039 	} else {
2040 		tcall = thread_call_allocate_with_options(feth_async_doorbell,
2041 		    (thread_call_param_t)fakeif,
2042 		    THREAD_CALL_PRIORITY_KERNEL,
2043 		    THREAD_CALL_OPTIONS_ONCE);
2044 		if (tcall == NULL) {
2045 			printf("%s: %s tcall alloc failed\n",
2046 			    __func__, fakeif->iff_name);
2047 		} else {
2048 			fakeif->iff_doorbell_tcall = tcall;
2049 			feth_retain(fakeif);
2050 			thread_call_enter(tcall);
2051 		}
2052 	}
2053 	feth_unlock();
2054 }
2055 
2056 static errno_t
feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2057 feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,
2058     kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2059 {
2060 #pragma unused(nxprov, ring, flags)
2061 	errno_t         error;
2062 	if_fake_ref     fakeif;
2063 
2064 	fakeif = feth_nexus_context(nexus);
2065 	FETH_DPRINTF("%s\n", fakeif->iff_name);
2066 
2067 	if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
2068 		boolean_t       more;
2069 		/* synchronous tx refill */
2070 		if (feth_in_wmm_mode(fakeif)) {
2071 			error = feth_nx_tx_dequeue_driver_managed(fakeif, TRUE);
2072 		} else {
2073 			error = kern_channel_tx_refill(ring, UINT32_MAX,
2074 			    UINT32_MAX, TRUE, &more);
2075 		}
2076 		if (error != 0) {
2077 			FETH_DPRINTF("%s: TX refill (sync) %d\n",
2078 			    fakeif->iff_name, error);
2079 		} else {
2080 			FETH_DPRINTF("%s: TX refilled (sync)\n",
2081 			    fakeif->iff_name);
2082 		}
2083 	} else {
2084 		FETH_DPRINTF("%s: schedule async refill\n", fakeif->iff_name);
2085 		feth_schedule_async_doorbell(fakeif);
2086 	}
2087 	return 0;
2088 }
2089 
2090 static errno_t
feth_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)2091 feth_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
2092 {
2093 	if_fake_ref fakeif;
2094 
2095 	fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
2096 	feth_ifnet_set_attrs(fakeif, ifp);
2097 	return 0;
2098 }
2099 
2100 static errno_t
feth_nx_intf_adv_config(void * prov_ctx,bool enable)2101 feth_nx_intf_adv_config(void *prov_ctx, bool enable)
2102 {
2103 	if_fake_ref fakeif = prov_ctx;
2104 
2105 	feth_lock();
2106 	fakeif->iff_intf_adv_enabled = enable;
2107 	feth_unlock();
2108 	FETH_DPRINTF("%s enable %d\n", fakeif->iff_name, enable);
2109 	return 0;
2110 }
2111 
2112 static errno_t
fill_capab_interface_advisory(if_fake_ref fakeif,void * contents,uint32_t * len)2113 fill_capab_interface_advisory(if_fake_ref fakeif, void *contents, uint32_t *len)
2114 {
2115 	struct kern_nexus_capab_interface_advisory *capab = contents;
2116 
2117 	if (*len != sizeof(*capab)) {
2118 		return EINVAL;
2119 	}
2120 	if (capab->kncia_version !=
2121 	    KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
2122 		return EINVAL;
2123 	}
2124 	if (!feth_has_intf_advisory_configured(fakeif)) {
2125 		return ENOTSUP;
2126 	}
2127 	VERIFY(capab->kncia_notify != NULL);
2128 	fakeif->iff_intf_adv_kern_ctx = capab->kncia_kern_context;
2129 	fakeif->iff_intf_adv_notify = capab->kncia_notify;
2130 	capab->kncia_provider_context = fakeif;
2131 	capab->kncia_config = feth_nx_intf_adv_config;
2132 	return 0;
2133 }
2134 
2135 static errno_t
feth_notify_steering_info(void * prov_ctx,void * qset_ctx,struct ifnet_traffic_descriptor_common * td,bool add)2136 feth_notify_steering_info(void *prov_ctx, void *qset_ctx,
2137     struct ifnet_traffic_descriptor_common *td, bool add)
2138 {
2139 #pragma unused(td)
2140 	if_fake_ref fakeif = prov_ctx;
2141 	fake_qset *qset = qset_ctx;
2142 
2143 	FETH_DPRINTF("%s: notify_steering_info: qset_id 0x%llx, %s\n",
2144 	    fakeif->iff_name, qset->fqs_id, add ? "add" : "remove");
2145 	return 0;
2146 }
2147 
2148 static errno_t
fill_capab_qset_extensions(if_fake_ref fakeif,void * contents,uint32_t * len)2149 fill_capab_qset_extensions(if_fake_ref fakeif, void *contents, uint32_t *len)
2150 {
2151 	struct kern_nexus_capab_qset_extensions *capab = contents;
2152 
2153 	if (*len != sizeof(*capab)) {
2154 		return EINVAL;
2155 	}
2156 	if (capab->cqe_version !=
2157 	    KERN_NEXUS_CAPAB_QSET_EXTENSIONS_VERSION_1) {
2158 		return EINVAL;
2159 	}
2160 	capab->cqe_prov_ctx = fakeif;
2161 	capab->cqe_notify_steering_info = feth_notify_steering_info;
2162 	return 0;
2163 }
2164 
2165 static errno_t
feth_nx_capab_config(kern_nexus_provider_t nxprov,kern_nexus_t nx,kern_nexus_capab_t capab,void * contents,uint32_t * len)2166 feth_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
2167     kern_nexus_capab_t capab, void *contents, uint32_t *len)
2168 {
2169 #pragma unused(nxprov)
2170 	errno_t error;
2171 	if_fake_ref fakeif;
2172 
2173 	fakeif = feth_nexus_context(nx);
2174 	FETH_DPRINTF("%s\n", fakeif->iff_name);
2175 
2176 	switch (capab) {
2177 	case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
2178 		error = fill_capab_interface_advisory(fakeif, contents, len);
2179 		break;
2180 	case KERN_NEXUS_CAPAB_QSET_EXTENSIONS:
2181 		error = fill_capab_qset_extensions(fakeif, contents, len);
2182 		break;
2183 	default:
2184 		error = ENOTSUP;
2185 		break;
2186 	}
2187 	return error;
2188 }
2189 
2190 static int
feth_set_tso(ifnet_t ifp)2191 feth_set_tso(ifnet_t ifp)
2192 {
2193 	ifnet_offload_t offload;
2194 	uint32_t tso_v4_mtu, tso_v6_mtu;
2195 	int error;
2196 
2197 	offload = IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2198 	tso_v4_mtu = if_fake_tso_buffer_size;
2199 	tso_v6_mtu = if_fake_tso_buffer_size;
2200 	error = ifnet_set_offload(ifp, offload);
2201 	if (error != 0) {
2202 		printf("%s: set TSO offload failed on %s, err %d\n", __func__,
2203 		    if_name(ifp), error);
2204 		return error;
2205 	}
2206 	error = ifnet_set_tso_mtu(ifp, AF_INET, tso_v4_mtu);
2207 	if (error != 0) {
2208 		printf("%s: set TSO MTU IPv4 failed on %s, err %d\n", __func__,
2209 		    if_name(ifp), error);
2210 		return error;
2211 	}
2212 	error = ifnet_set_tso_mtu(ifp, AF_INET6, tso_v6_mtu);
2213 	if (error != 0) {
2214 		printf("%s: set TSO MTU IPv6 failed on %s, err %d\n", __func__,
2215 		    if_name(ifp), error);
2216 		return error;
2217 	}
2218 	return 0;
2219 }
2220 
2221 static errno_t
create_netif_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2222 create_netif_provider_and_instance(if_fake_ref fakeif,
2223     struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2224     uuid_t * provider, uuid_t * instance)
2225 {
2226 	errno_t                 err;
2227 	nexus_controller_t      controller = kern_nexus_shared_controller();
2228 	struct kern_nexus_net_init net_init;
2229 	nexus_name_t            provider_name;
2230 	nexus_attr_t            nexus_attr = NULL;
2231 	struct kern_nexus_provider_init prov_init = {
2232 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2233 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2234 		.nxpi_pre_connect = feth_nx_pre_connect,
2235 		.nxpi_connected = feth_nx_connected,
2236 		.nxpi_pre_disconnect = feth_nx_pre_disconnect,
2237 		.nxpi_disconnected = feth_nx_disconnected,
2238 		.nxpi_ring_init = feth_nx_ring_init,
2239 		.nxpi_ring_fini = feth_nx_ring_fini,
2240 		.nxpi_slot_init = feth_nx_slot_init,
2241 		.nxpi_slot_fini = feth_nx_slot_fini,
2242 		.nxpi_sync_tx = feth_nx_sync_tx,
2243 		.nxpi_sync_rx = feth_nx_sync_rx,
2244 		.nxpi_tx_doorbell = feth_nx_tx_doorbell,
2245 		.nxpi_config_capab = feth_nx_capab_config,
2246 	};
2247 
2248 	_CASSERT(IFF_MAX_RX_RINGS == 1);
2249 	err = kern_nexus_attr_create(&nexus_attr);
2250 	if (err != 0) {
2251 		printf("%s nexus attribute creation failed, error %d\n",
2252 		    __func__, err);
2253 		goto failed;
2254 	}
2255 	if (feth_in_wmm_mode(fakeif)) {
2256 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_TX_RINGS,
2257 		    IFF_NUM_TX_RINGS_WMM_MODE);
2258 		VERIFY(err == 0);
2259 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_RX_RINGS,
2260 		    IFF_NUM_RX_RINGS_WMM_MODE);
2261 		VERIFY(err == 0);
2262 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_QMAP,
2263 		    NEXUS_QMAP_TYPE_WMM);
2264 		VERIFY(err == 0);
2265 	}
2266 
2267 	err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2268 	VERIFY(err == 0);
2269 	snprintf((char *)provider_name, sizeof(provider_name),
2270 	    "com.apple.netif.%s", fakeif->iff_name);
2271 	err = kern_nexus_controller_register_provider(controller,
2272 	    feth_nx_dom_prov,
2273 	    provider_name,
2274 	    &prov_init,
2275 	    sizeof(prov_init),
2276 	    nexus_attr,
2277 	    provider);
2278 	if (err != 0) {
2279 		printf("%s register provider failed, error %d\n",
2280 		    __func__, err);
2281 		goto failed;
2282 	}
2283 	bzero(&net_init, sizeof(net_init));
2284 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2285 	net_init.nxneti_flags = 0;
2286 	net_init.nxneti_eparams = init_params;
2287 	net_init.nxneti_lladdr = NULL;
2288 	net_init.nxneti_prepare = feth_netif_prepare;
2289 	net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2290 	net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2291 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
2292 	    *provider,
2293 	    fakeif,
2294 	    NULL,
2295 	    instance,
2296 	    &net_init,
2297 	    ifp);
2298 	if (err != 0) {
2299 		printf("%s alloc_net_provider_instance failed, %d\n",
2300 		    __func__, err);
2301 		kern_nexus_controller_deregister_provider(controller,
2302 		    *provider);
2303 		uuid_clear(*provider);
2304 		goto failed;
2305 	}
2306 	if (feth_supports_tso(fakeif)) {
2307 		if ((err = feth_set_tso(*ifp)) != 0) {
2308 			goto failed;
2309 		}
2310 	}
2311 
2312 failed:
2313 	if (nexus_attr != NULL) {
2314 		kern_nexus_attr_destroy(nexus_attr);
2315 	}
2316 	return err;
2317 }
2318 
2319 /*
2320  * The nif_stats need to be referenced because we don't want it set
2321  * to NULL until the last llink is removed.
2322  */
2323 static void
get_nexus_stats(if_fake_ref fakeif,kern_nexus_t nexus)2324 get_nexus_stats(if_fake_ref fakeif, kern_nexus_t nexus)
2325 {
2326 	if (++fakeif->iff_nifs_ref == 1) {
2327 		ASSERT(fakeif->iff_nifs == NULL);
2328 		fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2329 	}
2330 }
2331 
2332 static void
clear_nexus_stats(if_fake_ref fakeif)2333 clear_nexus_stats(if_fake_ref fakeif)
2334 {
2335 	if (--fakeif->iff_nifs_ref == 0) {
2336 		ASSERT(fakeif->iff_nifs != NULL);
2337 		fakeif->iff_nifs = NULL;
2338 	}
2339 }
2340 
2341 static errno_t
feth_nx_qset_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * llink_ctx,uint8_t qset_idx,uint64_t qset_id,kern_netif_qset_t qset,void ** qset_ctx)2342 feth_nx_qset_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2343     void *llink_ctx, uint8_t qset_idx, uint64_t qset_id, kern_netif_qset_t qset,
2344     void **qset_ctx)
2345 {
2346 #pragma unused(nxprov)
2347 	if_fake_ref fakeif;
2348 	fake_llink *fl = llink_ctx;
2349 	fake_qset *fqs;
2350 
2351 	feth_lock();
2352 	fakeif = feth_nexus_context(nexus);
2353 	if (feth_is_detaching(fakeif)) {
2354 		feth_unlock();
2355 		printf("%s: %s: detaching\n", __func__, fakeif->iff_name);
2356 		return ENXIO;
2357 	}
2358 	if (qset_idx >= fl->fl_qset_cnt) {
2359 		feth_unlock();
2360 		printf("%s: %s: invalid qset_idx %d\n", __func__,
2361 		    fakeif->iff_name, qset_idx);
2362 		return EINVAL;
2363 	}
2364 	fqs = &fl->fl_qset[qset_idx];
2365 	ASSERT(fqs->fqs_qset == NULL);
2366 	fqs->fqs_qset = qset;
2367 	fqs->fqs_id = qset_id;
2368 	*qset_ctx = fqs;
2369 
2370 	/* XXX This should really be done during registration */
2371 	get_nexus_stats(fakeif, nexus);
2372 	feth_unlock();
2373 	return 0;
2374 }
2375 
2376 static void
feth_nx_qset_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx)2377 feth_nx_qset_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2378     void *qset_ctx)
2379 {
2380 #pragma unused(nxprov)
2381 	if_fake_ref fakeif;
2382 	fake_qset *fqs = qset_ctx;
2383 
2384 	feth_lock();
2385 	fakeif = feth_nexus_context(nexus);
2386 	clear_nexus_stats(fakeif);
2387 	ASSERT(fqs->fqs_qset != NULL);
2388 	fqs->fqs_qset = NULL;
2389 	fqs->fqs_id = 0;
2390 	feth_unlock();
2391 }
2392 
2393 static errno_t
feth_nx_queue_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint8_t qidx,bool tx,kern_netif_queue_t queue,void ** queue_ctx)2394 feth_nx_queue_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2395     void *qset_ctx, uint8_t qidx, bool tx, kern_netif_queue_t queue,
2396     void **queue_ctx)
2397 {
2398 #pragma unused(nxprov)
2399 	if_fake_ref fakeif;
2400 	fake_qset *fqs = qset_ctx;
2401 	fake_queue *fq;
2402 
2403 	feth_lock();
2404 	fakeif = feth_nexus_context(nexus);
2405 	if (feth_is_detaching(fakeif)) {
2406 		printf("%s: %s: detaching\n", __func__, fakeif->iff_name);
2407 		feth_unlock();
2408 		return ENXIO;
2409 	}
2410 	if (tx) {
2411 		if (qidx >= fqs->fqs_tx_queue_cnt) {
2412 			printf("%s: %s: invalid tx qidx %d\n", __func__,
2413 			    fakeif->iff_name, qidx);
2414 			feth_unlock();
2415 			return EINVAL;
2416 		}
2417 		fq = &fqs->fqs_tx_queue[qidx];
2418 	} else {
2419 		if (qidx >= fqs->fqs_rx_queue_cnt) {
2420 			printf("%s: %s: invalid rx qidx %d\n", __func__,
2421 			    fakeif->iff_name, qidx);
2422 			feth_unlock();
2423 			return EINVAL;
2424 		}
2425 		fq = &fqs->fqs_rx_queue[qidx];
2426 	}
2427 	ASSERT(fq->fq_queue == NULL);
2428 	fq->fq_queue = queue;
2429 	*queue_ctx = fq;
2430 	feth_unlock();
2431 	return 0;
2432 }
2433 
2434 static void
feth_nx_queue_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx)2435 feth_nx_queue_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2436     void *queue_ctx)
2437 {
2438 #pragma unused(nxprov, nexus)
2439 	fake_queue *fq = queue_ctx;
2440 
2441 	feth_lock();
2442 	ASSERT(fq->fq_queue != NULL);
2443 	fq->fq_queue = NULL;
2444 	feth_unlock();
2445 }
2446 
2447 static void
feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif,kern_packet_t sph,struct netif_stats * nifs,if_fake_ref peer_fakeif,uint32_t llink_idx,uint32_t qset_idx)2448 feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif, kern_packet_t sph,
2449     struct netif_stats *nifs, if_fake_ref peer_fakeif,
2450     uint32_t llink_idx, uint32_t qset_idx)
2451 {
2452 	kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2453 	uint32_t n_pkts = 0;
2454 
2455 	while (sph != 0) {
2456 		uint16_t off;
2457 		kern_packet_t next;
2458 
2459 		next = kern_packet_get_next(sph);
2460 		kern_packet_set_next(sph, 0);
2461 
2462 		/* bpf tap output */
2463 		off = kern_packet_get_headroom(sph);
2464 		VERIFY(off >= fakeif->iff_tx_headroom);
2465 		kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2466 		feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2467 		bpf_tap_packet_out(fakeif->iff_ifp, DLT_EN10MB, sph, NULL, 0);
2468 
2469 		/* drop packets, if requested */
2470 		fakeif->iff_tx_pkts_count++;
2471 		if (feth_tx_complete_error(fakeif, &sph, nifs)) {
2472 			goto next_pkt;
2473 		}
2474 		ASSERT(sph != 0);
2475 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2476 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2477 
2478 		/* prepare batch for receiver */
2479 		pkts[n_pkts++] = sph;
2480 		if (n_pkts == IFF_MAX_BATCH_SIZE) {
2481 			feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx,
2482 			    qset_idx, pkts, n_pkts);
2483 			feth_tx_complete(fakeif, pkts, n_pkts);
2484 			n_pkts = 0;
2485 		}
2486 next_pkt:
2487 		sph = next;
2488 	}
2489 	/* catch last batch for receiver */
2490 	if (n_pkts != 0) {
2491 		feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx, qset_idx,
2492 		    pkts, n_pkts);
2493 		feth_tx_complete(fakeif, pkts, n_pkts);
2494 		n_pkts = 0;
2495 	}
2496 }
2497 
2498 static errno_t
feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint32_t flags)2499 feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2500     void *qset_ctx, uint32_t flags)
2501 {
2502 #pragma unused(nxprov)
2503 	if_fake_ref             fakeif;
2504 	ifnet_t                 ifp;
2505 	ifnet_t                 peer_ifp;
2506 	if_fake_ref             peer_fakeif = NULL;
2507 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2508 	fake_qset               *qset = qset_ctx;
2509 	boolean_t               detaching, connected;
2510 	uint32_t                i;
2511 	errno_t                 err;
2512 
2513 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2514 	fakeif = feth_nexus_context(nexus);
2515 	FETH_DPRINTF("%s qset %p, idx %d, flags 0x%x\n", fakeif->iff_name, qset,
2516 	    qset->fqs_idx, flags);
2517 
2518 	feth_lock();
2519 	detaching = feth_is_detaching(fakeif);
2520 	connected = fakeif->iff_channel_connected;
2521 	if (detaching || !connected) {
2522 		FETH_DPRINTF("%s: %s: detaching %s, channel connected %s\n",
2523 		    __func__, fakeif->iff_name,
2524 		    (detaching ? "true" : "false"),
2525 		    (connected ? "true" : "false"));
2526 		feth_unlock();
2527 		return 0;
2528 	}
2529 	ifp = fakeif->iff_ifp;
2530 	peer_ifp = fakeif->iff_peer;
2531 	if (peer_ifp != NULL) {
2532 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
2533 		if (peer_fakeif != NULL) {
2534 			detaching = feth_is_detaching(peer_fakeif);
2535 			connected = peer_fakeif->iff_channel_connected;
2536 			if (detaching || !connected) {
2537 				FETH_DPRINTF("%s: peer %s: detaching %s, "
2538 				    "channel connected %s\n",
2539 				    __func__, peer_fakeif->iff_name,
2540 				    (detaching ? "true" : "false"),
2541 				    (connected ? "true" : "false"));
2542 				goto done;
2543 			}
2544 		} else {
2545 			FETH_DPRINTF("%s: peer_fakeif is NULL\n", __func__);
2546 			goto done;
2547 		}
2548 	} else {
2549 		printf("%s: peer_ifp is NULL\n", __func__);
2550 		goto done;
2551 	}
2552 
2553 	if (if_fake_switch_combined_mode &&
2554 	    qset->fqs_dequeue_cnt >= if_fake_switch_mode_frequency) {
2555 		if (qset->fqs_combined_mode) {
2556 			kern_netif_set_qset_separate(qset->fqs_qset);
2557 		} else {
2558 			kern_netif_set_qset_combined(qset->fqs_qset);
2559 		}
2560 		qset->fqs_combined_mode = !qset->fqs_combined_mode;
2561 		qset->fqs_dequeue_cnt = 0;
2562 	}
2563 
2564 	for (i = 0; i < qset->fqs_tx_queue_cnt; i++) {
2565 		kern_packet_t sph = 0;
2566 		kern_netif_queue_t queue = qset->fqs_tx_queue[i].fq_queue;
2567 		boolean_t more = FALSE;
2568 
2569 		err = kern_netif_queue_tx_dequeue(queue, UINT32_MAX, UINT32_MAX,
2570 		    &more, &sph);
2571 		if (err != 0 && err != EAGAIN) {
2572 			FETH_DPRINTF("%s queue %p dequeue failed: err "
2573 			    "%d\n", fakeif->iff_name, queue, err);
2574 		}
2575 		feth_nx_tx_queue_deliver_pkt_chain(fakeif, sph, nifs,
2576 		    peer_fakeif, qset->fqs_llink_idx, qset->fqs_idx);
2577 	}
2578 
2579 done:
2580 	feth_unlock();
2581 	return 0;
2582 }
2583 
2584 static void
fill_qset_info_and_params(if_fake_ref fakeif,fake_llink * llink_info,uint32_t qset_idx,struct kern_nexus_netif_llink_qset_init * qset_init,bool is_def,bool is_low_latency)2585 fill_qset_info_and_params(if_fake_ref fakeif, fake_llink *llink_info,
2586     uint32_t qset_idx, struct kern_nexus_netif_llink_qset_init *qset_init,
2587     bool is_def, bool is_low_latency)
2588 {
2589 	fake_qset *qset_info = &llink_info->fl_qset[qset_idx];
2590 
2591 	qset_init->nlqi_flags =
2592 	    (is_def ? KERN_NEXUS_NET_LLINK_QSET_DEFAULT : 0) |
2593 	    (is_low_latency ? KERN_NEXUS_NET_LLINK_QSET_LOW_LATENCY : 0) |
2594 	    KERN_NEXUS_NET_LLINK_QSET_AQM;
2595 
2596 	if (feth_in_wmm_mode(fakeif)) {
2597 		qset_init->nlqi_flags |= KERN_NEXUS_NET_LLINK_QSET_WMM_MODE;
2598 		qset_init->nlqi_num_txqs = IFF_NUM_TX_QUEUES_WMM_MODE;
2599 		qset_init->nlqi_num_rxqs = IFF_NUM_RX_QUEUES_WMM_MODE;
2600 	} else {
2601 		qset_init->nlqi_num_txqs = 1;
2602 		qset_init->nlqi_num_rxqs = 1;
2603 	}
2604 	qset_info->fqs_tx_queue_cnt = qset_init->nlqi_num_txqs;
2605 	qset_info->fqs_rx_queue_cnt = qset_init->nlqi_num_rxqs;
2606 
2607 	/* These are needed for locating the peer qset */
2608 	qset_info->fqs_llink_idx = llink_info->fl_idx;
2609 	qset_info->fqs_idx = qset_idx;
2610 }
2611 
2612 static void
fill_llink_info_and_params(if_fake_ref fakeif,uint32_t llink_idx,struct kern_nexus_netif_llink_init * llink_init,uint32_t llink_id,struct kern_nexus_netif_llink_qset_init * qset_init,uint32_t qset_cnt,uint32_t flags)2613 fill_llink_info_and_params(if_fake_ref fakeif, uint32_t llink_idx,
2614     struct kern_nexus_netif_llink_init *llink_init, uint32_t llink_id,
2615     struct kern_nexus_netif_llink_qset_init *qset_init, uint32_t qset_cnt,
2616     uint32_t flags)
2617 {
2618 	fake_llink *llink_info = &fakeif->iff_llink[llink_idx];
2619 	uint32_t i;
2620 	bool create_ll_qset = if_fake_low_latency && (llink_idx != 0);
2621 
2622 	for (i = 0; i < qset_cnt; i++) {
2623 		fill_qset_info_and_params(fakeif, llink_info, i,
2624 		    &qset_init[i], i == 0, create_ll_qset && i == 1);
2625 	}
2626 	llink_info->fl_idx = llink_idx;
2627 
2628 	/* This doesn't have to be the same as llink_idx */
2629 	llink_info->fl_id = llink_id;
2630 	llink_info->fl_qset_cnt = qset_cnt;
2631 
2632 	llink_init->nli_link_id = llink_id;
2633 	llink_init->nli_num_qsets = qset_cnt;
2634 	llink_init->nli_qsets = qset_init;
2635 	llink_init->nli_flags = flags;
2636 	llink_init->nli_ctx = llink_info;
2637 }
2638 
2639 static errno_t
create_non_default_llinks(if_fake_ref fakeif)2640 create_non_default_llinks(if_fake_ref fakeif)
2641 {
2642 	struct kern_nexus *nx;
2643 	fake_nx_t fnx = &fakeif->iff_nx;
2644 	struct kern_nexus_netif_llink_init llink_init;
2645 	struct kern_nexus_netif_llink_qset_init qset_init[FETH_MAX_QSETS];
2646 	errno_t err;
2647 	uint64_t llink_id;
2648 	uint32_t i;
2649 
2650 	nx = nx_find(fnx->fnx_instance, FALSE);
2651 	if (nx == NULL) {
2652 		printf("%s: %s: nx not found\n", __func__, fakeif->iff_name);
2653 		return ENXIO;
2654 	}
2655 	/* Default llink starts at index 0 */
2656 	for (i = 1; i < if_fake_llink_cnt; i++) {
2657 		llink_id = (uint64_t)i;
2658 
2659 		/*
2660 		 * The llink_init and qset_init structures are reused for
2661 		 * each llink creation.
2662 		 */
2663 		fill_llink_info_and_params(fakeif, i, &llink_init,
2664 		    llink_id, qset_init, if_fake_qset_cnt, 0);
2665 		err = kern_nexus_netif_llink_add(nx, &llink_init);
2666 		if (err != 0) {
2667 			printf("%s: %s: llink add failed, error %d\n",
2668 			    __func__, fakeif->iff_name, err);
2669 			goto fail;
2670 		}
2671 		fakeif->iff_llink_cnt++;
2672 	}
2673 	nx_release(nx);
2674 	return 0;
2675 
2676 fail:
2677 	for (i = 0; i < fakeif->iff_llink_cnt; i++) {
2678 		int e;
2679 
2680 		e = kern_nexus_netif_llink_remove(nx, fakeif->
2681 		    iff_llink[i].fl_id);
2682 		if (e != 0) {
2683 			printf("%s: %s: llink remove failed, llink_id 0x%llx, "
2684 			    "error %d\n", __func__, fakeif->iff_name,
2685 			    fakeif->iff_llink[i].fl_id, e);
2686 		}
2687 		fakeif->iff_llink[i].fl_id = 0;
2688 	}
2689 	fakeif->iff_llink_cnt = 0;
2690 	nx_release(nx);
2691 	return err;
2692 }
2693 
2694 static errno_t
create_netif_llink_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2695 create_netif_llink_provider_and_instance(if_fake_ref fakeif,
2696     struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2697     uuid_t * provider, uuid_t * instance)
2698 {
2699 	errno_t                 err;
2700 	nexus_controller_t      controller = kern_nexus_shared_controller();
2701 	struct kern_nexus_net_init net_init;
2702 	struct kern_nexus_netif_llink_init llink_init;
2703 	struct kern_nexus_netif_llink_qset_init qsets[FETH_MAX_QSETS];
2704 
2705 	nexus_name_t            provider_name;
2706 	nexus_attr_t            nexus_attr = NULL;
2707 	struct kern_nexus_netif_provider_init prov_init = {
2708 		.nxnpi_version = KERN_NEXUS_DOMAIN_PROVIDER_NETIF,
2709 		.nxnpi_flags = NXPIF_VIRTUAL_DEVICE,
2710 		.nxnpi_pre_connect = feth_nx_pre_connect,
2711 		.nxnpi_connected = feth_nx_connected,
2712 		.nxnpi_pre_disconnect = feth_nx_pre_disconnect,
2713 		.nxnpi_disconnected = feth_nx_disconnected,
2714 		.nxnpi_qset_init = feth_nx_qset_init,
2715 		.nxnpi_qset_fini = feth_nx_qset_fini,
2716 		.nxnpi_queue_init = feth_nx_queue_init,
2717 		.nxnpi_queue_fini = feth_nx_queue_fini,
2718 		.nxnpi_tx_qset_notify = feth_nx_tx_qset_notify,
2719 		.nxnpi_config_capab = feth_nx_capab_config,
2720 	};
2721 
2722 	err = kern_nexus_attr_create(&nexus_attr);
2723 	if (err != 0) {
2724 		printf("%s nexus attribute creation failed, error %d\n",
2725 		    __func__, err);
2726 		goto failed;
2727 	}
2728 
2729 	err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2730 	VERIFY(err == 0);
2731 
2732 	snprintf((char *)provider_name, sizeof(provider_name),
2733 	    "com.apple.netif.%s", fakeif->iff_name);
2734 	err = kern_nexus_controller_register_provider(controller,
2735 	    feth_nx_dom_prov,
2736 	    provider_name,
2737 	    (struct kern_nexus_provider_init *)&prov_init,
2738 	    sizeof(prov_init),
2739 	    nexus_attr,
2740 	    provider);
2741 	if (err != 0) {
2742 		printf("%s register provider failed, error %d\n",
2743 		    __func__, err);
2744 		goto failed;
2745 	}
2746 	bzero(&net_init, sizeof(net_init));
2747 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2748 	net_init.nxneti_flags = 0;
2749 	net_init.nxneti_eparams = init_params;
2750 	net_init.nxneti_lladdr = NULL;
2751 	net_init.nxneti_prepare = feth_netif_prepare;
2752 	net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2753 	net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2754 
2755 	/*
2756 	 * Assume llink id is same as the index for if_fake.
2757 	 * This is not required for other drivers.
2758 	 */
2759 	_CASSERT(NETIF_LLINK_ID_DEFAULT == 0);
2760 	fill_llink_info_and_params(fakeif, 0, &llink_init,
2761 	    NETIF_LLINK_ID_DEFAULT, qsets, if_fake_qset_cnt,
2762 	    KERN_NEXUS_NET_LLINK_DEFAULT);
2763 
2764 	net_init.nxneti_llink = &llink_init;
2765 
2766 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
2767 	    *provider, fakeif, NULL, instance, &net_init, ifp);
2768 	if (err != 0) {
2769 		printf("%s alloc_net_provider_instance failed, %d\n",
2770 		    __func__, err);
2771 		kern_nexus_controller_deregister_provider(controller,
2772 		    *provider);
2773 		uuid_clear(*provider);
2774 		goto failed;
2775 	}
2776 	fakeif->iff_llink_cnt++;
2777 
2778 	if (if_fake_llink_cnt > 1) {
2779 		err = create_non_default_llinks(fakeif);
2780 		if (err != 0) {
2781 			printf("%s create_non_default_llinks failed, %d\n",
2782 			    __func__, err);
2783 			feth_detach_netif_nexus(fakeif);
2784 			goto failed;
2785 		}
2786 	}
2787 	if (feth_supports_tso(fakeif)) {
2788 		if ((err = feth_set_tso(*ifp)) != 0) {
2789 			goto failed;
2790 		}
2791 	}
2792 failed:
2793 	if (nexus_attr != NULL) {
2794 		kern_nexus_attr_destroy(nexus_attr);
2795 	}
2796 	return err;
2797 }
2798 
2799 static errno_t
feth_attach_netif_nexus(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp)2800 feth_attach_netif_nexus(if_fake_ref fakeif,
2801     struct ifnet_init_eparams * init_params, ifnet_t *ifp)
2802 {
2803 	errno_t                 error;
2804 	fake_nx_t               nx = &fakeif->iff_nx;
2805 
2806 	error = feth_packet_pool_make(fakeif);
2807 	if (error != 0) {
2808 		return error;
2809 	}
2810 	if (if_fake_llink_cnt == 0) {
2811 		return create_netif_provider_and_instance(fakeif, init_params,
2812 		           ifp, &nx->fnx_provider, &nx->fnx_instance);
2813 	} else {
2814 		return create_netif_llink_provider_and_instance(fakeif,
2815 		           init_params, ifp, &nx->fnx_provider,
2816 		           &nx->fnx_instance);
2817 	}
2818 }
2819 
2820 static void
remove_non_default_llinks(if_fake_ref fakeif)2821 remove_non_default_llinks(if_fake_ref fakeif)
2822 {
2823 	struct kern_nexus *nx;
2824 	fake_nx_t fnx = &fakeif->iff_nx;
2825 	uint32_t i;
2826 
2827 	if (fakeif->iff_llink_cnt <= 1) {
2828 		return;
2829 	}
2830 	nx = nx_find(fnx->fnx_instance, FALSE);
2831 	if (nx == NULL) {
2832 		printf("%s: %s: nx not found\n", __func__,
2833 		    fakeif->iff_name);
2834 		return;
2835 	}
2836 	/* Default llink (at index 0) is freed separately */
2837 	for (i = 1; i < fakeif->iff_llink_cnt; i++) {
2838 		int err;
2839 
2840 		err = kern_nexus_netif_llink_remove(nx, fakeif->
2841 		    iff_llink[i].fl_id);
2842 		if (err != 0) {
2843 			printf("%s: %s: llink remove failed, llink_id 0x%llx, "
2844 			    "error %d\n", __func__, fakeif->iff_name,
2845 			    fakeif->iff_llink[i].fl_id, err);
2846 		}
2847 		fakeif->iff_llink[i].fl_id = 0;
2848 	}
2849 	fakeif->iff_llink_cnt = 0;
2850 	nx_release(nx);
2851 }
2852 
2853 static void
detach_provider_and_instance(uuid_t provider,uuid_t instance)2854 detach_provider_and_instance(uuid_t provider, uuid_t instance)
2855 {
2856 	nexus_controller_t controller = kern_nexus_shared_controller();
2857 	errno_t err;
2858 
2859 	if (!uuid_is_null(instance)) {
2860 		err = kern_nexus_controller_free_provider_instance(controller,
2861 		    instance);
2862 		if (err != 0) {
2863 			printf("%s free_provider_instance failed %d\n",
2864 			    __func__, err);
2865 		}
2866 		uuid_clear(instance);
2867 	}
2868 	if (!uuid_is_null(provider)) {
2869 		err = kern_nexus_controller_deregister_provider(controller,
2870 		    provider);
2871 		if (err != 0) {
2872 			printf("%s deregister_provider %d\n", __func__, err);
2873 		}
2874 		uuid_clear(provider);
2875 	}
2876 	return;
2877 }
2878 
2879 static void
feth_detach_netif_nexus(if_fake_ref fakeif)2880 feth_detach_netif_nexus(if_fake_ref fakeif)
2881 {
2882 	fake_nx_t fnx = &fakeif->iff_nx;
2883 
2884 	remove_non_default_llinks(fakeif);
2885 	detach_provider_and_instance(fnx->fnx_provider, fnx->fnx_instance);
2886 }
2887 
2888 #endif /* SKYWALK */
2889 
2890 /**
2891 ** feth interface routines
2892 **/
2893 static void
feth_ifnet_set_attrs(if_fake_ref fakeif,ifnet_t ifp)2894 feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp)
2895 {
2896 	(void)ifnet_set_capabilities_enabled(ifp, 0, -1);
2897 	ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
2898 	ifnet_set_baudrate(ifp, 0);
2899 	ifnet_set_mtu(ifp, ETHERMTU);
2900 	ifnet_set_flags(ifp,
2901 	    IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX,
2902 	    0xffff);
2903 	ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
2904 	if ((fakeif->iff_flags & IFF_FLAGS_HWCSUM) != 0) {
2905 		ifnet_set_offload(ifp,
2906 		    IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
2907 		    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6);
2908 	} else {
2909 		ifnet_set_offload(ifp, 0);
2910 	}
2911 }
2912 
2913 static void
interface_link_event(ifnet_t ifp,u_int32_t event_code)2914 interface_link_event(ifnet_t ifp, u_int32_t event_code)
2915 {
2916 	struct event {
2917 		u_int32_t ifnet_family;
2918 		u_int32_t unit;
2919 		char if_name[IFNAMSIZ];
2920 	};
2921 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
2922 	struct kern_event_msg *header = (struct kern_event_msg*)message;
2923 	struct event *data = (struct event *)(header + 1);
2924 
2925 	header->total_size   = sizeof(message);
2926 	header->vendor_code  = KEV_VENDOR_APPLE;
2927 	header->kev_class    = KEV_NETWORK_CLASS;
2928 	header->kev_subclass = KEV_DL_SUBCLASS;
2929 	header->event_code   = event_code;
2930 	data->ifnet_family   = ifnet_family(ifp);
2931 	data->unit           = (u_int32_t)ifnet_unit(ifp);
2932 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
2933 	ifnet_event(ifp, header);
2934 }
2935 
2936 static if_fake_ref
ifnet_get_if_fake(ifnet_t ifp)2937 ifnet_get_if_fake(ifnet_t ifp)
2938 {
2939 	return (if_fake_ref)ifnet_softc(ifp);
2940 }
2941 
2942 static int
feth_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)2943 feth_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
2944 {
2945 	int                             error;
2946 	if_fake_ref                     fakeif;
2947 	struct ifnet_init_eparams       feth_init;
2948 	ifnet_t                         ifp;
2949 	uint8_t                         mac_address[ETHER_ADDR_LEN];
2950 	fake_llink                     *iff_llink;
2951 
2952 	iff_llink = kalloc_type(fake_llink, FETH_MAX_LLINKS, Z_WAITOK_ZERO);
2953 	if (iff_llink == NULL) {
2954 		return ENOBUFS;
2955 	}
2956 	fakeif = kalloc_type(struct if_fake, Z_WAITOK_ZERO_NOFAIL);
2957 	fakeif->iff_llink = iff_llink;
2958 	fakeif->iff_retain_count = 1;
2959 #define FAKE_ETHER_NAME_LEN     (sizeof(FAKE_ETHER_NAME) - 1)
2960 	_CASSERT(FAKE_ETHER_NAME_LEN == 4);
2961 	bcopy(FAKE_ETHER_NAME, mac_address, FAKE_ETHER_NAME_LEN);
2962 	mac_address[ETHER_ADDR_LEN - 2] = (unit & 0xff00) >> 8;
2963 	mac_address[ETHER_ADDR_LEN - 1] = unit & 0xff;
2964 	if (if_fake_bsd_mode != 0) {
2965 		fakeif->iff_flags |= IFF_FLAGS_BSD_MODE;
2966 	}
2967 	if (if_fake_hwcsum != 0) {
2968 		fakeif->iff_flags |= IFF_FLAGS_HWCSUM;
2969 	}
2970 	fakeif->iff_max_mtu = get_max_mtu(if_fake_bsd_mode, if_fake_max_mtu);
2971 	fakeif->iff_fcs = if_fake_fcs;
2972 	fakeif->iff_trailer_length = if_fake_trailer_length;
2973 
2974 	/* use the interface name as the unique id for ifp recycle */
2975 	if ((unsigned int)
2976 	    snprintf(fakeif->iff_name, sizeof(fakeif->iff_name), "%s%d",
2977 	    ifc->ifc_name, unit) >= sizeof(fakeif->iff_name)) {
2978 		feth_release(fakeif);
2979 		return EINVAL;
2980 	}
2981 	bzero(&feth_init, sizeof(feth_init));
2982 	feth_init.ver = IFNET_INIT_CURRENT_VERSION;
2983 	feth_init.len = sizeof(feth_init);
2984 	if (feth_in_bsd_mode(fakeif)) {
2985 		if (if_fake_txstart != 0) {
2986 			feth_init.start = feth_start;
2987 		} else {
2988 			feth_init.flags |= IFNET_INIT_LEGACY;
2989 			feth_init.output = feth_output;
2990 		}
2991 	}
2992 #if SKYWALK
2993 	else {
2994 		feth_init.flags |= IFNET_INIT_SKYWALK_NATIVE;
2995 		/*
2996 		 * Currently we support WMM mode only for Skywalk native
2997 		 * interface.
2998 		 */
2999 		if (if_fake_wmm_mode != 0) {
3000 			fakeif->iff_flags |= IFF_FLAGS_WMM_MODE;
3001 		}
3002 
3003 		if (if_fake_multibuflet != 0) {
3004 			fakeif->iff_flags |= IFF_FLAGS_MULTIBUFLETS;
3005 		}
3006 
3007 		if (if_fake_multibuflet != 0 &&
3008 		    if_fake_pktpool_mode == IFF_PP_MODE_PRIVATE_SPLIT) {
3009 			printf("%s: multi-buflet not supported for split rx &"
3010 			    " tx pool", __func__);
3011 			feth_release(fakeif);
3012 			return EINVAL;
3013 		}
3014 
3015 		fakeif->iff_pp_mode = if_fake_pktpool_mode;
3016 		if (if_fake_tso_support != 0) {
3017 			if (fakeif->iff_pp_mode != IFF_PP_MODE_GLOBAL) {
3018 				printf("%s: TSO mode requires global packet"
3019 				    " pool mode\n", __func__);
3020 				return EINVAL;
3021 			}
3022 			fakeif->iff_flags |= IFF_FLAGS_TSO_SUPPORT;
3023 		}
3024 
3025 		fakeif->iff_tx_headroom = if_fake_tx_headroom;
3026 		fakeif->iff_adv_interval = if_fake_if_adv_interval;
3027 		if (fakeif->iff_adv_interval > 0) {
3028 			feth_init.flags |= IFNET_INIT_IF_ADV;
3029 		}
3030 		fakeif->iff_tx_drop_rate = if_fake_tx_drops;
3031 		fakeif->iff_tx_completion_mode = if_tx_completion_mode;
3032 	}
3033 	feth_init.tx_headroom = fakeif->iff_tx_headroom;
3034 #endif /* SKYWALK */
3035 	if (if_fake_nxattach == 0) {
3036 		feth_init.flags |= IFNET_INIT_NX_NOAUTO;
3037 	}
3038 	feth_init.uniqueid = fakeif->iff_name;
3039 	feth_init.uniqueid_len = strlen(fakeif->iff_name);
3040 	feth_init.name = ifc->ifc_name;
3041 	feth_init.unit = unit;
3042 	feth_init.family = IFNET_FAMILY_ETHERNET;
3043 	feth_init.type = IFT_ETHER;
3044 	feth_init.demux = ether_demux;
3045 	feth_init.add_proto = ether_add_proto;
3046 	feth_init.del_proto = ether_del_proto;
3047 	feth_init.check_multi = ether_check_multi;
3048 	feth_init.framer_extended = ether_frameout_extended;
3049 	feth_init.softc = fakeif;
3050 	feth_init.ioctl = feth_ioctl;
3051 	feth_init.set_bpf_tap = NULL;
3052 	feth_init.detach = feth_if_free;
3053 	feth_init.broadcast_addr = etherbroadcastaddr;
3054 	feth_init.broadcast_len = ETHER_ADDR_LEN;
3055 	if (feth_in_bsd_mode(fakeif)) {
3056 		error = ifnet_allocate_extended(&feth_init, &ifp);
3057 		if (error) {
3058 			feth_release(fakeif);
3059 			return error;
3060 		}
3061 		feth_ifnet_set_attrs(fakeif, ifp);
3062 	}
3063 #if SKYWALK
3064 	else {
3065 		if (feth_in_wmm_mode(fakeif)) {
3066 			feth_init.output_sched_model =
3067 			    IFNET_SCHED_MODEL_DRIVER_MANAGED;
3068 		}
3069 		error = feth_attach_netif_nexus(fakeif, &feth_init, &ifp);
3070 		if (error != 0) {
3071 			feth_release(fakeif);
3072 			return error;
3073 		}
3074 		/* take an additional reference to ensure that it doesn't go away */
3075 		feth_retain(fakeif);
3076 		fakeif->iff_ifp = ifp;
3077 	}
3078 #endif /* SKYWALK */
3079 	fakeif->iff_media_count = MIN(default_media_words_count, IF_FAKE_MEDIA_LIST_MAX);
3080 	bcopy(default_media_words, fakeif->iff_media_list,
3081 	    fakeif->iff_media_count * sizeof(fakeif->iff_media_list[0]));
3082 	if (feth_in_bsd_mode(fakeif)) {
3083 		error = ifnet_attach(ifp, NULL);
3084 		if (error) {
3085 			ifnet_release(ifp);
3086 			feth_release(fakeif);
3087 			return error;
3088 		}
3089 		fakeif->iff_ifp = ifp;
3090 	}
3091 
3092 	ifnet_set_lladdr(ifp, mac_address, sizeof(mac_address));
3093 
3094 	/* attach as ethernet */
3095 	bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
3096 	return 0;
3097 }
3098 
3099 static int
feth_clone_destroy(ifnet_t ifp)3100 feth_clone_destroy(ifnet_t ifp)
3101 {
3102 	if_fake_ref     fakeif;
3103 #if SKYWALK
3104 	boolean_t       nx_attached = FALSE;
3105 #endif /* SKYWALK */
3106 
3107 	feth_lock();
3108 	fakeif = ifnet_get_if_fake(ifp);
3109 	if (fakeif == NULL || feth_is_detaching(fakeif)) {
3110 		feth_unlock();
3111 		return 0;
3112 	}
3113 	feth_set_detaching(fakeif);
3114 #if SKYWALK
3115 	nx_attached = !feth_in_bsd_mode(fakeif);
3116 #endif /* SKYWALK */
3117 	feth_unlock();
3118 
3119 #if SKYWALK
3120 	if (nx_attached) {
3121 		feth_detach_netif_nexus(fakeif);
3122 		feth_release(fakeif);
3123 	}
3124 #endif /* SKYWALK */
3125 	feth_config(ifp, NULL);
3126 	ifnet_detach(ifp);
3127 	return 0;
3128 }
3129 
3130 static void
feth_enqueue_input(ifnet_t ifp,struct mbuf * m)3131 feth_enqueue_input(ifnet_t ifp, struct mbuf * m)
3132 {
3133 	struct ifnet_stat_increment_param stats = {};
3134 
3135 	stats.packets_in = 1;
3136 	stats.bytes_in = (uint32_t)mbuf_pkthdr_len(m) + ETHER_HDR_LEN;
3137 	ifnet_input(ifp, m, &stats);
3138 }
3139 
3140 static struct mbuf *
copy_mbuf(struct mbuf * m)3141 copy_mbuf(struct mbuf *m)
3142 {
3143 	struct mbuf *   copy_m;
3144 	uint32_t        pkt_len;
3145 	uint32_t        offset;
3146 
3147 	if ((m->m_flags & M_PKTHDR) == 0) {
3148 		return NULL;
3149 	}
3150 	pkt_len = m->m_pkthdr.len;
3151 	MGETHDR(copy_m, M_DONTWAIT, MT_DATA);
3152 	if (copy_m == NULL) {
3153 		goto failed;
3154 	}
3155 	if (pkt_len > MHLEN) {
3156 		if (pkt_len <= MCLBYTES) {
3157 			MCLGET(copy_m, M_DONTWAIT);
3158 		} else if (pkt_len <= MBIGCLBYTES) {
3159 			copy_m = m_mbigget(copy_m, M_DONTWAIT);
3160 		} else if (pkt_len <= M16KCLBYTES && njcl > 0) {
3161 			copy_m = m_m16kget(copy_m, M_DONTWAIT);
3162 		} else {
3163 			printf("if_fake: copy_mbuf(): packet too large %d\n",
3164 			    pkt_len);
3165 			goto failed;
3166 		}
3167 		if (copy_m == NULL || (copy_m->m_flags & M_EXT) == 0) {
3168 			goto failed;
3169 		}
3170 	}
3171 	mbuf_setlen(copy_m, pkt_len);
3172 	copy_m->m_pkthdr.len = pkt_len;
3173 	copy_m->m_pkthdr.pkt_svc = m->m_pkthdr.pkt_svc;
3174 	offset = 0;
3175 	while (m != NULL && offset < pkt_len) {
3176 		uint32_t        frag_len;
3177 
3178 		frag_len = m->m_len;
3179 		if (frag_len > (pkt_len - offset)) {
3180 			printf("if_fake_: Large mbuf fragment %d > %d\n",
3181 			    frag_len, (pkt_len - offset));
3182 			goto failed;
3183 		}
3184 		m_copydata(m, 0, frag_len, mtodo(copy_m, offset));
3185 		offset += frag_len;
3186 		m = m->m_next;
3187 	}
3188 	return copy_m;
3189 
3190 failed:
3191 	if (copy_m != NULL) {
3192 		m_freem(copy_m);
3193 	}
3194 	return NULL;
3195 }
3196 
3197 static int
feth_add_mbuf_trailer(struct mbuf * m,void * trailer,size_t trailer_len)3198 feth_add_mbuf_trailer(struct mbuf *m, void *trailer, size_t trailer_len)
3199 {
3200 	int ret;
3201 	ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
3202 
3203 	ret = m_append(m, trailer_len, (caddr_t)trailer);
3204 	if (ret == 1) {
3205 		FETH_DPRINTF("%s %zuB trailer added\n", __func__, trailer_len);
3206 		return 0;
3207 	}
3208 	printf("%s m_append failed\n", __func__);
3209 	return ENOTSUP;
3210 }
3211 
3212 static int
feth_add_mbuf_fcs(struct mbuf * m)3213 feth_add_mbuf_fcs(struct mbuf *m)
3214 {
3215 	uint32_t pkt_len, offset = 0;
3216 	uint32_t crc = 0;
3217 	int err = 0;
3218 
3219 	ASSERT(sizeof(crc) == ETHER_CRC_LEN);
3220 
3221 	pkt_len = m->m_pkthdr.len;
3222 	struct mbuf *iter = m;
3223 	while (iter != NULL && offset < pkt_len) {
3224 		uint32_t frag_len = iter->m_len;
3225 		ASSERT(frag_len <= (pkt_len - offset));
3226 		crc = crc32(crc, mtod(iter, void *), frag_len);
3227 		offset += frag_len;
3228 		iter = m->m_next;
3229 	}
3230 
3231 	err = feth_add_mbuf_trailer(m, &crc, ETHER_CRC_LEN);
3232 	if (err != 0) {
3233 		return err;
3234 	}
3235 
3236 	m->m_flags |= M_HASFCS;
3237 
3238 	return 0;
3239 }
3240 
3241 static void
feth_output_common(ifnet_t ifp,struct mbuf * m,ifnet_t peer,iff_flags_t flags,bool fcs,void * trailer,size_t trailer_len)3242 feth_output_common(ifnet_t ifp, struct mbuf * m, ifnet_t peer,
3243     iff_flags_t flags, bool fcs, void *trailer, size_t trailer_len)
3244 {
3245 	void *          frame_header;
3246 
3247 	frame_header = mbuf_data(m);
3248 	if ((flags & IFF_FLAGS_HWCSUM) != 0) {
3249 		m->m_pkthdr.csum_data = 0xffff;
3250 		m->m_pkthdr.csum_flags =
3251 		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3252 		    CSUM_IP_CHECKED | CSUM_IP_VALID;
3253 	}
3254 
3255 	(void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
3256 	bpf_tap_out(ifp, DLT_EN10MB, m, NULL, 0);
3257 
3258 	if (trailer != 0) {
3259 		feth_add_mbuf_trailer(m, trailer, trailer_len);
3260 	}
3261 	if (fcs) {
3262 		feth_add_mbuf_fcs(m);
3263 	}
3264 
3265 	(void)mbuf_pkthdr_setrcvif(m, peer);
3266 	mbuf_pkthdr_setheader(m, frame_header);
3267 	mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
3268 	(void)mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
3269 	    mbuf_len(m) - ETHER_HDR_LEN);
3270 	bpf_tap_in(peer, DLT_EN10MB, m, frame_header,
3271 	    sizeof(struct ether_header));
3272 	feth_enqueue_input(peer, m);
3273 }
3274 
3275 static void
feth_start(ifnet_t ifp)3276 feth_start(ifnet_t ifp)
3277 {
3278 	struct mbuf *   copy_m = NULL;
3279 	if_fake_ref     fakeif;
3280 	iff_flags_t     flags = 0;
3281 	bool            fcs;
3282 	size_t          trailer_len;
3283 	ifnet_t         peer = NULL;
3284 	struct mbuf *   m;
3285 	struct mbuf *   save_m;
3286 
3287 	feth_lock();
3288 	fakeif = ifnet_get_if_fake(ifp);
3289 	if (fakeif == NULL) {
3290 		feth_unlock();
3291 		return;
3292 	}
3293 
3294 	if (fakeif->iff_start_busy) {
3295 		feth_unlock();
3296 		printf("if_fake: start is busy\n");
3297 		return;
3298 	}
3299 
3300 	peer = fakeif->iff_peer;
3301 	flags = fakeif->iff_flags;
3302 	fcs = fakeif->iff_fcs;
3303 	trailer_len = fakeif->iff_trailer_length;
3304 
3305 	/* check for pending TX */
3306 	m = fakeif->iff_pending_tx_packet;
3307 	if (m != NULL) {
3308 		if (peer != NULL) {
3309 			copy_m = copy_mbuf(m);
3310 			if (copy_m == NULL) {
3311 				feth_unlock();
3312 				return;
3313 			}
3314 		}
3315 		fakeif->iff_pending_tx_packet = NULL;
3316 		m_freem(m);
3317 		m = NULL;
3318 	}
3319 	fakeif->iff_start_busy = TRUE;
3320 	feth_unlock();
3321 	save_m = NULL;
3322 	for (;;) {
3323 		if (copy_m != NULL) {
3324 			VERIFY(peer != NULL);
3325 			feth_output_common(ifp, copy_m, peer, flags, fcs,
3326 			    feth_trailer, trailer_len);
3327 			copy_m = NULL;
3328 		}
3329 		if (ifnet_dequeue(ifp, &m) != 0) {
3330 			break;
3331 		}
3332 		if (peer == NULL) {
3333 			m_freem(m);
3334 		} else {
3335 			copy_m = copy_mbuf(m);
3336 			if (copy_m == NULL) {
3337 				save_m = m;
3338 				break;
3339 			}
3340 			m_freem(m);
3341 		}
3342 	}
3343 	peer = NULL;
3344 	feth_lock();
3345 	fakeif = ifnet_get_if_fake(ifp);
3346 	if (fakeif != NULL) {
3347 		fakeif->iff_start_busy = FALSE;
3348 		if (save_m != NULL && fakeif->iff_peer != NULL) {
3349 			/* save it for next time */
3350 			fakeif->iff_pending_tx_packet = save_m;
3351 			save_m = NULL;
3352 		}
3353 	}
3354 	feth_unlock();
3355 	if (save_m != NULL) {
3356 		/* didn't save packet, so free it */
3357 		m_freem(save_m);
3358 	}
3359 }
3360 
3361 static int
feth_output(ifnet_t ifp,struct mbuf * m)3362 feth_output(ifnet_t ifp, struct mbuf * m)
3363 {
3364 	struct mbuf *           copy_m;
3365 	if_fake_ref             fakeif;
3366 	iff_flags_t             flags;
3367 	bool                    fcs;
3368 	size_t                  trailer_len;
3369 	ifnet_t                 peer = NULL;
3370 
3371 	if (m == NULL) {
3372 		return 0;
3373 	}
3374 	copy_m = copy_mbuf(m);
3375 	m_freem(m);
3376 	m = NULL;
3377 	if (copy_m == NULL) {
3378 		/* count this as an output error */
3379 		ifnet_stat_increment_out(ifp, 0, 0, 1);
3380 		return 0;
3381 	}
3382 	feth_lock();
3383 	fakeif = ifnet_get_if_fake(ifp);
3384 	if (fakeif != NULL) {
3385 		peer = fakeif->iff_peer;
3386 		flags = fakeif->iff_flags;
3387 		fcs = fakeif->iff_fcs;
3388 		trailer_len = fakeif->iff_trailer_length;
3389 	}
3390 	feth_unlock();
3391 	if (peer == NULL) {
3392 		m_freem(copy_m);
3393 		ifnet_stat_increment_out(ifp, 0, 0, 1);
3394 		return 0;
3395 	}
3396 	feth_output_common(ifp, copy_m, peer, flags, fcs, feth_trailer,
3397 	    trailer_len);
3398 	return 0;
3399 }
3400 
3401 static int
feth_config(ifnet_t ifp,ifnet_t peer)3402 feth_config(ifnet_t ifp, ifnet_t peer)
3403 {
3404 	int             connected = FALSE;
3405 	int             disconnected = FALSE;
3406 	int             error = 0;
3407 	if_fake_ref     fakeif = NULL;
3408 
3409 	feth_lock();
3410 	fakeif = ifnet_get_if_fake(ifp);
3411 	if (fakeif == NULL) {
3412 		error = EINVAL;
3413 		goto done;
3414 	}
3415 	if (peer != NULL) {
3416 		/* connect to peer */
3417 		if_fake_ref     peer_fakeif;
3418 
3419 		peer_fakeif = ifnet_get_if_fake(peer);
3420 		if (peer_fakeif == NULL) {
3421 			error = EINVAL;
3422 			goto done;
3423 		}
3424 		if (feth_is_detaching(fakeif) ||
3425 		    feth_is_detaching(peer_fakeif) ||
3426 		    peer_fakeif->iff_peer != NULL ||
3427 		    fakeif->iff_peer != NULL) {
3428 			error = EBUSY;
3429 			goto done;
3430 		}
3431 #if SKYWALK
3432 		if (fakeif->iff_pp_mode !=
3433 		    peer_fakeif->iff_pp_mode) {
3434 			error = EINVAL;
3435 			goto done;
3436 		}
3437 #endif /* SKYWALK */
3438 		fakeif->iff_peer = peer;
3439 		peer_fakeif->iff_peer = ifp;
3440 		connected = TRUE;
3441 	} else if (fakeif->iff_peer != NULL) {
3442 		/* disconnect from peer */
3443 		if_fake_ref     peer_fakeif;
3444 
3445 		peer = fakeif->iff_peer;
3446 		peer_fakeif = ifnet_get_if_fake(peer);
3447 		if (peer_fakeif == NULL) {
3448 			/* should not happen */
3449 			error = EINVAL;
3450 			goto done;
3451 		}
3452 		fakeif->iff_peer = NULL;
3453 		peer_fakeif->iff_peer = NULL;
3454 		disconnected = TRUE;
3455 	}
3456 
3457 done:
3458 	feth_unlock();
3459 
3460 	/* generate link status event if we connect or disconnect */
3461 	if (connected) {
3462 		interface_link_event(ifp, KEV_DL_LINK_ON);
3463 		interface_link_event(peer, KEV_DL_LINK_ON);
3464 	} else if (disconnected) {
3465 		interface_link_event(ifp, KEV_DL_LINK_OFF);
3466 		interface_link_event(peer, KEV_DL_LINK_OFF);
3467 	}
3468 	return error;
3469 }
3470 
3471 static int
feth_set_media(ifnet_t ifp,struct if_fake_request * iffr)3472 feth_set_media(ifnet_t ifp, struct if_fake_request * iffr)
3473 {
3474 	if_fake_ref     fakeif;
3475 	int             error;
3476 
3477 	if (iffr->iffr_media.iffm_count > IF_FAKE_MEDIA_LIST_MAX) {
3478 		/* list is too long */
3479 		return EINVAL;
3480 	}
3481 	feth_lock();
3482 	fakeif = ifnet_get_if_fake(ifp);
3483 	if (fakeif == NULL) {
3484 		error = EINVAL;
3485 		goto done;
3486 	}
3487 	fakeif->iff_media_count = iffr->iffr_media.iffm_count;
3488 	bcopy(iffr->iffr_media.iffm_list, fakeif->iff_media_list,
3489 	    iffr->iffr_media.iffm_count * sizeof(fakeif->iff_media_list[0]));
3490 #if 0
3491 	/* XXX: "auto-negotiate" active with peer? */
3492 	/* generate link status event? */
3493 	fakeif->iff_media_current = iffr->iffr_media.iffm_current;
3494 #endif
3495 	error = 0;
3496 done:
3497 	feth_unlock();
3498 	return error;
3499 }
3500 
3501 static int
if_fake_request_copyin(user_addr_t user_addr,struct if_fake_request * iffr,u_int32_t len)3502 if_fake_request_copyin(user_addr_t user_addr,
3503     struct if_fake_request *iffr, u_int32_t len)
3504 {
3505 	int     error;
3506 
3507 	if (user_addr == USER_ADDR_NULL || len < sizeof(*iffr)) {
3508 		error = EINVAL;
3509 		goto done;
3510 	}
3511 	error = copyin(user_addr, iffr, sizeof(*iffr));
3512 	if (error != 0) {
3513 		goto done;
3514 	}
3515 	if (iffr->iffr_reserved[0] != 0 || iffr->iffr_reserved[1] != 0 ||
3516 	    iffr->iffr_reserved[2] != 0 || iffr->iffr_reserved[3] != 0) {
3517 		error = EINVAL;
3518 		goto done;
3519 	}
3520 done:
3521 	return error;
3522 }
3523 
3524 static int
feth_set_drvspec(ifnet_t ifp,uint32_t cmd,u_int32_t len,user_addr_t user_addr)3525 feth_set_drvspec(ifnet_t ifp, uint32_t cmd, u_int32_t len,
3526     user_addr_t user_addr)
3527 {
3528 	int                     error;
3529 	struct if_fake_request  iffr;
3530 	ifnet_t                 peer;
3531 
3532 	switch (cmd) {
3533 	case IF_FAKE_S_CMD_SET_PEER:
3534 		error = if_fake_request_copyin(user_addr, &iffr, len);
3535 		if (error != 0) {
3536 			break;
3537 		}
3538 		if (iffr.iffr_peer_name[0] == '\0') {
3539 			error = feth_config(ifp, NULL);
3540 			break;
3541 		}
3542 
3543 		/* ensure nul termination */
3544 		iffr.iffr_peer_name[IFNAMSIZ - 1] = '\0';
3545 		peer = ifunit(iffr.iffr_peer_name);
3546 		if (peer == NULL) {
3547 			error = ENXIO;
3548 			break;
3549 		}
3550 		if (ifnet_type(peer) != IFT_ETHER) {
3551 			error = EINVAL;
3552 			break;
3553 		}
3554 		if (strcmp(ifnet_name(peer), FAKE_ETHER_NAME) != 0) {
3555 			error = EINVAL;
3556 			break;
3557 		}
3558 		error = feth_config(ifp, peer);
3559 		break;
3560 	case IF_FAKE_S_CMD_SET_MEDIA:
3561 		error = if_fake_request_copyin(user_addr, &iffr, len);
3562 		if (error != 0) {
3563 			break;
3564 		}
3565 		error = feth_set_media(ifp, &iffr);
3566 		break;
3567 	case IF_FAKE_S_CMD_SET_DEQUEUE_STALL:
3568 		error = if_fake_request_copyin(user_addr, &iffr, len);
3569 		if (error != 0) {
3570 			break;
3571 		}
3572 		error = feth_enable_dequeue_stall(ifp,
3573 		    iffr.iffr_dequeue_stall);
3574 		break;
3575 	default:
3576 		error = EOPNOTSUPP;
3577 		break;
3578 	}
3579 	return error;
3580 }
3581 
3582 static int
feth_get_drvspec(ifnet_t ifp,u_int32_t cmd,u_int32_t len,user_addr_t user_addr)3583 feth_get_drvspec(ifnet_t ifp, u_int32_t cmd, u_int32_t len,
3584     user_addr_t user_addr)
3585 {
3586 	int                     error = EOPNOTSUPP;
3587 	if_fake_ref             fakeif;
3588 	struct if_fake_request  iffr;
3589 	ifnet_t                 peer;
3590 
3591 	switch (cmd) {
3592 	case IF_FAKE_G_CMD_GET_PEER:
3593 		if (len < sizeof(iffr)) {
3594 			error = EINVAL;
3595 			break;
3596 		}
3597 		feth_lock();
3598 		fakeif = ifnet_get_if_fake(ifp);
3599 		if (fakeif == NULL) {
3600 			feth_unlock();
3601 			error = EOPNOTSUPP;
3602 			break;
3603 		}
3604 		peer = fakeif->iff_peer;
3605 		feth_unlock();
3606 		bzero(&iffr, sizeof(iffr));
3607 		if (peer != NULL) {
3608 			strlcpy(iffr.iffr_peer_name,
3609 			    if_name(peer),
3610 			    sizeof(iffr.iffr_peer_name));
3611 		}
3612 		error = copyout(&iffr, user_addr, sizeof(iffr));
3613 		break;
3614 	default:
3615 		break;
3616 	}
3617 	return error;
3618 }
3619 
3620 union ifdrvu {
3621 	struct ifdrv32  *ifdrvu_32;
3622 	struct ifdrv64  *ifdrvu_64;
3623 	void            *ifdrvu_p;
3624 };
3625 
3626 static int
feth_ioctl(ifnet_t ifp,u_long cmd,void * data)3627 feth_ioctl(ifnet_t ifp, u_long cmd, void * data)
3628 {
3629 	unsigned int            count;
3630 	struct ifdevmtu *       devmtu_p;
3631 	union ifdrvu            drv;
3632 	uint32_t                drv_cmd;
3633 	uint32_t                drv_len;
3634 	boolean_t               drv_set_command = FALSE;
3635 	int                     error = 0;
3636 	struct ifmediareq *     ifmr;
3637 	struct ifreq *          ifr;
3638 	if_fake_ref             fakeif;
3639 	int                     status;
3640 	user_addr_t             user_addr;
3641 
3642 	ifr = (struct ifreq *)data;
3643 	switch (cmd) {
3644 	case SIOCSIFADDR:
3645 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
3646 		break;
3647 
3648 	case SIOCGIFMEDIA32:
3649 	case SIOCGIFMEDIA64:
3650 		feth_lock();
3651 		fakeif = ifnet_get_if_fake(ifp);
3652 		if (fakeif == NULL) {
3653 			feth_unlock();
3654 			return EOPNOTSUPP;
3655 		}
3656 		status = (fakeif->iff_peer != NULL)
3657 		    ? (IFM_AVALID | IFM_ACTIVE) : IFM_AVALID;
3658 		ifmr = (struct ifmediareq *)data;
3659 		user_addr = (cmd == SIOCGIFMEDIA64) ?
3660 		    ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
3661 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
3662 		count = ifmr->ifm_count;
3663 		ifmr->ifm_active = IFM_ETHER;
3664 		ifmr->ifm_current = IFM_ETHER;
3665 		ifmr->ifm_mask = 0;
3666 		ifmr->ifm_status = status;
3667 		if (user_addr == USER_ADDR_NULL) {
3668 			ifmr->ifm_count = fakeif->iff_media_count;
3669 		} else if (count > 0) {
3670 			if (count > fakeif->iff_media_count) {
3671 				count = fakeif->iff_media_count;
3672 			}
3673 			ifmr->ifm_count = count;
3674 			error = copyout(&fakeif->iff_media_list, user_addr,
3675 			    count * sizeof(int));
3676 		}
3677 		feth_unlock();
3678 		break;
3679 
3680 	case SIOCGIFDEVMTU:
3681 		devmtu_p = &ifr->ifr_devmtu;
3682 		devmtu_p->ifdm_current = ifnet_mtu(ifp);
3683 		devmtu_p->ifdm_max = feth_max_mtu(ifp);
3684 		devmtu_p->ifdm_min = IF_MINMTU;
3685 		break;
3686 
3687 	case SIOCSIFMTU:
3688 		if ((unsigned int)ifr->ifr_mtu > feth_max_mtu(ifp) ||
3689 		    ifr->ifr_mtu < IF_MINMTU) {
3690 			error = EINVAL;
3691 		} else {
3692 			error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
3693 		}
3694 		break;
3695 
3696 	case SIOCSDRVSPEC32:
3697 	case SIOCSDRVSPEC64:
3698 		error = proc_suser(current_proc());
3699 		if (error != 0) {
3700 			break;
3701 		}
3702 		drv_set_command = TRUE;
3703 		OS_FALLTHROUGH;
3704 	case SIOCGDRVSPEC32:
3705 	case SIOCGDRVSPEC64:
3706 		drv.ifdrvu_p = data;
3707 		if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
3708 			drv_cmd = drv.ifdrvu_32->ifd_cmd;
3709 			drv_len = drv.ifdrvu_32->ifd_len;
3710 			user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
3711 		} else {
3712 			drv_cmd = drv.ifdrvu_64->ifd_cmd;
3713 			drv_len = drv.ifdrvu_64->ifd_len;
3714 			user_addr = drv.ifdrvu_64->ifd_data;
3715 		}
3716 		if (drv_set_command) {
3717 			error = feth_set_drvspec(ifp, drv_cmd, drv_len,
3718 			    user_addr);
3719 		} else {
3720 			error = feth_get_drvspec(ifp, drv_cmd, drv_len,
3721 			    user_addr);
3722 		}
3723 		break;
3724 
3725 	case SIOCSIFLLADDR:
3726 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
3727 		    ifr->ifr_addr.sa_len);
3728 		break;
3729 
3730 	case SIOCSIFFLAGS:
3731 		if ((ifp->if_flags & IFF_UP) != 0) {
3732 			/* marked up, set running if not already set */
3733 			if ((ifp->if_flags & IFF_RUNNING) == 0) {
3734 				/* set running */
3735 				error = ifnet_set_flags(ifp, IFF_RUNNING,
3736 				    IFF_RUNNING);
3737 			}
3738 		} else if ((ifp->if_flags & IFF_RUNNING) != 0) {
3739 			/* marked down, clear running */
3740 			error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
3741 		}
3742 		break;
3743 
3744 	case SIOCADDMULTI:
3745 	case SIOCDELMULTI:
3746 		error = 0;
3747 		break;
3748 	default:
3749 		error = EOPNOTSUPP;
3750 		break;
3751 	}
3752 	return error;
3753 }
3754 
3755 static void
feth_if_free(ifnet_t ifp)3756 feth_if_free(ifnet_t ifp)
3757 {
3758 	if_fake_ref             fakeif;
3759 
3760 	if (ifp == NULL) {
3761 		return;
3762 	}
3763 	feth_lock();
3764 	fakeif = ifnet_get_if_fake(ifp);
3765 	if (fakeif == NULL) {
3766 		feth_unlock();
3767 		return;
3768 	}
3769 	ifp->if_softc = NULL;
3770 #if SKYWALK
3771 	VERIFY(fakeif->iff_doorbell_tcall == NULL);
3772 #endif /* SKYWALK */
3773 	feth_unlock();
3774 	feth_release(fakeif);
3775 	ifnet_release(ifp);
3776 	return;
3777 }
3778 
3779 __private_extern__ void
if_fake_init(void)3780 if_fake_init(void)
3781 {
3782 	int error;
3783 
3784 #if SKYWALK
3785 	(void)feth_register_nexus_domain_provider();
3786 #endif /* SKYWALK */
3787 	error = if_clone_attach(&feth_cloner);
3788 	if (error != 0) {
3789 		return;
3790 	}
3791 	return;
3792 }
3793