xref: /xnu-11215.61.5/bsd/net/if_fake.c (revision 4f1223e81cd707a65cc109d0b8ad6653699da3c4)
1 /*
2  * Copyright (c) 2015-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * if_fake.c
31  * - fake network interface used for testing
32  * - "feth" (e.g. "feth0", "feth1") is a virtual ethernet interface that allows
33  *   two instances to have their output/input paths "crossed-over" so that
34  *   output on one is input on the other
35  */
36 
37 /*
38  * Modification History:
39  *
40  * September 9, 2015	Dieter Siegmund ([email protected])
41  * - created
42  */
43 
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/queue.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/kern_event.h>
54 #include <sys/mcache.h>
55 #include <sys/syslog.h>
56 
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_vlan_var.h>
61 #include <net/if_fake_var.h>
62 #include <net/if_arp.h>
63 #include <net/if_dl.h>
64 #include <net/if_ether.h>
65 #include <net/if_types.h>
66 #include <libkern/OSAtomic.h>
67 
68 #include <net/dlil.h>
69 
70 #include <net/kpi_interface.h>
71 #include <net/kpi_protocol.h>
72 
73 #include <kern/locks.h>
74 #include <kern/zalloc.h>
75 
76 #include <mach/mach_time.h>
77 
78 #include <os/log.h>
79 
80 #ifdef INET
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #endif
84 
85 #include <net/if_media.h>
86 #include <net/ether_if_module.h>
87 #if SKYWALK
88 #include <skywalk/os_skywalk_private.h>
89 #include <skywalk/nexus/netif/nx_netif.h>
90 #include <skywalk/channel/channel_var.h>
91 #endif /* SKYWALK */
92 
93 /*
94  * if_fake_debug, FE_DBGF_*
95  * - 'if_fake_debug' is a bitmask of FE_DBGF_* flags that can be set
96  *   to enable additional logs for the corresponding fake function
97  * - "sysctl net.link.fake.debug" controls the value of
98  *   'if_fake_debug'
99  */
100 static uint32_t if_fake_debug = 0;
101 
102 #define FE_DBGF_LIFECYCLE               0x0001
103 #define FE_DBGF_INPUT                   0x0002
104 #define FE_DBGF_OUTPUT                  0x0004
105 #define FE_DBGF_CONTROL                 0x0008
106 #define FE_DBGF_MISC                    0x0010
107 
108 /*
109  * if_fake_log_level
110  * - 'if_fake_log_level' ensures that by default important logs are
111  *   logged regardless of if_fake_debug by comparing the log level
112  *   in FAKE_LOG to if_fake_log_level
113  * - use "sysctl net.link.fake.log_level" controls the value of
114  *   'if_fake_log_level'
115  * - the default value of 'if_fake_log_level' is LOG_NOTICE; important
116  *   logs must use LOG_NOTICE to ensure they appear by default
117  */
118 #define FAKE_DBGF_ENABLED(__flag)     ((if_fake_debug & __flag) != 0)
119 
120 /*
121  * FAKE_LOG
122  * - macro to generate the specified log conditionally based on
123  *   the specified log level and debug flags
124  */
125 #define FAKE_LOG(__level, __dbgf, __string, ...)              \
126 	do {                                                            \
127 	        if (__level <= if_fake_log_level ||                   \
128 	            FAKE_DBGF_ENABLED(__dbgf)) {                      \
129 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
130 	                       __func__, ## __VA_ARGS__);       \
131 	        }                                                       \
132 	} while (0)
133 
134 static boolean_t
is_power_of_two(unsigned int val)135 is_power_of_two(unsigned int val)
136 {
137 	return (val & (val - 1)) == 0;
138 }
139 
140 #define FAKE_ETHER_NAME         "feth"
141 
142 SYSCTL_DECL(_net_link);
143 SYSCTL_NODE(_net_link, OID_AUTO, fake, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
144     "Fake interface");
145 
146 static int if_fake_txstart = 1;
147 SYSCTL_INT(_net_link_fake, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
148     &if_fake_txstart, 0, "Fake interface TXSTART mode");
149 
150 static int if_fake_hwcsum = 0;
151 SYSCTL_INT(_net_link_fake, OID_AUTO, hwcsum, CTLFLAG_RW | CTLFLAG_LOCKED,
152     &if_fake_hwcsum, 0, "Fake interface simulate hardware checksum");
153 
154 static int if_fake_vlan_tagging = 1;
155 SYSCTL_INT(_net_link_fake, OID_AUTO, vlan_tagging, CTLFLAG_RW | CTLFLAG_LOCKED,
156     &if_fake_vlan_tagging, 0, "Fake interface VLAN tagging");
157 
158 static int if_fake_nxattach = 0;
159 SYSCTL_INT(_net_link_fake, OID_AUTO, nxattach, CTLFLAG_RW | CTLFLAG_LOCKED,
160     &if_fake_nxattach, 0, "Fake interface auto-attach nexus");
161 
162 static int if_fake_bsd_mode = 1;
163 SYSCTL_INT(_net_link_fake, OID_AUTO, bsd_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
164     &if_fake_bsd_mode, 0, "Fake interface attach as BSD interface");
165 
166 static int if_fake_log_level = LOG_NOTICE;
167 SYSCTL_INT(_net_link_fake, OID_AUTO, log_level, CTLFLAG_RW | CTLFLAG_LOCKED,
168     &if_fake_log_level, 0, "Fake interface log level");
169 
170 SYSCTL_INT(_net_link_fake, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
171     &if_fake_debug, 0, "Fake interface debug flags");
172 
173 static int if_fake_wmm_mode = 0;
174 SYSCTL_INT(_net_link_fake, OID_AUTO, wmm_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
175     &if_fake_wmm_mode, 0, "Fake interface in 802.11 WMM mode");
176 
177 static int if_fake_multibuflet = 0;
178 SYSCTL_INT(_net_link_fake, OID_AUTO, multibuflet, CTLFLAG_RW | CTLFLAG_LOCKED,
179     &if_fake_multibuflet, 0, "Fake interface using multi-buflet packets");
180 
181 static int if_fake_low_latency = 0;
182 SYSCTL_INT(_net_link_fake, OID_AUTO, low_latency, CTLFLAG_RW | CTLFLAG_LOCKED,
183     &if_fake_low_latency, 0, "Fake interface with a low latency qset");
184 
185 static int if_fake_switch_combined_mode = 0;
186 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_combined_mode,
187     CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_combined_mode, 0,
188     "Switch a qset between combined and separate mode during dequeues");
189 
190 static int if_fake_switch_mode_frequency = 10;
191 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_mode_frequency,
192     CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_mode_frequency, 0,
193     "The number of dequeues before we switch between the combined and separated mode");
194 
195 static int if_fake_tso_support = 0;
196 SYSCTL_INT(_net_link_fake, OID_AUTO, tso_support, CTLFLAG_RW | CTLFLAG_LOCKED,
197     &if_fake_tso_support, 0, "Fake interface with support for TSO offload");
198 
199 #define DEFAULT_EXPIRATION_THRESHOLD 500 /* usec */
200 static int if_fake_expiration_threshold_us = DEFAULT_EXPIRATION_THRESHOLD;
201 SYSCTL_INT(_net_link_fake, OID_AUTO, expiration_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
202     &if_fake_expiration_threshold_us, DEFAULT_EXPIRATION_THRESHOLD,
203     "Expiration threshold (usec) for expiration testing");
204 
205 static int if_fake_lro = 0;
206 SYSCTL_INT(_net_link_fake, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_LOCKED,
207     &if_fake_lro, 0, "Fake interface report LRO capability");
208 
209 static int if_fake_separate_frame_header = 0;
210 SYSCTL_INT(_net_link_fake, OID_AUTO, separate_frame_header,
211     CTLFLAG_RW | CTLFLAG_LOCKED,
212     &if_fake_separate_frame_header, 0, "Put frame header in separate mbuf");
213 
214 typedef enum {
215 	IFF_PP_MODE_GLOBAL = 0,         /* share a global pool */
216 	IFF_PP_MODE_PRIVATE = 1,        /* creates its own rx/tx pool */
217 	IFF_PP_MODE_PRIVATE_SPLIT = 2,  /* creates its own split rx & tx pool */
218 } iff_pktpool_mode_t;
219 static iff_pktpool_mode_t if_fake_pktpool_mode = IFF_PP_MODE_GLOBAL;
220 SYSCTL_INT(_net_link_fake, OID_AUTO, pktpool_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
221     &if_fake_pktpool_mode, IFF_PP_MODE_GLOBAL,
222     "Fake interface packet pool mode (0 global, 1 private, 2 private split");
223 
224 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX 512
225 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF 96
226 static int if_fake_link_layer_aggregation_factor =
227     FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF;
228 static int
229 feth_link_layer_aggregation_factor_sysctl SYSCTL_HANDLER_ARGS
230 {
231 #pragma unused(oidp, arg1, arg2)
232 	unsigned int new_value;
233 	int changed;
234 	int error;
235 
236 	error = sysctl_io_number(req, if_fake_link_layer_aggregation_factor,
237 	    sizeof(if_fake_link_layer_aggregation_factor), &new_value,
238 	    &changed);
239 	if (error == 0 && changed != 0) {
240 		if (new_value <= 0 ||
241 		    new_value > FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX) {
242 			return EINVAL;
243 		}
244 		if_fake_link_layer_aggregation_factor = new_value;
245 	}
246 	return error;
247 }
248 
249 SYSCTL_PROC(_net_link_fake, OID_AUTO, link_layer_aggregation_factor,
250     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
251     0, 0, feth_link_layer_aggregation_factor_sysctl, "IU",
252     "Fake interface link layer aggregation factor");
253 
254 #define FETH_TX_HEADROOM_MAX      32
255 static unsigned int if_fake_tx_headroom = FETH_TX_HEADROOM_MAX;
256 static int
257 feth_tx_headroom_sysctl SYSCTL_HANDLER_ARGS
258 {
259 #pragma unused(oidp, arg1, arg2)
260 	unsigned int new_value;
261 	int changed;
262 	int error;
263 
264 	error = sysctl_io_number(req, if_fake_tx_headroom,
265 	    sizeof(if_fake_tx_headroom), &new_value, &changed);
266 	if (error == 0 && changed != 0) {
267 		if (new_value > FETH_TX_HEADROOM_MAX ||
268 		    (new_value % 8) != 0) {
269 			return EINVAL;
270 		}
271 		if_fake_tx_headroom = new_value;
272 	}
273 	return 0;
274 }
275 
276 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_headroom,
277     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
278     0, 0, feth_tx_headroom_sysctl, "IU", "Fake ethernet Tx headroom");
279 
280 static int if_fake_fcs = 0;
281 SYSCTL_INT(_net_link_fake, OID_AUTO, fcs, CTLFLAG_RW | CTLFLAG_LOCKED,
282     &if_fake_fcs, 0, "Fake interface using frame check sequence");
283 
284 #define FETH_TRAILER_LENGTH_MAX 28
285 char feth_trailer[FETH_TRAILER_LENGTH_MAX + 1] = "trailertrailertrailertrailer";
286 static unsigned int if_fake_trailer_length = 0;
287 static int
288 feth_trailer_length_sysctl SYSCTL_HANDLER_ARGS
289 {
290 #pragma unused(oidp, arg1, arg2)
291 	unsigned int new_value;
292 	int changed;
293 	int error;
294 
295 	error = sysctl_io_number(req, if_fake_trailer_length,
296 	    sizeof(if_fake_trailer_length), &new_value, &changed);
297 	if (error == 0 && changed != 0) {
298 		if (new_value > FETH_TRAILER_LENGTH_MAX) {
299 			return EINVAL;
300 		}
301 		if_fake_trailer_length = new_value;
302 	}
303 	return 0;
304 }
305 
306 SYSCTL_PROC(_net_link_fake, OID_AUTO, trailer_length,
307     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
308     feth_trailer_length_sysctl, "IU", "Fake interface frame trailer length");
309 
310 /* sysctl net.link.fake.max_mtu */
311 #define FETH_MAX_MTU_DEFAULT    2048
312 #define FETH_MAX_MTU_MAX        ((16 * 1024) - ETHER_HDR_LEN)
313 
314 static unsigned int if_fake_max_mtu = FETH_MAX_MTU_DEFAULT;
315 
316 /* sysctl net.link.fake.buflet_size */
317 #define FETH_BUFLET_SIZE_MIN            512
318 #define FETH_BUFLET_SIZE_MAX            (32 * 1024)
319 #define FETH_TSO_BUFLET_SIZE            (16 * 1024)
320 
321 static unsigned int if_fake_buflet_size = FETH_BUFLET_SIZE_MIN;
322 static unsigned int if_fake_tso_buffer_size = FETH_TSO_BUFLET_SIZE;
323 
324 static int
325 feth_tso_buffer_size_sysctl SYSCTL_HANDLER_ARGS
326 {
327 #pragma unused(oidp, arg1, arg2)
328 	unsigned int new_value;
329 	int changed;
330 	int error;
331 
332 	error = sysctl_io_number(req, if_fake_tso_buffer_size,
333 	    sizeof(if_fake_tso_buffer_size), &new_value, &changed);
334 	if (error == 0 && changed != 0) {
335 		/* must be a power of 2 between min and max */
336 		if (new_value > FETH_BUFLET_SIZE_MAX ||
337 		    new_value < FETH_BUFLET_SIZE_MIN ||
338 		    !is_power_of_two(new_value)) {
339 			return EINVAL;
340 		}
341 		if_fake_tso_buffer_size = new_value;
342 	}
343 	return 0;
344 }
345 
346 SYSCTL_PROC(_net_link_fake, OID_AUTO, tso_buf_size,
347     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
348     0, 0, feth_tso_buffer_size_sysctl, "IU", "Fake interface TSO buffer size");
349 
350 static int
351 feth_max_mtu_sysctl SYSCTL_HANDLER_ARGS
352 {
353 #pragma unused(oidp, arg1, arg2)
354 	unsigned int new_value;
355 	int changed;
356 	int error;
357 
358 	error = sysctl_io_number(req, if_fake_max_mtu,
359 	    sizeof(if_fake_max_mtu), &new_value, &changed);
360 	if (error == 0 && changed != 0) {
361 		if (new_value > FETH_MAX_MTU_MAX ||
362 		    new_value < ETHERMTU ||
363 		    new_value <= if_fake_buflet_size) {
364 			return EINVAL;
365 		}
366 		if_fake_max_mtu = new_value;
367 	}
368 	return 0;
369 }
370 
371 SYSCTL_PROC(_net_link_fake, OID_AUTO, max_mtu,
372     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
373     0, 0, feth_max_mtu_sysctl, "IU", "Fake interface maximum MTU");
374 
375 static int
376 feth_buflet_size_sysctl SYSCTL_HANDLER_ARGS
377 {
378 #pragma unused(oidp, arg1, arg2)
379 	unsigned int new_value;
380 	int changed;
381 	int error;
382 
383 	error = sysctl_io_number(req, if_fake_buflet_size,
384 	    sizeof(if_fake_buflet_size), &new_value, &changed);
385 	if (error == 0 && changed != 0) {
386 		/* must be a power of 2 between min and max */
387 		if (new_value > FETH_BUFLET_SIZE_MAX ||
388 		    new_value < FETH_BUFLET_SIZE_MIN ||
389 		    !is_power_of_two(new_value) ||
390 		    new_value >= if_fake_max_mtu) {
391 			return EINVAL;
392 		}
393 		if_fake_buflet_size = new_value;
394 	}
395 	return 0;
396 }
397 
398 SYSCTL_PROC(_net_link_fake, OID_AUTO, buflet_size,
399     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
400     0, 0, feth_buflet_size_sysctl, "IU", "Fake interface buflet size");
401 
402 static unsigned int if_fake_user_access = 0;
403 
404 static int
405 feth_user_access_sysctl SYSCTL_HANDLER_ARGS
406 {
407 #pragma unused(oidp, arg1, arg2)
408 	unsigned int new_value;
409 	int changed;
410 	int error;
411 
412 	error = sysctl_io_number(req, if_fake_user_access,
413 	    sizeof(if_fake_user_access), &new_value, &changed);
414 	if (error == 0 && changed != 0) {
415 		if (new_value != 0) {
416 			if (new_value != 1) {
417 				return EINVAL;
418 			}
419 		}
420 		if_fake_user_access = new_value;
421 	}
422 	return 0;
423 }
424 
425 SYSCTL_PROC(_net_link_fake, OID_AUTO, user_access,
426     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
427     0, 0, feth_user_access_sysctl, "IU", "Fake interface user access");
428 
429 /* sysctl net.link.fake.if_adv_intvl (unit: millisecond) */
430 #define FETH_IF_ADV_INTVL_MIN            10
431 #define FETH_IF_ADV_INTVL_MAX            INT_MAX
432 
433 static int if_fake_if_adv_interval = 0; /* no interface advisory */
434 static int
435 feth_if_adv_interval_sysctl SYSCTL_HANDLER_ARGS
436 {
437 #pragma unused(oidp, arg1, arg2)
438 	unsigned int new_value;
439 	int changed;
440 	int error;
441 
442 	error = sysctl_io_number(req, if_fake_if_adv_interval,
443 	    sizeof(if_fake_if_adv_interval), &new_value, &changed);
444 	if (error == 0 && changed != 0) {
445 		if ((new_value != 0) && (new_value > FETH_IF_ADV_INTVL_MAX ||
446 		    new_value < FETH_IF_ADV_INTVL_MIN)) {
447 			return EINVAL;
448 		}
449 		if_fake_if_adv_interval = new_value;
450 	}
451 	return 0;
452 }
453 
454 SYSCTL_PROC(_net_link_fake, OID_AUTO, if_adv_intvl,
455     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
456     feth_if_adv_interval_sysctl, "IU",
457     "Fake interface will generate interface advisories reports at the specified interval in ms");
458 
459 /* sysctl net.link.fake.tx_drops */
460 /*
461  * Fake ethernet will drop packet on the transmit path at the specified
462  * rate, i.e drop one in every if_fake_tx_drops number of packets.
463  */
464 #define FETH_TX_DROPS_MIN            0
465 #define FETH_TX_DROPS_MAX            INT_MAX
466 static int if_fake_tx_drops = 0; /* no packets are dropped */
467 static int
468 feth_fake_tx_drops_sysctl SYSCTL_HANDLER_ARGS
469 {
470 #pragma unused(oidp, arg1, arg2)
471 	unsigned int new_value;
472 	int changed;
473 	int error;
474 
475 	error = sysctl_io_number(req, if_fake_tx_drops,
476 	    sizeof(if_fake_tx_drops), &new_value, &changed);
477 	if (error == 0 && changed != 0) {
478 		if (new_value > FETH_TX_DROPS_MAX ||
479 		    new_value < FETH_TX_DROPS_MIN) {
480 			return EINVAL;
481 		}
482 		if_fake_tx_drops = new_value;
483 	}
484 	return 0;
485 }
486 
487 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_drops,
488     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
489     feth_fake_tx_drops_sysctl, "IU",
490     "Fake interface will intermittently drop packets on Tx path");
491 
492 /* sysctl.net.link.fake.tx_exp_policy */
493 
494 typedef enum {
495 	IFF_TX_EXP_POLICY_DISABLED = 0,          /* Expiry notification disabled */
496 	IFF_TX_EXP_POLICY_DROP_AND_NOTIFY = 1,   /* Expiry notification enabled; drop + notify mode */
497 	IFF_TX_EXP_POLICY_NOTIFY_ONLY = 2,       /* Expiry notification enabled; notify only mode */
498 	IFF_TX_EXP_POLICY_METADATA = 3,          /* Expiry notification enabled; use packet metadata */
499 } iff_tx_exp_policy_t;
500 static iff_tx_exp_policy_t if_fake_tx_exp_policy = IFF_TX_EXP_POLICY_DISABLED;
501 
502 static int
503 feth_fake_tx_exp_policy_sysctl SYSCTL_HANDLER_ARGS
504 {
505 #pragma unused(oidp, arg1, arg2)
506 	unsigned int new_value;
507 	int changed;
508 	int error;
509 
510 	error = sysctl_io_number(req, if_fake_tx_exp_policy,
511 	    sizeof(if_fake_tx_exp_policy), &new_value, &changed);
512 	FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
513 	    "if_fake_tx_exp_policy: %u -> %u (%d)",
514 	    if_fake_tx_exp_policy, new_value, changed);
515 	if (error == 0 && changed != 0) {
516 		if (new_value > IFF_TX_EXP_POLICY_METADATA ||
517 		    new_value < IFF_TX_EXP_POLICY_DISABLED) {
518 			return EINVAL;
519 		}
520 		if_fake_tx_exp_policy = new_value;
521 	}
522 	return 0;
523 }
524 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_exp_policy,
525     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
526     feth_fake_tx_exp_policy_sysctl, "IU",
527     "Fake interface handling policy for expired TX attempts "
528     "(0 disabled, 1 drop and notify, 2 notify only, 3 packet metadata)");
529 
530 /* sysctl net.link.fake.tx_completion_mode */
531 typedef enum {
532 	IFF_TX_COMPL_MODE_SYNC = 0,
533 	IFF_TX_COMPL_MODE_ASYNC = 1,
534 } iff_tx_completion_mode_t;
535 static iff_tx_completion_mode_t if_tx_completion_mode = IFF_TX_COMPL_MODE_SYNC;
536 static int
537 feth_fake_tx_completion_mode_sysctl SYSCTL_HANDLER_ARGS
538 {
539 #pragma unused(oidp, arg1, arg2)
540 	unsigned int new_value;
541 	int changed;
542 	int error;
543 
544 	error = sysctl_io_number(req, if_tx_completion_mode,
545 	    sizeof(if_tx_completion_mode), &new_value, &changed);
546 	if (error == 0 && changed != 0) {
547 		if (new_value > IFF_TX_COMPL_MODE_ASYNC ||
548 		    new_value < IFF_TX_COMPL_MODE_SYNC) {
549 			return EINVAL;
550 		}
551 		if_tx_completion_mode = new_value;
552 	}
553 	return 0;
554 }
555 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_completion_mode,
556     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
557     feth_fake_tx_completion_mode_sysctl, "IU",
558     "Fake interface tx completion mode (0 synchronous, 1 asynchronous)");
559 
560 /* sysctl net.link.fake.llink_cnt */
561 
562 /* The maximum number of logical links (including default link) */
563 #define FETH_MAX_LLINKS 16
564 /*
565  * The default number of logical links (including default link).
566  * Zero means logical link mode is disabled.
567  */
568 #define FETH_DEF_LLINKS 0
569 
570 static uint32_t if_fake_llink_cnt = FETH_DEF_LLINKS;
571 static int
572 feth_fake_llink_cnt_sysctl SYSCTL_HANDLER_ARGS
573 {
574 #pragma unused(oidp, arg1, arg2)
575 	unsigned int new_value;
576 	int changed;
577 	int error;
578 
579 	error = sysctl_io_number(req, if_fake_llink_cnt,
580 	    sizeof(if_fake_llink_cnt), &new_value, &changed);
581 	if (error == 0 && changed != 0) {
582 		if (new_value > FETH_MAX_LLINKS) {
583 			return EINVAL;
584 		}
585 		if_fake_llink_cnt = new_value;
586 	}
587 	return 0;
588 }
589 
590 SYSCTL_PROC(_net_link_fake, OID_AUTO, llink_cnt,
591     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
592     feth_fake_llink_cnt_sysctl, "IU",
593     "Fake interface logical link count");
594 
595 /* sysctl net.link.fake.qset_cnt */
596 
597 /* The maximum number of qsets for each logical link */
598 #define FETH_MAX_QSETS  16
599 /* The default number of qsets for each logical link */
600 #define FETH_DEF_QSETS  4
601 
602 static uint32_t if_fake_qset_cnt = FETH_DEF_QSETS;
603 static int
604 feth_fake_qset_cnt_sysctl SYSCTL_HANDLER_ARGS
605 {
606 #pragma unused(oidp, arg1, arg2)
607 	unsigned int new_value;
608 	int changed;
609 	int error;
610 
611 	error = sysctl_io_number(req, if_fake_qset_cnt,
612 	    sizeof(if_fake_qset_cnt), &new_value, &changed);
613 	if (error == 0 && changed != 0) {
614 		if (new_value == 0 ||
615 		    new_value > FETH_MAX_QSETS) {
616 			return EINVAL;
617 		}
618 		if_fake_qset_cnt = new_value;
619 	}
620 	return 0;
621 }
622 
623 SYSCTL_PROC(_net_link_fake, OID_AUTO, qset_cnt,
624     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
625     feth_fake_qset_cnt_sysctl, "IU",
626     "Fake interface queue set count");
627 
628 
629 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)630 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
631 {
632 	mbuf_setdata(m, (char *)mbuf_data(m) + len, mbuf_len(m) - len);
633 	mbuf_pkthdr_adjustlen(m, -len);
634 }
635 
636 static inline void *
get_bpf_header(mbuf_t m,struct ether_header * eh_p,struct ether_vlan_header * evl_p,size_t * header_len)637 get_bpf_header(mbuf_t m, struct ether_header * eh_p,
638     struct ether_vlan_header * evl_p, size_t * header_len)
639 {
640 	void *  header;
641 
642 	/* no VLAN tag, just use the ethernet header */
643 	if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
644 		header = eh_p;
645 		*header_len = sizeof(*eh_p);
646 		goto done;
647 	}
648 
649 	/* has VLAN tag, populate the ether VLAN header */
650 	bcopy(eh_p, evl_p,
651 	    offsetof(struct ether_header, ether_type));   /* dst+src ether */
652 	evl_p->evl_encap_proto = htons(ETHERTYPE_VLAN);   /* VLAN encap */
653 	evl_p->evl_tag = htons(m->m_pkthdr.vlan_tag);     /* tag */
654 	evl_p->evl_proto = eh_p->ether_type;              /* proto */
655 	*header_len = sizeof(*evl_p);
656 	header = evl_p;
657 
658 done:
659 	return header;
660 }
661 
662 typedef void (*_tap_func)(ifnet_t interface, u_int32_t dlt, mbuf_t packet,
663     void *__sized_by(header_len) header, size_t header_len);
664 
665 static void
fake_bpf_tap_common(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p,_tap_func func)666 fake_bpf_tap_common(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p,
667     _tap_func func)
668 {
669 	struct ether_vlan_header        evl;
670 	void *                          header;
671 	size_t                          header_len;
672 
673 	header = get_bpf_header(m, eh_p, &evl, &header_len);
674 	(*func)(ifp, DLT_EN10MB, m, header, header_len);
675 }
676 
677 static inline void
fake_bpf_tap_in(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)678 fake_bpf_tap_in(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
679 {
680 	fake_bpf_tap_common(ifp, m, eh_p, bpf_tap_in);
681 }
682 
683 
684 static inline void
fake_bpf_tap_out(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)685 fake_bpf_tap_out(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
686 {
687 	fake_bpf_tap_common(ifp, m, eh_p, bpf_tap_out);
688 }
689 
690 /**
691 ** virtual ethernet structures, types
692 **/
693 
694 #define IFF_NUM_TX_RINGS_WMM_MODE       4
695 #define IFF_NUM_RX_RINGS_WMM_MODE       1
696 #define IFF_MAX_TX_RINGS        IFF_NUM_TX_RINGS_WMM_MODE
697 #define IFF_MAX_RX_RINGS        IFF_NUM_RX_RINGS_WMM_MODE
698 #define IFF_NUM_TX_QUEUES_WMM_MODE      4
699 #define IFF_NUM_RX_QUEUES_WMM_MODE      1
700 #define IFF_MAX_TX_QUEUES       IFF_NUM_TX_QUEUES_WMM_MODE
701 #define IFF_MAX_RX_QUEUES       IFF_NUM_RX_QUEUES_WMM_MODE
702 
703 #define IFF_MAX_BATCH_SIZE 32
704 
705 typedef uint16_t        iff_flags_t;
706 #define IFF_FLAGS_HWCSUM                0x0001
707 #define IFF_FLAGS_BSD_MODE              0x0002
708 #define IFF_FLAGS_DETACHING             0x0004
709 #define IFF_FLAGS_WMM_MODE              0x0008
710 #define IFF_FLAGS_MULTIBUFLETS          0x0010
711 #define IFF_FLAGS_TSO_SUPPORT           0x0020
712 #define IFF_FLAGS_LRO                   0x0040
713 #define IFF_FLAGS_VLAN_MTU              0x0080
714 #define IFF_FLAGS_VLAN_TAGGING          0x0100
715 #define IFF_FLAGS_SEPARATE_FRAME_HEADER 0x0200
716 #define IFF_FLAGS_NX_ATTACHED           0x0400
717 
718 #if SKYWALK
719 
720 typedef struct {
721 	uuid_t                  fnx_provider;
722 	uuid_t                  fnx_instance;
723 } fake_nx, *fake_nx_t;
724 
725 typedef struct {
726 	kern_netif_queue_t      fq_queue;
727 } fake_queue;
728 
729 typedef struct {
730 	kern_netif_qset_t       fqs_qset; /* provided by xnu */
731 	fake_queue              fqs_rx_queue[IFF_MAX_RX_QUEUES];
732 	fake_queue              fqs_tx_queue[IFF_MAX_TX_QUEUES];
733 	uint32_t                fqs_rx_queue_cnt;
734 	uint32_t                fqs_tx_queue_cnt;
735 	uint32_t                fqs_llink_idx;
736 	uint32_t                fqs_idx;
737 	uint32_t                fqs_dequeue_cnt;
738 	uint64_t                fqs_id;
739 	boolean_t               fqs_combined_mode;
740 } fake_qset;
741 
742 typedef struct {
743 	uint64_t                fl_id;
744 	uint32_t                fl_idx;
745 	uint32_t                fl_qset_cnt;
746 	fake_qset               fl_qset[FETH_MAX_QSETS];
747 } fake_llink, * fake_llink_t;
748 
749 static kern_pbufpool_t         S_pp;
750 
751 #define IFF_TT_OUTPUT   0x01 /* generate trace_tag on output */
752 #define IFF_TT_INPUT    0x02 /* generate trace_tag on input */
753 static int if_fake_trace_tag_flags = 0;
754 SYSCTL_INT(_net_link_fake, OID_AUTO, trace_tag, CTLFLAG_RW | CTLFLAG_LOCKED,
755     &if_fake_trace_tag_flags, 0, "Fake interface generate trace_tag");
756 static packet_trace_tag_t if_fake_trace_tag_current = 1;
757 
758 #endif /* SKYWALK */
759 
760 struct if_fake {
761 	char                    iff_name[IFNAMSIZ]; /* our unique id */
762 	ifnet_t                 iff_ifp;
763 	iff_flags_t             iff_flags;
764 	uint32_t                iff_retain_count;
765 	ifnet_t                 iff_peer;       /* the other end */
766 	int                     iff_media_current;
767 	int                     iff_media_active;
768 	uint32_t                iff_media_count;
769 	int                     iff_media_list[IF_FAKE_MEDIA_LIST_MAX];
770 	boolean_t               iff_start_busy;
771 	unsigned int            iff_max_mtu;
772 	uint32_t                iff_fcs;
773 	uint32_t                iff_trailer_length;
774 #if SKYWALK
775 	fake_nx                 iff_nx;
776 	struct netif_stats      *iff_nifs;
777 	uint32_t                iff_nifs_ref;
778 	uint32_t                iff_llink_cnt;
779 	kern_channel_ring_t     iff_rx_ring[IFF_MAX_RX_RINGS];
780 	kern_channel_ring_t     iff_tx_ring[IFF_MAX_TX_RINGS];
781 	fake_llink_t            iff_llink __counted_by(FETH_MAX_LLINKS);
782 	thread_call_t           iff_doorbell_tcall;
783 	thread_call_t           iff_if_adv_tcall;
784 	boolean_t               iff_doorbell_tcall_active;
785 	boolean_t               iff_waiting_for_tcall;
786 	boolean_t               iff_channel_connected;
787 	iff_pktpool_mode_t      iff_pp_mode;
788 	kern_pbufpool_t         iff_rx_pp;
789 	kern_pbufpool_t         iff_tx_pp;
790 	uint32_t                iff_tx_headroom;
791 	unsigned int            iff_adv_interval;
792 	uint32_t                iff_tx_drop_rate;
793 	uint32_t                iff_tx_pkts_count;
794 	iff_tx_completion_mode_t iff_tx_completion_mode;
795 	bool                    iff_intf_adv_enabled;
796 	void                    *iff_intf_adv_kern_ctx;
797 	kern_nexus_capab_interface_advisory_notify_fn_t iff_intf_adv_notify;
798 	iff_tx_exp_policy_t     iff_tx_exp_policy;
799 #endif /* SKYWALK */
800 };
801 
802 typedef struct if_fake * if_fake_ref;
803 
804 static if_fake_ref
805 ifnet_get_if_fake(ifnet_t ifp);
806 
807 static inline boolean_t
feth_in_bsd_mode(if_fake_ref fakeif)808 feth_in_bsd_mode(if_fake_ref fakeif)
809 {
810 	return (fakeif->iff_flags & IFF_FLAGS_BSD_MODE) != 0;
811 }
812 
813 static inline void
feth_set_detaching(if_fake_ref fakeif)814 feth_set_detaching(if_fake_ref fakeif)
815 {
816 	fakeif->iff_flags |= IFF_FLAGS_DETACHING;
817 }
818 
819 static inline boolean_t
feth_is_detaching(if_fake_ref fakeif)820 feth_is_detaching(if_fake_ref fakeif)
821 {
822 	return (fakeif->iff_flags & IFF_FLAGS_DETACHING) != 0;
823 }
824 
825 static int
feth_enable_dequeue_stall(ifnet_t ifp,uint32_t enable)826 feth_enable_dequeue_stall(ifnet_t ifp, uint32_t enable)
827 {
828 	int error;
829 
830 	if (enable != 0) {
831 		error = ifnet_disable_output(ifp);
832 	} else {
833 		error = ifnet_enable_output(ifp);
834 	}
835 
836 	return error;
837 }
838 
839 #if SKYWALK
840 static inline boolean_t
feth_in_wmm_mode(if_fake_ref fakeif)841 feth_in_wmm_mode(if_fake_ref fakeif)
842 {
843 	return (fakeif->iff_flags & IFF_FLAGS_WMM_MODE) != 0;
844 }
845 
846 static inline boolean_t
feth_using_multibuflets(if_fake_ref fakeif)847 feth_using_multibuflets(if_fake_ref fakeif)
848 {
849 	return (fakeif->iff_flags & IFF_FLAGS_MULTIBUFLETS) != 0;
850 }
851 static void feth_detach_netif_nexus(if_fake_ref fakeif);
852 
853 static inline boolean_t
feth_has_intf_advisory_configured(if_fake_ref fakeif)854 feth_has_intf_advisory_configured(if_fake_ref fakeif)
855 {
856 	return fakeif->iff_adv_interval > 0;
857 }
858 #endif /* SKYWALK */
859 
860 static inline bool
feth_supports_tso(if_fake_ref fakeif)861 feth_supports_tso(if_fake_ref fakeif)
862 {
863 	return (fakeif->iff_flags & IFF_FLAGS_TSO_SUPPORT) != 0;
864 }
865 
866 static inline void
feth_set_supports_tso(if_fake_ref fakeif)867 feth_set_supports_tso(if_fake_ref fakeif)
868 {
869 	fakeif->iff_flags |= IFF_FLAGS_TSO_SUPPORT;
870 }
871 
872 static inline bool
feth_supports_vlan_mtu(if_fake_ref fakeif)873 feth_supports_vlan_mtu(if_fake_ref fakeif)
874 {
875 	return (fakeif->iff_flags & IFF_FLAGS_VLAN_MTU) != 0;
876 }
877 
878 static inline void
feth_set_supports_vlan_mtu(if_fake_ref fakeif)879 feth_set_supports_vlan_mtu(if_fake_ref fakeif)
880 {
881 	fakeif->iff_flags |= IFF_FLAGS_VLAN_MTU;
882 }
883 
884 static inline bool
feth_supports_vlan_tagging(if_fake_ref fakeif)885 feth_supports_vlan_tagging(if_fake_ref fakeif)
886 {
887 	return (fakeif->iff_flags & IFF_FLAGS_VLAN_TAGGING) != 0;
888 }
889 
890 static inline void
feth_set_supports_vlan_tagging(if_fake_ref fakeif)891 feth_set_supports_vlan_tagging(if_fake_ref fakeif)
892 {
893 	fakeif->iff_flags |= IFF_FLAGS_VLAN_TAGGING;
894 }
895 
896 
897 #define FETH_MAXUNIT    IF_MAXUNIT
898 #define FETH_ZONE_MAX_ELEM      MIN(IFNETS_MAX, FETH_MAXUNIT)
899 
900 static  int feth_clone_create(struct if_clone *, u_int32_t, void *);
901 static  int feth_clone_destroy(ifnet_t);
902 static  int feth_output(ifnet_t ifp, struct mbuf *m);
903 static  void feth_start(ifnet_t ifp);
904 static  int feth_ioctl(ifnet_t ifp, u_long cmd, void * addr);
905 static  int feth_config(ifnet_t ifp, ifnet_t peer);
906 static  void feth_if_free(ifnet_t ifp);
907 static  void feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp);
908 static  void feth_free(if_fake_ref fakeif);
909 
910 static struct if_clone
911     feth_cloner = IF_CLONE_INITIALIZER(FAKE_ETHER_NAME,
912     feth_clone_create,
913     feth_clone_destroy,
914     0,
915     FETH_MAXUNIT);
916 static  void interface_link_event(ifnet_t ifp, u_int32_t event_code);
917 
918 /* some media words to pretend to be ethernet */
919 #define FAKE_DEFAULT_MEDIA      IFM_MAKEWORD(IFM_ETHER, IFM_10G_T, IFM_FDX, 0)
920 static int default_media_words[] = {
921 	IFM_MAKEWORD(IFM_ETHER, 0, 0, 0),
922 	FAKE_DEFAULT_MEDIA,
923 	IFM_MAKEWORD(IFM_ETHER, IFM_2500_T, IFM_FDX, 0),
924 	IFM_MAKEWORD(IFM_ETHER, IFM_5000_T, IFM_FDX, 0),
925 
926 	IFM_MAKEWORD(IFM_ETHER, IFM_10G_KX4, IFM_FDX, 0),
927 	IFM_MAKEWORD(IFM_ETHER, IFM_20G_KR2, IFM_FDX, 0),
928 	IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX, 0),
929 	IFM_MAKEWORD(IFM_ETHER, IFM_25G_KR, IFM_FDX, 0),
930 	IFM_MAKEWORD(IFM_ETHER, IFM_40G_SR4, IFM_FDX, 0),
931 	IFM_MAKEWORD(IFM_ETHER, IFM_50G_CR2, IFM_FDX, 0),
932 	IFM_MAKEWORD(IFM_ETHER, IFM_56G_R4, IFM_FDX, 0),
933 	IFM_MAKEWORD(IFM_ETHER, IFM_100G_CR4, IFM_FDX, 0),
934 	IFM_MAKEWORD(IFM_ETHER, IFM_400G_AUI8, IFM_FDX, 0),
935 };
936 #define default_media_words_count (sizeof(default_media_words)          \
937 	                           / sizeof (default_media_words[0]))
938 
939 /**
940 ** veth locks
941 **/
942 
943 static LCK_GRP_DECLARE(feth_lck_grp, "fake");
944 static LCK_MTX_DECLARE(feth_lck_mtx, &feth_lck_grp);
945 
946 static inline void
feth_lock(void)947 feth_lock(void)
948 {
949 	lck_mtx_lock(&feth_lck_mtx);
950 }
951 
952 static inline void
feth_unlock(void)953 feth_unlock(void)
954 {
955 	lck_mtx_unlock(&feth_lck_mtx);
956 }
957 
958 static inline int
get_max_mtu(int bsd_mode,unsigned int max_mtu)959 get_max_mtu(int bsd_mode, unsigned int max_mtu)
960 {
961 	unsigned int    mtu;
962 
963 	if (bsd_mode != 0) {
964 		mtu = (njcl > 0) ? (M16KCLBYTES - ETHER_HDR_LEN)
965 		    : MBIGCLBYTES - ETHER_HDR_LEN;
966 		if (mtu > max_mtu) {
967 			mtu = max_mtu;
968 		}
969 	} else {
970 		mtu = max_mtu;
971 	}
972 	return mtu;
973 }
974 
975 static inline unsigned int
feth_max_mtu(ifnet_t ifp)976 feth_max_mtu(ifnet_t ifp)
977 {
978 	if_fake_ref     fakeif;
979 	unsigned int    max_mtu = ETHERMTU;
980 
981 	feth_lock();
982 	fakeif = ifnet_get_if_fake(ifp);
983 	if (fakeif != NULL) {
984 		max_mtu = fakeif->iff_max_mtu;
985 	}
986 	feth_unlock();
987 	return max_mtu;
988 }
989 
990 static void
feth_free(if_fake_ref fakeif)991 feth_free(if_fake_ref fakeif)
992 {
993 	VERIFY(fakeif->iff_retain_count == 0);
994 #if SKYWALK
995 	if (!feth_in_bsd_mode(fakeif)) {
996 		if (fakeif->iff_pp_mode == IFF_PP_MODE_GLOBAL) {
997 			VERIFY(fakeif->iff_rx_pp == S_pp);
998 			VERIFY(fakeif->iff_tx_pp == S_pp);
999 			pp_release(fakeif->iff_rx_pp);
1000 			fakeif->iff_rx_pp = NULL;
1001 			pp_release(fakeif->iff_tx_pp);
1002 			fakeif->iff_tx_pp = NULL;
1003 			feth_lock();
1004 			if (S_pp != NULL && S_pp->pp_refcnt == 1) {
1005 				pp_release(S_pp);
1006 				S_pp = NULL;
1007 			}
1008 			feth_unlock();
1009 		} else {
1010 			if (fakeif->iff_rx_pp != NULL) {
1011 				pp_release(fakeif->iff_rx_pp);
1012 				fakeif->iff_rx_pp = NULL;
1013 			}
1014 			if (fakeif->iff_tx_pp != NULL) {
1015 				pp_release(fakeif->iff_tx_pp);
1016 				fakeif->iff_tx_pp = NULL;
1017 			}
1018 		}
1019 	}
1020 #endif /* SKYWALK */
1021 
1022 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s", fakeif->iff_name);
1023 	if (fakeif->iff_llink != NULL) {
1024 		kfree_type(fake_llink, FETH_MAX_LLINKS, fakeif->iff_llink);
1025 	}
1026 	kfree_type(struct if_fake, fakeif);
1027 }
1028 
1029 static void
feth_release(if_fake_ref fakeif)1030 feth_release(if_fake_ref fakeif)
1031 {
1032 	u_int32_t               old_retain_count;
1033 
1034 	old_retain_count = OSDecrementAtomic(&fakeif->iff_retain_count);
1035 	switch (old_retain_count) {
1036 	case 0:
1037 		VERIFY(old_retain_count != 0);
1038 		break;
1039 	case 1:
1040 		feth_free(fakeif);
1041 		break;
1042 	default:
1043 		break;
1044 	}
1045 	return;
1046 }
1047 
1048 #if SKYWALK
1049 
1050 static void
feth_retain(if_fake_ref fakeif)1051 feth_retain(if_fake_ref fakeif)
1052 {
1053 	OSIncrementAtomic(&fakeif->iff_retain_count);
1054 }
1055 
1056 static void
feth_packet_pool_init_prepare(if_fake_ref fakeif,struct kern_pbufpool_init * pp_init)1057 feth_packet_pool_init_prepare(if_fake_ref fakeif,
1058     struct kern_pbufpool_init *pp_init)
1059 {
1060 	uint32_t max_mtu = fakeif->iff_max_mtu;
1061 	uint32_t buflet_size = if_fake_buflet_size;
1062 
1063 	bzero(pp_init, sizeof(*pp_init));
1064 	pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1065 	pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
1066 	pp_init->kbi_packets = 1024; /* TBD configurable */
1067 	if (feth_supports_tso(fakeif)) {
1068 		buflet_size = if_fake_tso_buffer_size;
1069 	}
1070 	if (feth_using_multibuflets(fakeif)) {
1071 		pp_init->kbi_bufsize = buflet_size;
1072 		pp_init->kbi_max_frags = howmany(max_mtu, buflet_size);
1073 		pp_init->kbi_buflets = pp_init->kbi_packets *
1074 		    pp_init->kbi_max_frags;
1075 		pp_init->kbi_flags |= KBIF_BUFFER_ON_DEMAND;
1076 	} else {
1077 		pp_init->kbi_bufsize = max(max_mtu, buflet_size);
1078 		pp_init->kbi_max_frags = 1;
1079 		pp_init->kbi_buflets = pp_init->kbi_packets;
1080 	}
1081 	pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
1082 	if (if_fake_user_access != 0) {
1083 		pp_init->kbi_flags |= KBIF_USER_ACCESS;
1084 	}
1085 	pp_init->kbi_ctx = NULL;
1086 	pp_init->kbi_ctx_retain = NULL;
1087 	pp_init->kbi_ctx_release = NULL;
1088 }
1089 
1090 static errno_t
feth_packet_pool_make(if_fake_ref fakeif)1091 feth_packet_pool_make(if_fake_ref fakeif)
1092 {
1093 	struct kern_pbufpool_init pp_init;
1094 	errno_t err;
1095 
1096 	feth_packet_pool_init_prepare(fakeif, &pp_init);
1097 
1098 	switch (fakeif->iff_pp_mode) {
1099 	case IFF_PP_MODE_GLOBAL:
1100 		feth_lock();
1101 		if (S_pp == NULL) {
1102 			(void)snprintf((char *)pp_init.kbi_name,
1103 			    sizeof(pp_init.kbi_name), "%s", "feth shared pp");
1104 			err = kern_pbufpool_create(&pp_init, &S_pp, NULL);
1105 		}
1106 		pp_retain(S_pp);
1107 		feth_unlock();
1108 		fakeif->iff_rx_pp = S_pp;
1109 		pp_retain(S_pp);
1110 		fakeif->iff_tx_pp = S_pp;
1111 		break;
1112 	case IFF_PP_MODE_PRIVATE:
1113 		(void)snprintf((char *)pp_init.kbi_name,
1114 		    sizeof(pp_init.kbi_name), "%s pp", fakeif->iff_name);
1115 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
1116 		pp_retain(fakeif->iff_rx_pp);
1117 		fakeif->iff_tx_pp = fakeif->iff_rx_pp;
1118 		break;
1119 	case IFF_PP_MODE_PRIVATE_SPLIT:
1120 		(void)snprintf((char *)pp_init.kbi_name,
1121 		    sizeof(pp_init.kbi_name), "%s rx pp", fakeif->iff_name);
1122 		pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
1123 		    KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
1124 		pp_init.kbi_flags |= (KBIF_IODIR_IN | KBIF_BUFFER_ON_DEMAND);
1125 		pp_init.kbi_packets = 1024;
1126 		pp_init.kbi_bufsize = if_fake_link_layer_aggregation_factor * 1024;
1127 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
1128 		if (err != 0) {
1129 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1130 			    "rx pp create failed %d", err);
1131 			return err;
1132 		}
1133 		pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
1134 		    KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
1135 		pp_init.kbi_flags |= KBIF_IODIR_OUT;
1136 		pp_init.kbi_packets = 1024;            /* TBD configurable */
1137 		pp_init.kbi_bufsize = fakeif->iff_max_mtu;
1138 		(void)snprintf((char *)pp_init.kbi_name,
1139 		    sizeof(pp_init.kbi_name), "%s tx pp", fakeif->iff_name);
1140 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_tx_pp, NULL);
1141 		if (err != 0) {
1142 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1143 			    "tx pp create failed %d", err);
1144 			pp_release(fakeif->iff_rx_pp);
1145 			return err;
1146 		}
1147 		break;
1148 	default:
1149 		VERIFY(0);
1150 		__builtin_unreachable();
1151 	}
1152 
1153 	return 0;
1154 }
1155 
1156 static void
feth_packet_set_trace_tag(kern_packet_t ph,int flag)1157 feth_packet_set_trace_tag(kern_packet_t ph, int flag)
1158 {
1159 	if (if_fake_trace_tag_flags & flag) {
1160 		if (++if_fake_trace_tag_current == 0) {
1161 			if_fake_trace_tag_current = 1;
1162 		}
1163 		kern_packet_set_trace_tag(ph, if_fake_trace_tag_current);
1164 	}
1165 }
1166 
1167 static errno_t
feth_clone_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1168 feth_clone_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1169 {
1170 	errno_t err = 0;
1171 	kern_pbufpool_t pp = dif->iff_rx_pp;
1172 	kern_packet_t dph = 0, dph0 = 0;
1173 	kern_buflet_t sbuf, dbuf0 = NULL, dbuf;
1174 	void *saddr, *daddr;
1175 	uint32_t soff, doff;
1176 	uint32_t slen, dlen;
1177 	uint32_t dlim0, dlim;
1178 
1179 	sbuf = kern_packet_get_next_buflet(sph, NULL);
1180 	saddr = kern_buflet_get_data_address(sbuf);
1181 	doff = soff = kern_buflet_get_data_offset(sbuf);
1182 	dlen = slen = kern_buflet_get_data_length(sbuf);
1183 
1184 	/* packet clone is only supported for single-buflet */
1185 	ASSERT(kern_packet_get_buflet_count(sph) == 1);
1186 	ASSERT(soff == kern_packet_get_headroom(sph));
1187 	ASSERT(slen == kern_packet_get_data_length(sph));
1188 
1189 	dph0 = *pdph;
1190 	if (dph0 == 0) {
1191 		dlim0 = 0;
1192 	} else {
1193 		dbuf0 = kern_packet_get_next_buflet(dph0, NULL);
1194 		ASSERT(kern_buflet_get_object_limit(dbuf0) ==
1195 		    PP_BUF_OBJ_SIZE_DEF(pp));
1196 		ASSERT(kern_buflet_get_data_limit(dbuf0) % 16 == 0);
1197 		dlim0 = ((uintptr_t)kern_buflet_get_object_address(dbuf0) +
1198 		    kern_buflet_get_object_limit(dbuf0)) -
1199 		    ((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1200 		    kern_buflet_get_data_limit(dbuf0));
1201 	}
1202 
1203 	if (doff + dlen > dlim0) {
1204 		err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1205 		if (err != 0) {
1206 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1207 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1208 			return err;
1209 		}
1210 		dbuf = kern_packet_get_next_buflet(dph, NULL);
1211 		ASSERT(kern_buflet_get_data_address(dbuf) ==
1212 		    kern_buflet_get_object_address(dbuf));
1213 		daddr = kern_buflet_get_data_address(dbuf);
1214 		dlim = kern_buflet_get_object_limit(dbuf);
1215 		ASSERT(dlim == PP_BUF_OBJ_SIZE_DEF(pp));
1216 	} else {
1217 		err = kern_packet_clone_nosleep(dph0, &dph, KPKT_COPY_LIGHT);
1218 		if (err != 0) {
1219 			FAKE_LOG(LOG_INFO, FE_DBGF_OUTPUT,
1220 			    "packet clone err %d", err);
1221 			return err;
1222 		}
1223 		dbuf = kern_packet_get_next_buflet(dph, NULL);
1224 		ASSERT(kern_buflet_get_object_address(dbuf) ==
1225 		    kern_buflet_get_object_address(dbuf0));
1226 		daddr = (void *)((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1227 		    kern_buflet_get_data_limit(dbuf0));
1228 		dlim = dlim0;
1229 	}
1230 
1231 	ASSERT(doff + dlen <= dlim);
1232 
1233 	ASSERT((uintptr_t)daddr % 16 == 0);
1234 
1235 	bcopy((const void *)((uintptr_t)saddr + soff),
1236 	    (void *)((uintptr_t)daddr + doff), slen);
1237 
1238 	dlim = MIN(dlim, P2ROUNDUP(doff + dlen, 16));
1239 	err = kern_buflet_set_data_address(dbuf, daddr);
1240 	VERIFY(err == 0);
1241 	err = kern_buflet_set_data_limit(dbuf, dlim);
1242 	VERIFY(err == 0);
1243 	err = kern_buflet_set_data_length(dbuf, dlen);
1244 	VERIFY(err == 0);
1245 	err = kern_buflet_set_data_offset(dbuf, doff);
1246 	VERIFY(err == 0);
1247 	err = kern_packet_set_headroom(dph, doff);
1248 	VERIFY(err == 0);
1249 	err = kern_packet_set_link_header_length(dph,
1250 	    kern_packet_get_link_header_length(sph));
1251 	VERIFY(err == 0);
1252 	err = kern_packet_set_service_class(dph,
1253 	    kern_packet_get_service_class(sph));
1254 	VERIFY(err == 0);
1255 	err = kern_packet_finalize(dph);
1256 	VERIFY(err == 0);
1257 	*pdph = dph;
1258 
1259 	return err;
1260 }
1261 
1262 static inline void
feth_copy_buflet(kern_buflet_t sbuf,kern_buflet_t dbuf)1263 feth_copy_buflet(kern_buflet_t sbuf, kern_buflet_t dbuf)
1264 {
1265 	errno_t err;
1266 	uint32_t off, len;
1267 	uint8_t *saddr, *daddr;
1268 
1269 	saddr = kern_buflet_get_data_address(sbuf);
1270 	off = kern_buflet_get_data_offset(sbuf);
1271 	len = kern_buflet_get_data_length(sbuf);
1272 	daddr = kern_buflet_get_data_address(dbuf);
1273 	bcopy((saddr + off), (daddr + off), len);
1274 	err = kern_buflet_set_data_offset(dbuf, off);
1275 	VERIFY(err == 0);
1276 	err = kern_buflet_set_data_length(dbuf, len);
1277 	VERIFY(err == 0);
1278 }
1279 
1280 static int
feth_add_packet_trailer(kern_packet_t ph,void * trailer,size_t trailer_len)1281 feth_add_packet_trailer(kern_packet_t ph, void *trailer, size_t trailer_len)
1282 {
1283 	errno_t err = 0;
1284 
1285 	ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
1286 
1287 	kern_buflet_t buf = NULL, iter = NULL;
1288 	while ((iter = kern_packet_get_next_buflet(ph, iter)) != NULL) {
1289 		buf = iter;
1290 	}
1291 	ASSERT(buf != NULL);
1292 
1293 	uint32_t dlim = kern_buflet_get_data_limit(buf);
1294 	uint32_t doff = kern_buflet_get_data_offset(buf);
1295 	uint32_t dlen = kern_buflet_get_data_length(buf);
1296 
1297 	size_t trailer_room = dlim - doff - dlen;
1298 
1299 	if (trailer_room < trailer_len) {
1300 		FAKE_LOG(LOG_INFO, FE_DBGF_OUTPUT, "not enough room");
1301 		return ERANGE;
1302 	}
1303 
1304 	void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff + dlen);
1305 	memcpy(data, trailer, trailer_len);
1306 
1307 	err = kern_buflet_set_data_length(buf, dlen + trailer_len);
1308 	VERIFY(err == 0);
1309 
1310 	err = kern_packet_finalize(ph);
1311 	VERIFY(err == 0);
1312 
1313 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%zuB trailer added", trailer_len);
1314 
1315 	return 0;
1316 }
1317 
1318 static int
feth_add_packet_fcs(kern_packet_t ph)1319 feth_add_packet_fcs(kern_packet_t ph)
1320 {
1321 	uint32_t crc = 0;
1322 	int err;
1323 
1324 	ASSERT(sizeof(crc) == ETHER_CRC_LEN);
1325 
1326 	kern_buflet_t buf = NULL;
1327 	while ((buf = kern_packet_get_next_buflet(ph, buf)) != NULL) {
1328 		uint32_t doff = kern_buflet_get_data_offset(buf);
1329 		uint32_t dlen = kern_buflet_get_data_length(buf);
1330 		void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff);
1331 		crc = crc32(crc, data, dlen);
1332 	}
1333 
1334 	err = feth_add_packet_trailer(ph, &crc, ETHER_CRC_LEN);
1335 	if (!err) {
1336 		return err;
1337 	}
1338 
1339 	err = kern_packet_set_link_ethfcs(ph);
1340 	VERIFY(err == 0);
1341 
1342 	return 0;
1343 }
1344 
1345 static errno_t
feth_copy_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1346 feth_copy_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1347 {
1348 	errno_t err = 0;
1349 	uint16_t i, bufcnt;
1350 	mach_vm_address_t baddr;
1351 	kern_buflet_t sbuf = NULL, dbuf = NULL;
1352 	kern_pbufpool_t pp = dif->iff_rx_pp;
1353 	kern_packet_t dph;
1354 	boolean_t multi_buflet = feth_using_multibuflets(dif);
1355 
1356 	bufcnt = kern_packet_get_buflet_count(sph);
1357 	ASSERT((bufcnt == 1) || multi_buflet);
1358 	*pdph = 0;
1359 
1360 	err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1361 	if (err != 0) {
1362 		STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1363 		STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1364 		return err;
1365 	}
1366 
1367 	/* pre-constructed single buflet packet copy */
1368 	sbuf = kern_packet_get_next_buflet(sph, NULL);
1369 	dbuf = kern_packet_get_next_buflet(dph, NULL);
1370 	feth_copy_buflet(sbuf, dbuf);
1371 
1372 	if (!multi_buflet) {
1373 		goto done;
1374 	}
1375 
1376 	/* un-constructed multi-buflet packet copy */
1377 	for (i = 1; i < bufcnt; i++) {
1378 		kern_buflet_t dbuf_next = NULL;
1379 
1380 		sbuf = kern_packet_get_next_buflet(sph, sbuf);
1381 		VERIFY(sbuf != NULL);
1382 		err = kern_pbufpool_alloc_buflet_nosleep(pp, &dbuf_next);
1383 		if (err != 0) {
1384 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1385 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_BUF);
1386 			break;
1387 		}
1388 		ASSERT(dbuf_next != NULL);
1389 		feth_copy_buflet(sbuf, dbuf_next);
1390 		err = kern_packet_add_buflet(dph, dbuf, dbuf_next);
1391 		VERIFY(err == 0);
1392 		dbuf = dbuf_next;
1393 	}
1394 	if (__improbable(err != 0)) {
1395 		dbuf = NULL;
1396 		while (i-- != 0) {
1397 			dbuf = kern_packet_get_next_buflet(dph, dbuf);
1398 			VERIFY(dbuf != NULL);
1399 			baddr = (mach_vm_address_t)
1400 			    kern_buflet_get_data_address(dbuf);
1401 			VERIFY(baddr != 0);
1402 		}
1403 		kern_pbufpool_free(pp, dph);
1404 		dph = 0;
1405 	}
1406 
1407 done:
1408 	if (__probable(err == 0)) {
1409 		err = kern_packet_set_headroom(dph,
1410 		    kern_packet_get_headroom(sph));
1411 		VERIFY(err == 0);
1412 		err = kern_packet_set_link_header_length(dph,
1413 		    kern_packet_get_link_header_length(sph));
1414 		VERIFY(err == 0);
1415 		err = kern_packet_set_service_class(dph,
1416 		    kern_packet_get_service_class(sph));
1417 		VERIFY(err == 0);
1418 		err = kern_packet_finalize(dph);
1419 		VERIFY(err == 0);
1420 		VERIFY(bufcnt == kern_packet_get_buflet_count(dph));
1421 		*pdph = dph;
1422 	}
1423 	return err;
1424 }
1425 
1426 static inline void
feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)1427 feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)
1428 {
1429 	/*
1430 	 * Nothing to do if not a TSO offloaded packet.
1431 	 */
1432 	uint16_t seg_sz = 0;
1433 	seg_sz = kern_packet_get_protocol_segment_size(ph);
1434 	if (seg_sz == 0) {
1435 		return;
1436 	}
1437 	/*
1438 	 * For RX, make the packet appear as a fully validated LRO packet.
1439 	 */
1440 	packet_csum_flags_t csum_flags = PACKET_CSUM_IP_CHECKED |
1441 	    PACKET_CSUM_IP_VALID | PACKET_CSUM_DATA_VALID |
1442 	    PACKET_CSUM_PSEUDO_HDR;
1443 	(void) kern_packet_set_inet_checksum(ph, csum_flags, 0, 0xFFFF, FALSE);
1444 	return;
1445 }
1446 
1447 static void
feth_rx_submit(if_fake_ref sif,if_fake_ref dif,kern_packet_t sphs[],uint32_t n_pkts)1448 feth_rx_submit(if_fake_ref sif, if_fake_ref dif, kern_packet_t sphs[],
1449     uint32_t n_pkts)
1450 {
1451 	errno_t err = 0;
1452 	struct kern_channel_ring_stat_increment stats;
1453 	kern_channel_ring_t rx_ring = NULL;
1454 	kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
1455 	kern_packet_t sph = 0, dph = 0;
1456 
1457 	memset(&stats, 0, sizeof(stats));
1458 
1459 	rx_ring = dif->iff_rx_ring[0];
1460 	if (rx_ring == NULL) {
1461 		return;
1462 	}
1463 
1464 	kr_enter(rx_ring, TRUE);
1465 	kern_channel_reclaim(rx_ring);
1466 	rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1467 
1468 	for (uint32_t i = 0; i < n_pkts && rx_slot != NULL; i++) {
1469 		sph = sphs[i];
1470 
1471 		switch (dif->iff_pp_mode) {
1472 		case IFF_PP_MODE_GLOBAL:
1473 			sphs[i] = 0;
1474 			dph = sph;
1475 			feth_update_pkt_tso_metadata_for_rx(dph);
1476 			err = kern_packet_finalize(dph);
1477 			VERIFY(err == 0);
1478 			break;
1479 		case IFF_PP_MODE_PRIVATE:
1480 			err = feth_copy_packet(dif, sph, &dph);
1481 			break;
1482 		case IFF_PP_MODE_PRIVATE_SPLIT:
1483 			err = feth_clone_packet(dif, sph, &dph);
1484 			break;
1485 		default:
1486 			VERIFY(0);
1487 			__builtin_unreachable();
1488 		}
1489 		if (__improbable(err != 0)) {
1490 			continue;
1491 		}
1492 
1493 		if (sif->iff_trailer_length != 0) {
1494 			feth_add_packet_trailer(dph, feth_trailer,
1495 			    sif->iff_trailer_length);
1496 		}
1497 		if (sif->iff_fcs != 0) {
1498 			feth_add_packet_fcs(dph);
1499 		}
1500 		feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1501 		bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1502 		stats.kcrsi_slots_transferred++;
1503 		stats.kcrsi_bytes_transferred
1504 		        += kern_packet_get_data_length(dph);
1505 
1506 		/* attach the packet to the RX ring */
1507 		err = kern_channel_slot_attach_packet(rx_ring, rx_slot, dph);
1508 		VERIFY(err == 0);
1509 		last_rx_slot = rx_slot;
1510 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1511 	}
1512 
1513 	if (last_rx_slot != NULL) {
1514 		kern_channel_advance_slot(rx_ring, last_rx_slot);
1515 		kern_channel_increment_ring_net_stats(rx_ring, dif->iff_ifp,
1516 		    &stats);
1517 	}
1518 
1519 	if (rx_ring != NULL) {
1520 		kr_exit(rx_ring);
1521 		kern_channel_notify(rx_ring, 0);
1522 	}
1523 }
1524 
1525 static void
feth_rx_queue_submit(if_fake_ref sif,if_fake_ref dif,uint32_t llink_idx,uint32_t qset_idx,kern_packet_t sphs[],uint32_t n_pkts)1526 feth_rx_queue_submit(if_fake_ref sif, if_fake_ref dif, uint32_t llink_idx,
1527     uint32_t qset_idx, kern_packet_t sphs[], uint32_t n_pkts)
1528 {
1529 	errno_t err = 0;
1530 	kern_netif_queue_t queue;
1531 	kern_packet_t sph = 0, dph = 0;
1532 	fake_llink *llink;
1533 	fake_qset *qset;
1534 
1535 	if (llink_idx >= dif->iff_llink_cnt) {
1536 		FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1537 		    "invalid llink_idx idx %d (max %d) on peer %s",
1538 		    llink_idx, dif->iff_llink_cnt, dif->iff_name);
1539 		return;
1540 	}
1541 	llink = &dif->iff_llink[llink_idx];
1542 	if (qset_idx >= llink->fl_qset_cnt) {
1543 		FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1544 		    "invalid qset_idx %d (max %d) on peer %s",
1545 		    qset_idx, llink->fl_qset_cnt, dif->iff_name);
1546 		return;
1547 	}
1548 	qset = &dif->iff_llink[llink_idx].fl_qset[qset_idx];
1549 	queue = qset->fqs_rx_queue[0].fq_queue;
1550 	if (queue == NULL) {
1551 		FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1552 		    "NULL default queue (llink_idx %d, qset_idx %d) on peer %s",
1553 		    llink_idx, qset_idx, dif->iff_name);
1554 		return;
1555 	}
1556 	for (uint32_t i = 0; i < n_pkts; i++) {
1557 		uint32_t flags;
1558 
1559 		sph = sphs[i];
1560 
1561 		switch (dif->iff_pp_mode) {
1562 		case IFF_PP_MODE_GLOBAL:
1563 			sphs[i] = 0;
1564 			dph = sph;
1565 			feth_update_pkt_tso_metadata_for_rx(dph);
1566 			break;
1567 		case IFF_PP_MODE_PRIVATE:
1568 			err = feth_copy_packet(dif, sph, &dph);
1569 			break;
1570 		case IFF_PP_MODE_PRIVATE_SPLIT:
1571 			err = feth_clone_packet(dif, sph, &dph);
1572 			break;
1573 		default:
1574 			VERIFY(0);
1575 			__builtin_unreachable();
1576 		}
1577 		if (__improbable(err != 0)) {
1578 			continue;
1579 		}
1580 
1581 		if (sif->iff_trailer_length != 0) {
1582 			feth_add_packet_trailer(dph, feth_trailer,
1583 			    sif->iff_trailer_length);
1584 		}
1585 		if (sif->iff_fcs != 0) {
1586 			feth_add_packet_fcs(dph);
1587 		}
1588 		feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1589 		bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1590 
1591 		flags = (i == n_pkts - 1) ?
1592 		    KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH : 0;
1593 		kern_netif_queue_rx_enqueue(queue, dph, 1, flags);
1594 	}
1595 }
1596 
1597 static void
feth_tx_complete(if_fake_ref fakeif,kern_packet_t phs[],uint32_t nphs)1598 feth_tx_complete(if_fake_ref fakeif, kern_packet_t phs[], uint32_t nphs)
1599 {
1600 	for (uint32_t i = 0; i < nphs; i++) {
1601 		kern_packet_t ph = phs[i];
1602 		if (ph == 0) {
1603 			continue;
1604 		}
1605 		int err = kern_packet_set_tx_completion_status(ph, 0);
1606 		VERIFY(err == 0);
1607 		kern_packet_tx_completion(ph, fakeif->iff_ifp);
1608 		kern_pbufpool_free(fakeif->iff_tx_pp, phs[i]);
1609 		phs[i] = 0;
1610 	}
1611 }
1612 
1613 #define NSEC_PER_USEC 1000ull
1614 /*
1615  * Calculate the time delta that passed from `since' to `until'.
1616  * If `until' happens before `since', returns negative value.
1617  */
1618 static bool
feth_packet_has_expired(if_fake_ref __unused fakeif,kern_packet_t ph,uint64_t * out_deadline)1619 feth_packet_has_expired(if_fake_ref __unused fakeif, kern_packet_t ph,
1620     uint64_t *out_deadline)
1621 {
1622 	uint64_t now;
1623 	uint64_t packet_expire_time_mach;
1624 	int64_t time_until_expiration;
1625 	errno_t err;
1626 	bool expired = false;
1627 
1628 	static mach_timebase_info_data_t clock_timebase = {0, 0};
1629 
1630 	if (clock_timebase.denom == 0) {
1631 		clock_timebase_info(&clock_timebase);
1632 		VERIFY(clock_timebase.denom != 0);
1633 	}
1634 
1635 	err = kern_packet_get_expire_time(ph, &packet_expire_time_mach);
1636 	if (err) {
1637 		goto out;
1638 	}
1639 
1640 	now = mach_absolute_time();
1641 	time_until_expiration = packet_expire_time_mach - now;
1642 	if (time_until_expiration < 0) {
1643 		/* The packet had expired */
1644 		expired = true;
1645 		goto out;
1646 	}
1647 
1648 	/* Convert the time_delta from mach ticks to nanoseconds */
1649 	time_until_expiration *= clock_timebase.numer;
1650 	time_until_expiration /= clock_timebase.denom;
1651 	/* convert from nanoseconds to microseconds */
1652 	time_until_expiration /= 1000ull;
1653 
1654 	if (if_fake_expiration_threshold_us < time_until_expiration) {
1655 		/* packet has some life ahead of it */
1656 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1657 		    "Packet has %llu usec until expiration",
1658 		    time_until_expiration);
1659 		goto out;
1660 	}
1661 
1662 out:
1663 	if (expired && out_deadline) {
1664 		*out_deadline = packet_expire_time_mach;
1665 	}
1666 
1667 	return expired;
1668 }
1669 
1670 static errno_t
feth_get_packet_notification_details(if_fake_ref fakeif,kern_packet_t ph,packet_id_t * pkt_id,uint32_t * nx_port_id)1671 feth_get_packet_notification_details(if_fake_ref fakeif, kern_packet_t ph,
1672     packet_id_t *pkt_id, uint32_t *nx_port_id)
1673 {
1674 	errno_t err = 0;
1675 
1676 	err = kern_packet_get_packetid(ph, pkt_id);
1677 	if (err != 0) {
1678 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1679 		    "%s err=%d getting packetid", fakeif->iff_name, err);
1680 		return err;
1681 	}
1682 
1683 	err = kern_packet_get_tx_nexus_port_id(ph, nx_port_id);
1684 	if (err != 0) {
1685 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1686 		    "%s err=%d getting nx_port_id", fakeif->iff_name, err);
1687 		return err;
1688 	}
1689 
1690 	return 0;
1691 }
1692 
1693 static packet_expiry_action_t
feth_get_effective_expn_action(if_fake_ref fakeif,kern_packet_t ph)1694 feth_get_effective_expn_action(if_fake_ref fakeif, kern_packet_t ph)
1695 {
1696 	errno_t err;
1697 	packet_expiry_action_t expiry_action;
1698 
1699 	switch (fakeif->iff_tx_exp_policy) {
1700 	case IFF_TX_EXP_POLICY_DISABLED:
1701 		expiry_action = PACKET_EXPIRY_ACTION_NONE;
1702 		break;
1703 	case IFF_TX_EXP_POLICY_NOTIFY_ONLY:
1704 		expiry_action = PACKET_EXPIRY_ACTION_NOTIFY;
1705 		break;
1706 	case IFF_TX_EXP_POLICY_DROP_AND_NOTIFY:
1707 		expiry_action = PACKET_EXPIRY_ACTION_DROP;
1708 		break;
1709 	case IFF_TX_EXP_POLICY_METADATA:
1710 		err = kern_packet_get_expiry_action(ph, &expiry_action);
1711 		if (err != 0) {
1712 			if (err != ENOENT) {
1713 				FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1714 				    "Error %d when getting expiry action",
1715 				    err);
1716 			}
1717 			expiry_action = PACKET_EXPIRY_ACTION_NONE;
1718 		}
1719 		break;
1720 	default:
1721 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1722 		    "Unrecognized value %d for \"net.link.fake.tx_exp_policy\"",
1723 		    fakeif->iff_tx_exp_policy);
1724 		expiry_action = PACKET_EXPIRY_ACTION_NONE;
1725 	}
1726 
1727 	return expiry_action;
1728 }
1729 
1730 /* returns true if the packet is selected for epxiration and should be dropped */
1731 static bool
feth_tx_expired_error(if_fake_ref fakeif,kern_packet_t ph)1732 feth_tx_expired_error(if_fake_ref fakeif, kern_packet_t ph)
1733 {
1734 	int err = 0;
1735 	uint32_t nx_port_id = 0;
1736 	os_channel_event_packet_transmit_expired_t expn = {0};
1737 	packet_expiry_action_t expiry_action = PACKET_EXPIRY_ACTION_NONE;
1738 
1739 	FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC, "%s", fakeif->iff_name);
1740 
1741 	if (feth_packet_has_expired(fakeif, ph, &expn.packet_tx_expiration_deadline)) {
1742 		expiry_action = feth_get_effective_expn_action(fakeif, ph);
1743 	}
1744 
1745 	bool drop_packet = (expiry_action == PACKET_EXPIRY_ACTION_DROP);
1746 	if (expiry_action != PACKET_EXPIRY_ACTION_NONE) {
1747 		/* set the expiration status code */
1748 		expn.packet_tx_expiration_status = drop_packet ?
1749 		    CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_DROPPED :
1750 		    CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_NOT_DROPPED;
1751 
1752 		/* Mark the expiration timestamp */
1753 		expn.packet_tx_expiration_timestamp = mach_absolute_time();
1754 
1755 		err = feth_get_packet_notification_details(fakeif, ph,
1756 		    &expn.packet_id, &nx_port_id);
1757 
1758 		if (err == 0) {
1759 			err = kern_channel_event_transmit_expired(
1760 				fakeif->iff_ifp, &expn, nx_port_id);
1761 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1762 			    "%s sent expiry notification on nexus port "
1763 			    "%u notif code %u",
1764 			    fakeif->iff_name, nx_port_id,
1765 			    expn.packet_tx_expiration_status);
1766 		}
1767 		if (err != 0) {
1768 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1769 			    "%s err=%d, nx_port_id: 0x%x",
1770 			    fakeif->iff_name, err, nx_port_id);
1771 		}
1772 	}
1773 
1774 	return drop_packet;
1775 }
1776 
1777 /* returns true if the packet is selected for TX error & dropped */
1778 static bool
feth_tx_complete_error(if_fake_ref fakeif,kern_packet_t ph)1779 feth_tx_complete_error(if_fake_ref fakeif, kern_packet_t ph)
1780 {
1781 	int err;
1782 
1783 	if (fakeif->iff_tx_drop_rate == 0 ||
1784 	    fakeif->iff_tx_pkts_count != fakeif->iff_tx_drop_rate) {
1785 		return false;
1786 	}
1787 	/* simulate TX completion error on the packet */
1788 	if (fakeif->iff_tx_completion_mode == IFF_TX_COMPL_MODE_SYNC) {
1789 		err = kern_packet_set_tx_completion_status(ph,
1790 		    CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED);
1791 		VERIFY(err == 0);
1792 		kern_packet_tx_completion(ph, fakeif->iff_ifp);
1793 	} else {
1794 		uint32_t nx_port_id = 0;
1795 		os_channel_event_packet_transmit_status_t pkt_tx_status = {0};
1796 
1797 		pkt_tx_status.packet_status =
1798 		    CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED;
1799 		err = feth_get_packet_notification_details(fakeif, ph,
1800 		    &pkt_tx_status.packet_id, &nx_port_id);
1801 		if (err == 0) {
1802 			err = kern_channel_event_transmit_status(
1803 				fakeif->iff_ifp, &pkt_tx_status, nx_port_id);
1804 		}
1805 		if (err != 0) {
1806 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1807 			    "%s err=%d, nx_port_id: 0x%x",
1808 			    fakeif->iff_name, err, nx_port_id);
1809 		}
1810 	}
1811 
1812 	return true;
1813 }
1814 
1815 static void
feth_if_adv(thread_call_param_t arg0,thread_call_param_t arg1)1816 feth_if_adv(thread_call_param_t arg0, thread_call_param_t arg1)
1817 {
1818 #pragma unused(arg1)
1819 	errno_t                            error;
1820 	if_fake_ref                        fakeif = (if_fake_ref)arg0;
1821 	struct ifnet_interface_advisory    if_adv;
1822 	struct ifnet_stats_param           if_stat;
1823 
1824 	feth_lock();
1825 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1826 		feth_unlock();
1827 		return;
1828 	}
1829 	feth_unlock();
1830 
1831 	if (!fakeif->iff_intf_adv_enabled) {
1832 		goto done;
1833 	}
1834 
1835 	error = ifnet_stat(fakeif->iff_ifp, &if_stat);
1836 	if (error != 0) {
1837 		FAKE_LOG(LOG_NOTICE, 0, "%s: ifnet_stat() failed %d",
1838 		    fakeif->iff_name, error);
1839 		goto done;
1840 	}
1841 	if_adv.header.version = IF_INTERFACE_ADVISORY_VERSION_CURRENT;
1842 	if_adv.header.direction = IF_INTERFACE_ADVISORY_DIRECTION_TX;
1843 	if_adv.header.interface_type =
1844 	    IF_INTERFACE_ADVISORY_INTERFACE_TYPE_WIFI;
1845 	if_adv.capacity.timestamp = mach_absolute_time();
1846 	if_adv.capacity.rate_trend_suggestion =
1847 	    IF_INTERFACE_ADVISORY_RATE_SUGGESTION_RAMP_NEUTRAL;
1848 	if_adv.capacity.max_bandwidth = 1000 * 1000 * 1000; /* 1Gbps */
1849 	if_adv.capacity.total_byte_count = if_stat.packets_out;
1850 	if_adv.capacity.average_throughput = 1000 * 1000 * 1000; /* 1Gbps */
1851 	if_adv.capacity.flushable_queue_size = UINT32_MAX;
1852 	if_adv.capacity.non_flushable_queue_size = UINT32_MAX;
1853 	if_adv.capacity.average_delay = 1; /* ms */
1854 
1855 	error = fakeif->iff_intf_adv_notify(fakeif->iff_intf_adv_kern_ctx,
1856 	    &if_adv);
1857 	if (error != 0) {
1858 		FAKE_LOG(LOG_NOTICE, 0,
1859 		    "%s: interface advisory report failed %d",
1860 		    fakeif->iff_name, error);
1861 	}
1862 
1863 done:
1864 	feth_lock();
1865 	if (!feth_is_detaching(fakeif) && fakeif->iff_channel_connected) {
1866 		uint64_t deadline;
1867 		clock_interval_to_deadline(fakeif->iff_adv_interval,
1868 		    NSEC_PER_MSEC, &deadline);
1869 		thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1870 	}
1871 	feth_unlock();
1872 }
1873 
1874 static int
feth_if_adv_tcall_create(if_fake_ref fakeif)1875 feth_if_adv_tcall_create(if_fake_ref fakeif)
1876 {
1877 	uint64_t deadline;
1878 
1879 	feth_lock();
1880 	ASSERT(fakeif->iff_if_adv_tcall == NULL);
1881 	ASSERT(fakeif->iff_adv_interval > 0);
1882 	ASSERT(fakeif->iff_channel_connected);
1883 	fakeif->iff_if_adv_tcall =
1884 	    thread_call_allocate_with_options(feth_if_adv,
1885 	    (thread_call_param_t)fakeif, THREAD_CALL_PRIORITY_KERNEL,
1886 	    THREAD_CALL_OPTIONS_ONCE);
1887 	if (fakeif->iff_if_adv_tcall == NULL) {
1888 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1889 		    "%s if_adv tcall alloc failed",
1890 		    fakeif->iff_name);
1891 		feth_unlock();
1892 		return ENXIO;
1893 	}
1894 	/* retain for the interface advisory thread call */
1895 	feth_retain(fakeif);
1896 	clock_interval_to_deadline(fakeif->iff_adv_interval,
1897 	    NSEC_PER_MSEC, &deadline);
1898 	thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1899 	feth_unlock();
1900 	return 0;
1901 }
1902 
1903 /**
1904 ** nexus netif domain provider
1905 **/
1906 static errno_t
feth_nxdp_init(kern_nexus_domain_provider_t domprov)1907 feth_nxdp_init(kern_nexus_domain_provider_t domprov)
1908 {
1909 #pragma unused(domprov)
1910 	return 0;
1911 }
1912 
1913 static void
feth_nxdp_fini(kern_nexus_domain_provider_t domprov)1914 feth_nxdp_fini(kern_nexus_domain_provider_t domprov)
1915 {
1916 #pragma unused(domprov)
1917 }
1918 
1919 static uuid_t                   feth_nx_dom_prov;
1920 
1921 static errno_t
feth_register_nexus_domain_provider(void)1922 feth_register_nexus_domain_provider(void)
1923 {
1924 	const struct kern_nexus_domain_provider_init dp_init = {
1925 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1926 		.nxdpi_flags = 0,
1927 		.nxdpi_init = feth_nxdp_init,
1928 		.nxdpi_fini = feth_nxdp_fini
1929 	};
1930 	errno_t                         err = 0;
1931 
1932 	/* feth_nxdp_init() is called before this function returns */
1933 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
1934 	    (const uint8_t *)
1935 	    "com.apple.feth",
1936 	    &dp_init, sizeof(dp_init),
1937 	    &feth_nx_dom_prov);
1938 	if (err != 0) {
1939 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1940 		    "failed to register domain provider");
1941 		return err;
1942 	}
1943 	return 0;
1944 }
1945 
1946 /**
1947 ** netif nexus routines
1948 **/
1949 static if_fake_ref
feth_nexus_context(kern_nexus_t nexus)1950 feth_nexus_context(kern_nexus_t nexus)
1951 {
1952 	if_fake_ref fakeif;
1953 
1954 	fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
1955 	assert(fakeif != NULL);
1956 	return fakeif;
1957 }
1958 
1959 static uint8_t
feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1960 feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1961 {
1962 	switch (svc_class) {
1963 	case KPKT_SC_VO:
1964 		return 0;
1965 	case KPKT_SC_VI:
1966 		return 1;
1967 	case KPKT_SC_BE:
1968 		return 2;
1969 	case KPKT_SC_BK:
1970 		return 3;
1971 	default:
1972 		VERIFY(0);
1973 		return 0;
1974 	}
1975 }
1976 
1977 static errno_t
feth_nx_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1978 feth_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1979     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1980     void **ring_ctx)
1981 {
1982 	if_fake_ref     fakeif;
1983 	int             err;
1984 #pragma unused(nxprov, channel, ring_ctx)
1985 	feth_lock();
1986 	fakeif = feth_nexus_context(nexus);
1987 	if (feth_is_detaching(fakeif)) {
1988 		feth_unlock();
1989 		return 0;
1990 	}
1991 	if (is_tx_ring) {
1992 		if (feth_in_wmm_mode(fakeif)) {
1993 			kern_packet_svc_class_t svc_class;
1994 			uint8_t ring_idx;
1995 
1996 			err = kern_channel_get_service_class(ring, &svc_class);
1997 			VERIFY(err == 0);
1998 			ring_idx = feth_find_tx_ring_by_svc(svc_class);
1999 			VERIFY(ring_idx < IFF_NUM_TX_RINGS_WMM_MODE);
2000 			VERIFY(fakeif->iff_tx_ring[ring_idx] == NULL);
2001 			fakeif->iff_tx_ring[ring_idx] = ring;
2002 		} else {
2003 			VERIFY(fakeif->iff_tx_ring[0] == NULL);
2004 			fakeif->iff_tx_ring[0] = ring;
2005 		}
2006 	} else {
2007 		VERIFY(fakeif->iff_rx_ring[0] == NULL);
2008 		fakeif->iff_rx_ring[0] = ring;
2009 	}
2010 	fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2011 	feth_unlock();
2012 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: %s ring init",
2013 	    fakeif->iff_name, is_tx_ring ? "TX" : "RX");
2014 	return 0;
2015 }
2016 
2017 static void
feth_nx_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)2018 feth_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2019     kern_channel_ring_t ring)
2020 {
2021 #pragma unused(nxprov, ring)
2022 	if_fake_ref     fakeif;
2023 	thread_call_t   tcall = NULL;
2024 
2025 	feth_lock();
2026 	fakeif = feth_nexus_context(nexus);
2027 	if (fakeif->iff_rx_ring[0] == ring) {
2028 		fakeif->iff_rx_ring[0] = NULL;
2029 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2030 		    "%s: RX ring fini", fakeif->iff_name);
2031 	} else if (feth_in_wmm_mode(fakeif)) {
2032 		int i;
2033 		for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
2034 			if (fakeif->iff_tx_ring[i] == ring) {
2035 				fakeif->iff_tx_ring[i] = NULL;
2036 				break;
2037 			}
2038 		}
2039 		for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
2040 			if (fakeif->iff_tx_ring[i] != NULL) {
2041 				break;
2042 			}
2043 		}
2044 		if (i == IFF_MAX_TX_RINGS) {
2045 			tcall = fakeif->iff_doorbell_tcall;
2046 			fakeif->iff_doorbell_tcall = NULL;
2047 		}
2048 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2049 		    "%s: TX ring fini", fakeif->iff_name);
2050 	} else if (fakeif->iff_tx_ring[0] == ring) {
2051 		tcall = fakeif->iff_doorbell_tcall;
2052 		fakeif->iff_doorbell_tcall = NULL;
2053 		fakeif->iff_tx_ring[0] = NULL;
2054 	}
2055 	fakeif->iff_nifs = NULL;
2056 	feth_unlock();
2057 	if (tcall != NULL) {
2058 		boolean_t       success;
2059 
2060 		success = thread_call_cancel_wait(tcall);
2061 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2062 		    "%s: thread_call_cancel %s", fakeif->iff_name,
2063 		    success ? "SUCCESS" : "FAILURE");
2064 		if (!success) {
2065 			feth_lock();
2066 			if (fakeif->iff_doorbell_tcall_active) {
2067 				fakeif->iff_waiting_for_tcall = TRUE;
2068 				FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2069 				    "%s: *waiting for threadcall",
2070 				    fakeif->iff_name);
2071 				do {
2072 					msleep(fakeif, &feth_lck_mtx,
2073 					    PZERO, "feth threadcall", 0);
2074 				} while (fakeif->iff_doorbell_tcall_active);
2075 				FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2076 				    "%s: ^threadcall done",
2077 				    fakeif->iff_name);
2078 				fakeif->iff_waiting_for_tcall = FALSE;
2079 			}
2080 			feth_unlock();
2081 		}
2082 		success = thread_call_free(tcall);
2083 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2084 		    "%s: thread_call_free %s",
2085 		    fakeif->iff_name,
2086 		    success ? "SUCCESS" : "FAILURE");
2087 		feth_release(fakeif);
2088 		VERIFY(success == TRUE);
2089 	}
2090 }
2091 
2092 static errno_t
feth_nx_pre_connect(kern_nexus_provider_t nxprov,proc_t proc,kern_nexus_t nexus,nexus_port_t port,kern_channel_t channel,void ** channel_context)2093 feth_nx_pre_connect(kern_nexus_provider_t nxprov,
2094     proc_t proc, kern_nexus_t nexus, nexus_port_t port, kern_channel_t channel,
2095     void **channel_context)
2096 {
2097 #pragma unused(nxprov, proc, nexus, port, channel, channel_context)
2098 	return 0;
2099 }
2100 
2101 static errno_t
feth_nx_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2102 feth_nx_connected(kern_nexus_provider_t nxprov,
2103     kern_nexus_t nexus, kern_channel_t channel)
2104 {
2105 #pragma unused(nxprov, channel)
2106 	int err;
2107 	if_fake_ref fakeif;
2108 
2109 	fakeif = feth_nexus_context(nexus);
2110 	feth_lock();
2111 	if (feth_is_detaching(fakeif)) {
2112 		feth_unlock();
2113 		return EBUSY;
2114 	}
2115 	feth_retain(fakeif);
2116 	fakeif->iff_channel_connected = TRUE;
2117 	feth_unlock();
2118 	if (feth_has_intf_advisory_configured(fakeif)) {
2119 		err = feth_if_adv_tcall_create(fakeif);
2120 		if (err != 0) {
2121 			return err;
2122 		}
2123 	}
2124 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: connected channel %p",
2125 	    fakeif->iff_name, channel);
2126 	return 0;
2127 }
2128 
2129 static void
feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2130 feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,
2131     kern_nexus_t nexus, kern_channel_t channel)
2132 {
2133 #pragma unused(nxprov, channel)
2134 	if_fake_ref fakeif;
2135 	thread_call_t tcall;
2136 	boolean_t connected;
2137 
2138 	fakeif = feth_nexus_context(nexus);
2139 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2140 	    "%s: pre-disconnect channel %p",
2141 	    fakeif->iff_name, channel);
2142 	/* Quiesce the interface and flush any pending outbound packets. */
2143 	if_down(fakeif->iff_ifp);
2144 	feth_lock();
2145 	connected = fakeif->iff_channel_connected;
2146 	fakeif->iff_channel_connected = FALSE;
2147 	tcall = fakeif->iff_if_adv_tcall;
2148 	fakeif->iff_if_adv_tcall = NULL;
2149 	feth_unlock();
2150 	if (tcall != NULL) {
2151 		(void) thread_call_cancel_wait(tcall);
2152 		if (!thread_call_free(tcall)) {
2153 			boolean_t freed;
2154 			(void) thread_call_cancel_wait(tcall);
2155 			freed = thread_call_free(tcall);
2156 			VERIFY(freed);
2157 		}
2158 		/* release for the interface advisory thread call */
2159 		feth_release(fakeif);
2160 	}
2161 	if (connected) {
2162 		feth_release(fakeif);
2163 	}
2164 }
2165 
2166 static void
feth_nx_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2167 feth_nx_disconnected(kern_nexus_provider_t nxprov,
2168     kern_nexus_t nexus, kern_channel_t channel)
2169 {
2170 #pragma unused(nxprov, channel)
2171 	if_fake_ref fakeif;
2172 
2173 	fakeif = feth_nexus_context(nexus);
2174 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: disconnected channel %p",
2175 	    fakeif->iff_name, channel);
2176 }
2177 
2178 static errno_t
feth_nx_slot_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index,struct kern_slot_prop ** slot_prop_addr,void ** slot_context)2179 feth_nx_slot_init(kern_nexus_provider_t nxprov,
2180     kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2181     uint32_t slot_index, struct kern_slot_prop **slot_prop_addr,
2182     void **slot_context)
2183 {
2184 #pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
2185 	return 0;
2186 }
2187 
2188 static void
feth_nx_slot_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index)2189 feth_nx_slot_fini(kern_nexus_provider_t nxprov,
2190     kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2191     uint32_t slot_index)
2192 {
2193 #pragma unused(nxprov, nexus, ring, slot, slot_index)
2194 }
2195 
2196 static errno_t
feth_nx_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)2197 feth_nx_sync_tx(kern_nexus_provider_t nxprov,
2198     kern_nexus_t nexus, kern_channel_ring_t tx_ring, uint32_t flags)
2199 {
2200 #pragma unused(nxprov)
2201 	if_fake_ref             fakeif;
2202 	ifnet_t                 ifp;
2203 	kern_channel_slot_t     last_tx_slot = NULL;
2204 	ifnet_t                 peer_ifp;
2205 	if_fake_ref             peer_fakeif = NULL;
2206 	struct kern_channel_ring_stat_increment stats;
2207 	kern_channel_slot_t     tx_slot;
2208 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2209 	kern_packet_t           pkts[IFF_MAX_BATCH_SIZE];
2210 	uint32_t                n_pkts = 0;
2211 
2212 	memset(&stats, 0, sizeof(stats));
2213 
2214 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2215 	fakeif = feth_nexus_context(nexus);
2216 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2217 	    "%s ring %d flags 0x%x", fakeif->iff_name,
2218 	    tx_ring->ckr_ring_id, flags);
2219 	(void)flags;
2220 	feth_lock();
2221 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2222 		feth_unlock();
2223 		return 0;
2224 	}
2225 	ifp = fakeif->iff_ifp;
2226 	peer_ifp = fakeif->iff_peer;
2227 	if (peer_ifp != NULL) {
2228 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
2229 		if (peer_fakeif != NULL) {
2230 			if (feth_is_detaching(peer_fakeif)) {
2231 				FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2232 				    "%s peer fakeif %s is detaching",
2233 				    fakeif->iff_name, peer_fakeif->iff_name);
2234 				goto done;
2235 			}
2236 			if (!peer_fakeif->iff_channel_connected) {
2237 				if (fakeif->iff_tx_exp_policy ==
2238 				    IFF_TX_EXP_POLICY_DISABLED) {
2239 					FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2240 					    "%s peer fakeif %s channel not connected, expn: %d",
2241 					    fakeif->iff_name, peer_fakeif->iff_name,
2242 					    fakeif->iff_tx_exp_policy);
2243 					goto done;
2244 				}
2245 			}
2246 		} else {
2247 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2248 			    "%s no peer fakeif (peer %p)",
2249 			    fakeif->iff_name, peer_ifp);
2250 			goto done;
2251 		}
2252 	} else {
2253 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2254 		    "%s no peer", fakeif->iff_name);
2255 		goto done;
2256 	}
2257 	tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
2258 	while (tx_slot != NULL) {
2259 		uint16_t off;
2260 		kern_packet_t sph;
2261 
2262 		/* detach the packet from the TX ring */
2263 		sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
2264 		VERIFY(sph != 0);
2265 		kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
2266 
2267 		/* bpf tap output */
2268 		off = kern_packet_get_headroom(sph);
2269 		VERIFY(off >= fakeif->iff_tx_headroom);
2270 		kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2271 		feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2272 		bpf_tap_packet_out(ifp, DLT_EN10MB, sph, NULL, 0);
2273 
2274 		/* drop packets, if requested */
2275 		fakeif->iff_tx_pkts_count++;
2276 		if (feth_tx_expired_error(fakeif, sph) ||
2277 		    feth_tx_complete_error(fakeif, sph) ||
2278 		    !peer_fakeif->iff_channel_connected) {
2279 			fakeif->iff_tx_pkts_count = 0;
2280 			kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2281 			STATS_INC(nifs, NETIF_STATS_DROP);
2282 			goto next_tx_slot;
2283 		}
2284 
2285 		ASSERT(sph != 0);
2286 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2287 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2288 
2289 		stats.kcrsi_slots_transferred++;
2290 		stats.kcrsi_bytes_transferred
2291 		        += kern_packet_get_data_length(sph);
2292 
2293 		/* prepare batch for receiver */
2294 		pkts[n_pkts++] = sph;
2295 		if (n_pkts == IFF_MAX_BATCH_SIZE) {
2296 			feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2297 			feth_tx_complete(fakeif, pkts, n_pkts);
2298 			n_pkts = 0;
2299 		}
2300 
2301 next_tx_slot:
2302 		last_tx_slot = tx_slot;
2303 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
2304 	}
2305 
2306 	/* catch last batch for receiver */
2307 	if (n_pkts != 0) {
2308 		feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2309 		feth_tx_complete(fakeif, pkts, n_pkts);
2310 		n_pkts = 0;
2311 	}
2312 
2313 	if (last_tx_slot != NULL) {
2314 		kern_channel_advance_slot(tx_ring, last_tx_slot);
2315 		kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
2316 	}
2317 done:
2318 	feth_unlock();
2319 	return 0;
2320 }
2321 
2322 static errno_t
feth_nx_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2323 feth_nx_sync_rx(kern_nexus_provider_t nxprov,
2324     kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2325 {
2326 #pragma unused(nxprov, ring, flags)
2327 	if_fake_ref             fakeif;
2328 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2329 
2330 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
2331 	fakeif = feth_nexus_context(nexus);
2332 	FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT, "%s", fakeif->iff_name);
2333 	return 0;
2334 }
2335 
2336 static errno_t
feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif,boolean_t doorbell_ctxt)2337 feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif, boolean_t doorbell_ctxt)
2338 {
2339 	int i;
2340 	errno_t error = 0;
2341 	boolean_t more;
2342 
2343 	for (i = 0; i < IFF_NUM_TX_RINGS_WMM_MODE; i++) {
2344 		kern_channel_ring_t ring = fakeif->iff_tx_ring[i];
2345 		if (ring != NULL) {
2346 			error = kern_channel_tx_refill(ring, UINT32_MAX,
2347 			    UINT32_MAX, doorbell_ctxt, &more);
2348 		}
2349 		if (error != 0) {
2350 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2351 			    "%s: TX refill ring %d (%s) %d",
2352 			    fakeif->iff_name, ring->ckr_ring_id,
2353 			    doorbell_ctxt ? "sync" : "async", error);
2354 			if (!((error == EAGAIN) || (error == EBUSY))) {
2355 				break;
2356 			}
2357 		} else {
2358 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2359 			    "%s: TX refilled ring %d (%s)",
2360 			    fakeif->iff_name, ring->ckr_ring_id,
2361 			    doorbell_ctxt ? "sync" : "async");
2362 		}
2363 	}
2364 	return error;
2365 }
2366 
2367 static void
feth_async_doorbell(thread_call_param_t arg0,thread_call_param_t arg1)2368 feth_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
2369 {
2370 #pragma unused(arg1)
2371 	errno_t                 error;
2372 	if_fake_ref             fakeif = (if_fake_ref)arg0;
2373 	kern_channel_ring_t     ring;
2374 	boolean_t               more;
2375 
2376 	feth_lock();
2377 	ring = fakeif->iff_tx_ring[0];
2378 	if (feth_is_detaching(fakeif) ||
2379 	    !fakeif->iff_channel_connected ||
2380 	    ring == NULL) {
2381 		goto done;
2382 	}
2383 	fakeif->iff_doorbell_tcall_active = TRUE;
2384 	feth_unlock();
2385 	if (feth_in_wmm_mode(fakeif)) {
2386 		error = feth_nx_tx_dequeue_driver_managed(fakeif, FALSE);
2387 	} else {
2388 		error = kern_channel_tx_refill(ring, UINT32_MAX,
2389 		    UINT32_MAX, FALSE, &more);
2390 	}
2391 	if (error != 0) {
2392 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s: TX refill failed %d",
2393 		    fakeif->iff_name, error);
2394 	} else {
2395 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s: TX refilled",
2396 		    fakeif->iff_name);
2397 	}
2398 
2399 	feth_lock();
2400 done:
2401 	fakeif->iff_doorbell_tcall_active = FALSE;
2402 	if (fakeif->iff_waiting_for_tcall) {
2403 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2404 		    "%s: threadcall waking up waiter", fakeif->iff_name);
2405 		wakeup((caddr_t)fakeif);
2406 	}
2407 	feth_unlock();
2408 }
2409 
2410 static void
feth_schedule_async_doorbell(if_fake_ref fakeif)2411 feth_schedule_async_doorbell(if_fake_ref fakeif)
2412 {
2413 	thread_call_t   tcall;
2414 
2415 	feth_lock();
2416 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2417 		feth_unlock();
2418 		return;
2419 	}
2420 	tcall = fakeif->iff_doorbell_tcall;
2421 	if (tcall != NULL) {
2422 		thread_call_enter(tcall);
2423 	} else {
2424 		tcall = thread_call_allocate_with_options(feth_async_doorbell,
2425 		    (thread_call_param_t)fakeif,
2426 		    THREAD_CALL_PRIORITY_KERNEL,
2427 		    THREAD_CALL_OPTIONS_ONCE);
2428 		if (tcall == NULL) {
2429 			FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT,
2430 			    "%s tcall alloc failed", fakeif->iff_name);
2431 		} else {
2432 			fakeif->iff_doorbell_tcall = tcall;
2433 			feth_retain(fakeif);
2434 			thread_call_enter(tcall);
2435 		}
2436 	}
2437 	feth_unlock();
2438 }
2439 
2440 static errno_t
feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2441 feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,
2442     kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2443 {
2444 #pragma unused(nxprov, ring, flags)
2445 	errno_t         error;
2446 	if_fake_ref     fakeif;
2447 
2448 	fakeif = feth_nexus_context(nexus);
2449 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s", fakeif->iff_name);
2450 
2451 	if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
2452 		boolean_t       more;
2453 		/* synchronous tx refill */
2454 		if (feth_in_wmm_mode(fakeif)) {
2455 			error = feth_nx_tx_dequeue_driver_managed(fakeif, TRUE);
2456 		} else {
2457 			error = kern_channel_tx_refill(ring, UINT32_MAX,
2458 			    UINT32_MAX, TRUE, &more);
2459 		}
2460 		if (error != 0) {
2461 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2462 			    "%s: TX refill (sync) %d", fakeif->iff_name, error);
2463 		} else {
2464 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2465 			    "%s: TX refilled (sync)", fakeif->iff_name);
2466 		}
2467 	} else {
2468 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2469 		    "%s: schedule async refill", fakeif->iff_name);
2470 		feth_schedule_async_doorbell(fakeif);
2471 	}
2472 	return 0;
2473 }
2474 
2475 static errno_t
feth_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)2476 feth_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
2477 {
2478 	if_fake_ref fakeif;
2479 
2480 	fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
2481 	feth_ifnet_set_attrs(fakeif, ifp);
2482 	return 0;
2483 }
2484 
2485 static errno_t
feth_nx_intf_adv_config(void * prov_ctx,bool enable)2486 feth_nx_intf_adv_config(void *prov_ctx, bool enable)
2487 {
2488 	if_fake_ref fakeif = prov_ctx;
2489 
2490 	feth_lock();
2491 	fakeif->iff_intf_adv_enabled = enable;
2492 	feth_unlock();
2493 	FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2494 	    "%s enable %d", fakeif->iff_name, enable);
2495 	return 0;
2496 }
2497 
2498 static errno_t
fill_capab_interface_advisory(if_fake_ref fakeif,void * contents,uint32_t * len)2499 fill_capab_interface_advisory(if_fake_ref fakeif, void *contents, uint32_t *len)
2500 {
2501 	struct kern_nexus_capab_interface_advisory *capab = contents;
2502 
2503 	if (*len != sizeof(*capab)) {
2504 		return EINVAL;
2505 	}
2506 	if (capab->kncia_version !=
2507 	    KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
2508 		return EINVAL;
2509 	}
2510 	if (!feth_has_intf_advisory_configured(fakeif)) {
2511 		return ENOTSUP;
2512 	}
2513 	VERIFY(capab->kncia_notify != NULL);
2514 	fakeif->iff_intf_adv_kern_ctx = capab->kncia_kern_context;
2515 	fakeif->iff_intf_adv_notify = capab->kncia_notify;
2516 	capab->kncia_provider_context = fakeif;
2517 	capab->kncia_config = feth_nx_intf_adv_config;
2518 	return 0;
2519 }
2520 
2521 static errno_t
feth_notify_steering_info(void * prov_ctx,void * qset_ctx,struct ifnet_traffic_descriptor_common * td,bool add)2522 feth_notify_steering_info(void *prov_ctx, void *qset_ctx,
2523     struct ifnet_traffic_descriptor_common *td, bool add)
2524 {
2525 #pragma unused(td)
2526 	if_fake_ref fakeif = prov_ctx;
2527 	fake_qset *qset = qset_ctx;
2528 
2529 	FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2530 	    "%s: notify_steering_info: qset_id 0x%llx, %s",
2531 	    fakeif->iff_name, qset->fqs_id, add ? "add" : "remove");
2532 	return 0;
2533 }
2534 
2535 static errno_t
fill_capab_qset_extensions(if_fake_ref fakeif,void * contents,uint32_t * len)2536 fill_capab_qset_extensions(if_fake_ref fakeif, void *contents, uint32_t *len)
2537 {
2538 	struct kern_nexus_capab_qset_extensions *capab = contents;
2539 
2540 	if (*len != sizeof(*capab)) {
2541 		return EINVAL;
2542 	}
2543 	if (capab->cqe_version !=
2544 	    KERN_NEXUS_CAPAB_QSET_EXTENSIONS_VERSION_1) {
2545 		return EINVAL;
2546 	}
2547 	capab->cqe_prov_ctx = fakeif;
2548 	capab->cqe_notify_steering_info = feth_notify_steering_info;
2549 	return 0;
2550 }
2551 
2552 static errno_t
feth_nx_capab_config(kern_nexus_provider_t nxprov,kern_nexus_t nx,kern_nexus_capab_t capab,void * contents,uint32_t * len)2553 feth_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
2554     kern_nexus_capab_t capab, void *contents, uint32_t *len)
2555 {
2556 #pragma unused(nxprov)
2557 	errno_t error;
2558 	if_fake_ref fakeif;
2559 
2560 	fakeif = feth_nexus_context(nx);
2561 	FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL, "%s", fakeif->iff_name);
2562 
2563 	switch (capab) {
2564 	case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
2565 		error = fill_capab_interface_advisory(fakeif, contents, len);
2566 		break;
2567 	case KERN_NEXUS_CAPAB_QSET_EXTENSIONS:
2568 		error = fill_capab_qset_extensions(fakeif, contents, len);
2569 		break;
2570 	default:
2571 		error = ENOTSUP;
2572 		break;
2573 	}
2574 	return error;
2575 }
2576 
2577 static int
feth_set_tso_mtu(ifnet_t ifp,uint32_t tso_v4_mtu,uint32_t tso_v6_mtu)2578 feth_set_tso_mtu(ifnet_t ifp, uint32_t tso_v4_mtu, uint32_t tso_v6_mtu)
2579 {
2580 	int     error;
2581 
2582 	error = ifnet_set_tso_mtu(ifp, AF_INET, tso_v4_mtu);
2583 	if (error != 0) {
2584 		FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2585 		    "set TSO MTU IPv4 failed on %s, err %d",
2586 		    if_name(ifp), error);
2587 		return error;
2588 	}
2589 	error = ifnet_set_tso_mtu(ifp, AF_INET6, tso_v6_mtu);
2590 	if (error != 0) {
2591 		FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2592 		    "set TSO MTU IPv6 failed on %s, err %d",
2593 		    if_name(ifp), error);
2594 		return error;
2595 	}
2596 	return 0;
2597 }
2598 
2599 static int
feth_set_tso_offload(ifnet_t ifp)2600 feth_set_tso_offload(ifnet_t ifp)
2601 {
2602 	ifnet_offload_t offload;
2603 	int error;
2604 
2605 	offload = IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2606 	error = ifnet_set_offload(ifp, offload);
2607 	if (error != 0) {
2608 		FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2609 		    "set TSO offload failed on %s, err %d",
2610 		    if_name(ifp), error);
2611 		goto done;
2612 	}
2613 	error = feth_set_tso_mtu(ifp, if_fake_tso_buffer_size,
2614 	    if_fake_tso_buffer_size);
2615 done:
2616 	return error;
2617 }
2618 
2619 static errno_t
create_netif_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2620 create_netif_provider_and_instance(if_fake_ref fakeif,
2621     struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2622     uuid_t * provider, uuid_t * instance)
2623 {
2624 	errno_t                 err;
2625 	nexus_controller_t      controller = kern_nexus_shared_controller();
2626 	struct kern_nexus_net_init net_init;
2627 	nexus_name_t            provider_name;
2628 	nexus_attr_t            nexus_attr = NULL;
2629 	struct kern_nexus_provider_init prov_init = {
2630 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2631 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2632 		.nxpi_pre_connect = feth_nx_pre_connect,
2633 		.nxpi_connected = feth_nx_connected,
2634 		.nxpi_pre_disconnect = feth_nx_pre_disconnect,
2635 		.nxpi_disconnected = feth_nx_disconnected,
2636 		.nxpi_ring_init = feth_nx_ring_init,
2637 		.nxpi_ring_fini = feth_nx_ring_fini,
2638 		.nxpi_slot_init = feth_nx_slot_init,
2639 		.nxpi_slot_fini = feth_nx_slot_fini,
2640 		.nxpi_sync_tx = feth_nx_sync_tx,
2641 		.nxpi_sync_rx = feth_nx_sync_rx,
2642 		.nxpi_tx_doorbell = feth_nx_tx_doorbell,
2643 		.nxpi_config_capab = feth_nx_capab_config,
2644 	};
2645 
2646 	_CASSERT(IFF_MAX_RX_RINGS == 1);
2647 	err = kern_nexus_attr_create(&nexus_attr);
2648 	if (err != 0) {
2649 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
2650 		    "nexus attribute creation failed, error %d", err);
2651 		goto failed;
2652 	}
2653 	if (feth_in_wmm_mode(fakeif)) {
2654 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_TX_RINGS,
2655 		    IFF_NUM_TX_RINGS_WMM_MODE);
2656 		VERIFY(err == 0);
2657 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_RX_RINGS,
2658 		    IFF_NUM_RX_RINGS_WMM_MODE);
2659 		VERIFY(err == 0);
2660 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_QMAP,
2661 		    NEXUS_QMAP_TYPE_WMM);
2662 		VERIFY(err == 0);
2663 	}
2664 
2665 	err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2666 	VERIFY(err == 0);
2667 	snprintf((char *)provider_name, sizeof(provider_name),
2668 	    "com.apple.netif.%s", fakeif->iff_name);
2669 	err = kern_nexus_controller_register_provider(controller,
2670 	    feth_nx_dom_prov,
2671 	    provider_name,
2672 	    &prov_init,
2673 	    sizeof(prov_init),
2674 	    nexus_attr,
2675 	    provider);
2676 	if (err != 0) {
2677 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
2678 		    "register provider failed, error %d", err);
2679 		goto failed;
2680 	}
2681 	bzero(&net_init, sizeof(net_init));
2682 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2683 	net_init.nxneti_flags = 0;
2684 	net_init.nxneti_eparams = init_params;
2685 	net_init.nxneti_lladdr = NULL;
2686 	net_init.nxneti_prepare = feth_netif_prepare;
2687 	net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2688 	net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2689 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
2690 	    *provider,
2691 	    fakeif,
2692 	    NULL,
2693 	    instance,
2694 	    &net_init,
2695 	    ifp);
2696 	if (err != 0) {
2697 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2698 		    "alloc_net_provider_instance failed, %d", err);
2699 		kern_nexus_controller_deregister_provider(controller,
2700 		    *provider);
2701 		uuid_clear(*provider);
2702 		goto failed;
2703 	}
2704 	if (feth_supports_tso(fakeif)) {
2705 		if ((err = feth_set_tso_offload(*ifp)) != 0) {
2706 			goto failed;
2707 		}
2708 	}
2709 
2710 failed:
2711 	if (nexus_attr != NULL) {
2712 		kern_nexus_attr_destroy(nexus_attr);
2713 	}
2714 	return err;
2715 }
2716 
2717 /*
2718  * The nif_stats need to be referenced because we don't want it set
2719  * to NULL until the last llink is removed.
2720  */
2721 static void
get_nexus_stats(if_fake_ref fakeif,kern_nexus_t nexus)2722 get_nexus_stats(if_fake_ref fakeif, kern_nexus_t nexus)
2723 {
2724 	if (++fakeif->iff_nifs_ref == 1) {
2725 		ASSERT(fakeif->iff_nifs == NULL);
2726 		fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2727 	}
2728 }
2729 
2730 static void
clear_nexus_stats(if_fake_ref fakeif)2731 clear_nexus_stats(if_fake_ref fakeif)
2732 {
2733 	if (--fakeif->iff_nifs_ref == 0) {
2734 		ASSERT(fakeif->iff_nifs != NULL);
2735 		fakeif->iff_nifs = NULL;
2736 	}
2737 }
2738 
2739 static errno_t
feth_nx_qset_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * llink_ctx,uint8_t qset_idx,uint64_t qset_id,kern_netif_qset_t qset,void ** qset_ctx)2740 feth_nx_qset_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2741     void *llink_ctx, uint8_t qset_idx, uint64_t qset_id, kern_netif_qset_t qset,
2742     void **qset_ctx)
2743 {
2744 #pragma unused(nxprov)
2745 	if_fake_ref fakeif;
2746 	fake_llink *fl = llink_ctx;
2747 	fake_qset *fqs;
2748 
2749 	feth_lock();
2750 	fakeif = feth_nexus_context(nexus);
2751 	if (feth_is_detaching(fakeif)) {
2752 		feth_unlock();
2753 		FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2754 		    "%s: detaching", fakeif->iff_name);
2755 		return ENXIO;
2756 	}
2757 	if (qset_idx >= fl->fl_qset_cnt) {
2758 		feth_unlock();
2759 		FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2760 		    "%s: invalid qset_idx %d", fakeif->iff_name, qset_idx);
2761 		return EINVAL;
2762 	}
2763 	fqs = &fl->fl_qset[qset_idx];
2764 	ASSERT(fqs->fqs_qset == NULL);
2765 	fqs->fqs_qset = qset;
2766 	fqs->fqs_id = qset_id;
2767 	*qset_ctx = fqs;
2768 
2769 	/* XXX This should really be done during registration */
2770 	get_nexus_stats(fakeif, nexus);
2771 	feth_unlock();
2772 	return 0;
2773 }
2774 
2775 static void
feth_nx_qset_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx)2776 feth_nx_qset_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2777     void *qset_ctx)
2778 {
2779 #pragma unused(nxprov)
2780 	if_fake_ref fakeif;
2781 	fake_qset *fqs = qset_ctx;
2782 
2783 	feth_lock();
2784 	fakeif = feth_nexus_context(nexus);
2785 	clear_nexus_stats(fakeif);
2786 	ASSERT(fqs->fqs_qset != NULL);
2787 	fqs->fqs_qset = NULL;
2788 	fqs->fqs_id = 0;
2789 	feth_unlock();
2790 }
2791 
2792 static errno_t
feth_nx_queue_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint8_t qidx,bool tx,kern_netif_queue_t queue,void ** queue_ctx)2793 feth_nx_queue_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2794     void *qset_ctx, uint8_t qidx, bool tx, kern_netif_queue_t queue,
2795     void **queue_ctx)
2796 {
2797 #pragma unused(nxprov)
2798 	if_fake_ref fakeif;
2799 	fake_qset *fqs = qset_ctx;
2800 	fake_queue *fq;
2801 
2802 	feth_lock();
2803 	fakeif = feth_nexus_context(nexus);
2804 	if (feth_is_detaching(fakeif)) {
2805 		FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2806 		    "%s: detaching", fakeif->iff_name);
2807 		feth_unlock();
2808 		return ENXIO;
2809 	}
2810 	if (tx) {
2811 		if (qidx >= fqs->fqs_tx_queue_cnt) {
2812 			FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2813 			    "%s: invalid tx qidx %d", fakeif->iff_name, qidx);
2814 			feth_unlock();
2815 			return EINVAL;
2816 		}
2817 		fq = &fqs->fqs_tx_queue[qidx];
2818 	} else {
2819 		if (qidx >= fqs->fqs_rx_queue_cnt) {
2820 			FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2821 			    "%s: invalid rx qidx %d", fakeif->iff_name, qidx);
2822 			feth_unlock();
2823 			return EINVAL;
2824 		}
2825 		fq = &fqs->fqs_rx_queue[qidx];
2826 	}
2827 	ASSERT(fq->fq_queue == NULL);
2828 	fq->fq_queue = queue;
2829 	*queue_ctx = fq;
2830 	feth_unlock();
2831 	return 0;
2832 }
2833 
2834 static void
feth_nx_queue_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx)2835 feth_nx_queue_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2836     void *queue_ctx)
2837 {
2838 #pragma unused(nxprov, nexus)
2839 	fake_queue *fq = queue_ctx;
2840 
2841 	feth_lock();
2842 	ASSERT(fq->fq_queue != NULL);
2843 	fq->fq_queue = NULL;
2844 	feth_unlock();
2845 }
2846 
2847 static void
feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif,kern_packet_t sph,struct netif_stats * nifs,if_fake_ref peer_fakeif,uint32_t llink_idx,uint32_t qset_idx)2848 feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif, kern_packet_t sph,
2849     struct netif_stats *nifs, if_fake_ref peer_fakeif,
2850     uint32_t llink_idx, uint32_t qset_idx)
2851 {
2852 	kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2853 	uint32_t n_pkts = 0;
2854 
2855 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2856 	    "%s -> %s", fakeif->iff_name, peer_fakeif->iff_name);
2857 
2858 	while (sph != 0) {
2859 		uint16_t off;
2860 		kern_packet_t next;
2861 
2862 		next = kern_packet_get_next(sph);
2863 		kern_packet_set_next(sph, 0);
2864 
2865 		/* bpf tap output */
2866 		off = kern_packet_get_headroom(sph);
2867 		VERIFY(off >= fakeif->iff_tx_headroom);
2868 		kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2869 		feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2870 		bpf_tap_packet_out(fakeif->iff_ifp, DLT_EN10MB, sph, NULL, 0);
2871 
2872 		/* drop packets, if requested */
2873 		fakeif->iff_tx_pkts_count++;
2874 		if (feth_tx_expired_error(fakeif, sph) ||
2875 		    feth_tx_complete_error(fakeif, sph)) {
2876 			fakeif->iff_tx_pkts_count = 0;
2877 			kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2878 			STATS_INC(nifs, NETIF_STATS_DROP);
2879 			goto next_pkt;
2880 		}
2881 		ASSERT(sph != 0);
2882 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2883 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2884 
2885 		/* prepare batch for receiver */
2886 		pkts[n_pkts++] = sph;
2887 		if (n_pkts == IFF_MAX_BATCH_SIZE) {
2888 			feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx,
2889 			    qset_idx, pkts, n_pkts);
2890 			feth_tx_complete(fakeif, pkts, n_pkts);
2891 			n_pkts = 0;
2892 		}
2893 next_pkt:
2894 		sph = next;
2895 	}
2896 	/* catch last batch for receiver */
2897 	if (n_pkts != 0) {
2898 		feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx, qset_idx,
2899 		    pkts, n_pkts);
2900 		feth_tx_complete(fakeif, pkts, n_pkts);
2901 		n_pkts = 0;
2902 	}
2903 }
2904 
2905 static errno_t
feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint32_t flags)2906 feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2907     void *qset_ctx, uint32_t flags)
2908 {
2909 #pragma unused(nxprov)
2910 	if_fake_ref             fakeif;
2911 	ifnet_t                 ifp;
2912 	ifnet_t                 peer_ifp;
2913 	if_fake_ref             peer_fakeif = NULL;
2914 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2915 	fake_qset               *qset = qset_ctx;
2916 	boolean_t               detaching, connected;
2917 	uint32_t                i;
2918 	errno_t                 err;
2919 
2920 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2921 	fakeif = feth_nexus_context(nexus);
2922 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2923 	    "%s qset %p, idx %d, flags 0x%x", fakeif->iff_name, qset,
2924 	    qset->fqs_idx, flags);
2925 
2926 	feth_lock();
2927 	detaching = feth_is_detaching(fakeif);
2928 	connected = fakeif->iff_channel_connected;
2929 	if (detaching || !connected) {
2930 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2931 		    "%s: detaching %s, channel connected %s",
2932 		    fakeif->iff_name,
2933 		    (detaching ? "true" : "false"),
2934 		    (connected ? "true" : "false"));
2935 		feth_unlock();
2936 		return 0;
2937 	}
2938 	ifp = fakeif->iff_ifp;
2939 	peer_ifp = fakeif->iff_peer;
2940 	if (peer_ifp != NULL) {
2941 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
2942 		if (peer_fakeif != NULL) {
2943 			detaching = feth_is_detaching(peer_fakeif);
2944 			connected = peer_fakeif->iff_channel_connected;
2945 			if (detaching || !connected) {
2946 				FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2947 				    "peer %s: detaching %s, "
2948 				    "channel connected %s",
2949 				    peer_fakeif->iff_name,
2950 				    (detaching ? "true" : "false"),
2951 				    (connected ? "true" : "false"));
2952 				goto done;
2953 			}
2954 		} else {
2955 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2956 			    "peer_fakeif is NULL");
2957 			goto done;
2958 		}
2959 	} else {
2960 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "peer_ifp is NULL");
2961 		goto done;
2962 	}
2963 
2964 	if (if_fake_switch_combined_mode &&
2965 	    qset->fqs_dequeue_cnt >= if_fake_switch_mode_frequency) {
2966 		if (qset->fqs_combined_mode) {
2967 			kern_netif_set_qset_separate(qset->fqs_qset);
2968 		} else {
2969 			kern_netif_set_qset_combined(qset->fqs_qset);
2970 		}
2971 		qset->fqs_combined_mode = !qset->fqs_combined_mode;
2972 		qset->fqs_dequeue_cnt = 0;
2973 	}
2974 
2975 	for (i = 0; i < qset->fqs_tx_queue_cnt; i++) {
2976 		kern_packet_t sph = 0;
2977 		kern_netif_queue_t queue = qset->fqs_tx_queue[i].fq_queue;
2978 		boolean_t more = FALSE;
2979 
2980 		err = kern_netif_queue_tx_dequeue(queue, UINT32_MAX, UINT32_MAX,
2981 		    &more, &sph);
2982 		if (err != 0 && err != EAGAIN) {
2983 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2984 			    "%s queue %p dequeue failed: err "
2985 			    "%d", fakeif->iff_name, queue, err);
2986 		}
2987 		feth_nx_tx_queue_deliver_pkt_chain(fakeif, sph, nifs,
2988 		    peer_fakeif, qset->fqs_llink_idx, qset->fqs_idx);
2989 	}
2990 
2991 done:
2992 	feth_unlock();
2993 	return 0;
2994 }
2995 
2996 
2997 static errno_t
feth_nx_queue_tx_push(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx,kern_packet_t * ph,uint32_t * packetCount,uint32_t * byteCount)2998 feth_nx_queue_tx_push(kern_nexus_provider_t nxprov,
2999     kern_nexus_t nexus, void *queue_ctx, kern_packet_t *ph,
3000     uint32_t *packetCount, uint32_t *byteCount)
3001 {
3002 #pragma unused(nxprov)
3003 	if_fake_ref             fakeif;
3004 	ifnet_t                 ifp;
3005 	ifnet_t                 peer_ifp;
3006 	if_fake_ref             peer_fakeif = NULL;
3007 	struct netif_stats     *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
3008 	fake_queue             *fq = queue_ctx;
3009 	boolean_t               detaching, connected;
3010 
3011 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
3012 	fakeif = feth_nexus_context(nexus);
3013 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s queue %p", fakeif->iff_name, fq);
3014 
3015 	feth_lock();
3016 
3017 	detaching = feth_is_detaching(fakeif);
3018 	connected = fakeif->iff_channel_connected;
3019 	if (detaching || !connected) {
3020 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3021 		    "%s: detaching %s, channel connected %s",
3022 		    fakeif->iff_name,
3023 		    (detaching ? "true" : "false"),
3024 		    (connected ? "true" : "false"));
3025 		goto done;
3026 	}
3027 	ifp = fakeif->iff_ifp;
3028 	peer_ifp = fakeif->iff_peer;
3029 	if (peer_ifp != NULL) {
3030 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
3031 		if (peer_fakeif != NULL) {
3032 			detaching = feth_is_detaching(peer_fakeif);
3033 			connected = peer_fakeif->iff_channel_connected;
3034 			if (detaching || !connected) {
3035 				FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3036 				    "peer %s: detaching %s, "
3037 				    "channel connected %s",
3038 				    peer_fakeif->iff_name,
3039 				    (detaching ? "true" : "false"),
3040 				    (connected ? "true" : "false"));
3041 				goto done;
3042 			}
3043 		} else {
3044 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3045 			    "peer_fakeif is NULL");
3046 			goto done;
3047 		}
3048 	} else {
3049 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "peer_ifp is NULL");
3050 		goto done;
3051 	}
3052 
3053 	*packetCount = *byteCount = 0;
3054 
3055 	kern_packet_t sph = *ph;
3056 	while (sph != 0) {
3057 		(*packetCount)++;
3058 		*byteCount += kern_packet_get_data_length(sph);
3059 		sph = kern_packet_get_next(sph);
3060 	}
3061 
3062 	feth_nx_tx_queue_deliver_pkt_chain(fakeif, *ph, nifs,
3063 	    peer_fakeif, 0, 0);
3064 
3065 	*ph = 0;
3066 
3067 done:
3068 	feth_unlock();
3069 	return 0;
3070 }
3071 
3072 
3073 static void
fill_qset_info_and_params(if_fake_ref fakeif,fake_llink * llink_info,uint32_t qset_idx,struct kern_nexus_netif_llink_qset_init * qset_init,bool is_def,bool is_low_latency)3074 fill_qset_info_and_params(if_fake_ref fakeif, fake_llink *llink_info,
3075     uint32_t qset_idx, struct kern_nexus_netif_llink_qset_init *qset_init,
3076     bool is_def, bool is_low_latency)
3077 {
3078 	fake_qset *qset_info = &llink_info->fl_qset[qset_idx];
3079 
3080 	qset_init->nlqi_flags =
3081 	    (is_def ? KERN_NEXUS_NET_LLINK_QSET_DEFAULT : 0) |
3082 	    (is_low_latency ? KERN_NEXUS_NET_LLINK_QSET_LOW_LATENCY : 0) |
3083 	    KERN_NEXUS_NET_LLINK_QSET_AQM;
3084 
3085 	if (feth_in_wmm_mode(fakeif)) {
3086 		qset_init->nlqi_flags |= KERN_NEXUS_NET_LLINK_QSET_WMM_MODE;
3087 		qset_init->nlqi_num_txqs = IFF_NUM_TX_QUEUES_WMM_MODE;
3088 		qset_init->nlqi_num_rxqs = IFF_NUM_RX_QUEUES_WMM_MODE;
3089 	} else {
3090 		qset_init->nlqi_num_txqs = 1;
3091 		qset_init->nlqi_num_rxqs = 1;
3092 	}
3093 	qset_info->fqs_tx_queue_cnt = qset_init->nlqi_num_txqs;
3094 	qset_info->fqs_rx_queue_cnt = qset_init->nlqi_num_rxqs;
3095 
3096 	/* These are needed for locating the peer qset */
3097 	qset_info->fqs_llink_idx = llink_info->fl_idx;
3098 	qset_info->fqs_idx = qset_idx;
3099 }
3100 
3101 static void
fill_llink_info_and_params(if_fake_ref fakeif,uint32_t llink_idx,struct kern_nexus_netif_llink_init * llink_init,uint32_t llink_id,struct kern_nexus_netif_llink_qset_init * qset_init,uint32_t qset_cnt,uint32_t flags)3102 fill_llink_info_and_params(if_fake_ref fakeif, uint32_t llink_idx,
3103     struct kern_nexus_netif_llink_init *llink_init, uint32_t llink_id,
3104     struct kern_nexus_netif_llink_qset_init *qset_init, uint32_t qset_cnt,
3105     uint32_t flags)
3106 {
3107 	fake_llink *llink_info = &fakeif->iff_llink[llink_idx];
3108 	uint32_t i;
3109 	bool create_ll_qset = if_fake_low_latency && (llink_idx != 0);
3110 
3111 	for (i = 0; i < qset_cnt; i++) {
3112 		fill_qset_info_and_params(fakeif, llink_info, i,
3113 		    &qset_init[i], i == 0, create_ll_qset && i == 1);
3114 	}
3115 	llink_info->fl_idx = llink_idx;
3116 
3117 	/* This doesn't have to be the same as llink_idx */
3118 	llink_info->fl_id = llink_id;
3119 	llink_info->fl_qset_cnt = qset_cnt;
3120 
3121 	llink_init->nli_link_id = llink_id;
3122 	llink_init->nli_num_qsets = qset_cnt;
3123 	llink_init->nli_qsets = qset_init;
3124 	llink_init->nli_flags = flags;
3125 	llink_init->nli_ctx = llink_info;
3126 }
3127 
3128 static errno_t
create_non_default_llinks(if_fake_ref fakeif)3129 create_non_default_llinks(if_fake_ref fakeif)
3130 {
3131 	struct kern_nexus *nx;
3132 	fake_nx_t fnx = &fakeif->iff_nx;
3133 	struct kern_nexus_netif_llink_init llink_init;
3134 	struct kern_nexus_netif_llink_qset_init qset_init[FETH_MAX_QSETS];
3135 	errno_t err;
3136 	uint64_t llink_id;
3137 	uint32_t i;
3138 
3139 	nx = nx_find(fnx->fnx_instance, FALSE);
3140 	if (nx == NULL) {
3141 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3142 		    "%s: nx not found", fakeif->iff_name);
3143 		return ENXIO;
3144 	}
3145 	/* Default llink starts at index 0 */
3146 	for (i = 1; i < if_fake_llink_cnt; i++) {
3147 		llink_id = (uint64_t)i;
3148 
3149 		/*
3150 		 * The llink_init and qset_init structures are reused for
3151 		 * each llink creation.
3152 		 */
3153 		fill_llink_info_and_params(fakeif, i, &llink_init,
3154 		    llink_id, qset_init, if_fake_qset_cnt, 0);
3155 		err = kern_nexus_netif_llink_add(nx, &llink_init);
3156 		if (err != 0) {
3157 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3158 			    "%s: llink add failed, error %d",
3159 			    fakeif->iff_name, err);
3160 			goto fail;
3161 		}
3162 		fakeif->iff_llink_cnt++;
3163 	}
3164 	nx_release(nx);
3165 	return 0;
3166 
3167 fail:
3168 	for (i = 0; i < fakeif->iff_llink_cnt; i++) {
3169 		int                     error;
3170 		fake_llink * __single   ll = &fakeif->iff_llink[i];
3171 
3172 		error = kern_nexus_netif_llink_remove(nx, ll->fl_id);
3173 		if (error != 0) {
3174 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3175 			    "%s: llink remove failed, llink_id 0x%llx, "
3176 			    "error %d", fakeif->iff_name,
3177 			    ll->fl_id, error);
3178 		}
3179 		ll->fl_id = 0;
3180 	}
3181 	fakeif->iff_llink_cnt = 0;
3182 	nx_release(nx);
3183 	return err;
3184 }
3185 
3186 static errno_t
create_netif_llink_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)3187 create_netif_llink_provider_and_instance(if_fake_ref fakeif,
3188     struct ifnet_init_eparams * init_params, ifnet_t *ifp,
3189     uuid_t * provider, uuid_t * instance)
3190 {
3191 	errno_t                 err;
3192 	nexus_controller_t      controller = kern_nexus_shared_controller();
3193 	struct kern_nexus_net_init net_init;
3194 	struct kern_nexus_netif_llink_init llink_init;
3195 	struct kern_nexus_netif_llink_qset_init qsets[FETH_MAX_QSETS];
3196 
3197 	nexus_name_t            provider_name;
3198 	nexus_attr_t            nexus_attr = NULL;
3199 	struct kern_nexus_netif_provider_init prov_init = {
3200 		.nxnpi_version = KERN_NEXUS_DOMAIN_PROVIDER_NETIF,
3201 		.nxnpi_flags = NXPIF_VIRTUAL_DEVICE,
3202 		.nxnpi_pre_connect = feth_nx_pre_connect,
3203 		.nxnpi_connected = feth_nx_connected,
3204 		.nxnpi_pre_disconnect = feth_nx_pre_disconnect,
3205 		.nxnpi_disconnected = feth_nx_disconnected,
3206 		.nxnpi_qset_init = feth_nx_qset_init,
3207 		.nxnpi_qset_fini = feth_nx_qset_fini,
3208 		.nxnpi_queue_init = feth_nx_queue_init,
3209 		.nxnpi_queue_fini = feth_nx_queue_fini,
3210 		.nxnpi_tx_qset_notify = feth_nx_tx_qset_notify,
3211 		.nxnpi_config_capab = feth_nx_capab_config,
3212 		.nxnpi_queue_tx_push = feth_nx_queue_tx_push
3213 	};
3214 
3215 	err = kern_nexus_attr_create(&nexus_attr);
3216 	if (err != 0) {
3217 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3218 		    "nexus attribute creation failed, error %d", err);
3219 		goto failed;
3220 	}
3221 
3222 	err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
3223 	VERIFY(err == 0);
3224 
3225 	snprintf((char *)provider_name, sizeof(provider_name),
3226 	    "com.apple.netif.%s", fakeif->iff_name);
3227 	err = kern_nexus_controller_register_provider(controller,
3228 	    feth_nx_dom_prov,
3229 	    provider_name,
3230 	    (struct kern_nexus_provider_init *)&prov_init,
3231 	    sizeof(prov_init),
3232 	    nexus_attr,
3233 	    provider);
3234 	if (err != 0) {
3235 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3236 		    "register provider failed, error %d", err);
3237 		goto failed;
3238 	}
3239 	bzero(&net_init, sizeof(net_init));
3240 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
3241 	net_init.nxneti_flags = 0;
3242 	net_init.nxneti_eparams = init_params;
3243 	net_init.nxneti_lladdr = NULL;
3244 	net_init.nxneti_prepare = feth_netif_prepare;
3245 	net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
3246 	net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
3247 
3248 	/*
3249 	 * Assume llink id is same as the index for if_fake.
3250 	 * This is not required for other drivers.
3251 	 */
3252 	_CASSERT(NETIF_LLINK_ID_DEFAULT == 0);
3253 	fill_llink_info_and_params(fakeif, 0, &llink_init,
3254 	    NETIF_LLINK_ID_DEFAULT, qsets, if_fake_qset_cnt,
3255 	    KERN_NEXUS_NET_LLINK_DEFAULT);
3256 
3257 	net_init.nxneti_llink = &llink_init;
3258 
3259 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
3260 	    *provider, fakeif, NULL, instance, &net_init, ifp);
3261 	if (err != 0) {
3262 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3263 		    "alloc_net_provider_instance failed, %d", err);
3264 		kern_nexus_controller_deregister_provider(controller,
3265 		    *provider);
3266 		uuid_clear(*provider);
3267 		goto failed;
3268 	}
3269 	fakeif->iff_llink_cnt++;
3270 
3271 	if (if_fake_llink_cnt > 1) {
3272 		err = create_non_default_llinks(fakeif);
3273 		if (err != 0) {
3274 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3275 			    "create_non_default_llinks failed, %d", err);
3276 			feth_detach_netif_nexus(fakeif);
3277 			goto failed;
3278 		}
3279 	}
3280 	if (feth_supports_tso(fakeif)) {
3281 		if ((err = feth_set_tso_offload(*ifp)) != 0) {
3282 			goto failed;
3283 		}
3284 	}
3285 failed:
3286 	if (nexus_attr != NULL) {
3287 		kern_nexus_attr_destroy(nexus_attr);
3288 	}
3289 	return err;
3290 }
3291 
3292 static errno_t
feth_attach_netif_nexus(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp)3293 feth_attach_netif_nexus(if_fake_ref fakeif,
3294     struct ifnet_init_eparams * init_params, ifnet_t *ifp)
3295 {
3296 	errno_t                 error;
3297 	fake_nx_t               nx = &fakeif->iff_nx;
3298 
3299 	error = feth_packet_pool_make(fakeif);
3300 	if (error != 0) {
3301 		return error;
3302 	}
3303 	if (if_fake_llink_cnt == 0) {
3304 		return create_netif_provider_and_instance(fakeif, init_params,
3305 		           ifp, &nx->fnx_provider, &nx->fnx_instance);
3306 	} else {
3307 		return create_netif_llink_provider_and_instance(fakeif,
3308 		           init_params, ifp, &nx->fnx_provider,
3309 		           &nx->fnx_instance);
3310 	}
3311 }
3312 
3313 static void
remove_non_default_llinks(const char * name,fake_nx_t fnx,fake_llink_t llink __counted_by (FETH_MAX_LLINKS),uint32_t llink_cnt)3314 remove_non_default_llinks(const char * name, fake_nx_t fnx,
3315     fake_llink_t llink __counted_by(FETH_MAX_LLINKS),
3316     uint32_t llink_cnt)
3317 {
3318 	struct kern_nexus *nx;
3319 	uint32_t i;
3320 
3321 	if (llink_cnt <= 1) {
3322 		goto done;
3323 	}
3324 	nx = nx_find(fnx->fnx_instance, FALSE);
3325 	if (nx == NULL) {
3326 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3327 		    "%s: nx not found", name);
3328 		goto done;
3329 	}
3330 	/* Default llink (at index 0) is freed separately */
3331 	for (i = 1; i < llink_cnt; i++) {
3332 		int err;
3333 
3334 		err = kern_nexus_netif_llink_remove(nx, llink[i].fl_id);
3335 		if (err != 0) {
3336 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3337 			    "%s: llink remove failed, llink_id 0x%llx, "
3338 			    "error %d", name,
3339 			    llink[i].fl_id, err);
3340 		}
3341 	}
3342 	nx_release(nx);
3343 done:
3344 	return;
3345 }
3346 
3347 static void
detach_provider_and_instance(uuid_t provider,uuid_t instance)3348 detach_provider_and_instance(uuid_t provider, uuid_t instance)
3349 {
3350 	nexus_controller_t controller = kern_nexus_shared_controller();
3351 	errno_t err;
3352 
3353 	if (!uuid_is_null(instance)) {
3354 		err = kern_nexus_controller_free_provider_instance(controller,
3355 		    instance);
3356 		if (err != 0) {
3357 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3358 			    "free_provider_instance failed %d", err);
3359 		} else {
3360 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3361 			    "deregister_instance");
3362 		}
3363 	}
3364 	if (!uuid_is_null(provider)) {
3365 		err = kern_nexus_controller_deregister_provider(controller,
3366 		    provider);
3367 		if (err != 0) {
3368 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3369 			    "deregister_provider %d", err);
3370 		} else {
3371 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3372 			    "deregister_provider");
3373 		}
3374 	}
3375 	return;
3376 }
3377 
3378 static void
feth_detach_netif_nexus(if_fake_ref fakeif)3379 feth_detach_netif_nexus(if_fake_ref fakeif)
3380 {
3381 	fake_nx         fnx;
3382 	fake_llink_t    llink __counted_by(FETH_MAX_LLINKS);
3383 	uint32_t        llink_cnt;
3384 
3385 	feth_lock();
3386 	fnx = fakeif->iff_nx;
3387 	bzero(&fakeif->iff_nx, sizeof(fakeif->iff_nx));
3388 	llink = fakeif->iff_llink;
3389 	fakeif->iff_llink = NULL;
3390 	llink_cnt = fakeif->iff_llink_cnt;
3391 	fakeif->iff_llink_cnt = 0;
3392 	feth_unlock();
3393 	remove_non_default_llinks(fakeif->iff_name, &fnx, llink, llink_cnt);
3394 	detach_provider_and_instance(fnx.fnx_provider, fnx.fnx_instance);
3395 	if (llink != NULL) {
3396 		kfree_type(fake_llink, FETH_MAX_LLINKS, llink);
3397 	}
3398 	return;
3399 }
3400 #endif /* SKYWALK */
3401 
3402 /**
3403 ** feth interface routines
3404 **/
3405 static void
feth_ifnet_set_attrs(if_fake_ref fakeif,ifnet_t ifp)3406 feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp)
3407 {
3408 	errno_t         error;
3409 	ifnet_offload_t offload = 0;
3410 
3411 	ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
3412 	ifnet_set_baudrate(ifp, 0);
3413 	ifnet_set_mtu(ifp, ETHERMTU);
3414 	ifnet_set_flags(ifp,
3415 	    IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX,
3416 	    0xffff);
3417 	ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
3418 	if ((fakeif->iff_flags & IFF_FLAGS_LRO) != 0) {
3419 		offload |= IFNET_LRO;
3420 	}
3421 	if ((fakeif->iff_flags & IFF_FLAGS_HWCSUM) != 0) {
3422 		offload |= IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
3423 		    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6;
3424 	}
3425 	if (feth_supports_tso(fakeif)) {
3426 		offload |= IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
3427 	}
3428 	if (feth_supports_vlan_tagging(fakeif)) {
3429 		offload |= IFNET_VLAN_TAGGING;
3430 	} else if (feth_supports_vlan_mtu(fakeif)) {
3431 		offload |= IFNET_VLAN_MTU;
3432 	}
3433 	error = ifnet_set_offload(ifp, offload);
3434 	if (error != 0) {
3435 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3436 		    "ifnet_set_offload(%s, 0x%x) failed, %d",
3437 		    ifp->if_xname, offload, error);
3438 	} else {
3439 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3440 		    "ifnet_set_offload(%s, 0x%x) succeeded",
3441 		    ifp->if_xname, offload);
3442 	}
3443 }
3444 
3445 static void
interface_link_event(ifnet_t ifp,u_int32_t event_code)3446 interface_link_event(ifnet_t ifp, u_int32_t event_code)
3447 {
3448 	struct event {
3449 		u_int32_t ifnet_family;
3450 		u_int32_t unit;
3451 		char if_name[IFNAMSIZ];
3452 	};
3453 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3454 	struct kern_event_msg *header = (struct kern_event_msg*)message;
3455 	struct event *data = (struct event *)(header + 1);
3456 
3457 	header->total_size   = sizeof(message);
3458 	header->vendor_code  = KEV_VENDOR_APPLE;
3459 	header->kev_class    = KEV_NETWORK_CLASS;
3460 	header->kev_subclass = KEV_DL_SUBCLASS;
3461 	header->event_code   = event_code;
3462 	data->ifnet_family   = ifnet_family(ifp);
3463 	data->unit           = (u_int32_t)ifnet_unit(ifp);
3464 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
3465 	ifnet_event(ifp, header);
3466 }
3467 
3468 static if_fake_ref
ifnet_get_if_fake(ifnet_t ifp)3469 ifnet_get_if_fake(ifnet_t ifp)
3470 {
3471 	return (if_fake_ref)ifnet_softc(ifp);
3472 }
3473 
3474 static int
feth_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)3475 feth_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
3476 {
3477 	bool                            bsd_mode;
3478 	int                             error;
3479 	if_fake_ref                     fakeif;
3480 	struct ifnet_init_eparams       feth_init;
3481 	fake_llink                     *iff_llink = NULL;
3482 	ifnet_t                         ifp;
3483 	uint8_t                         mac_address[ETHER_ADDR_LEN];
3484 	bool                            multi_buflet;
3485 	iff_pktpool_mode_t              pktpool_mode;
3486 	bool                            tso_support;
3487 
3488 	/* make local copy of globals needed to make consistency checks below */
3489 	bsd_mode = (if_fake_bsd_mode != 0);
3490 	multi_buflet = (if_fake_multibuflet != 0);
3491 	tso_support = (if_fake_tso_support != 0);
3492 	pktpool_mode = if_fake_pktpool_mode;
3493 
3494 	if (!bsd_mode) {
3495 		/* consistency checks */
3496 		if (if_fake_llink_cnt == 0 &&
3497 		    strbufcmp(sk_ll_prefix, FAKE_ETHER_NAME) == 0) {
3498 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3499 			    "feth used as ifname prefix but logical link "
3500 			    "support in feth is disabled.");
3501 			return EINVAL;
3502 		}
3503 		if (tso_support && pktpool_mode != IFF_PP_MODE_GLOBAL) {
3504 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3505 			    "TSO mode requires global packet pool mode");
3506 			return EINVAL;
3507 		}
3508 		if (multi_buflet && pktpool_mode == IFF_PP_MODE_PRIVATE_SPLIT) {
3509 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3510 			    "multi-buflet not supported for split rx & tx pool");
3511 			return EINVAL;
3512 		}
3513 		iff_llink = kalloc_type(fake_llink,
3514 		    FETH_MAX_LLINKS, Z_WAITOK_ZERO);
3515 		if (iff_llink == NULL) {
3516 			return ENOBUFS;
3517 		}
3518 	}
3519 	fakeif = kalloc_type(struct if_fake, Z_WAITOK_ZERO_NOFAIL);
3520 	fakeif->iff_llink = iff_llink;
3521 	fakeif->iff_retain_count = 1;
3522 #define FAKE_ETHER_NAME_LEN     (sizeof(FAKE_ETHER_NAME) - 1)
3523 	_CASSERT(FAKE_ETHER_NAME_LEN == 4);
3524 	bcopy(FAKE_ETHER_NAME, mac_address, FAKE_ETHER_NAME_LEN);
3525 	mac_address[ETHER_ADDR_LEN - 2] = (unit & 0xff00) >> 8;
3526 	mac_address[ETHER_ADDR_LEN - 1] = unit & 0xff;
3527 	if (bsd_mode) {
3528 		fakeif->iff_flags |= IFF_FLAGS_BSD_MODE;
3529 	}
3530 	if (if_fake_hwcsum != 0) {
3531 		fakeif->iff_flags |= IFF_FLAGS_HWCSUM;
3532 	}
3533 	if (if_fake_lro != 0) {
3534 		fakeif->iff_flags |= IFF_FLAGS_LRO;
3535 	}
3536 	if (if_fake_vlan_tagging != 0) {
3537 		/* support VLAN tagging in hardware */
3538 		feth_set_supports_vlan_tagging(fakeif);
3539 	} else {
3540 		/* support VLAN mtu-sized packets */
3541 		feth_set_supports_vlan_mtu(fakeif);
3542 	}
3543 	if (if_fake_separate_frame_header != 0) {
3544 		fakeif->iff_flags |= IFF_FLAGS_SEPARATE_FRAME_HEADER;
3545 	}
3546 	fakeif->iff_max_mtu = get_max_mtu(bsd_mode, if_fake_max_mtu);
3547 	fakeif->iff_fcs = if_fake_fcs;
3548 	fakeif->iff_trailer_length = if_fake_trailer_length;
3549 
3550 	/* use the interface name as the unique id for ifp recycle */
3551 	if ((unsigned int)
3552 	    snprintf(fakeif->iff_name, sizeof(fakeif->iff_name), "%s%d",
3553 	    ifc->ifc_name, unit) >= sizeof(fakeif->iff_name)) {
3554 		feth_release(fakeif);
3555 		return EINVAL;
3556 	}
3557 	bzero(&feth_init, sizeof(feth_init));
3558 	feth_init.ver = IFNET_INIT_CURRENT_VERSION;
3559 	feth_init.len = sizeof(feth_init);
3560 	if (feth_in_bsd_mode(fakeif)) {
3561 		if (if_fake_txstart != 0) {
3562 			feth_init.start = feth_start;
3563 		} else {
3564 			feth_init.flags |= IFNET_INIT_LEGACY;
3565 			feth_init.output = feth_output;
3566 		}
3567 		if (tso_support) {
3568 			feth_set_supports_tso(fakeif);
3569 		}
3570 	}
3571 #if SKYWALK
3572 	else {
3573 		feth_init.flags |= IFNET_INIT_SKYWALK_NATIVE;
3574 		/*
3575 		 * Currently we support WMM mode only for Skywalk native
3576 		 * interface.
3577 		 */
3578 		if (if_fake_wmm_mode != 0) {
3579 			fakeif->iff_flags |= IFF_FLAGS_WMM_MODE;
3580 		}
3581 
3582 		if (multi_buflet) {
3583 			fakeif->iff_flags |= IFF_FLAGS_MULTIBUFLETS;
3584 		}
3585 
3586 		fakeif->iff_pp_mode = pktpool_mode;
3587 		if (tso_support) {
3588 			feth_set_supports_tso(fakeif);
3589 		}
3590 
3591 		fakeif->iff_tx_headroom = if_fake_tx_headroom;
3592 		fakeif->iff_adv_interval = if_fake_if_adv_interval;
3593 		if (fakeif->iff_adv_interval > 0) {
3594 			feth_init.flags |= IFNET_INIT_IF_ADV;
3595 		}
3596 		fakeif->iff_tx_drop_rate = if_fake_tx_drops;
3597 		fakeif->iff_tx_completion_mode = if_tx_completion_mode;
3598 		fakeif->iff_tx_exp_policy = if_fake_tx_exp_policy;
3599 	}
3600 	feth_init.tx_headroom = fakeif->iff_tx_headroom;
3601 #endif /* SKYWALK */
3602 	if (if_fake_nxattach == 0) {
3603 		feth_init.flags |= IFNET_INIT_NX_NOAUTO;
3604 	}
3605 	feth_init.uniqueid = fakeif->iff_name;
3606 	feth_init.uniqueid_len = strlen(fakeif->iff_name);
3607 	feth_init.name = ifc->ifc_name;
3608 	feth_init.unit = unit;
3609 	feth_init.family = IFNET_FAMILY_ETHERNET;
3610 	feth_init.type = IFT_ETHER;
3611 	feth_init.demux = ether_demux;
3612 	feth_init.add_proto = ether_add_proto;
3613 	feth_init.del_proto = ether_del_proto;
3614 	feth_init.check_multi = ether_check_multi;
3615 	feth_init.framer_extended = ether_frameout_extended;
3616 	feth_init.softc = fakeif;
3617 	feth_init.ioctl = feth_ioctl;
3618 	feth_init.set_bpf_tap = NULL;
3619 	feth_init.detach = feth_if_free;
3620 	feth_init.broadcast_addr = etherbroadcastaddr;
3621 	feth_init.broadcast_len = ETHER_ADDR_LEN;
3622 	if (feth_in_bsd_mode(fakeif)) {
3623 		error = ifnet_allocate_extended(&feth_init, &ifp);
3624 		if (error) {
3625 			feth_release(fakeif);
3626 			return error;
3627 		}
3628 		feth_ifnet_set_attrs(fakeif, ifp);
3629 		if (feth_supports_tso(fakeif)) {
3630 			feth_set_tso_mtu(ifp, IP_MAXPACKET, IP_MAXPACKET);
3631 		}
3632 	}
3633 #if SKYWALK
3634 	else {
3635 		if (feth_in_wmm_mode(fakeif)) {
3636 			feth_init.output_sched_model =
3637 			    IFNET_SCHED_MODEL_DRIVER_MANAGED;
3638 		}
3639 		error = feth_attach_netif_nexus(fakeif, &feth_init, &ifp);
3640 		if (error != 0) {
3641 			feth_release(fakeif);
3642 			return error;
3643 		}
3644 		/* take an additional reference to ensure that it doesn't go away */
3645 		feth_retain(fakeif);
3646 		fakeif->iff_flags |= IFF_FLAGS_NX_ATTACHED;
3647 		fakeif->iff_ifp = ifp;
3648 	}
3649 #endif /* SKYWALK */
3650 	fakeif->iff_media_count = MIN(default_media_words_count, IF_FAKE_MEDIA_LIST_MAX);
3651 	bcopy(default_media_words, fakeif->iff_media_list,
3652 	    fakeif->iff_media_count * sizeof(fakeif->iff_media_list[0]));
3653 	if (feth_in_bsd_mode(fakeif)) {
3654 		error = ifnet_attach(ifp, NULL);
3655 		if (error) {
3656 			ifnet_release(ifp);
3657 			feth_release(fakeif);
3658 			return error;
3659 		}
3660 		fakeif->iff_ifp = ifp;
3661 	}
3662 
3663 	ifnet_set_lladdr(ifp, mac_address, sizeof(mac_address));
3664 
3665 	/* attach as ethernet */
3666 	bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
3667 	return 0;
3668 }
3669 
3670 static int
feth_clone_destroy(ifnet_t ifp)3671 feth_clone_destroy(ifnet_t ifp)
3672 {
3673 	if_fake_ref     fakeif;
3674 #if SKYWALK
3675 	boolean_t       nx_attached = FALSE;
3676 #endif /* SKYWALK */
3677 
3678 	feth_lock();
3679 	fakeif = ifnet_get_if_fake(ifp);
3680 	if (fakeif == NULL || feth_is_detaching(fakeif)) {
3681 		feth_unlock();
3682 		return 0;
3683 	}
3684 	feth_set_detaching(fakeif);
3685 #if SKYWALK
3686 	nx_attached = (fakeif->iff_flags & IFF_FLAGS_NX_ATTACHED) != 0;
3687 #endif /* SKYWALK */
3688 	feth_unlock();
3689 	feth_config(ifp, NULL);
3690 #if SKYWALK
3691 	if (nx_attached) {
3692 		feth_detach_netif_nexus(fakeif);
3693 		feth_release(fakeif);
3694 	}
3695 #endif /* SKYWALK */
3696 	ifnet_detach(ifp);
3697 	return 0;
3698 }
3699 
3700 static void
feth_enqueue_input(ifnet_t ifp,struct mbuf * m)3701 feth_enqueue_input(ifnet_t ifp, struct mbuf * m)
3702 {
3703 	struct ifnet_stat_increment_param stats = {};
3704 
3705 	stats.packets_in = 1;
3706 	stats.bytes_in = (uint32_t)mbuf_pkthdr_len(m) + ETHER_HDR_LEN;
3707 	ifnet_input(ifp, m, &stats);
3708 }
3709 
3710 
3711 static int
feth_add_mbuf_trailer(struct mbuf * m,void * trailer,size_t trailer_len)3712 feth_add_mbuf_trailer(struct mbuf *m, void *trailer, size_t trailer_len)
3713 {
3714 	int ret;
3715 	ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
3716 
3717 	ret = m_append(m, trailer_len, (caddr_t)trailer);
3718 	if (ret == 1) {
3719 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3720 		    "%zuB trailer added", trailer_len);
3721 		return 0;
3722 	}
3723 	FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT, "m_append failed");
3724 	return ENOTSUP;
3725 }
3726 
3727 static int
feth_add_mbuf_fcs(struct mbuf * m)3728 feth_add_mbuf_fcs(struct mbuf *m)
3729 {
3730 	uint32_t pkt_len, offset = 0;
3731 	uint32_t crc = 0;
3732 	int err = 0;
3733 
3734 	ASSERT(sizeof(crc) == ETHER_CRC_LEN);
3735 
3736 	pkt_len = m->m_pkthdr.len;
3737 	struct mbuf *iter = m;
3738 	while (iter != NULL && offset < pkt_len) {
3739 		uint32_t frag_len = iter->m_len;
3740 		ASSERT(frag_len <= (pkt_len - offset));
3741 		crc = crc32(crc, mtod(iter, void *), frag_len);
3742 		offset += frag_len;
3743 		iter = iter->m_next;
3744 	}
3745 
3746 	err = feth_add_mbuf_trailer(m, &crc, ETHER_CRC_LEN);
3747 	if (err != 0) {
3748 		return err;
3749 	}
3750 
3751 	m->m_flags |= M_HASFCS;
3752 
3753 	return 0;
3754 }
3755 
3756 static void
feth_output_common(ifnet_t ifp,struct mbuf * m,ifnet_t peer,iff_flags_t flags,bool fcs,void * trailer,size_t trailer_len)3757 feth_output_common(ifnet_t ifp, struct mbuf * m, ifnet_t peer,
3758     iff_flags_t flags, bool fcs, void *trailer, size_t trailer_len)
3759 {
3760 	void *                  frame_header;
3761 
3762 	if ((flags & IFF_FLAGS_HWCSUM) != 0) {
3763 		m->m_pkthdr.csum_data = 0xffff;
3764 		m->m_pkthdr.csum_flags =
3765 		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3766 		    CSUM_IP_CHECKED | CSUM_IP_VALID;
3767 	}
3768 
3769 	(void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
3770 	if (trailer_len != 0 && trailer != NULL) {
3771 		feth_add_mbuf_trailer(m, trailer, trailer_len);
3772 	}
3773 	if (fcs) {
3774 		feth_add_mbuf_fcs(m);
3775 	}
3776 	if ((flags & IFF_FLAGS_SEPARATE_FRAME_HEADER) != 0) {
3777 		m = m_copyup(m, ETHER_HDR_LEN, 0);
3778 		if (m == NULL) {
3779 			FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT, "m_copyup failed");
3780 			goto done;
3781 		}
3782 		frame_header = mbuf_data(m);
3783 		mbuf_pkthdr_setheader(m, frame_header);
3784 		m_adj(m, ETHER_HDR_LEN);
3785 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3786 		    "%s: frame 0x%llx data 0x%llx len %ld",
3787 		    ifp->if_xname,
3788 		    (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
3789 		    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
3790 		    mbuf_len(m));
3791 	} else {
3792 		frame_header = mbuf_data(m);
3793 		mbuf_pkthdr_setheader(m, frame_header);
3794 		_mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
3795 	}
3796 
3797 	/* tap it out */
3798 	if (ifp->if_bpf != NULL) {
3799 		fake_bpf_tap_out(ifp, m, frame_header);
3800 	}
3801 
3802 	/* tap it in */
3803 	if (peer->if_bpf != NULL) {
3804 		fake_bpf_tap_in(peer, m, frame_header);
3805 	}
3806 
3807 	(void)mbuf_pkthdr_setrcvif(m, peer);
3808 	feth_enqueue_input(peer, m);
3809 done:
3810 	return;
3811 }
3812 
3813 static void
feth_start(ifnet_t ifp)3814 feth_start(ifnet_t ifp)
3815 {
3816 	if_fake_ref     fakeif;
3817 	iff_flags_t     flags = 0;
3818 	bool            fcs;
3819 	struct mbuf *   m;
3820 	ifnet_t         peer = NULL;
3821 	size_t          trailer_len;
3822 
3823 	feth_lock();
3824 	fakeif = ifnet_get_if_fake(ifp);
3825 	if (fakeif == NULL) {
3826 		feth_unlock();
3827 		return;
3828 	}
3829 
3830 	if (fakeif->iff_start_busy) {
3831 		feth_unlock();
3832 		return;
3833 	}
3834 
3835 	peer = fakeif->iff_peer;
3836 	flags = fakeif->iff_flags;
3837 	fcs = fakeif->iff_fcs;
3838 	trailer_len = fakeif->iff_trailer_length;
3839 
3840 	fakeif->iff_start_busy = TRUE;
3841 	feth_unlock();
3842 	for (;;) {
3843 		if (ifnet_dequeue(ifp, &m) != 0) {
3844 			break;
3845 		}
3846 		if (peer == NULL) {
3847 			m_freem(m);
3848 			continue;
3849 		}
3850 		if (m != NULL) {
3851 			feth_output_common(ifp, m, peer, flags, fcs,
3852 			    feth_trailer, trailer_len);
3853 		}
3854 	}
3855 	feth_lock();
3856 	fakeif = ifnet_get_if_fake(ifp);
3857 	if (fakeif != NULL) {
3858 		fakeif->iff_start_busy = FALSE;
3859 	}
3860 	feth_unlock();
3861 }
3862 
3863 static int
feth_output(ifnet_t ifp,struct mbuf * m)3864 feth_output(ifnet_t ifp, struct mbuf * m)
3865 {
3866 	if_fake_ref             fakeif;
3867 	iff_flags_t             flags;
3868 	bool                    fcs;
3869 	size_t                  trailer_len;
3870 	ifnet_t                 peer = NULL;
3871 
3872 	if (m == NULL) {
3873 		return 0;
3874 	}
3875 	feth_lock();
3876 	fakeif = ifnet_get_if_fake(ifp);
3877 	if (fakeif != NULL) {
3878 		peer = fakeif->iff_peer;
3879 		flags = fakeif->iff_flags;
3880 		fcs = fakeif->iff_fcs;
3881 		trailer_len = fakeif->iff_trailer_length;
3882 	}
3883 	feth_unlock();
3884 	if (peer == NULL) {
3885 		m_freem(m);
3886 		ifnet_stat_increment_out(ifp, 0, 0, 1);
3887 		return 0;
3888 	}
3889 	feth_output_common(ifp, m, peer, flags, fcs, feth_trailer, trailer_len);
3890 	return 0;
3891 }
3892 
3893 static int
feth_config(ifnet_t ifp,ifnet_t peer)3894 feth_config(ifnet_t ifp, ifnet_t peer)
3895 {
3896 	int             connected = FALSE;
3897 	int             disconnected = FALSE;
3898 	int             error = 0;
3899 	if_fake_ref     fakeif = NULL;
3900 
3901 	feth_lock();
3902 	fakeif = ifnet_get_if_fake(ifp);
3903 	if (fakeif == NULL) {
3904 		error = EINVAL;
3905 		goto done;
3906 	}
3907 	if (peer != NULL) {
3908 		/* connect to peer */
3909 		if_fake_ref     peer_fakeif;
3910 
3911 		peer_fakeif = ifnet_get_if_fake(peer);
3912 		if (peer_fakeif == NULL) {
3913 			error = EINVAL;
3914 			goto done;
3915 		}
3916 		if (feth_is_detaching(fakeif) ||
3917 		    feth_is_detaching(peer_fakeif) ||
3918 		    peer_fakeif->iff_peer != NULL ||
3919 		    fakeif->iff_peer != NULL) {
3920 			error = EBUSY;
3921 			goto done;
3922 		}
3923 #if SKYWALK
3924 		if (fakeif->iff_pp_mode !=
3925 		    peer_fakeif->iff_pp_mode) {
3926 			error = EINVAL;
3927 			goto done;
3928 		}
3929 #endif /* SKYWALK */
3930 		fakeif->iff_peer = peer;
3931 		peer_fakeif->iff_peer = ifp;
3932 		connected = TRUE;
3933 	} else if (fakeif->iff_peer != NULL) {
3934 		/* disconnect from peer */
3935 		if_fake_ref     peer_fakeif;
3936 
3937 		peer = fakeif->iff_peer;
3938 		peer_fakeif = ifnet_get_if_fake(peer);
3939 		if (peer_fakeif == NULL) {
3940 			/* should not happen */
3941 			error = EINVAL;
3942 			goto done;
3943 		}
3944 		fakeif->iff_peer = NULL;
3945 		peer_fakeif->iff_peer = NULL;
3946 		disconnected = TRUE;
3947 	}
3948 
3949 done:
3950 	feth_unlock();
3951 
3952 	/* generate link status event if we connect or disconnect */
3953 	if (connected) {
3954 		interface_link_event(ifp, KEV_DL_LINK_ON);
3955 		interface_link_event(peer, KEV_DL_LINK_ON);
3956 	} else if (disconnected) {
3957 		interface_link_event(ifp, KEV_DL_LINK_OFF);
3958 		interface_link_event(peer, KEV_DL_LINK_OFF);
3959 	}
3960 	return error;
3961 }
3962 
3963 static int
feth_set_media(ifnet_t ifp,struct if_fake_request * iffr)3964 feth_set_media(ifnet_t ifp, struct if_fake_request * iffr)
3965 {
3966 	if_fake_ref     fakeif;
3967 	int             error;
3968 
3969 	if (iffr->iffr_media.iffm_count > IF_FAKE_MEDIA_LIST_MAX) {
3970 		/* list is too long */
3971 		return EINVAL;
3972 	}
3973 	feth_lock();
3974 	fakeif = ifnet_get_if_fake(ifp);
3975 	if (fakeif == NULL) {
3976 		error = EINVAL;
3977 		goto done;
3978 	}
3979 	fakeif->iff_media_count = iffr->iffr_media.iffm_count;
3980 	bcopy(iffr->iffr_media.iffm_list, fakeif->iff_media_list,
3981 	    iffr->iffr_media.iffm_count * sizeof(fakeif->iff_media_list[0]));
3982 #if 0
3983 	/* XXX: "auto-negotiate" active with peer? */
3984 	/* generate link status event? */
3985 	fakeif->iff_media_current = iffr->iffr_media.iffm_current;
3986 #endif
3987 	error = 0;
3988 done:
3989 	feth_unlock();
3990 	return error;
3991 }
3992 
3993 static int
if_fake_request_copyin(user_addr_t user_addr,struct if_fake_request * iffr,u_int32_t len)3994 if_fake_request_copyin(user_addr_t user_addr,
3995     struct if_fake_request *iffr, u_int32_t len)
3996 {
3997 	int     error;
3998 
3999 	if (user_addr == USER_ADDR_NULL || len < sizeof(*iffr)) {
4000 		error = EINVAL;
4001 		goto done;
4002 	}
4003 	error = copyin(user_addr, iffr, sizeof(*iffr));
4004 	if (error != 0) {
4005 		goto done;
4006 	}
4007 	if (iffr->iffr_reserved[0] != 0 || iffr->iffr_reserved[1] != 0 ||
4008 	    iffr->iffr_reserved[2] != 0 || iffr->iffr_reserved[3] != 0) {
4009 		error = EINVAL;
4010 		goto done;
4011 	}
4012 done:
4013 	return error;
4014 }
4015 
4016 static int
feth_set_drvspec(ifnet_t ifp,uint32_t cmd,u_int32_t len,user_addr_t user_addr)4017 feth_set_drvspec(ifnet_t ifp, uint32_t cmd, u_int32_t len,
4018     user_addr_t user_addr)
4019 {
4020 	int                     error;
4021 	struct if_fake_request  iffr;
4022 	ifnet_t                 peer;
4023 
4024 	switch (cmd) {
4025 	case IF_FAKE_S_CMD_SET_PEER:
4026 		error = if_fake_request_copyin(user_addr, &iffr, len);
4027 		if (error != 0) {
4028 			break;
4029 		}
4030 		if (iffr.iffr_peer_name[0] == '\0') {
4031 			error = feth_config(ifp, NULL);
4032 			break;
4033 		}
4034 
4035 		/* ensure nul termination */
4036 		iffr.iffr_peer_name[IFNAMSIZ - 1] = '\0';
4037 		peer = ifunit(iffr.iffr_peer_name);
4038 		if (peer == NULL) {
4039 			error = ENXIO;
4040 			break;
4041 		}
4042 		if (ifnet_type(peer) != IFT_ETHER) {
4043 			error = EINVAL;
4044 			break;
4045 		}
4046 		if (strcmp(ifnet_name(peer), FAKE_ETHER_NAME) != 0) {
4047 			error = EINVAL;
4048 			break;
4049 		}
4050 		error = feth_config(ifp, peer);
4051 		break;
4052 	case IF_FAKE_S_CMD_SET_MEDIA:
4053 		error = if_fake_request_copyin(user_addr, &iffr, len);
4054 		if (error != 0) {
4055 			break;
4056 		}
4057 		error = feth_set_media(ifp, &iffr);
4058 		break;
4059 	case IF_FAKE_S_CMD_SET_DEQUEUE_STALL:
4060 		error = if_fake_request_copyin(user_addr, &iffr, len);
4061 		if (error != 0) {
4062 			break;
4063 		}
4064 		error = feth_enable_dequeue_stall(ifp,
4065 		    iffr.iffr_dequeue_stall);
4066 		break;
4067 	default:
4068 		error = EOPNOTSUPP;
4069 		break;
4070 	}
4071 	return error;
4072 }
4073 
4074 static int
feth_get_drvspec(ifnet_t ifp,u_int32_t cmd,u_int32_t len,user_addr_t user_addr)4075 feth_get_drvspec(ifnet_t ifp, u_int32_t cmd, u_int32_t len,
4076     user_addr_t user_addr)
4077 {
4078 	int                     error = EOPNOTSUPP;
4079 	if_fake_ref             fakeif;
4080 	struct if_fake_request  iffr;
4081 	ifnet_t                 peer;
4082 
4083 	switch (cmd) {
4084 	case IF_FAKE_G_CMD_GET_PEER:
4085 		if (len < sizeof(iffr)) {
4086 			error = EINVAL;
4087 			break;
4088 		}
4089 		feth_lock();
4090 		fakeif = ifnet_get_if_fake(ifp);
4091 		if (fakeif == NULL) {
4092 			feth_unlock();
4093 			error = EOPNOTSUPP;
4094 			break;
4095 		}
4096 		peer = fakeif->iff_peer;
4097 		feth_unlock();
4098 		bzero(&iffr, sizeof(iffr));
4099 		if (peer != NULL) {
4100 			strlcpy(iffr.iffr_peer_name,
4101 			    if_name(peer),
4102 			    sizeof(iffr.iffr_peer_name));
4103 		}
4104 		error = copyout(&iffr, user_addr, sizeof(iffr));
4105 		break;
4106 	default:
4107 		break;
4108 	}
4109 	return error;
4110 }
4111 
4112 union ifdrvu {
4113 	struct ifdrv32  *ifdrvu_32;
4114 	struct ifdrv64  *ifdrvu_64;
4115 	void            *ifdrvu_p;
4116 };
4117 
4118 static int
feth_ioctl(ifnet_t ifp,u_long cmd,void * data)4119 feth_ioctl(ifnet_t ifp, u_long cmd, void * data)
4120 {
4121 	unsigned int            count;
4122 	struct ifdevmtu *       devmtu_p;
4123 	union ifdrvu            drv;
4124 	uint32_t                drv_cmd;
4125 	uint32_t                drv_len;
4126 	boolean_t               drv_set_command = FALSE;
4127 	int                     error = 0;
4128 	struct ifmediareq32 *   ifmr;
4129 	struct ifreq *          ifr;
4130 	if_fake_ref             fakeif;
4131 	int                     status;
4132 	user_addr_t             user_addr;
4133 
4134 	ifr = (struct ifreq *)data;
4135 	switch (cmd) {
4136 	case SIOCSIFADDR:
4137 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
4138 		break;
4139 
4140 	case SIOCGIFMEDIA32:
4141 	case SIOCGIFMEDIA64:
4142 		feth_lock();
4143 		fakeif = ifnet_get_if_fake(ifp);
4144 		if (fakeif == NULL) {
4145 			feth_unlock();
4146 			return EOPNOTSUPP;
4147 		}
4148 		status = (fakeif->iff_peer != NULL)
4149 		    ? (IFM_AVALID | IFM_ACTIVE) : IFM_AVALID;
4150 		ifmr = (struct ifmediareq32 *)data;
4151 		user_addr = (cmd == SIOCGIFMEDIA64) ?
4152 		    ((struct ifmediareq64 *)data)->ifmu_ulist :
4153 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
4154 		count = ifmr->ifm_count;
4155 		ifmr->ifm_active = (fakeif->iff_peer != NULL)
4156 		    ? FAKE_DEFAULT_MEDIA : IFM_ETHER;
4157 		ifmr->ifm_current = IFM_ETHER;
4158 		ifmr->ifm_mask = 0;
4159 		ifmr->ifm_status = status;
4160 		if (user_addr == USER_ADDR_NULL) {
4161 			ifmr->ifm_count = fakeif->iff_media_count;
4162 		} else if (count > 0) {
4163 			if (count > fakeif->iff_media_count) {
4164 				count = fakeif->iff_media_count;
4165 			}
4166 			ifmr->ifm_count = count;
4167 			error = copyout(&fakeif->iff_media_list, user_addr,
4168 			    count * sizeof(int));
4169 		}
4170 		feth_unlock();
4171 		break;
4172 
4173 	case SIOCGIFDEVMTU:
4174 		devmtu_p = &ifr->ifr_devmtu;
4175 		devmtu_p->ifdm_current = ifnet_mtu(ifp);
4176 		devmtu_p->ifdm_max = feth_max_mtu(ifp);
4177 		devmtu_p->ifdm_min = IF_MINMTU;
4178 		break;
4179 
4180 	case SIOCSIFMTU:
4181 		if ((unsigned int)ifr->ifr_mtu > feth_max_mtu(ifp) ||
4182 		    ifr->ifr_mtu < IF_MINMTU) {
4183 			error = EINVAL;
4184 		} else {
4185 			error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
4186 		}
4187 		break;
4188 
4189 	case SIOCSDRVSPEC32:
4190 	case SIOCSDRVSPEC64:
4191 		error = proc_suser(current_proc());
4192 		if (error != 0) {
4193 			break;
4194 		}
4195 		drv_set_command = TRUE;
4196 		OS_FALLTHROUGH;
4197 	case SIOCGDRVSPEC32:
4198 	case SIOCGDRVSPEC64:
4199 		drv.ifdrvu_p = data;
4200 		if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
4201 			drv_cmd = drv.ifdrvu_32->ifd_cmd;
4202 			drv_len = drv.ifdrvu_32->ifd_len;
4203 			user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
4204 		} else {
4205 			drv_cmd = drv.ifdrvu_64->ifd_cmd;
4206 			drv_len = drv.ifdrvu_64->ifd_len;
4207 			user_addr = drv.ifdrvu_64->ifd_data;
4208 		}
4209 		if (drv_set_command) {
4210 			error = feth_set_drvspec(ifp, drv_cmd, drv_len,
4211 			    user_addr);
4212 		} else {
4213 			error = feth_get_drvspec(ifp, drv_cmd, drv_len,
4214 			    user_addr);
4215 		}
4216 		break;
4217 
4218 	case SIOCSIFLLADDR:
4219 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
4220 		    ifr->ifr_addr.sa_len);
4221 		break;
4222 
4223 	case SIOCSIFFLAGS:
4224 		if ((ifp->if_flags & IFF_UP) != 0) {
4225 			/* marked up, set running if not already set */
4226 			if ((ifp->if_flags & IFF_RUNNING) == 0) {
4227 				/* set running */
4228 				error = ifnet_set_flags(ifp, IFF_RUNNING,
4229 				    IFF_RUNNING);
4230 			}
4231 		} else if ((ifp->if_flags & IFF_RUNNING) != 0) {
4232 			/* marked down, clear running */
4233 			error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
4234 		}
4235 		break;
4236 
4237 	case SIOCADDMULTI:
4238 	case SIOCDELMULTI:
4239 		error = 0;
4240 		break;
4241 	case SIOCSIFCAP: {
4242 		uint32_t        cap;
4243 
4244 		feth_lock();
4245 		fakeif = ifnet_get_if_fake(ifp);
4246 		if (fakeif == NULL ||
4247 		    (fakeif->iff_flags & IFF_FLAGS_LRO) == 0) {
4248 			feth_unlock();
4249 			return EOPNOTSUPP;
4250 		}
4251 		feth_unlock();
4252 		cap = (ifr->ifr_reqcap & IFCAP_LRO) != 0 ? IFCAP_LRO : 0;
4253 		error = ifnet_set_capabilities_enabled(ifp, cap, IFCAP_LRO);
4254 		break;
4255 	}
4256 	default:
4257 		error = EOPNOTSUPP;
4258 		break;
4259 	}
4260 	return error;
4261 }
4262 
4263 static void
feth_if_free(ifnet_t ifp)4264 feth_if_free(ifnet_t ifp)
4265 {
4266 	if_fake_ref             fakeif;
4267 
4268 	if (ifp == NULL) {
4269 		return;
4270 	}
4271 	feth_lock();
4272 	fakeif = ifnet_get_if_fake(ifp);
4273 	if (fakeif == NULL) {
4274 		feth_unlock();
4275 		return;
4276 	}
4277 	ifp->if_softc = NULL;
4278 #if SKYWALK
4279 	VERIFY(fakeif->iff_doorbell_tcall == NULL);
4280 #endif /* SKYWALK */
4281 	feth_unlock();
4282 	feth_release(fakeif);
4283 	ifnet_release(ifp);
4284 	return;
4285 }
4286 
4287 __private_extern__ void
if_fake_init(void)4288 if_fake_init(void)
4289 {
4290 	int error;
4291 
4292 #if SKYWALK
4293 	(void)feth_register_nexus_domain_provider();
4294 #endif /* SKYWALK */
4295 	error = if_clone_attach(&feth_cloner);
4296 	if (error != 0) {
4297 		return;
4298 	}
4299 	return;
4300 }
4301