xref: /xnu-11215.41.3/bsd/net/if_fake.c (revision 33de042d024d46de5ff4e89f2471de6608e37fa4) !
1 /*
2  * Copyright (c) 2015-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * if_fake.c
31  * - fake network interface used for testing
32  * - "feth" (e.g. "feth0", "feth1") is a virtual ethernet interface that allows
33  *   two instances to have their output/input paths "crossed-over" so that
34  *   output on one is input on the other
35  */
36 
37 /*
38  * Modification History:
39  *
40  * September 9, 2015	Dieter Siegmund ([email protected])
41  * - created
42  */
43 
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/queue.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/kern_event.h>
54 #include <sys/mcache.h>
55 #include <sys/syslog.h>
56 
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_vlan_var.h>
61 #include <net/if_fake_var.h>
62 #include <net/if_arp.h>
63 #include <net/if_dl.h>
64 #include <net/if_ether.h>
65 #include <net/if_types.h>
66 #include <libkern/OSAtomic.h>
67 
68 #include <net/dlil.h>
69 
70 #include <net/kpi_interface.h>
71 #include <net/kpi_protocol.h>
72 
73 #include <kern/locks.h>
74 #include <kern/zalloc.h>
75 
76 #include <mach/mach_time.h>
77 
78 #include <os/log.h>
79 
80 #ifdef INET
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #endif
84 
85 #include <net/if_media.h>
86 #include <net/ether_if_module.h>
87 #if SKYWALK
88 #include <skywalk/os_skywalk_private.h>
89 #include <skywalk/nexus/netif/nx_netif.h>
90 #include <skywalk/channel/channel_var.h>
91 #endif /* SKYWALK */
92 
93 /*
94  * if_fake_debug, FE_DBGF_*
95  * - 'if_fake_debug' is a bitmask of FE_DBGF_* flags that can be set
96  *   to enable additional logs for the corresponding fake function
97  * - "sysctl net.link.fake.debug" controls the value of
98  *   'if_fake_debug'
99  */
100 static uint32_t if_fake_debug = 0;
101 
102 #define FE_DBGF_LIFECYCLE               0x0001
103 #define FE_DBGF_INPUT                   0x0002
104 #define FE_DBGF_OUTPUT                  0x0004
105 #define FE_DBGF_CONTROL                 0x0008
106 #define FE_DBGF_MISC                    0x0010
107 
108 /*
109  * if_fake_log_level
110  * - 'if_fake_log_level' ensures that by default important logs are
111  *   logged regardless of if_fake_debug by comparing the log level
112  *   in FAKE_LOG to if_fake_log_level
113  * - use "sysctl net.link.fake.log_level" controls the value of
114  *   'if_fake_log_level'
115  * - the default value of 'if_fake_log_level' is LOG_NOTICE; important
116  *   logs must use LOG_NOTICE to ensure they appear by default
117  */
118 #define FAKE_DBGF_ENABLED(__flag)     ((if_fake_debug & __flag) != 0)
119 
120 /*
121  * FAKE_LOG
122  * - macro to generate the specified log conditionally based on
123  *   the specified log level and debug flags
124  */
125 #define FAKE_LOG(__level, __dbgf, __string, ...)              \
126 	do {                                                            \
127 	        if (__level <= if_fake_log_level ||                   \
128 	            FAKE_DBGF_ENABLED(__dbgf)) {                      \
129 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
130 	                       __func__, ## __VA_ARGS__);       \
131 	        }                                                       \
132 	} while (0)
133 
134 static boolean_t
is_power_of_two(unsigned int val)135 is_power_of_two(unsigned int val)
136 {
137 	return (val & (val - 1)) == 0;
138 }
139 
140 #define FAKE_ETHER_NAME         "feth"
141 
142 SYSCTL_DECL(_net_link);
143 SYSCTL_NODE(_net_link, OID_AUTO, fake, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
144     "Fake interface");
145 
146 static int if_fake_txstart = 1;
147 SYSCTL_INT(_net_link_fake, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
148     &if_fake_txstart, 0, "Fake interface TXSTART mode");
149 
150 static int if_fake_hwcsum = 0;
151 SYSCTL_INT(_net_link_fake, OID_AUTO, hwcsum, CTLFLAG_RW | CTLFLAG_LOCKED,
152     &if_fake_hwcsum, 0, "Fake interface simulate hardware checksum");
153 
154 static int if_fake_vlan_tagging = 1;
155 SYSCTL_INT(_net_link_fake, OID_AUTO, vlan_tagging, CTLFLAG_RW | CTLFLAG_LOCKED,
156     &if_fake_vlan_tagging, 0, "Fake interface VLAN tagging");
157 
158 static int if_fake_nxattach = 0;
159 SYSCTL_INT(_net_link_fake, OID_AUTO, nxattach, CTLFLAG_RW | CTLFLAG_LOCKED,
160     &if_fake_nxattach, 0, "Fake interface auto-attach nexus");
161 
162 static int if_fake_bsd_mode = 1;
163 SYSCTL_INT(_net_link_fake, OID_AUTO, bsd_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
164     &if_fake_bsd_mode, 0, "Fake interface attach as BSD interface");
165 
166 static int if_fake_log_level = LOG_NOTICE;
167 SYSCTL_INT(_net_link_fake, OID_AUTO, log_level, CTLFLAG_RW | CTLFLAG_LOCKED,
168     &if_fake_log_level, 0, "Fake interface log level");
169 
170 SYSCTL_INT(_net_link_fake, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
171     &if_fake_debug, 0, "Fake interface debug flags");
172 
173 static int if_fake_wmm_mode = 0;
174 SYSCTL_INT(_net_link_fake, OID_AUTO, wmm_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
175     &if_fake_wmm_mode, 0, "Fake interface in 802.11 WMM mode");
176 
177 static int if_fake_multibuflet = 0;
178 SYSCTL_INT(_net_link_fake, OID_AUTO, multibuflet, CTLFLAG_RW | CTLFLAG_LOCKED,
179     &if_fake_multibuflet, 0, "Fake interface using multi-buflet packets");
180 
181 static int if_fake_low_latency = 0;
182 SYSCTL_INT(_net_link_fake, OID_AUTO, low_latency, CTLFLAG_RW | CTLFLAG_LOCKED,
183     &if_fake_low_latency, 0, "Fake interface with a low latency qset");
184 
185 static int if_fake_switch_combined_mode = 0;
186 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_combined_mode,
187     CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_combined_mode, 0,
188     "Switch a qset between combined and separate mode during dequeues");
189 
190 static int if_fake_switch_mode_frequency = 10;
191 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_mode_frequency,
192     CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_mode_frequency, 0,
193     "The number of dequeues before we switch between the combined and separated mode");
194 
195 static int if_fake_tso_support = 0;
196 SYSCTL_INT(_net_link_fake, OID_AUTO, tso_support, CTLFLAG_RW | CTLFLAG_LOCKED,
197     &if_fake_tso_support, 0, "Fake interface with support for TSO offload");
198 
199 #define DEFAULT_EXPIRATION_THRESHOLD 500 /* usec */
200 static int if_fake_expiration_threshold_us = DEFAULT_EXPIRATION_THRESHOLD;
201 SYSCTL_INT(_net_link_fake, OID_AUTO, expiration_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
202     &if_fake_expiration_threshold_us, DEFAULT_EXPIRATION_THRESHOLD,
203     "Expiration threshold (usec) for expiration testing");
204 
205 static int if_fake_lro = 0;
206 SYSCTL_INT(_net_link_fake, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_LOCKED,
207     &if_fake_lro, 0, "Fake interface report LRO capability");
208 
209 static int if_fake_separate_frame_header = 0;
210 SYSCTL_INT(_net_link_fake, OID_AUTO, separate_frame_header,
211     CTLFLAG_RW | CTLFLAG_LOCKED,
212     &if_fake_separate_frame_header, 0, "Put frame header in separate mbuf");
213 
214 typedef enum {
215 	IFF_PP_MODE_GLOBAL = 0,         /* share a global pool */
216 	IFF_PP_MODE_PRIVATE = 1,        /* creates its own rx/tx pool */
217 	IFF_PP_MODE_PRIVATE_SPLIT = 2,  /* creates its own split rx & tx pool */
218 } iff_pktpool_mode_t;
219 static iff_pktpool_mode_t if_fake_pktpool_mode = IFF_PP_MODE_GLOBAL;
220 SYSCTL_INT(_net_link_fake, OID_AUTO, pktpool_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
221     &if_fake_pktpool_mode, IFF_PP_MODE_GLOBAL,
222     "Fake interface packet pool mode (0 global, 1 private, 2 private split");
223 
224 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX 512
225 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF 96
226 static int if_fake_link_layer_aggregation_factor =
227     FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF;
228 static int
229 feth_link_layer_aggregation_factor_sysctl SYSCTL_HANDLER_ARGS
230 {
231 #pragma unused(oidp, arg1, arg2)
232 	unsigned int new_value;
233 	int changed;
234 	int error;
235 
236 	error = sysctl_io_number(req, if_fake_link_layer_aggregation_factor,
237 	    sizeof(if_fake_link_layer_aggregation_factor), &new_value,
238 	    &changed);
239 	if (error == 0 && changed != 0) {
240 		if (new_value <= 0 ||
241 		    new_value > FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX) {
242 			return EINVAL;
243 		}
244 		if_fake_link_layer_aggregation_factor = new_value;
245 	}
246 	return error;
247 }
248 
249 SYSCTL_PROC(_net_link_fake, OID_AUTO, link_layer_aggregation_factor,
250     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
251     0, 0, feth_link_layer_aggregation_factor_sysctl, "IU",
252     "Fake interface link layer aggregation factor");
253 
254 #define FETH_TX_HEADROOM_MAX      32
255 static unsigned int if_fake_tx_headroom = FETH_TX_HEADROOM_MAX;
256 static int
257 feth_tx_headroom_sysctl SYSCTL_HANDLER_ARGS
258 {
259 #pragma unused(oidp, arg1, arg2)
260 	unsigned int new_value;
261 	int changed;
262 	int error;
263 
264 	error = sysctl_io_number(req, if_fake_tx_headroom,
265 	    sizeof(if_fake_tx_headroom), &new_value, &changed);
266 	if (error == 0 && changed != 0) {
267 		if (new_value > FETH_TX_HEADROOM_MAX ||
268 		    (new_value % 8) != 0) {
269 			return EINVAL;
270 		}
271 		if_fake_tx_headroom = new_value;
272 	}
273 	return 0;
274 }
275 
276 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_headroom,
277     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
278     0, 0, feth_tx_headroom_sysctl, "IU", "Fake ethernet Tx headroom");
279 
280 static int if_fake_fcs = 0;
281 SYSCTL_INT(_net_link_fake, OID_AUTO, fcs, CTLFLAG_RW | CTLFLAG_LOCKED,
282     &if_fake_fcs, 0, "Fake interface using frame check sequence");
283 
284 #define FETH_TRAILER_LENGTH_MAX 28
285 char feth_trailer[FETH_TRAILER_LENGTH_MAX + 1] = "trailertrailertrailertrailer";
286 static unsigned int if_fake_trailer_length = 0;
287 static int
288 feth_trailer_length_sysctl SYSCTL_HANDLER_ARGS
289 {
290 #pragma unused(oidp, arg1, arg2)
291 	unsigned int new_value;
292 	int changed;
293 	int error;
294 
295 	error = sysctl_io_number(req, if_fake_trailer_length,
296 	    sizeof(if_fake_trailer_length), &new_value, &changed);
297 	if (error == 0 && changed != 0) {
298 		if (new_value > FETH_TRAILER_LENGTH_MAX) {
299 			return EINVAL;
300 		}
301 		if_fake_trailer_length = new_value;
302 	}
303 	return 0;
304 }
305 
306 SYSCTL_PROC(_net_link_fake, OID_AUTO, trailer_length,
307     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
308     feth_trailer_length_sysctl, "IU", "Fake interface frame trailer length");
309 
310 /* sysctl net.link.fake.max_mtu */
311 #define FETH_MAX_MTU_DEFAULT    2048
312 #define FETH_MAX_MTU_MAX        ((16 * 1024) - ETHER_HDR_LEN)
313 
314 static unsigned int if_fake_max_mtu = FETH_MAX_MTU_DEFAULT;
315 
316 /* sysctl net.link.fake.buflet_size */
317 #define FETH_BUFLET_SIZE_MIN            512
318 #define FETH_BUFLET_SIZE_MAX            (32 * 1024)
319 #define FETH_TSO_BUFLET_SIZE            (16 * 1024)
320 
321 static unsigned int if_fake_buflet_size = FETH_BUFLET_SIZE_MIN;
322 static unsigned int if_fake_tso_buffer_size = FETH_TSO_BUFLET_SIZE;
323 
324 static int
325 feth_tso_buffer_size_sysctl SYSCTL_HANDLER_ARGS
326 {
327 #pragma unused(oidp, arg1, arg2)
328 	unsigned int new_value;
329 	int changed;
330 	int error;
331 
332 	error = sysctl_io_number(req, if_fake_tso_buffer_size,
333 	    sizeof(if_fake_tso_buffer_size), &new_value, &changed);
334 	if (error == 0 && changed != 0) {
335 		/* must be a power of 2 between min and max */
336 		if (new_value > FETH_BUFLET_SIZE_MAX ||
337 		    new_value < FETH_BUFLET_SIZE_MIN ||
338 		    !is_power_of_two(new_value)) {
339 			return EINVAL;
340 		}
341 		if_fake_tso_buffer_size = new_value;
342 	}
343 	return 0;
344 }
345 
346 SYSCTL_PROC(_net_link_fake, OID_AUTO, tso_buf_size,
347     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
348     0, 0, feth_tso_buffer_size_sysctl, "IU", "Fake interface TSO buffer size");
349 
350 static int
351 feth_max_mtu_sysctl SYSCTL_HANDLER_ARGS
352 {
353 #pragma unused(oidp, arg1, arg2)
354 	unsigned int new_value;
355 	int changed;
356 	int error;
357 
358 	error = sysctl_io_number(req, if_fake_max_mtu,
359 	    sizeof(if_fake_max_mtu), &new_value, &changed);
360 	if (error == 0 && changed != 0) {
361 		if (new_value > FETH_MAX_MTU_MAX ||
362 		    new_value < ETHERMTU ||
363 		    new_value <= if_fake_buflet_size) {
364 			return EINVAL;
365 		}
366 		if_fake_max_mtu = new_value;
367 	}
368 	return 0;
369 }
370 
371 SYSCTL_PROC(_net_link_fake, OID_AUTO, max_mtu,
372     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
373     0, 0, feth_max_mtu_sysctl, "IU", "Fake interface maximum MTU");
374 
375 static int
376 feth_buflet_size_sysctl SYSCTL_HANDLER_ARGS
377 {
378 #pragma unused(oidp, arg1, arg2)
379 	unsigned int new_value;
380 	int changed;
381 	int error;
382 
383 	error = sysctl_io_number(req, if_fake_buflet_size,
384 	    sizeof(if_fake_buflet_size), &new_value, &changed);
385 	if (error == 0 && changed != 0) {
386 		/* must be a power of 2 between min and max */
387 		if (new_value > FETH_BUFLET_SIZE_MAX ||
388 		    new_value < FETH_BUFLET_SIZE_MIN ||
389 		    !is_power_of_two(new_value) ||
390 		    new_value >= if_fake_max_mtu) {
391 			return EINVAL;
392 		}
393 		if_fake_buflet_size = new_value;
394 	}
395 	return 0;
396 }
397 
398 SYSCTL_PROC(_net_link_fake, OID_AUTO, buflet_size,
399     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
400     0, 0, feth_buflet_size_sysctl, "IU", "Fake interface buflet size");
401 
402 static unsigned int if_fake_user_access = 0;
403 
404 static int
405 feth_user_access_sysctl SYSCTL_HANDLER_ARGS
406 {
407 #pragma unused(oidp, arg1, arg2)
408 	unsigned int new_value;
409 	int changed;
410 	int error;
411 
412 	error = sysctl_io_number(req, if_fake_user_access,
413 	    sizeof(if_fake_user_access), &new_value, &changed);
414 	if (error == 0 && changed != 0) {
415 		if (new_value != 0) {
416 			if (new_value != 1) {
417 				return EINVAL;
418 			}
419 		}
420 		if_fake_user_access = new_value;
421 	}
422 	return 0;
423 }
424 
425 SYSCTL_PROC(_net_link_fake, OID_AUTO, user_access,
426     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
427     0, 0, feth_user_access_sysctl, "IU", "Fake interface user access");
428 
429 /* sysctl net.link.fake.if_adv_intvl (unit: millisecond) */
430 #define FETH_IF_ADV_INTVL_MIN            10
431 #define FETH_IF_ADV_INTVL_MAX            INT_MAX
432 
433 static int if_fake_if_adv_interval = 0; /* no interface advisory */
434 static int
435 feth_if_adv_interval_sysctl SYSCTL_HANDLER_ARGS
436 {
437 #pragma unused(oidp, arg1, arg2)
438 	unsigned int new_value;
439 	int changed;
440 	int error;
441 
442 	error = sysctl_io_number(req, if_fake_if_adv_interval,
443 	    sizeof(if_fake_if_adv_interval), &new_value, &changed);
444 	if (error == 0 && changed != 0) {
445 		if ((new_value != 0) && (new_value > FETH_IF_ADV_INTVL_MAX ||
446 		    new_value < FETH_IF_ADV_INTVL_MIN)) {
447 			return EINVAL;
448 		}
449 		if_fake_if_adv_interval = new_value;
450 	}
451 	return 0;
452 }
453 
454 SYSCTL_PROC(_net_link_fake, OID_AUTO, if_adv_intvl,
455     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
456     feth_if_adv_interval_sysctl, "IU",
457     "Fake interface will generate interface advisories reports at the specified interval in ms");
458 
459 /* sysctl net.link.fake.tx_drops */
460 /*
461  * Fake ethernet will drop packet on the transmit path at the specified
462  * rate, i.e drop one in every if_fake_tx_drops number of packets.
463  */
464 #define FETH_TX_DROPS_MIN            0
465 #define FETH_TX_DROPS_MAX            INT_MAX
466 static int if_fake_tx_drops = 0; /* no packets are dropped */
467 static int
468 feth_fake_tx_drops_sysctl SYSCTL_HANDLER_ARGS
469 {
470 #pragma unused(oidp, arg1, arg2)
471 	unsigned int new_value;
472 	int changed;
473 	int error;
474 
475 	error = sysctl_io_number(req, if_fake_tx_drops,
476 	    sizeof(if_fake_tx_drops), &new_value, &changed);
477 	if (error == 0 && changed != 0) {
478 		if (new_value > FETH_TX_DROPS_MAX ||
479 		    new_value < FETH_TX_DROPS_MIN) {
480 			return EINVAL;
481 		}
482 		if_fake_tx_drops = new_value;
483 	}
484 	return 0;
485 }
486 
487 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_drops,
488     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
489     feth_fake_tx_drops_sysctl, "IU",
490     "Fake interface will intermittently drop packets on Tx path");
491 
492 /* sysctl.net.link.fake.tx_exp_policy */
493 
494 typedef enum {
495 	IFF_TX_EXP_POLICY_DISABLED = 0,          /* Expiry notification disabled */
496 	IFF_TX_EXP_POLICY_DROP_AND_NOTIFY = 1,   /* Expiry notification enabled; drop + notify mode */
497 	IFF_TX_EXP_POLICY_NOTIFY_ONLY = 2,       /* Expiry notification enabled; notify only mode */
498 	IFF_TX_EXP_POLICY_METADATA = 3,          /* Expiry notification enabled; use packet metadata */
499 } iff_tx_exp_policy_t;
500 static iff_tx_exp_policy_t if_fake_tx_exp_policy = IFF_TX_EXP_POLICY_DISABLED;
501 
502 static int
503 feth_fake_tx_exp_policy_sysctl SYSCTL_HANDLER_ARGS
504 {
505 #pragma unused(oidp, arg1, arg2)
506 	unsigned int new_value;
507 	int changed;
508 	int error;
509 
510 	error = sysctl_io_number(req, if_fake_tx_exp_policy,
511 	    sizeof(if_fake_tx_exp_policy), &new_value, &changed);
512 	FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
513 	    "if_fake_tx_exp_policy: %u -> %u (%d)",
514 	    if_fake_tx_exp_policy, new_value, changed);
515 	if (error == 0 && changed != 0) {
516 		if (new_value > IFF_TX_EXP_POLICY_METADATA ||
517 		    new_value < IFF_TX_EXP_POLICY_DISABLED) {
518 			return EINVAL;
519 		}
520 		if_fake_tx_exp_policy = new_value;
521 	}
522 	return 0;
523 }
524 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_exp_policy,
525     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
526     feth_fake_tx_exp_policy_sysctl, "IU",
527     "Fake interface handling policy for expired TX attempts "
528     "(0 disabled, 1 drop and notify, 2 notify only, 3 packet metadata)");
529 
530 /* sysctl net.link.fake.tx_completion_mode */
531 typedef enum {
532 	IFF_TX_COMPL_MODE_SYNC = 0,
533 	IFF_TX_COMPL_MODE_ASYNC = 1,
534 } iff_tx_completion_mode_t;
535 static iff_tx_completion_mode_t if_tx_completion_mode = IFF_TX_COMPL_MODE_SYNC;
536 static int
537 feth_fake_tx_completion_mode_sysctl SYSCTL_HANDLER_ARGS
538 {
539 #pragma unused(oidp, arg1, arg2)
540 	unsigned int new_value;
541 	int changed;
542 	int error;
543 
544 	error = sysctl_io_number(req, if_tx_completion_mode,
545 	    sizeof(if_tx_completion_mode), &new_value, &changed);
546 	if (error == 0 && changed != 0) {
547 		if (new_value > IFF_TX_COMPL_MODE_ASYNC ||
548 		    new_value < IFF_TX_COMPL_MODE_SYNC) {
549 			return EINVAL;
550 		}
551 		if_tx_completion_mode = new_value;
552 	}
553 	return 0;
554 }
555 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_completion_mode,
556     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
557     feth_fake_tx_completion_mode_sysctl, "IU",
558     "Fake interface tx completion mode (0 synchronous, 1 asynchronous)");
559 
560 /* sysctl net.link.fake.llink_cnt */
561 
562 /* The maximum number of logical links (including default link) */
563 #define FETH_MAX_LLINKS 16
564 /*
565  * The default number of logical links (including default link).
566  * Zero means logical link mode is disabled.
567  */
568 #define FETH_DEF_LLINKS 0
569 
570 static uint32_t if_fake_llink_cnt = FETH_DEF_LLINKS;
571 static int
572 feth_fake_llink_cnt_sysctl SYSCTL_HANDLER_ARGS
573 {
574 #pragma unused(oidp, arg1, arg2)
575 	unsigned int new_value;
576 	int changed;
577 	int error;
578 
579 	error = sysctl_io_number(req, if_fake_llink_cnt,
580 	    sizeof(if_fake_llink_cnt), &new_value, &changed);
581 	if (error == 0 && changed != 0) {
582 		if (new_value > FETH_MAX_LLINKS) {
583 			return EINVAL;
584 		}
585 		if_fake_llink_cnt = new_value;
586 	}
587 	return 0;
588 }
589 
590 SYSCTL_PROC(_net_link_fake, OID_AUTO, llink_cnt,
591     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
592     feth_fake_llink_cnt_sysctl, "IU",
593     "Fake interface logical link count");
594 
595 /* sysctl net.link.fake.qset_cnt */
596 
597 /* The maximum number of qsets for each logical link */
598 #define FETH_MAX_QSETS  16
599 /* The default number of qsets for each logical link */
600 #define FETH_DEF_QSETS  4
601 
602 static uint32_t if_fake_qset_cnt = FETH_DEF_QSETS;
603 static int
604 feth_fake_qset_cnt_sysctl SYSCTL_HANDLER_ARGS
605 {
606 #pragma unused(oidp, arg1, arg2)
607 	unsigned int new_value;
608 	int changed;
609 	int error;
610 
611 	error = sysctl_io_number(req, if_fake_qset_cnt,
612 	    sizeof(if_fake_qset_cnt), &new_value, &changed);
613 	if (error == 0 && changed != 0) {
614 		if (new_value == 0 ||
615 		    new_value > FETH_MAX_QSETS) {
616 			return EINVAL;
617 		}
618 		if_fake_qset_cnt = new_value;
619 	}
620 	return 0;
621 }
622 
623 SYSCTL_PROC(_net_link_fake, OID_AUTO, qset_cnt,
624     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
625     feth_fake_qset_cnt_sysctl, "IU",
626     "Fake interface queue set count");
627 
628 
629 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)630 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
631 {
632 	mbuf_setdata(m, (char *)mbuf_data(m) + len, mbuf_len(m) - len);
633 	mbuf_pkthdr_adjustlen(m, -len);
634 }
635 
636 static inline void *
get_bpf_header(mbuf_t m,struct ether_header * eh_p,struct ether_vlan_header * evl_p,size_t * header_len)637 get_bpf_header(mbuf_t m, struct ether_header * eh_p,
638     struct ether_vlan_header * evl_p, size_t * header_len)
639 {
640 	void *  header;
641 
642 	/* no VLAN tag, just use the ethernet header */
643 	if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
644 		header = eh_p;
645 		*header_len = sizeof(*eh_p);
646 		goto done;
647 	}
648 
649 	/* has VLAN tag, populate the ether VLAN header */
650 	bcopy(eh_p, evl_p,
651 	    offsetof(struct ether_header, ether_type));   /* dst+src ether */
652 	evl_p->evl_encap_proto = htons(ETHERTYPE_VLAN);   /* VLAN encap */
653 	evl_p->evl_tag = htons(m->m_pkthdr.vlan_tag);     /* tag */
654 	evl_p->evl_proto = eh_p->ether_type;              /* proto */
655 	*header_len = sizeof(*evl_p);
656 	header = evl_p;
657 
658 done:
659 	return header;
660 }
661 
662 typedef void (*_tap_func)(ifnet_t interface, u_int32_t dlt, mbuf_t packet,
663     void *__sized_by(header_len) header, size_t header_len);
664 
665 static void
fake_bpf_tap_common(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p,_tap_func func)666 fake_bpf_tap_common(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p,
667     _tap_func func)
668 {
669 	struct ether_vlan_header        evl;
670 	void *                          header;
671 	size_t                          header_len;
672 
673 	header = get_bpf_header(m, eh_p, &evl, &header_len);
674 	(*func)(ifp, DLT_EN10MB, m, header, header_len);
675 }
676 
677 static inline void
fake_bpf_tap_in(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)678 fake_bpf_tap_in(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
679 {
680 	fake_bpf_tap_common(ifp, m, eh_p, bpf_tap_in);
681 }
682 
683 
684 static inline void
fake_bpf_tap_out(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)685 fake_bpf_tap_out(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
686 {
687 	fake_bpf_tap_common(ifp, m, eh_p, bpf_tap_out);
688 }
689 
690 /**
691 ** virtual ethernet structures, types
692 **/
693 
694 #define IFF_NUM_TX_RINGS_WMM_MODE       4
695 #define IFF_NUM_RX_RINGS_WMM_MODE       1
696 #define IFF_MAX_TX_RINGS        IFF_NUM_TX_RINGS_WMM_MODE
697 #define IFF_MAX_RX_RINGS        IFF_NUM_RX_RINGS_WMM_MODE
698 #define IFF_NUM_TX_QUEUES_WMM_MODE      4
699 #define IFF_NUM_RX_QUEUES_WMM_MODE      1
700 #define IFF_MAX_TX_QUEUES       IFF_NUM_TX_QUEUES_WMM_MODE
701 #define IFF_MAX_RX_QUEUES       IFF_NUM_RX_QUEUES_WMM_MODE
702 
703 #define IFF_MAX_BATCH_SIZE 32
704 
705 typedef uint16_t        iff_flags_t;
706 #define IFF_FLAGS_HWCSUM                0x0001
707 #define IFF_FLAGS_BSD_MODE              0x0002
708 #define IFF_FLAGS_DETACHING             0x0004
709 #define IFF_FLAGS_WMM_MODE              0x0008
710 #define IFF_FLAGS_MULTIBUFLETS          0x0010
711 #define IFF_FLAGS_TSO_SUPPORT           0x0020
712 #define IFF_FLAGS_LRO                   0x0040
713 #define IFF_FLAGS_VLAN_MTU              0x0080
714 #define IFF_FLAGS_VLAN_TAGGING          0x0100
715 #define IFF_FLAGS_SEPARATE_FRAME_HEADER 0x0200
716 
717 #if SKYWALK
718 
719 typedef struct {
720 	uuid_t                  fnx_provider;
721 	uuid_t                  fnx_instance;
722 } fake_nx, *fake_nx_t;
723 
724 typedef struct {
725 	kern_netif_queue_t      fq_queue;
726 } fake_queue;
727 
728 typedef struct {
729 	kern_netif_qset_t       fqs_qset; /* provided by xnu */
730 	fake_queue              fqs_rx_queue[IFF_MAX_RX_QUEUES];
731 	fake_queue              fqs_tx_queue[IFF_MAX_TX_QUEUES];
732 	uint32_t                fqs_rx_queue_cnt;
733 	uint32_t                fqs_tx_queue_cnt;
734 	uint32_t                fqs_llink_idx;
735 	uint32_t                fqs_idx;
736 	uint32_t                fqs_dequeue_cnt;
737 	uint64_t                fqs_id;
738 	boolean_t               fqs_combined_mode;
739 } fake_qset;
740 
741 typedef struct {
742 	uint64_t                fl_id;
743 	uint32_t                fl_idx;
744 	uint32_t                fl_qset_cnt;
745 	fake_qset               fl_qset[FETH_MAX_QSETS];
746 } fake_llink;
747 
748 static kern_pbufpool_t         S_pp;
749 
750 #define IFF_TT_OUTPUT   0x01 /* generate trace_tag on output */
751 #define IFF_TT_INPUT    0x02 /* generate trace_tag on input */
752 static int if_fake_trace_tag_flags = 0;
753 SYSCTL_INT(_net_link_fake, OID_AUTO, trace_tag, CTLFLAG_RW | CTLFLAG_LOCKED,
754     &if_fake_trace_tag_flags, 0, "Fake interface generate trace_tag");
755 static packet_trace_tag_t if_fake_trace_tag_current = 1;
756 
757 #endif /* SKYWALK */
758 
759 struct if_fake {
760 	char                    iff_name[IFNAMSIZ]; /* our unique id */
761 	ifnet_t                 iff_ifp;
762 	iff_flags_t             iff_flags;
763 	uint32_t                iff_retain_count;
764 	ifnet_t                 iff_peer;       /* the other end */
765 	int                     iff_media_current;
766 	int                     iff_media_active;
767 	uint32_t                iff_media_count;
768 	int                     iff_media_list[IF_FAKE_MEDIA_LIST_MAX];
769 	boolean_t               iff_start_busy;
770 	unsigned int            iff_max_mtu;
771 	uint32_t                iff_fcs;
772 	uint32_t                iff_trailer_length;
773 #if SKYWALK
774 	fake_nx                 iff_nx;
775 	struct netif_stats      *iff_nifs;
776 	uint32_t                iff_nifs_ref;
777 	uint32_t                iff_llink_cnt;
778 	kern_channel_ring_t     iff_rx_ring[IFF_MAX_RX_RINGS];
779 	kern_channel_ring_t     iff_tx_ring[IFF_MAX_TX_RINGS];
780 	fake_llink             *iff_llink __counted_by(FETH_MAX_LLINKS);
781 	thread_call_t           iff_doorbell_tcall;
782 	thread_call_t           iff_if_adv_tcall;
783 	boolean_t               iff_doorbell_tcall_active;
784 	boolean_t               iff_waiting_for_tcall;
785 	boolean_t               iff_channel_connected;
786 	iff_pktpool_mode_t      iff_pp_mode;
787 	kern_pbufpool_t         iff_rx_pp;
788 	kern_pbufpool_t         iff_tx_pp;
789 	uint32_t                iff_tx_headroom;
790 	unsigned int            iff_adv_interval;
791 	uint32_t                iff_tx_drop_rate;
792 	uint32_t                iff_tx_pkts_count;
793 	iff_tx_completion_mode_t iff_tx_completion_mode;
794 	bool                    iff_intf_adv_enabled;
795 	void                    *iff_intf_adv_kern_ctx;
796 	kern_nexus_capab_interface_advisory_notify_fn_t iff_intf_adv_notify;
797 	iff_tx_exp_policy_t     iff_tx_exp_policy;
798 #endif /* SKYWALK */
799 };
800 
801 typedef struct if_fake * if_fake_ref;
802 
803 static if_fake_ref
804 ifnet_get_if_fake(ifnet_t ifp);
805 
806 static inline boolean_t
feth_in_bsd_mode(if_fake_ref fakeif)807 feth_in_bsd_mode(if_fake_ref fakeif)
808 {
809 	return (fakeif->iff_flags & IFF_FLAGS_BSD_MODE) != 0;
810 }
811 
812 static inline void
feth_set_detaching(if_fake_ref fakeif)813 feth_set_detaching(if_fake_ref fakeif)
814 {
815 	fakeif->iff_flags |= IFF_FLAGS_DETACHING;
816 }
817 
818 static inline boolean_t
feth_is_detaching(if_fake_ref fakeif)819 feth_is_detaching(if_fake_ref fakeif)
820 {
821 	return (fakeif->iff_flags & IFF_FLAGS_DETACHING) != 0;
822 }
823 
824 static int
feth_enable_dequeue_stall(ifnet_t ifp,uint32_t enable)825 feth_enable_dequeue_stall(ifnet_t ifp, uint32_t enable)
826 {
827 	int error;
828 
829 	if (enable != 0) {
830 		error = ifnet_disable_output(ifp);
831 	} else {
832 		error = ifnet_enable_output(ifp);
833 	}
834 
835 	return error;
836 }
837 
838 #if SKYWALK
839 static inline boolean_t
feth_in_wmm_mode(if_fake_ref fakeif)840 feth_in_wmm_mode(if_fake_ref fakeif)
841 {
842 	return (fakeif->iff_flags & IFF_FLAGS_WMM_MODE) != 0;
843 }
844 
845 static inline boolean_t
feth_using_multibuflets(if_fake_ref fakeif)846 feth_using_multibuflets(if_fake_ref fakeif)
847 {
848 	return (fakeif->iff_flags & IFF_FLAGS_MULTIBUFLETS) != 0;
849 }
850 static void feth_detach_netif_nexus(if_fake_ref fakeif);
851 
852 static inline boolean_t
feth_has_intf_advisory_configured(if_fake_ref fakeif)853 feth_has_intf_advisory_configured(if_fake_ref fakeif)
854 {
855 	return fakeif->iff_adv_interval > 0;
856 }
857 #endif /* SKYWALK */
858 
859 static inline bool
feth_supports_tso(if_fake_ref fakeif)860 feth_supports_tso(if_fake_ref fakeif)
861 {
862 	return (fakeif->iff_flags & IFF_FLAGS_TSO_SUPPORT) != 0;
863 }
864 
865 static inline void
feth_set_supports_tso(if_fake_ref fakeif)866 feth_set_supports_tso(if_fake_ref fakeif)
867 {
868 	fakeif->iff_flags |= IFF_FLAGS_TSO_SUPPORT;
869 }
870 
871 static inline bool
feth_supports_vlan_mtu(if_fake_ref fakeif)872 feth_supports_vlan_mtu(if_fake_ref fakeif)
873 {
874 	return (fakeif->iff_flags & IFF_FLAGS_VLAN_MTU) != 0;
875 }
876 
877 static inline void
feth_set_supports_vlan_mtu(if_fake_ref fakeif)878 feth_set_supports_vlan_mtu(if_fake_ref fakeif)
879 {
880 	fakeif->iff_flags |= IFF_FLAGS_VLAN_MTU;
881 }
882 
883 static inline bool
feth_supports_vlan_tagging(if_fake_ref fakeif)884 feth_supports_vlan_tagging(if_fake_ref fakeif)
885 {
886 	return (fakeif->iff_flags & IFF_FLAGS_VLAN_TAGGING) != 0;
887 }
888 
889 static inline void
feth_set_supports_vlan_tagging(if_fake_ref fakeif)890 feth_set_supports_vlan_tagging(if_fake_ref fakeif)
891 {
892 	fakeif->iff_flags |= IFF_FLAGS_VLAN_TAGGING;
893 }
894 
895 
896 #define FETH_MAXUNIT    IF_MAXUNIT
897 #define FETH_ZONE_MAX_ELEM      MIN(IFNETS_MAX, FETH_MAXUNIT)
898 
899 static  int feth_clone_create(struct if_clone *, u_int32_t, void *);
900 static  int feth_clone_destroy(ifnet_t);
901 static  int feth_output(ifnet_t ifp, struct mbuf *m);
902 static  void feth_start(ifnet_t ifp);
903 static  int feth_ioctl(ifnet_t ifp, u_long cmd, void * addr);
904 static  int feth_config(ifnet_t ifp, ifnet_t peer);
905 static  void feth_if_free(ifnet_t ifp);
906 static  void feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp);
907 static  void feth_free(if_fake_ref fakeif);
908 
909 static struct if_clone
910     feth_cloner = IF_CLONE_INITIALIZER(FAKE_ETHER_NAME,
911     feth_clone_create,
912     feth_clone_destroy,
913     0,
914     FETH_MAXUNIT);
915 static  void interface_link_event(ifnet_t ifp, u_int32_t event_code);
916 
917 /* some media words to pretend to be ethernet */
918 #define FAKE_DEFAULT_MEDIA      IFM_MAKEWORD(IFM_ETHER, IFM_10G_T, IFM_FDX, 0)
919 static int default_media_words[] = {
920 	IFM_MAKEWORD(IFM_ETHER, 0, 0, 0),
921 	FAKE_DEFAULT_MEDIA,
922 	IFM_MAKEWORD(IFM_ETHER, IFM_2500_T, IFM_FDX, 0),
923 	IFM_MAKEWORD(IFM_ETHER, IFM_5000_T, IFM_FDX, 0),
924 
925 	IFM_MAKEWORD(IFM_ETHER, IFM_10G_KX4, IFM_FDX, 0),
926 	IFM_MAKEWORD(IFM_ETHER, IFM_20G_KR2, IFM_FDX, 0),
927 	IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX, 0),
928 	IFM_MAKEWORD(IFM_ETHER, IFM_25G_KR, IFM_FDX, 0),
929 	IFM_MAKEWORD(IFM_ETHER, IFM_40G_SR4, IFM_FDX, 0),
930 	IFM_MAKEWORD(IFM_ETHER, IFM_50G_CR2, IFM_FDX, 0),
931 	IFM_MAKEWORD(IFM_ETHER, IFM_56G_R4, IFM_FDX, 0),
932 	IFM_MAKEWORD(IFM_ETHER, IFM_100G_CR4, IFM_FDX, 0),
933 	IFM_MAKEWORD(IFM_ETHER, IFM_400G_AUI8, IFM_FDX, 0),
934 };
935 #define default_media_words_count (sizeof(default_media_words)          \
936 	                           / sizeof (default_media_words[0]))
937 
938 /**
939 ** veth locks
940 **/
941 
942 static LCK_GRP_DECLARE(feth_lck_grp, "fake");
943 static LCK_MTX_DECLARE(feth_lck_mtx, &feth_lck_grp);
944 
945 static inline void
feth_lock(void)946 feth_lock(void)
947 {
948 	lck_mtx_lock(&feth_lck_mtx);
949 }
950 
951 static inline void
feth_unlock(void)952 feth_unlock(void)
953 {
954 	lck_mtx_unlock(&feth_lck_mtx);
955 }
956 
957 static inline int
get_max_mtu(int bsd_mode,unsigned int max_mtu)958 get_max_mtu(int bsd_mode, unsigned int max_mtu)
959 {
960 	unsigned int    mtu;
961 
962 	if (bsd_mode != 0) {
963 		mtu = (njcl > 0) ? (M16KCLBYTES - ETHER_HDR_LEN)
964 		    : MBIGCLBYTES - ETHER_HDR_LEN;
965 		if (mtu > max_mtu) {
966 			mtu = max_mtu;
967 		}
968 	} else {
969 		mtu = max_mtu;
970 	}
971 	return mtu;
972 }
973 
974 static inline unsigned int
feth_max_mtu(ifnet_t ifp)975 feth_max_mtu(ifnet_t ifp)
976 {
977 	if_fake_ref     fakeif;
978 	unsigned int    max_mtu = ETHERMTU;
979 
980 	feth_lock();
981 	fakeif = ifnet_get_if_fake(ifp);
982 	if (fakeif != NULL) {
983 		max_mtu = fakeif->iff_max_mtu;
984 	}
985 	feth_unlock();
986 	return max_mtu;
987 }
988 
989 static void
feth_free(if_fake_ref fakeif)990 feth_free(if_fake_ref fakeif)
991 {
992 	VERIFY(fakeif->iff_retain_count == 0);
993 #if SKYWALK
994 	if (!feth_in_bsd_mode(fakeif)) {
995 		if (fakeif->iff_pp_mode == IFF_PP_MODE_GLOBAL) {
996 			VERIFY(fakeif->iff_rx_pp == S_pp);
997 			VERIFY(fakeif->iff_tx_pp == S_pp);
998 			pp_release(fakeif->iff_rx_pp);
999 			fakeif->iff_rx_pp = NULL;
1000 			pp_release(fakeif->iff_tx_pp);
1001 			fakeif->iff_tx_pp = NULL;
1002 			feth_lock();
1003 			if (S_pp != NULL && S_pp->pp_refcnt == 1) {
1004 				pp_release(S_pp);
1005 				S_pp = NULL;
1006 			}
1007 			feth_unlock();
1008 		} else {
1009 			if (fakeif->iff_rx_pp != NULL) {
1010 				pp_release(fakeif->iff_rx_pp);
1011 				fakeif->iff_rx_pp = NULL;
1012 			}
1013 			if (fakeif->iff_tx_pp != NULL) {
1014 				pp_release(fakeif->iff_tx_pp);
1015 				fakeif->iff_tx_pp = NULL;
1016 			}
1017 		}
1018 	}
1019 #endif /* SKYWALK */
1020 
1021 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s", fakeif->iff_name);
1022 	kfree_type(fake_llink, FETH_MAX_LLINKS, fakeif->iff_llink);
1023 	kfree_type(struct if_fake, fakeif);
1024 }
1025 
1026 static void
feth_release(if_fake_ref fakeif)1027 feth_release(if_fake_ref fakeif)
1028 {
1029 	u_int32_t               old_retain_count;
1030 
1031 	old_retain_count = OSDecrementAtomic(&fakeif->iff_retain_count);
1032 	switch (old_retain_count) {
1033 	case 0:
1034 		VERIFY(old_retain_count != 0);
1035 		break;
1036 	case 1:
1037 		feth_free(fakeif);
1038 		break;
1039 	default:
1040 		break;
1041 	}
1042 	return;
1043 }
1044 
1045 #if SKYWALK
1046 
1047 static void
feth_retain(if_fake_ref fakeif)1048 feth_retain(if_fake_ref fakeif)
1049 {
1050 	OSIncrementAtomic(&fakeif->iff_retain_count);
1051 }
1052 
1053 static void
feth_packet_pool_init_prepare(if_fake_ref fakeif,struct kern_pbufpool_init * pp_init)1054 feth_packet_pool_init_prepare(if_fake_ref fakeif,
1055     struct kern_pbufpool_init *pp_init)
1056 {
1057 	uint32_t max_mtu = fakeif->iff_max_mtu;
1058 	uint32_t buflet_size = if_fake_buflet_size;
1059 
1060 	bzero(pp_init, sizeof(*pp_init));
1061 	pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1062 	pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
1063 	pp_init->kbi_packets = 1024; /* TBD configurable */
1064 	if (feth_supports_tso(fakeif)) {
1065 		buflet_size = if_fake_tso_buffer_size;
1066 	}
1067 	if (feth_using_multibuflets(fakeif)) {
1068 		pp_init->kbi_bufsize = buflet_size;
1069 		pp_init->kbi_max_frags = howmany(max_mtu, buflet_size);
1070 		pp_init->kbi_buflets = pp_init->kbi_packets *
1071 		    pp_init->kbi_max_frags;
1072 		pp_init->kbi_flags |= KBIF_BUFFER_ON_DEMAND;
1073 	} else {
1074 		pp_init->kbi_bufsize = max(max_mtu, buflet_size);
1075 		pp_init->kbi_max_frags = 1;
1076 		pp_init->kbi_buflets = pp_init->kbi_packets;
1077 	}
1078 	pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
1079 	if (if_fake_user_access != 0) {
1080 		pp_init->kbi_flags |= KBIF_USER_ACCESS;
1081 	}
1082 	pp_init->kbi_ctx = NULL;
1083 	pp_init->kbi_ctx_retain = NULL;
1084 	pp_init->kbi_ctx_release = NULL;
1085 }
1086 
1087 static errno_t
feth_packet_pool_make(if_fake_ref fakeif)1088 feth_packet_pool_make(if_fake_ref fakeif)
1089 {
1090 	struct kern_pbufpool_init pp_init;
1091 	errno_t err;
1092 
1093 	feth_packet_pool_init_prepare(fakeif, &pp_init);
1094 
1095 	switch (fakeif->iff_pp_mode) {
1096 	case IFF_PP_MODE_GLOBAL:
1097 		feth_lock();
1098 		if (S_pp == NULL) {
1099 			(void)snprintf((char *)pp_init.kbi_name,
1100 			    sizeof(pp_init.kbi_name), "%s", "feth shared pp");
1101 			err = kern_pbufpool_create(&pp_init, &S_pp, NULL);
1102 		}
1103 		pp_retain(S_pp);
1104 		feth_unlock();
1105 		fakeif->iff_rx_pp = S_pp;
1106 		pp_retain(S_pp);
1107 		fakeif->iff_tx_pp = S_pp;
1108 		break;
1109 	case IFF_PP_MODE_PRIVATE:
1110 		(void)snprintf((char *)pp_init.kbi_name,
1111 		    sizeof(pp_init.kbi_name), "%s pp", fakeif->iff_name);
1112 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
1113 		pp_retain(fakeif->iff_rx_pp);
1114 		fakeif->iff_tx_pp = fakeif->iff_rx_pp;
1115 		break;
1116 	case IFF_PP_MODE_PRIVATE_SPLIT:
1117 		(void)snprintf((char *)pp_init.kbi_name,
1118 		    sizeof(pp_init.kbi_name), "%s rx pp", fakeif->iff_name);
1119 		pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
1120 		    KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
1121 		pp_init.kbi_flags |= (KBIF_IODIR_IN | KBIF_BUFFER_ON_DEMAND);
1122 		pp_init.kbi_packets = 1024;
1123 		pp_init.kbi_bufsize = if_fake_link_layer_aggregation_factor * 1024;
1124 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
1125 		if (err != 0) {
1126 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1127 			    "rx pp create failed %d", err);
1128 			return err;
1129 		}
1130 		pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
1131 		    KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
1132 		pp_init.kbi_flags |= KBIF_IODIR_OUT;
1133 		pp_init.kbi_packets = 1024;            /* TBD configurable */
1134 		pp_init.kbi_bufsize = fakeif->iff_max_mtu;
1135 		(void)snprintf((char *)pp_init.kbi_name,
1136 		    sizeof(pp_init.kbi_name), "%s tx pp", fakeif->iff_name);
1137 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_tx_pp, NULL);
1138 		if (err != 0) {
1139 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1140 			    "tx pp create failed %d", err);
1141 			pp_release(fakeif->iff_rx_pp);
1142 			return err;
1143 		}
1144 		break;
1145 	default:
1146 		VERIFY(0);
1147 		__builtin_unreachable();
1148 	}
1149 
1150 	return 0;
1151 }
1152 
1153 static void
feth_packet_set_trace_tag(kern_packet_t ph,int flag)1154 feth_packet_set_trace_tag(kern_packet_t ph, int flag)
1155 {
1156 	if (if_fake_trace_tag_flags & flag) {
1157 		if (++if_fake_trace_tag_current == 0) {
1158 			if_fake_trace_tag_current = 1;
1159 		}
1160 		kern_packet_set_trace_tag(ph, if_fake_trace_tag_current);
1161 	}
1162 }
1163 
1164 static errno_t
feth_clone_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1165 feth_clone_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1166 {
1167 	errno_t err = 0;
1168 	kern_pbufpool_t pp = dif->iff_rx_pp;
1169 	kern_packet_t dph = 0, dph0 = 0;
1170 	kern_buflet_t sbuf, dbuf0 = NULL, dbuf;
1171 	void *saddr, *daddr;
1172 	uint32_t soff, doff;
1173 	uint32_t slen, dlen;
1174 	uint32_t dlim0, dlim;
1175 
1176 	sbuf = kern_packet_get_next_buflet(sph, NULL);
1177 	saddr = kern_buflet_get_data_address(sbuf);
1178 	doff = soff = kern_buflet_get_data_offset(sbuf);
1179 	dlen = slen = kern_buflet_get_data_length(sbuf);
1180 
1181 	/* packet clone is only supported for single-buflet */
1182 	ASSERT(kern_packet_get_buflet_count(sph) == 1);
1183 	ASSERT(soff == kern_packet_get_headroom(sph));
1184 	ASSERT(slen == kern_packet_get_data_length(sph));
1185 
1186 	dph0 = *pdph;
1187 	if (dph0 == 0) {
1188 		dlim0 = 0;
1189 	} else {
1190 		dbuf0 = kern_packet_get_next_buflet(dph0, NULL);
1191 		ASSERT(kern_buflet_get_object_limit(dbuf0) ==
1192 		    PP_BUF_OBJ_SIZE_DEF(pp));
1193 		ASSERT(kern_buflet_get_data_limit(dbuf0) % 16 == 0);
1194 		dlim0 = ((uintptr_t)kern_buflet_get_object_address(dbuf0) +
1195 		    kern_buflet_get_object_limit(dbuf0)) -
1196 		    ((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1197 		    kern_buflet_get_data_limit(dbuf0));
1198 	}
1199 
1200 	if (doff + dlen > dlim0) {
1201 		err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1202 		if (err != 0) {
1203 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1204 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1205 			return err;
1206 		}
1207 		dbuf = kern_packet_get_next_buflet(dph, NULL);
1208 		ASSERT(kern_buflet_get_data_address(dbuf) ==
1209 		    kern_buflet_get_object_address(dbuf));
1210 		daddr = kern_buflet_get_data_address(dbuf);
1211 		dlim = kern_buflet_get_object_limit(dbuf);
1212 		ASSERT(dlim == PP_BUF_OBJ_SIZE_DEF(pp));
1213 	} else {
1214 		err = kern_packet_clone_nosleep(dph0, &dph, KPKT_COPY_LIGHT);
1215 		if (err != 0) {
1216 			FAKE_LOG(LOG_INFO, FE_DBGF_OUTPUT,
1217 			    "packet clone err %d", err);
1218 			return err;
1219 		}
1220 		dbuf = kern_packet_get_next_buflet(dph, NULL);
1221 		ASSERT(kern_buflet_get_object_address(dbuf) ==
1222 		    kern_buflet_get_object_address(dbuf0));
1223 		daddr = (void *)((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1224 		    kern_buflet_get_data_limit(dbuf0));
1225 		dlim = dlim0;
1226 	}
1227 
1228 	ASSERT(doff + dlen <= dlim);
1229 
1230 	ASSERT((uintptr_t)daddr % 16 == 0);
1231 
1232 	bcopy((const void *)((uintptr_t)saddr + soff),
1233 	    (void *)((uintptr_t)daddr + doff), slen);
1234 
1235 	dlim = MIN(dlim, P2ROUNDUP(doff + dlen, 16));
1236 	err = kern_buflet_set_data_address(dbuf, daddr);
1237 	VERIFY(err == 0);
1238 	err = kern_buflet_set_data_limit(dbuf, dlim);
1239 	VERIFY(err == 0);
1240 	err = kern_buflet_set_data_length(dbuf, dlen);
1241 	VERIFY(err == 0);
1242 	err = kern_buflet_set_data_offset(dbuf, doff);
1243 	VERIFY(err == 0);
1244 	err = kern_packet_set_headroom(dph, doff);
1245 	VERIFY(err == 0);
1246 	err = kern_packet_set_link_header_length(dph,
1247 	    kern_packet_get_link_header_length(sph));
1248 	VERIFY(err == 0);
1249 	err = kern_packet_set_service_class(dph,
1250 	    kern_packet_get_service_class(sph));
1251 	VERIFY(err == 0);
1252 	err = kern_packet_finalize(dph);
1253 	VERIFY(err == 0);
1254 	*pdph = dph;
1255 
1256 	return err;
1257 }
1258 
1259 static inline void
feth_copy_buflet(kern_buflet_t sbuf,kern_buflet_t dbuf)1260 feth_copy_buflet(kern_buflet_t sbuf, kern_buflet_t dbuf)
1261 {
1262 	errno_t err;
1263 	uint32_t off, len;
1264 	uint8_t *saddr, *daddr;
1265 
1266 	saddr = kern_buflet_get_data_address(sbuf);
1267 	off = kern_buflet_get_data_offset(sbuf);
1268 	len = kern_buflet_get_data_length(sbuf);
1269 	daddr = kern_buflet_get_data_address(dbuf);
1270 	bcopy((saddr + off), (daddr + off), len);
1271 	err = kern_buflet_set_data_offset(dbuf, off);
1272 	VERIFY(err == 0);
1273 	err = kern_buflet_set_data_length(dbuf, len);
1274 	VERIFY(err == 0);
1275 }
1276 
1277 static int
feth_add_packet_trailer(kern_packet_t ph,void * trailer,size_t trailer_len)1278 feth_add_packet_trailer(kern_packet_t ph, void *trailer, size_t trailer_len)
1279 {
1280 	errno_t err = 0;
1281 
1282 	ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
1283 
1284 	kern_buflet_t buf = NULL, iter = NULL;
1285 	while ((iter = kern_packet_get_next_buflet(ph, iter)) != NULL) {
1286 		buf = iter;
1287 	}
1288 	ASSERT(buf != NULL);
1289 
1290 	uint32_t dlim = kern_buflet_get_data_limit(buf);
1291 	uint32_t doff = kern_buflet_get_data_offset(buf);
1292 	uint32_t dlen = kern_buflet_get_data_length(buf);
1293 
1294 	size_t trailer_room = dlim - doff - dlen;
1295 
1296 	if (trailer_room < trailer_len) {
1297 		FAKE_LOG(LOG_INFO, FE_DBGF_OUTPUT, "not enough room");
1298 		return ERANGE;
1299 	}
1300 
1301 	void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff + dlen);
1302 	memcpy(data, trailer, trailer_len);
1303 
1304 	err = kern_buflet_set_data_length(buf, dlen + trailer_len);
1305 	VERIFY(err == 0);
1306 
1307 	err = kern_packet_finalize(ph);
1308 	VERIFY(err == 0);
1309 
1310 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%zuB trailer added", trailer_len);
1311 
1312 	return 0;
1313 }
1314 
1315 static int
feth_add_packet_fcs(kern_packet_t ph)1316 feth_add_packet_fcs(kern_packet_t ph)
1317 {
1318 	uint32_t crc = 0;
1319 	int err;
1320 
1321 	ASSERT(sizeof(crc) == ETHER_CRC_LEN);
1322 
1323 	kern_buflet_t buf = NULL;
1324 	while ((buf = kern_packet_get_next_buflet(ph, buf)) != NULL) {
1325 		uint32_t doff = kern_buflet_get_data_offset(buf);
1326 		uint32_t dlen = kern_buflet_get_data_length(buf);
1327 		void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff);
1328 		crc = crc32(crc, data, dlen);
1329 	}
1330 
1331 	err = feth_add_packet_trailer(ph, &crc, ETHER_CRC_LEN);
1332 	if (!err) {
1333 		return err;
1334 	}
1335 
1336 	err = kern_packet_set_link_ethfcs(ph);
1337 	VERIFY(err == 0);
1338 
1339 	return 0;
1340 }
1341 
1342 static errno_t
feth_copy_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1343 feth_copy_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1344 {
1345 	errno_t err = 0;
1346 	uint16_t i, bufcnt;
1347 	mach_vm_address_t baddr;
1348 	kern_buflet_t sbuf = NULL, dbuf = NULL;
1349 	kern_pbufpool_t pp = dif->iff_rx_pp;
1350 	kern_packet_t dph;
1351 	boolean_t multi_buflet = feth_using_multibuflets(dif);
1352 
1353 	bufcnt = kern_packet_get_buflet_count(sph);
1354 	ASSERT((bufcnt == 1) || multi_buflet);
1355 	*pdph = 0;
1356 
1357 	err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1358 	if (err != 0) {
1359 		STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1360 		STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1361 		return err;
1362 	}
1363 
1364 	/* pre-constructed single buflet packet copy */
1365 	sbuf = kern_packet_get_next_buflet(sph, NULL);
1366 	dbuf = kern_packet_get_next_buflet(dph, NULL);
1367 	feth_copy_buflet(sbuf, dbuf);
1368 
1369 	if (!multi_buflet) {
1370 		goto done;
1371 	}
1372 
1373 	/* un-constructed multi-buflet packet copy */
1374 	for (i = 1; i < bufcnt; i++) {
1375 		kern_buflet_t dbuf_next = NULL;
1376 
1377 		sbuf = kern_packet_get_next_buflet(sph, sbuf);
1378 		VERIFY(sbuf != NULL);
1379 		err = kern_pbufpool_alloc_buflet_nosleep(pp, &dbuf_next);
1380 		if (err != 0) {
1381 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1382 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_BUF);
1383 			break;
1384 		}
1385 		ASSERT(dbuf_next != NULL);
1386 		feth_copy_buflet(sbuf, dbuf_next);
1387 		err = kern_packet_add_buflet(dph, dbuf, dbuf_next);
1388 		VERIFY(err == 0);
1389 		dbuf = dbuf_next;
1390 	}
1391 	if (__improbable(err != 0)) {
1392 		dbuf = NULL;
1393 		while (i-- != 0) {
1394 			dbuf = kern_packet_get_next_buflet(dph, dbuf);
1395 			VERIFY(dbuf != NULL);
1396 			baddr = (mach_vm_address_t)
1397 			    kern_buflet_get_data_address(dbuf);
1398 			VERIFY(baddr != 0);
1399 		}
1400 		kern_pbufpool_free(pp, dph);
1401 		dph = 0;
1402 	}
1403 
1404 done:
1405 	if (__probable(err == 0)) {
1406 		err = kern_packet_set_headroom(dph,
1407 		    kern_packet_get_headroom(sph));
1408 		VERIFY(err == 0);
1409 		err = kern_packet_set_link_header_length(dph,
1410 		    kern_packet_get_link_header_length(sph));
1411 		VERIFY(err == 0);
1412 		err = kern_packet_set_service_class(dph,
1413 		    kern_packet_get_service_class(sph));
1414 		VERIFY(err == 0);
1415 		err = kern_packet_finalize(dph);
1416 		VERIFY(err == 0);
1417 		VERIFY(bufcnt == kern_packet_get_buflet_count(dph));
1418 		*pdph = dph;
1419 	}
1420 	return err;
1421 }
1422 
1423 static inline void
feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)1424 feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)
1425 {
1426 	/*
1427 	 * Nothing to do if not a TSO offloaded packet.
1428 	 */
1429 	uint16_t seg_sz = 0;
1430 	seg_sz = kern_packet_get_protocol_segment_size(ph);
1431 	if (seg_sz == 0) {
1432 		return;
1433 	}
1434 	/*
1435 	 * For RX, make the packet appear as a fully validated LRO packet.
1436 	 */
1437 	packet_csum_flags_t csum_flags = PACKET_CSUM_IP_CHECKED |
1438 	    PACKET_CSUM_IP_VALID | PACKET_CSUM_DATA_VALID |
1439 	    PACKET_CSUM_PSEUDO_HDR;
1440 	(void) kern_packet_set_inet_checksum(ph, csum_flags, 0, 0xFFFF, FALSE);
1441 	return;
1442 }
1443 
1444 static void
feth_rx_submit(if_fake_ref sif,if_fake_ref dif,kern_packet_t sphs[],uint32_t n_pkts)1445 feth_rx_submit(if_fake_ref sif, if_fake_ref dif, kern_packet_t sphs[],
1446     uint32_t n_pkts)
1447 {
1448 	errno_t err = 0;
1449 	struct kern_channel_ring_stat_increment stats;
1450 	kern_channel_ring_t rx_ring = NULL;
1451 	kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
1452 	kern_packet_t sph = 0, dph = 0;
1453 
1454 	memset(&stats, 0, sizeof(stats));
1455 
1456 	rx_ring = dif->iff_rx_ring[0];
1457 	if (rx_ring == NULL) {
1458 		return;
1459 	}
1460 
1461 	kr_enter(rx_ring, TRUE);
1462 	kern_channel_reclaim(rx_ring);
1463 	rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1464 
1465 	for (uint32_t i = 0; i < n_pkts && rx_slot != NULL; i++) {
1466 		sph = sphs[i];
1467 
1468 		switch (dif->iff_pp_mode) {
1469 		case IFF_PP_MODE_GLOBAL:
1470 			sphs[i] = 0;
1471 			dph = sph;
1472 			feth_update_pkt_tso_metadata_for_rx(dph);
1473 			err = kern_packet_finalize(dph);
1474 			VERIFY(err == 0);
1475 			break;
1476 		case IFF_PP_MODE_PRIVATE:
1477 			err = feth_copy_packet(dif, sph, &dph);
1478 			break;
1479 		case IFF_PP_MODE_PRIVATE_SPLIT:
1480 			err = feth_clone_packet(dif, sph, &dph);
1481 			break;
1482 		default:
1483 			VERIFY(0);
1484 			__builtin_unreachable();
1485 		}
1486 		if (__improbable(err != 0)) {
1487 			continue;
1488 		}
1489 
1490 		if (sif->iff_trailer_length != 0) {
1491 			feth_add_packet_trailer(dph, feth_trailer,
1492 			    sif->iff_trailer_length);
1493 		}
1494 		if (sif->iff_fcs != 0) {
1495 			feth_add_packet_fcs(dph);
1496 		}
1497 		feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1498 		bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1499 		stats.kcrsi_slots_transferred++;
1500 		stats.kcrsi_bytes_transferred
1501 		        += kern_packet_get_data_length(dph);
1502 
1503 		/* attach the packet to the RX ring */
1504 		err = kern_channel_slot_attach_packet(rx_ring, rx_slot, dph);
1505 		VERIFY(err == 0);
1506 		last_rx_slot = rx_slot;
1507 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1508 	}
1509 
1510 	if (last_rx_slot != NULL) {
1511 		kern_channel_advance_slot(rx_ring, last_rx_slot);
1512 		kern_channel_increment_ring_net_stats(rx_ring, dif->iff_ifp,
1513 		    &stats);
1514 	}
1515 
1516 	if (rx_ring != NULL) {
1517 		kr_exit(rx_ring);
1518 		kern_channel_notify(rx_ring, 0);
1519 	}
1520 }
1521 
1522 static void
feth_rx_queue_submit(if_fake_ref sif,if_fake_ref dif,uint32_t llink_idx,uint32_t qset_idx,kern_packet_t sphs[],uint32_t n_pkts)1523 feth_rx_queue_submit(if_fake_ref sif, if_fake_ref dif, uint32_t llink_idx,
1524     uint32_t qset_idx, kern_packet_t sphs[], uint32_t n_pkts)
1525 {
1526 	errno_t err = 0;
1527 	kern_netif_queue_t queue;
1528 	kern_packet_t sph = 0, dph = 0;
1529 	fake_llink *llink;
1530 	fake_qset *qset;
1531 
1532 	if (llink_idx >= dif->iff_llink_cnt) {
1533 		FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1534 		    "invalid llink_idx idx %d (max %d) on peer %s",
1535 		    llink_idx, dif->iff_llink_cnt, dif->iff_name);
1536 		return;
1537 	}
1538 	llink = &dif->iff_llink[llink_idx];
1539 	if (qset_idx >= llink->fl_qset_cnt) {
1540 		FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1541 		    "invalid qset_idx %d (max %d) on peer %s",
1542 		    qset_idx, llink->fl_qset_cnt, dif->iff_name);
1543 		return;
1544 	}
1545 	qset = &dif->iff_llink[llink_idx].fl_qset[qset_idx];
1546 	queue = qset->fqs_rx_queue[0].fq_queue;
1547 	if (queue == NULL) {
1548 		FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1549 		    "NULL default queue (llink_idx %d, qset_idx %d) on peer %s",
1550 		    llink_idx, qset_idx, dif->iff_name);
1551 		return;
1552 	}
1553 	for (uint32_t i = 0; i < n_pkts; i++) {
1554 		uint32_t flags;
1555 
1556 		sph = sphs[i];
1557 
1558 		switch (dif->iff_pp_mode) {
1559 		case IFF_PP_MODE_GLOBAL:
1560 			sphs[i] = 0;
1561 			dph = sph;
1562 			feth_update_pkt_tso_metadata_for_rx(dph);
1563 			break;
1564 		case IFF_PP_MODE_PRIVATE:
1565 			err = feth_copy_packet(dif, sph, &dph);
1566 			break;
1567 		case IFF_PP_MODE_PRIVATE_SPLIT:
1568 			err = feth_clone_packet(dif, sph, &dph);
1569 			break;
1570 		default:
1571 			VERIFY(0);
1572 			__builtin_unreachable();
1573 		}
1574 		if (__improbable(err != 0)) {
1575 			continue;
1576 		}
1577 
1578 		if (sif->iff_trailer_length != 0) {
1579 			feth_add_packet_trailer(dph, feth_trailer,
1580 			    sif->iff_trailer_length);
1581 		}
1582 		if (sif->iff_fcs != 0) {
1583 			feth_add_packet_fcs(dph);
1584 		}
1585 		feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1586 		bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1587 
1588 		flags = (i == n_pkts - 1) ?
1589 		    KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH : 0;
1590 		kern_netif_queue_rx_enqueue(queue, dph, 1, flags);
1591 	}
1592 }
1593 
1594 static void
feth_tx_complete(if_fake_ref fakeif,kern_packet_t phs[],uint32_t nphs)1595 feth_tx_complete(if_fake_ref fakeif, kern_packet_t phs[], uint32_t nphs)
1596 {
1597 	for (uint32_t i = 0; i < nphs; i++) {
1598 		kern_packet_t ph = phs[i];
1599 		if (ph == 0) {
1600 			continue;
1601 		}
1602 		int err = kern_packet_set_tx_completion_status(ph, 0);
1603 		VERIFY(err == 0);
1604 		kern_packet_tx_completion(ph, fakeif->iff_ifp);
1605 		kern_pbufpool_free(fakeif->iff_tx_pp, phs[i]);
1606 		phs[i] = 0;
1607 	}
1608 }
1609 
1610 #define NSEC_PER_USEC 1000ull
1611 /*
1612  * Calculate the time delta that passed from `since' to `until'.
1613  * If `until' happens before `since', returns negative value.
1614  */
1615 static bool
feth_packet_has_expired(if_fake_ref __unused fakeif,kern_packet_t ph,uint64_t * out_deadline)1616 feth_packet_has_expired(if_fake_ref __unused fakeif, kern_packet_t ph,
1617     uint64_t *out_deadline)
1618 {
1619 	uint64_t now;
1620 	uint64_t packet_expire_time_mach;
1621 	int64_t time_until_expiration;
1622 	errno_t err;
1623 	bool expired = false;
1624 
1625 	static mach_timebase_info_data_t clock_timebase = {0, 0};
1626 
1627 	if (clock_timebase.denom == 0) {
1628 		clock_timebase_info(&clock_timebase);
1629 		VERIFY(clock_timebase.denom != 0);
1630 	}
1631 
1632 	err = kern_packet_get_expire_time(ph, &packet_expire_time_mach);
1633 	if (err) {
1634 		goto out;
1635 	}
1636 
1637 	now = mach_absolute_time();
1638 	time_until_expiration = packet_expire_time_mach - now;
1639 	if (time_until_expiration < 0) {
1640 		/* The packet had expired */
1641 		expired = true;
1642 		goto out;
1643 	}
1644 
1645 	/* Convert the time_delta from mach ticks to nanoseconds */
1646 	time_until_expiration *= clock_timebase.numer;
1647 	time_until_expiration /= clock_timebase.denom;
1648 	/* convert from nanoseconds to microseconds */
1649 	time_until_expiration /= 1000ull;
1650 
1651 	if (if_fake_expiration_threshold_us < time_until_expiration) {
1652 		/* packet has some life ahead of it */
1653 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1654 		    "Packet has %llu usec until expiration",
1655 		    time_until_expiration);
1656 		goto out;
1657 	}
1658 
1659 out:
1660 	if (expired && out_deadline) {
1661 		*out_deadline = packet_expire_time_mach;
1662 	}
1663 
1664 	return expired;
1665 }
1666 
1667 static errno_t
feth_get_packet_notification_details(if_fake_ref fakeif,kern_packet_t ph,packet_id_t * pkt_id,uint32_t * nx_port_id)1668 feth_get_packet_notification_details(if_fake_ref fakeif, kern_packet_t ph,
1669     packet_id_t *pkt_id, uint32_t *nx_port_id)
1670 {
1671 	errno_t err = 0;
1672 
1673 	err = kern_packet_get_packetid(ph, pkt_id);
1674 	if (err != 0) {
1675 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1676 		    "%s err=%d getting packetid", fakeif->iff_name, err);
1677 		return err;
1678 	}
1679 
1680 	err = kern_packet_get_tx_nexus_port_id(ph, nx_port_id);
1681 	if (err != 0) {
1682 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1683 		    "%s err=%d getting nx_port_id", fakeif->iff_name, err);
1684 		return err;
1685 	}
1686 
1687 	return 0;
1688 }
1689 
1690 static packet_expiry_action_t
feth_get_effective_expn_action(if_fake_ref fakeif,kern_packet_t ph)1691 feth_get_effective_expn_action(if_fake_ref fakeif, kern_packet_t ph)
1692 {
1693 	errno_t err;
1694 	packet_expiry_action_t expiry_action;
1695 
1696 	switch (fakeif->iff_tx_exp_policy) {
1697 	case IFF_TX_EXP_POLICY_DISABLED:
1698 		expiry_action = PACKET_EXPIRY_ACTION_NONE;
1699 		break;
1700 	case IFF_TX_EXP_POLICY_NOTIFY_ONLY:
1701 		expiry_action = PACKET_EXPIRY_ACTION_NOTIFY;
1702 		break;
1703 	case IFF_TX_EXP_POLICY_DROP_AND_NOTIFY:
1704 		expiry_action = PACKET_EXPIRY_ACTION_DROP;
1705 		break;
1706 	case IFF_TX_EXP_POLICY_METADATA:
1707 		err = kern_packet_get_expiry_action(ph, &expiry_action);
1708 		if (err != 0) {
1709 			if (err != ENOENT) {
1710 				FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1711 				    "Error %d when getting expiry action",
1712 				    err);
1713 			}
1714 			expiry_action = PACKET_EXPIRY_ACTION_NONE;
1715 		}
1716 		break;
1717 	default:
1718 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1719 		    "Unrecognized value %d for \"net.link.fake.tx_exp_policy\"",
1720 		    fakeif->iff_tx_exp_policy);
1721 		expiry_action = PACKET_EXPIRY_ACTION_NONE;
1722 	}
1723 
1724 	return expiry_action;
1725 }
1726 
1727 /* returns true if the packet is selected for epxiration and should be dropped */
1728 static bool
feth_tx_expired_error(if_fake_ref fakeif,kern_packet_t ph)1729 feth_tx_expired_error(if_fake_ref fakeif, kern_packet_t ph)
1730 {
1731 	int err = 0;
1732 	uint32_t nx_port_id = 0;
1733 	os_channel_event_packet_transmit_expired_t expn = {0};
1734 	packet_expiry_action_t expiry_action = PACKET_EXPIRY_ACTION_NONE;
1735 
1736 	FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC, "%s", fakeif->iff_name);
1737 
1738 	if (feth_packet_has_expired(fakeif, ph, &expn.packet_tx_expiration_deadline)) {
1739 		expiry_action = feth_get_effective_expn_action(fakeif, ph);
1740 	}
1741 
1742 	bool drop_packet = (expiry_action == PACKET_EXPIRY_ACTION_DROP);
1743 	if (expiry_action != PACKET_EXPIRY_ACTION_NONE) {
1744 		/* set the expiration status code */
1745 		expn.packet_tx_expiration_status = drop_packet ?
1746 		    CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_DROPPED :
1747 		    CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_NOT_DROPPED;
1748 
1749 		/* Mark the expiration timestamp */
1750 		expn.packet_tx_expiration_timestamp = mach_absolute_time();
1751 
1752 		err = feth_get_packet_notification_details(fakeif, ph,
1753 		    &expn.packet_id, &nx_port_id);
1754 
1755 		if (err == 0) {
1756 			err = kern_channel_event_transmit_expired(
1757 				fakeif->iff_ifp, &expn, nx_port_id);
1758 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1759 			    "%s sent expiry notification on nexus port "
1760 			    "%u notif code %u",
1761 			    fakeif->iff_name, nx_port_id,
1762 			    expn.packet_tx_expiration_status);
1763 		}
1764 		if (err != 0) {
1765 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1766 			    "%s err=%d, nx_port_id: 0x%x",
1767 			    fakeif->iff_name, err, nx_port_id);
1768 		}
1769 	}
1770 
1771 	return drop_packet;
1772 }
1773 
1774 /* returns true if the packet is selected for TX error & dropped */
1775 static bool
feth_tx_complete_error(if_fake_ref fakeif,kern_packet_t ph)1776 feth_tx_complete_error(if_fake_ref fakeif, kern_packet_t ph)
1777 {
1778 	int err;
1779 
1780 	if (fakeif->iff_tx_drop_rate == 0 ||
1781 	    fakeif->iff_tx_pkts_count != fakeif->iff_tx_drop_rate) {
1782 		return false;
1783 	}
1784 	/* simulate TX completion error on the packet */
1785 	if (fakeif->iff_tx_completion_mode == IFF_TX_COMPL_MODE_SYNC) {
1786 		err = kern_packet_set_tx_completion_status(ph,
1787 		    CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED);
1788 		VERIFY(err == 0);
1789 		kern_packet_tx_completion(ph, fakeif->iff_ifp);
1790 	} else {
1791 		uint32_t nx_port_id = 0;
1792 		os_channel_event_packet_transmit_status_t pkt_tx_status = {0};
1793 
1794 		pkt_tx_status.packet_status =
1795 		    CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED;
1796 		err = feth_get_packet_notification_details(fakeif, ph,
1797 		    &pkt_tx_status.packet_id, &nx_port_id);
1798 		if (err == 0) {
1799 			err = kern_channel_event_transmit_status(
1800 				fakeif->iff_ifp, &pkt_tx_status, nx_port_id);
1801 		}
1802 		if (err != 0) {
1803 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1804 			    "%s err=%d, nx_port_id: 0x%x",
1805 			    fakeif->iff_name, err, nx_port_id);
1806 		}
1807 	}
1808 
1809 	return true;
1810 }
1811 
1812 static void
feth_if_adv(thread_call_param_t arg0,thread_call_param_t arg1)1813 feth_if_adv(thread_call_param_t arg0, thread_call_param_t arg1)
1814 {
1815 #pragma unused(arg1)
1816 	errno_t                            error;
1817 	if_fake_ref                        fakeif = (if_fake_ref)arg0;
1818 	struct ifnet_interface_advisory    if_adv;
1819 	struct ifnet_stats_param           if_stat;
1820 
1821 	feth_lock();
1822 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1823 		feth_unlock();
1824 		return;
1825 	}
1826 	feth_unlock();
1827 
1828 	if (!fakeif->iff_intf_adv_enabled) {
1829 		goto done;
1830 	}
1831 
1832 	error = ifnet_stat(fakeif->iff_ifp, &if_stat);
1833 	if (error != 0) {
1834 		FAKE_LOG(LOG_NOTICE, 0, "%s: ifnet_stat() failed %d",
1835 		    fakeif->iff_name, error);
1836 		goto done;
1837 	}
1838 	if_adv.header.version = IF_INTERFACE_ADVISORY_VERSION_CURRENT;
1839 	if_adv.header.direction = IF_INTERFACE_ADVISORY_DIRECTION_TX;
1840 	if_adv.header.interface_type =
1841 	    IF_INTERFACE_ADVISORY_INTERFACE_TYPE_WIFI;
1842 	if_adv.capacity.timestamp = mach_absolute_time();
1843 	if_adv.capacity.rate_trend_suggestion =
1844 	    IF_INTERFACE_ADVISORY_RATE_SUGGESTION_RAMP_NEUTRAL;
1845 	if_adv.capacity.max_bandwidth = 1000 * 1000 * 1000; /* 1Gbps */
1846 	if_adv.capacity.total_byte_count = if_stat.packets_out;
1847 	if_adv.capacity.average_throughput = 1000 * 1000 * 1000; /* 1Gbps */
1848 	if_adv.capacity.flushable_queue_size = UINT32_MAX;
1849 	if_adv.capacity.non_flushable_queue_size = UINT32_MAX;
1850 	if_adv.capacity.average_delay = 1; /* ms */
1851 
1852 	error = fakeif->iff_intf_adv_notify(fakeif->iff_intf_adv_kern_ctx,
1853 	    &if_adv);
1854 	if (error != 0) {
1855 		FAKE_LOG(LOG_NOTICE, 0,
1856 		    "%s: interface advisory report failed %d",
1857 		    fakeif->iff_name, error);
1858 	}
1859 
1860 done:
1861 	feth_lock();
1862 	if (!feth_is_detaching(fakeif) && fakeif->iff_channel_connected) {
1863 		uint64_t deadline;
1864 		clock_interval_to_deadline(fakeif->iff_adv_interval,
1865 		    NSEC_PER_MSEC, &deadline);
1866 		thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1867 	}
1868 	feth_unlock();
1869 }
1870 
1871 static int
feth_if_adv_tcall_create(if_fake_ref fakeif)1872 feth_if_adv_tcall_create(if_fake_ref fakeif)
1873 {
1874 	uint64_t deadline;
1875 
1876 	feth_lock();
1877 	ASSERT(fakeif->iff_if_adv_tcall == NULL);
1878 	ASSERT(fakeif->iff_adv_interval > 0);
1879 	ASSERT(fakeif->iff_channel_connected);
1880 	fakeif->iff_if_adv_tcall =
1881 	    thread_call_allocate_with_options(feth_if_adv,
1882 	    (thread_call_param_t)fakeif, THREAD_CALL_PRIORITY_KERNEL,
1883 	    THREAD_CALL_OPTIONS_ONCE);
1884 	if (fakeif->iff_if_adv_tcall == NULL) {
1885 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1886 		    "%s if_adv tcall alloc failed",
1887 		    fakeif->iff_name);
1888 		return ENXIO;
1889 	}
1890 	/* retain for the interface advisory thread call */
1891 	feth_retain(fakeif);
1892 	clock_interval_to_deadline(fakeif->iff_adv_interval,
1893 	    NSEC_PER_MSEC, &deadline);
1894 	thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1895 	feth_unlock();
1896 	return 0;
1897 }
1898 
1899 static void
feth_if_adv_tcall_destroy(if_fake_ref fakeif)1900 feth_if_adv_tcall_destroy(if_fake_ref fakeif)
1901 {
1902 	thread_call_t tcall;
1903 
1904 	feth_lock();
1905 	ASSERT(fakeif->iff_if_adv_tcall != NULL);
1906 	tcall = fakeif->iff_if_adv_tcall;
1907 	feth_unlock();
1908 	(void) thread_call_cancel_wait(tcall);
1909 	if (!thread_call_free(tcall)) {
1910 		boolean_t freed;
1911 		(void) thread_call_cancel_wait(tcall);
1912 		freed = thread_call_free(tcall);
1913 		VERIFY(freed);
1914 	}
1915 	feth_lock();
1916 	fakeif->iff_if_adv_tcall = NULL;
1917 	feth_unlock();
1918 	/* release for the interface advisory thread call */
1919 	feth_release(fakeif);
1920 }
1921 
1922 
1923 /**
1924 ** nexus netif domain provider
1925 **/
1926 static errno_t
feth_nxdp_init(kern_nexus_domain_provider_t domprov)1927 feth_nxdp_init(kern_nexus_domain_provider_t domprov)
1928 {
1929 #pragma unused(domprov)
1930 	return 0;
1931 }
1932 
1933 static void
feth_nxdp_fini(kern_nexus_domain_provider_t domprov)1934 feth_nxdp_fini(kern_nexus_domain_provider_t domprov)
1935 {
1936 #pragma unused(domprov)
1937 }
1938 
1939 static uuid_t                   feth_nx_dom_prov;
1940 
1941 static errno_t
feth_register_nexus_domain_provider(void)1942 feth_register_nexus_domain_provider(void)
1943 {
1944 	const struct kern_nexus_domain_provider_init dp_init = {
1945 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1946 		.nxdpi_flags = 0,
1947 		.nxdpi_init = feth_nxdp_init,
1948 		.nxdpi_fini = feth_nxdp_fini
1949 	};
1950 	errno_t                         err = 0;
1951 
1952 	/* feth_nxdp_init() is called before this function returns */
1953 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
1954 	    (const uint8_t *)
1955 	    "com.apple.feth",
1956 	    &dp_init, sizeof(dp_init),
1957 	    &feth_nx_dom_prov);
1958 	if (err != 0) {
1959 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1960 		    "failed to register domain provider");
1961 		return err;
1962 	}
1963 	return 0;
1964 }
1965 
1966 /**
1967 ** netif nexus routines
1968 **/
1969 static if_fake_ref
feth_nexus_context(kern_nexus_t nexus)1970 feth_nexus_context(kern_nexus_t nexus)
1971 {
1972 	if_fake_ref fakeif;
1973 
1974 	fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
1975 	assert(fakeif != NULL);
1976 	return fakeif;
1977 }
1978 
1979 static uint8_t
feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1980 feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1981 {
1982 	switch (svc_class) {
1983 	case KPKT_SC_VO:
1984 		return 0;
1985 	case KPKT_SC_VI:
1986 		return 1;
1987 	case KPKT_SC_BE:
1988 		return 2;
1989 	case KPKT_SC_BK:
1990 		return 3;
1991 	default:
1992 		VERIFY(0);
1993 		return 0;
1994 	}
1995 }
1996 
1997 static errno_t
feth_nx_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1998 feth_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1999     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
2000     void **ring_ctx)
2001 {
2002 	if_fake_ref     fakeif;
2003 	int             err;
2004 #pragma unused(nxprov, channel, ring_ctx)
2005 	feth_lock();
2006 	fakeif = feth_nexus_context(nexus);
2007 	if (feth_is_detaching(fakeif)) {
2008 		feth_unlock();
2009 		return 0;
2010 	}
2011 	if (is_tx_ring) {
2012 		if (feth_in_wmm_mode(fakeif)) {
2013 			kern_packet_svc_class_t svc_class;
2014 			uint8_t ring_idx;
2015 
2016 			err = kern_channel_get_service_class(ring, &svc_class);
2017 			VERIFY(err == 0);
2018 			ring_idx = feth_find_tx_ring_by_svc(svc_class);
2019 			VERIFY(ring_idx < IFF_NUM_TX_RINGS_WMM_MODE);
2020 			VERIFY(fakeif->iff_tx_ring[ring_idx] == NULL);
2021 			fakeif->iff_tx_ring[ring_idx] = ring;
2022 		} else {
2023 			VERIFY(fakeif->iff_tx_ring[0] == NULL);
2024 			fakeif->iff_tx_ring[0] = ring;
2025 		}
2026 	} else {
2027 		VERIFY(fakeif->iff_rx_ring[0] == NULL);
2028 		fakeif->iff_rx_ring[0] = ring;
2029 	}
2030 	fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2031 	feth_unlock();
2032 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: %s ring init",
2033 	    fakeif->iff_name, is_tx_ring ? "TX" : "RX");
2034 	return 0;
2035 }
2036 
2037 static void
feth_nx_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)2038 feth_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2039     kern_channel_ring_t ring)
2040 {
2041 #pragma unused(nxprov, ring)
2042 	if_fake_ref     fakeif;
2043 	thread_call_t   tcall = NULL;
2044 
2045 	feth_lock();
2046 	fakeif = feth_nexus_context(nexus);
2047 	if (fakeif->iff_rx_ring[0] == ring) {
2048 		fakeif->iff_rx_ring[0] = NULL;
2049 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2050 		    "%s: RX ring fini", fakeif->iff_name);
2051 	} else if (feth_in_wmm_mode(fakeif)) {
2052 		int i;
2053 		for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
2054 			if (fakeif->iff_tx_ring[i] == ring) {
2055 				fakeif->iff_tx_ring[i] = NULL;
2056 				break;
2057 			}
2058 		}
2059 		for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
2060 			if (fakeif->iff_tx_ring[i] != NULL) {
2061 				break;
2062 			}
2063 		}
2064 		if (i == IFF_MAX_TX_RINGS) {
2065 			tcall = fakeif->iff_doorbell_tcall;
2066 			fakeif->iff_doorbell_tcall = NULL;
2067 		}
2068 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2069 		    "%s: TX ring fini", fakeif->iff_name);
2070 	} else if (fakeif->iff_tx_ring[0] == ring) {
2071 		tcall = fakeif->iff_doorbell_tcall;
2072 		fakeif->iff_doorbell_tcall = NULL;
2073 		fakeif->iff_tx_ring[0] = NULL;
2074 	}
2075 	fakeif->iff_nifs = NULL;
2076 	feth_unlock();
2077 	if (tcall != NULL) {
2078 		boolean_t       success;
2079 
2080 		success = thread_call_cancel_wait(tcall);
2081 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2082 		    "%s: thread_call_cancel %s", fakeif->iff_name,
2083 		    success ? "SUCCESS" : "FAILURE");
2084 		if (!success) {
2085 			feth_lock();
2086 			if (fakeif->iff_doorbell_tcall_active) {
2087 				fakeif->iff_waiting_for_tcall = TRUE;
2088 				FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2089 				    "%s: *waiting for threadcall",
2090 				    fakeif->iff_name);
2091 				do {
2092 					msleep(fakeif, &feth_lck_mtx,
2093 					    PZERO, "feth threadcall", 0);
2094 				} while (fakeif->iff_doorbell_tcall_active);
2095 				FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2096 				    "%s: ^threadcall done",
2097 				    fakeif->iff_name);
2098 				fakeif->iff_waiting_for_tcall = FALSE;
2099 			}
2100 			feth_unlock();
2101 		}
2102 		success = thread_call_free(tcall);
2103 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2104 		    "%s: thread_call_free %s",
2105 		    fakeif->iff_name,
2106 		    success ? "SUCCESS" : "FAILURE");
2107 		feth_release(fakeif);
2108 		VERIFY(success == TRUE);
2109 	}
2110 }
2111 
2112 static errno_t
feth_nx_pre_connect(kern_nexus_provider_t nxprov,proc_t proc,kern_nexus_t nexus,nexus_port_t port,kern_channel_t channel,void ** channel_context)2113 feth_nx_pre_connect(kern_nexus_provider_t nxprov,
2114     proc_t proc, kern_nexus_t nexus, nexus_port_t port, kern_channel_t channel,
2115     void **channel_context)
2116 {
2117 #pragma unused(nxprov, proc, nexus, port, channel, channel_context)
2118 	return 0;
2119 }
2120 
2121 static errno_t
feth_nx_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2122 feth_nx_connected(kern_nexus_provider_t nxprov,
2123     kern_nexus_t nexus, kern_channel_t channel)
2124 {
2125 #pragma unused(nxprov, channel)
2126 	int err;
2127 	if_fake_ref fakeif;
2128 
2129 	fakeif = feth_nexus_context(nexus);
2130 	feth_lock();
2131 	if (feth_is_detaching(fakeif)) {
2132 		feth_unlock();
2133 		return EBUSY;
2134 	}
2135 	feth_retain(fakeif);
2136 	fakeif->iff_channel_connected = TRUE;
2137 	feth_unlock();
2138 	if (feth_has_intf_advisory_configured(fakeif)) {
2139 		err = feth_if_adv_tcall_create(fakeif);
2140 		if (err != 0) {
2141 			return err;
2142 		}
2143 	}
2144 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: connected channel %p",
2145 	    fakeif->iff_name, channel);
2146 	return 0;
2147 }
2148 
2149 static void
feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2150 feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,
2151     kern_nexus_t nexus, kern_channel_t channel)
2152 {
2153 #pragma unused(nxprov, channel)
2154 	if_fake_ref fakeif;
2155 
2156 	fakeif = feth_nexus_context(nexus);
2157 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2158 	    "%s: pre-disconnect channel %p",
2159 	    fakeif->iff_name, channel);
2160 	/* Quiesce the interface and flush any pending outbound packets. */
2161 	if_down(fakeif->iff_ifp);
2162 	feth_lock();
2163 	fakeif->iff_channel_connected = FALSE;
2164 	feth_unlock();
2165 	if (fakeif->iff_if_adv_tcall != NULL) {
2166 		feth_if_adv_tcall_destroy(fakeif);
2167 	}
2168 }
2169 
2170 static void
feth_nx_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2171 feth_nx_disconnected(kern_nexus_provider_t nxprov,
2172     kern_nexus_t nexus, kern_channel_t channel)
2173 {
2174 #pragma unused(nxprov, channel)
2175 	if_fake_ref fakeif;
2176 
2177 	fakeif = feth_nexus_context(nexus);
2178 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: disconnected channel %p",
2179 	    fakeif->iff_name, channel);
2180 	feth_release(fakeif);
2181 }
2182 
2183 static errno_t
feth_nx_slot_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index,struct kern_slot_prop ** slot_prop_addr,void ** slot_context)2184 feth_nx_slot_init(kern_nexus_provider_t nxprov,
2185     kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2186     uint32_t slot_index, struct kern_slot_prop **slot_prop_addr,
2187     void **slot_context)
2188 {
2189 #pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
2190 	return 0;
2191 }
2192 
2193 static void
feth_nx_slot_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index)2194 feth_nx_slot_fini(kern_nexus_provider_t nxprov,
2195     kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2196     uint32_t slot_index)
2197 {
2198 #pragma unused(nxprov, nexus, ring, slot, slot_index)
2199 }
2200 
2201 static errno_t
feth_nx_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)2202 feth_nx_sync_tx(kern_nexus_provider_t nxprov,
2203     kern_nexus_t nexus, kern_channel_ring_t tx_ring, uint32_t flags)
2204 {
2205 #pragma unused(nxprov)
2206 	if_fake_ref             fakeif;
2207 	ifnet_t                 ifp;
2208 	kern_channel_slot_t     last_tx_slot = NULL;
2209 	ifnet_t                 peer_ifp;
2210 	if_fake_ref             peer_fakeif = NULL;
2211 	struct kern_channel_ring_stat_increment stats;
2212 	kern_channel_slot_t     tx_slot;
2213 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2214 	kern_packet_t           pkts[IFF_MAX_BATCH_SIZE];
2215 	uint32_t                n_pkts = 0;
2216 
2217 	memset(&stats, 0, sizeof(stats));
2218 
2219 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2220 	fakeif = feth_nexus_context(nexus);
2221 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2222 	    "%s ring %d flags 0x%x", fakeif->iff_name,
2223 	    tx_ring->ckr_ring_id, flags);
2224 	(void)flags;
2225 	feth_lock();
2226 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2227 		feth_unlock();
2228 		return 0;
2229 	}
2230 	ifp = fakeif->iff_ifp;
2231 	peer_ifp = fakeif->iff_peer;
2232 	if (peer_ifp != NULL) {
2233 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
2234 		if (peer_fakeif != NULL) {
2235 			if (feth_is_detaching(peer_fakeif)) {
2236 				FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2237 				    "%s peer fakeif %s is detaching",
2238 				    fakeif->iff_name, peer_fakeif->iff_name);
2239 				goto done;
2240 			}
2241 			if (!peer_fakeif->iff_channel_connected) {
2242 				if (fakeif->iff_tx_exp_policy ==
2243 				    IFF_TX_EXP_POLICY_DISABLED) {
2244 					FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2245 					    "%s peer fakeif %s channel not connected, expn: %d",
2246 					    fakeif->iff_name, peer_fakeif->iff_name,
2247 					    fakeif->iff_tx_exp_policy);
2248 					goto done;
2249 				}
2250 			}
2251 		} else {
2252 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2253 			    "%s no peer fakeif (peer %p)",
2254 			    fakeif->iff_name, peer_ifp);
2255 			goto done;
2256 		}
2257 	} else {
2258 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2259 		    "%s no peer", fakeif->iff_name);
2260 		goto done;
2261 	}
2262 	tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
2263 	while (tx_slot != NULL) {
2264 		uint16_t off;
2265 		kern_packet_t sph;
2266 
2267 		/* detach the packet from the TX ring */
2268 		sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
2269 		VERIFY(sph != 0);
2270 		kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
2271 
2272 		/* bpf tap output */
2273 		off = kern_packet_get_headroom(sph);
2274 		VERIFY(off >= fakeif->iff_tx_headroom);
2275 		kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2276 		feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2277 		bpf_tap_packet_out(ifp, DLT_EN10MB, sph, NULL, 0);
2278 
2279 		/* drop packets, if requested */
2280 		fakeif->iff_tx_pkts_count++;
2281 		if (feth_tx_expired_error(fakeif, sph) ||
2282 		    feth_tx_complete_error(fakeif, sph) ||
2283 		    !peer_fakeif->iff_channel_connected) {
2284 			fakeif->iff_tx_pkts_count = 0;
2285 			kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2286 			STATS_INC(nifs, NETIF_STATS_DROP);
2287 			goto next_tx_slot;
2288 		}
2289 
2290 		ASSERT(sph != 0);
2291 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2292 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2293 
2294 		stats.kcrsi_slots_transferred++;
2295 		stats.kcrsi_bytes_transferred
2296 		        += kern_packet_get_data_length(sph);
2297 
2298 		/* prepare batch for receiver */
2299 		pkts[n_pkts++] = sph;
2300 		if (n_pkts == IFF_MAX_BATCH_SIZE) {
2301 			feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2302 			feth_tx_complete(fakeif, pkts, n_pkts);
2303 			n_pkts = 0;
2304 		}
2305 
2306 next_tx_slot:
2307 		last_tx_slot = tx_slot;
2308 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
2309 	}
2310 
2311 	/* catch last batch for receiver */
2312 	if (n_pkts != 0) {
2313 		feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2314 		feth_tx_complete(fakeif, pkts, n_pkts);
2315 		n_pkts = 0;
2316 	}
2317 
2318 	if (last_tx_slot != NULL) {
2319 		kern_channel_advance_slot(tx_ring, last_tx_slot);
2320 		kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
2321 	}
2322 done:
2323 	feth_unlock();
2324 	return 0;
2325 }
2326 
2327 static errno_t
feth_nx_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2328 feth_nx_sync_rx(kern_nexus_provider_t nxprov,
2329     kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2330 {
2331 #pragma unused(nxprov, ring, flags)
2332 	if_fake_ref             fakeif;
2333 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2334 
2335 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
2336 	fakeif = feth_nexus_context(nexus);
2337 	FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT, "%s", fakeif->iff_name);
2338 	return 0;
2339 }
2340 
2341 static errno_t
feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif,boolean_t doorbell_ctxt)2342 feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif, boolean_t doorbell_ctxt)
2343 {
2344 	int i;
2345 	errno_t error = 0;
2346 	boolean_t more;
2347 
2348 	for (i = 0; i < IFF_NUM_TX_RINGS_WMM_MODE; i++) {
2349 		kern_channel_ring_t ring = fakeif->iff_tx_ring[i];
2350 		if (ring != NULL) {
2351 			error = kern_channel_tx_refill(ring, UINT32_MAX,
2352 			    UINT32_MAX, doorbell_ctxt, &more);
2353 		}
2354 		if (error != 0) {
2355 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2356 			    "%s: TX refill ring %d (%s) %d",
2357 			    fakeif->iff_name, ring->ckr_ring_id,
2358 			    doorbell_ctxt ? "sync" : "async", error);
2359 			if (!((error == EAGAIN) || (error == EBUSY))) {
2360 				break;
2361 			}
2362 		} else {
2363 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2364 			    "%s: TX refilled ring %d (%s)",
2365 			    fakeif->iff_name, ring->ckr_ring_id,
2366 			    doorbell_ctxt ? "sync" : "async");
2367 		}
2368 	}
2369 	return error;
2370 }
2371 
2372 static void
feth_async_doorbell(thread_call_param_t arg0,thread_call_param_t arg1)2373 feth_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
2374 {
2375 #pragma unused(arg1)
2376 	errno_t                 error;
2377 	if_fake_ref             fakeif = (if_fake_ref)arg0;
2378 	kern_channel_ring_t     ring;
2379 	boolean_t               more;
2380 
2381 	feth_lock();
2382 	ring = fakeif->iff_tx_ring[0];
2383 	if (feth_is_detaching(fakeif) ||
2384 	    !fakeif->iff_channel_connected ||
2385 	    ring == NULL) {
2386 		goto done;
2387 	}
2388 	fakeif->iff_doorbell_tcall_active = TRUE;
2389 	feth_unlock();
2390 	if (feth_in_wmm_mode(fakeif)) {
2391 		error = feth_nx_tx_dequeue_driver_managed(fakeif, FALSE);
2392 	} else {
2393 		error = kern_channel_tx_refill(ring, UINT32_MAX,
2394 		    UINT32_MAX, FALSE, &more);
2395 	}
2396 	if (error != 0) {
2397 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s: TX refill failed %d",
2398 		    fakeif->iff_name, error);
2399 	} else {
2400 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s: TX refilled",
2401 		    fakeif->iff_name);
2402 	}
2403 
2404 	feth_lock();
2405 done:
2406 	fakeif->iff_doorbell_tcall_active = FALSE;
2407 	if (fakeif->iff_waiting_for_tcall) {
2408 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2409 		    "%s: threadcall waking up waiter", fakeif->iff_name);
2410 		wakeup((caddr_t)fakeif);
2411 	}
2412 	feth_unlock();
2413 }
2414 
2415 static void
feth_schedule_async_doorbell(if_fake_ref fakeif)2416 feth_schedule_async_doorbell(if_fake_ref fakeif)
2417 {
2418 	thread_call_t   tcall;
2419 
2420 	feth_lock();
2421 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2422 		feth_unlock();
2423 		return;
2424 	}
2425 	tcall = fakeif->iff_doorbell_tcall;
2426 	if (tcall != NULL) {
2427 		thread_call_enter(tcall);
2428 	} else {
2429 		tcall = thread_call_allocate_with_options(feth_async_doorbell,
2430 		    (thread_call_param_t)fakeif,
2431 		    THREAD_CALL_PRIORITY_KERNEL,
2432 		    THREAD_CALL_OPTIONS_ONCE);
2433 		if (tcall == NULL) {
2434 			FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT,
2435 			    "%s tcall alloc failed", fakeif->iff_name);
2436 		} else {
2437 			fakeif->iff_doorbell_tcall = tcall;
2438 			feth_retain(fakeif);
2439 			thread_call_enter(tcall);
2440 		}
2441 	}
2442 	feth_unlock();
2443 }
2444 
2445 static errno_t
feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2446 feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,
2447     kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2448 {
2449 #pragma unused(nxprov, ring, flags)
2450 	errno_t         error;
2451 	if_fake_ref     fakeif;
2452 
2453 	fakeif = feth_nexus_context(nexus);
2454 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s", fakeif->iff_name);
2455 
2456 	if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
2457 		boolean_t       more;
2458 		/* synchronous tx refill */
2459 		if (feth_in_wmm_mode(fakeif)) {
2460 			error = feth_nx_tx_dequeue_driver_managed(fakeif, TRUE);
2461 		} else {
2462 			error = kern_channel_tx_refill(ring, UINT32_MAX,
2463 			    UINT32_MAX, TRUE, &more);
2464 		}
2465 		if (error != 0) {
2466 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2467 			    "%s: TX refill (sync) %d", fakeif->iff_name, error);
2468 		} else {
2469 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2470 			    "%s: TX refilled (sync)", fakeif->iff_name);
2471 		}
2472 	} else {
2473 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2474 		    "%s: schedule async refill", fakeif->iff_name);
2475 		feth_schedule_async_doorbell(fakeif);
2476 	}
2477 	return 0;
2478 }
2479 
2480 static errno_t
feth_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)2481 feth_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
2482 {
2483 	if_fake_ref fakeif;
2484 
2485 	fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
2486 	feth_ifnet_set_attrs(fakeif, ifp);
2487 	return 0;
2488 }
2489 
2490 static errno_t
feth_nx_intf_adv_config(void * prov_ctx,bool enable)2491 feth_nx_intf_adv_config(void *prov_ctx, bool enable)
2492 {
2493 	if_fake_ref fakeif = prov_ctx;
2494 
2495 	feth_lock();
2496 	fakeif->iff_intf_adv_enabled = enable;
2497 	feth_unlock();
2498 	FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2499 	    "%s enable %d", fakeif->iff_name, enable);
2500 	return 0;
2501 }
2502 
2503 static errno_t
fill_capab_interface_advisory(if_fake_ref fakeif,void * contents,uint32_t * len)2504 fill_capab_interface_advisory(if_fake_ref fakeif, void *contents, uint32_t *len)
2505 {
2506 	struct kern_nexus_capab_interface_advisory *capab = contents;
2507 
2508 	if (*len != sizeof(*capab)) {
2509 		return EINVAL;
2510 	}
2511 	if (capab->kncia_version !=
2512 	    KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
2513 		return EINVAL;
2514 	}
2515 	if (!feth_has_intf_advisory_configured(fakeif)) {
2516 		return ENOTSUP;
2517 	}
2518 	VERIFY(capab->kncia_notify != NULL);
2519 	fakeif->iff_intf_adv_kern_ctx = capab->kncia_kern_context;
2520 	fakeif->iff_intf_adv_notify = capab->kncia_notify;
2521 	capab->kncia_provider_context = fakeif;
2522 	capab->kncia_config = feth_nx_intf_adv_config;
2523 	return 0;
2524 }
2525 
2526 static errno_t
feth_notify_steering_info(void * prov_ctx,void * qset_ctx,struct ifnet_traffic_descriptor_common * td,bool add)2527 feth_notify_steering_info(void *prov_ctx, void *qset_ctx,
2528     struct ifnet_traffic_descriptor_common *td, bool add)
2529 {
2530 #pragma unused(td)
2531 	if_fake_ref fakeif = prov_ctx;
2532 	fake_qset *qset = qset_ctx;
2533 
2534 	FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2535 	    "%s: notify_steering_info: qset_id 0x%llx, %s",
2536 	    fakeif->iff_name, qset->fqs_id, add ? "add" : "remove");
2537 	return 0;
2538 }
2539 
2540 static errno_t
fill_capab_qset_extensions(if_fake_ref fakeif,void * contents,uint32_t * len)2541 fill_capab_qset_extensions(if_fake_ref fakeif, void *contents, uint32_t *len)
2542 {
2543 	struct kern_nexus_capab_qset_extensions *capab = contents;
2544 
2545 	if (*len != sizeof(*capab)) {
2546 		return EINVAL;
2547 	}
2548 	if (capab->cqe_version !=
2549 	    KERN_NEXUS_CAPAB_QSET_EXTENSIONS_VERSION_1) {
2550 		return EINVAL;
2551 	}
2552 	capab->cqe_prov_ctx = fakeif;
2553 	capab->cqe_notify_steering_info = feth_notify_steering_info;
2554 	return 0;
2555 }
2556 
2557 static errno_t
feth_nx_capab_config(kern_nexus_provider_t nxprov,kern_nexus_t nx,kern_nexus_capab_t capab,void * contents,uint32_t * len)2558 feth_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
2559     kern_nexus_capab_t capab, void *contents, uint32_t *len)
2560 {
2561 #pragma unused(nxprov)
2562 	errno_t error;
2563 	if_fake_ref fakeif;
2564 
2565 	fakeif = feth_nexus_context(nx);
2566 	FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL, "%s", fakeif->iff_name);
2567 
2568 	switch (capab) {
2569 	case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
2570 		error = fill_capab_interface_advisory(fakeif, contents, len);
2571 		break;
2572 	case KERN_NEXUS_CAPAB_QSET_EXTENSIONS:
2573 		error = fill_capab_qset_extensions(fakeif, contents, len);
2574 		break;
2575 	default:
2576 		error = ENOTSUP;
2577 		break;
2578 	}
2579 	return error;
2580 }
2581 
2582 static int
feth_set_tso_mtu(ifnet_t ifp,uint32_t tso_v4_mtu,uint32_t tso_v6_mtu)2583 feth_set_tso_mtu(ifnet_t ifp, uint32_t tso_v4_mtu, uint32_t tso_v6_mtu)
2584 {
2585 	int     error;
2586 
2587 	error = ifnet_set_tso_mtu(ifp, AF_INET, tso_v4_mtu);
2588 	if (error != 0) {
2589 		FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2590 		    "set TSO MTU IPv4 failed on %s, err %d",
2591 		    if_name(ifp), error);
2592 		return error;
2593 	}
2594 	error = ifnet_set_tso_mtu(ifp, AF_INET6, tso_v6_mtu);
2595 	if (error != 0) {
2596 		FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2597 		    "set TSO MTU IPv6 failed on %s, err %d",
2598 		    if_name(ifp), error);
2599 		return error;
2600 	}
2601 	return 0;
2602 }
2603 
2604 static int
feth_set_tso_offload(ifnet_t ifp)2605 feth_set_tso_offload(ifnet_t ifp)
2606 {
2607 	ifnet_offload_t offload;
2608 	int error;
2609 
2610 	offload = IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2611 	error = ifnet_set_offload(ifp, offload);
2612 	if (error != 0) {
2613 		FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2614 		    "set TSO offload failed on %s, err %d",
2615 		    if_name(ifp), error);
2616 		goto done;
2617 	}
2618 	error = feth_set_tso_mtu(ifp, if_fake_tso_buffer_size,
2619 	    if_fake_tso_buffer_size);
2620 done:
2621 	return error;
2622 }
2623 
2624 static errno_t
create_netif_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2625 create_netif_provider_and_instance(if_fake_ref fakeif,
2626     struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2627     uuid_t * provider, uuid_t * instance)
2628 {
2629 	errno_t                 err;
2630 	nexus_controller_t      controller = kern_nexus_shared_controller();
2631 	struct kern_nexus_net_init net_init;
2632 	nexus_name_t            provider_name;
2633 	nexus_attr_t            nexus_attr = NULL;
2634 	struct kern_nexus_provider_init prov_init = {
2635 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2636 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2637 		.nxpi_pre_connect = feth_nx_pre_connect,
2638 		.nxpi_connected = feth_nx_connected,
2639 		.nxpi_pre_disconnect = feth_nx_pre_disconnect,
2640 		.nxpi_disconnected = feth_nx_disconnected,
2641 		.nxpi_ring_init = feth_nx_ring_init,
2642 		.nxpi_ring_fini = feth_nx_ring_fini,
2643 		.nxpi_slot_init = feth_nx_slot_init,
2644 		.nxpi_slot_fini = feth_nx_slot_fini,
2645 		.nxpi_sync_tx = feth_nx_sync_tx,
2646 		.nxpi_sync_rx = feth_nx_sync_rx,
2647 		.nxpi_tx_doorbell = feth_nx_tx_doorbell,
2648 		.nxpi_config_capab = feth_nx_capab_config,
2649 	};
2650 
2651 	_CASSERT(IFF_MAX_RX_RINGS == 1);
2652 	err = kern_nexus_attr_create(&nexus_attr);
2653 	if (err != 0) {
2654 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
2655 		    "nexus attribute creation failed, error %d", err);
2656 		goto failed;
2657 	}
2658 	if (feth_in_wmm_mode(fakeif)) {
2659 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_TX_RINGS,
2660 		    IFF_NUM_TX_RINGS_WMM_MODE);
2661 		VERIFY(err == 0);
2662 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_RX_RINGS,
2663 		    IFF_NUM_RX_RINGS_WMM_MODE);
2664 		VERIFY(err == 0);
2665 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_QMAP,
2666 		    NEXUS_QMAP_TYPE_WMM);
2667 		VERIFY(err == 0);
2668 	}
2669 
2670 	err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2671 	VERIFY(err == 0);
2672 	snprintf((char *)provider_name, sizeof(provider_name),
2673 	    "com.apple.netif.%s", fakeif->iff_name);
2674 	err = kern_nexus_controller_register_provider(controller,
2675 	    feth_nx_dom_prov,
2676 	    provider_name,
2677 	    &prov_init,
2678 	    sizeof(prov_init),
2679 	    nexus_attr,
2680 	    provider);
2681 	if (err != 0) {
2682 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
2683 		    "register provider failed, error %d", err);
2684 		goto failed;
2685 	}
2686 	bzero(&net_init, sizeof(net_init));
2687 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2688 	net_init.nxneti_flags = 0;
2689 	net_init.nxneti_eparams = init_params;
2690 	net_init.nxneti_lladdr = NULL;
2691 	net_init.nxneti_prepare = feth_netif_prepare;
2692 	net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2693 	net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2694 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
2695 	    *provider,
2696 	    fakeif,
2697 	    NULL,
2698 	    instance,
2699 	    &net_init,
2700 	    ifp);
2701 	if (err != 0) {
2702 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2703 		    "alloc_net_provider_instance failed, %d", err);
2704 		kern_nexus_controller_deregister_provider(controller,
2705 		    *provider);
2706 		uuid_clear(*provider);
2707 		goto failed;
2708 	}
2709 	if (feth_supports_tso(fakeif)) {
2710 		if ((err = feth_set_tso_offload(*ifp)) != 0) {
2711 			goto failed;
2712 		}
2713 	}
2714 
2715 failed:
2716 	if (nexus_attr != NULL) {
2717 		kern_nexus_attr_destroy(nexus_attr);
2718 	}
2719 	return err;
2720 }
2721 
2722 /*
2723  * The nif_stats need to be referenced because we don't want it set
2724  * to NULL until the last llink is removed.
2725  */
2726 static void
get_nexus_stats(if_fake_ref fakeif,kern_nexus_t nexus)2727 get_nexus_stats(if_fake_ref fakeif, kern_nexus_t nexus)
2728 {
2729 	if (++fakeif->iff_nifs_ref == 1) {
2730 		ASSERT(fakeif->iff_nifs == NULL);
2731 		fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2732 	}
2733 }
2734 
2735 static void
clear_nexus_stats(if_fake_ref fakeif)2736 clear_nexus_stats(if_fake_ref fakeif)
2737 {
2738 	if (--fakeif->iff_nifs_ref == 0) {
2739 		ASSERT(fakeif->iff_nifs != NULL);
2740 		fakeif->iff_nifs = NULL;
2741 	}
2742 }
2743 
2744 static errno_t
feth_nx_qset_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * llink_ctx,uint8_t qset_idx,uint64_t qset_id,kern_netif_qset_t qset,void ** qset_ctx)2745 feth_nx_qset_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2746     void *llink_ctx, uint8_t qset_idx, uint64_t qset_id, kern_netif_qset_t qset,
2747     void **qset_ctx)
2748 {
2749 #pragma unused(nxprov)
2750 	if_fake_ref fakeif;
2751 	fake_llink *fl = llink_ctx;
2752 	fake_qset *fqs;
2753 
2754 	feth_lock();
2755 	fakeif = feth_nexus_context(nexus);
2756 	if (feth_is_detaching(fakeif)) {
2757 		feth_unlock();
2758 		FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2759 		    "%s: detaching", fakeif->iff_name);
2760 		return ENXIO;
2761 	}
2762 	if (qset_idx >= fl->fl_qset_cnt) {
2763 		feth_unlock();
2764 		FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2765 		    "%s: invalid qset_idx %d", fakeif->iff_name, qset_idx);
2766 		return EINVAL;
2767 	}
2768 	fqs = &fl->fl_qset[qset_idx];
2769 	ASSERT(fqs->fqs_qset == NULL);
2770 	fqs->fqs_qset = qset;
2771 	fqs->fqs_id = qset_id;
2772 	*qset_ctx = fqs;
2773 
2774 	/* XXX This should really be done during registration */
2775 	get_nexus_stats(fakeif, nexus);
2776 	feth_unlock();
2777 	return 0;
2778 }
2779 
2780 static void
feth_nx_qset_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx)2781 feth_nx_qset_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2782     void *qset_ctx)
2783 {
2784 #pragma unused(nxprov)
2785 	if_fake_ref fakeif;
2786 	fake_qset *fqs = qset_ctx;
2787 
2788 	feth_lock();
2789 	fakeif = feth_nexus_context(nexus);
2790 	clear_nexus_stats(fakeif);
2791 	ASSERT(fqs->fqs_qset != NULL);
2792 	fqs->fqs_qset = NULL;
2793 	fqs->fqs_id = 0;
2794 	feth_unlock();
2795 }
2796 
2797 static errno_t
feth_nx_queue_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint8_t qidx,bool tx,kern_netif_queue_t queue,void ** queue_ctx)2798 feth_nx_queue_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2799     void *qset_ctx, uint8_t qidx, bool tx, kern_netif_queue_t queue,
2800     void **queue_ctx)
2801 {
2802 #pragma unused(nxprov)
2803 	if_fake_ref fakeif;
2804 	fake_qset *fqs = qset_ctx;
2805 	fake_queue *fq;
2806 
2807 	feth_lock();
2808 	fakeif = feth_nexus_context(nexus);
2809 	if (feth_is_detaching(fakeif)) {
2810 		FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2811 		    "%s: detaching", fakeif->iff_name);
2812 		feth_unlock();
2813 		return ENXIO;
2814 	}
2815 	if (tx) {
2816 		if (qidx >= fqs->fqs_tx_queue_cnt) {
2817 			FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2818 			    "%s: invalid tx qidx %d", fakeif->iff_name, qidx);
2819 			feth_unlock();
2820 			return EINVAL;
2821 		}
2822 		fq = &fqs->fqs_tx_queue[qidx];
2823 	} else {
2824 		if (qidx >= fqs->fqs_rx_queue_cnt) {
2825 			FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2826 			    "%s: invalid rx qidx %d", fakeif->iff_name, qidx);
2827 			feth_unlock();
2828 			return EINVAL;
2829 		}
2830 		fq = &fqs->fqs_rx_queue[qidx];
2831 	}
2832 	ASSERT(fq->fq_queue == NULL);
2833 	fq->fq_queue = queue;
2834 	*queue_ctx = fq;
2835 	feth_unlock();
2836 	return 0;
2837 }
2838 
2839 static void
feth_nx_queue_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx)2840 feth_nx_queue_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2841     void *queue_ctx)
2842 {
2843 #pragma unused(nxprov, nexus)
2844 	fake_queue *fq = queue_ctx;
2845 
2846 	feth_lock();
2847 	ASSERT(fq->fq_queue != NULL);
2848 	fq->fq_queue = NULL;
2849 	feth_unlock();
2850 }
2851 
2852 static void
feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif,kern_packet_t sph,struct netif_stats * nifs,if_fake_ref peer_fakeif,uint32_t llink_idx,uint32_t qset_idx)2853 feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif, kern_packet_t sph,
2854     struct netif_stats *nifs, if_fake_ref peer_fakeif,
2855     uint32_t llink_idx, uint32_t qset_idx)
2856 {
2857 	kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2858 	uint32_t n_pkts = 0;
2859 
2860 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2861 	    "%s -> %s", fakeif->iff_name, peer_fakeif->iff_name);
2862 
2863 	while (sph != 0) {
2864 		uint16_t off;
2865 		kern_packet_t next;
2866 
2867 		next = kern_packet_get_next(sph);
2868 		kern_packet_set_next(sph, 0);
2869 
2870 		/* bpf tap output */
2871 		off = kern_packet_get_headroom(sph);
2872 		VERIFY(off >= fakeif->iff_tx_headroom);
2873 		kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2874 		feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2875 		bpf_tap_packet_out(fakeif->iff_ifp, DLT_EN10MB, sph, NULL, 0);
2876 
2877 		/* drop packets, if requested */
2878 		fakeif->iff_tx_pkts_count++;
2879 		if (feth_tx_expired_error(fakeif, sph) ||
2880 		    feth_tx_complete_error(fakeif, sph)) {
2881 			fakeif->iff_tx_pkts_count = 0;
2882 			kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2883 			STATS_INC(nifs, NETIF_STATS_DROP);
2884 			goto next_pkt;
2885 		}
2886 		ASSERT(sph != 0);
2887 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2888 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2889 
2890 		/* prepare batch for receiver */
2891 		pkts[n_pkts++] = sph;
2892 		if (n_pkts == IFF_MAX_BATCH_SIZE) {
2893 			feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx,
2894 			    qset_idx, pkts, n_pkts);
2895 			feth_tx_complete(fakeif, pkts, n_pkts);
2896 			n_pkts = 0;
2897 		}
2898 next_pkt:
2899 		sph = next;
2900 	}
2901 	/* catch last batch for receiver */
2902 	if (n_pkts != 0) {
2903 		feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx, qset_idx,
2904 		    pkts, n_pkts);
2905 		feth_tx_complete(fakeif, pkts, n_pkts);
2906 		n_pkts = 0;
2907 	}
2908 }
2909 
2910 static errno_t
feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint32_t flags)2911 feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2912     void *qset_ctx, uint32_t flags)
2913 {
2914 #pragma unused(nxprov)
2915 	if_fake_ref             fakeif;
2916 	ifnet_t                 ifp;
2917 	ifnet_t                 peer_ifp;
2918 	if_fake_ref             peer_fakeif = NULL;
2919 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2920 	fake_qset               *qset = qset_ctx;
2921 	boolean_t               detaching, connected;
2922 	uint32_t                i;
2923 	errno_t                 err;
2924 
2925 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2926 	fakeif = feth_nexus_context(nexus);
2927 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2928 	    "%s qset %p, idx %d, flags 0x%x", fakeif->iff_name, qset,
2929 	    qset->fqs_idx, flags);
2930 
2931 	feth_lock();
2932 	detaching = feth_is_detaching(fakeif);
2933 	connected = fakeif->iff_channel_connected;
2934 	if (detaching || !connected) {
2935 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2936 		    "%s: detaching %s, channel connected %s",
2937 		    fakeif->iff_name,
2938 		    (detaching ? "true" : "false"),
2939 		    (connected ? "true" : "false"));
2940 		feth_unlock();
2941 		return 0;
2942 	}
2943 	ifp = fakeif->iff_ifp;
2944 	peer_ifp = fakeif->iff_peer;
2945 	if (peer_ifp != NULL) {
2946 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
2947 		if (peer_fakeif != NULL) {
2948 			detaching = feth_is_detaching(peer_fakeif);
2949 			connected = peer_fakeif->iff_channel_connected;
2950 			if (detaching || !connected) {
2951 				FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2952 				    "peer %s: detaching %s, "
2953 				    "channel connected %s",
2954 				    peer_fakeif->iff_name,
2955 				    (detaching ? "true" : "false"),
2956 				    (connected ? "true" : "false"));
2957 				goto done;
2958 			}
2959 		} else {
2960 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2961 			    "peer_fakeif is NULL");
2962 			goto done;
2963 		}
2964 	} else {
2965 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "peer_ifp is NULL");
2966 		goto done;
2967 	}
2968 
2969 	if (if_fake_switch_combined_mode &&
2970 	    qset->fqs_dequeue_cnt >= if_fake_switch_mode_frequency) {
2971 		if (qset->fqs_combined_mode) {
2972 			kern_netif_set_qset_separate(qset->fqs_qset);
2973 		} else {
2974 			kern_netif_set_qset_combined(qset->fqs_qset);
2975 		}
2976 		qset->fqs_combined_mode = !qset->fqs_combined_mode;
2977 		qset->fqs_dequeue_cnt = 0;
2978 	}
2979 
2980 	for (i = 0; i < qset->fqs_tx_queue_cnt; i++) {
2981 		kern_packet_t sph = 0;
2982 		kern_netif_queue_t queue = qset->fqs_tx_queue[i].fq_queue;
2983 		boolean_t more = FALSE;
2984 
2985 		err = kern_netif_queue_tx_dequeue(queue, UINT32_MAX, UINT32_MAX,
2986 		    &more, &sph);
2987 		if (err != 0 && err != EAGAIN) {
2988 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2989 			    "%s queue %p dequeue failed: err "
2990 			    "%d", fakeif->iff_name, queue, err);
2991 		}
2992 		feth_nx_tx_queue_deliver_pkt_chain(fakeif, sph, nifs,
2993 		    peer_fakeif, qset->fqs_llink_idx, qset->fqs_idx);
2994 	}
2995 
2996 done:
2997 	feth_unlock();
2998 	return 0;
2999 }
3000 
3001 
3002 static errno_t
feth_nx_queue_tx_push(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx,kern_packet_t * ph,uint32_t * packetCount,uint32_t * byteCount)3003 feth_nx_queue_tx_push(kern_nexus_provider_t nxprov,
3004     kern_nexus_t nexus, void *queue_ctx, kern_packet_t *ph,
3005     uint32_t *packetCount, uint32_t *byteCount)
3006 {
3007 #pragma unused(nxprov)
3008 	if_fake_ref             fakeif;
3009 	ifnet_t                 ifp;
3010 	ifnet_t                 peer_ifp;
3011 	if_fake_ref             peer_fakeif = NULL;
3012 	struct netif_stats     *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
3013 	fake_queue             *fq = queue_ctx;
3014 	boolean_t               detaching, connected;
3015 
3016 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
3017 	fakeif = feth_nexus_context(nexus);
3018 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s queue %p", fakeif->iff_name, fq);
3019 
3020 	feth_lock();
3021 
3022 	detaching = feth_is_detaching(fakeif);
3023 	connected = fakeif->iff_channel_connected;
3024 	if (detaching || !connected) {
3025 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3026 		    "%s: detaching %s, channel connected %s",
3027 		    fakeif->iff_name,
3028 		    (detaching ? "true" : "false"),
3029 		    (connected ? "true" : "false"));
3030 		goto done;
3031 	}
3032 	ifp = fakeif->iff_ifp;
3033 	peer_ifp = fakeif->iff_peer;
3034 	if (peer_ifp != NULL) {
3035 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
3036 		if (peer_fakeif != NULL) {
3037 			detaching = feth_is_detaching(peer_fakeif);
3038 			connected = peer_fakeif->iff_channel_connected;
3039 			if (detaching || !connected) {
3040 				FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3041 				    "peer %s: detaching %s, "
3042 				    "channel connected %s",
3043 				    peer_fakeif->iff_name,
3044 				    (detaching ? "true" : "false"),
3045 				    (connected ? "true" : "false"));
3046 				goto done;
3047 			}
3048 		} else {
3049 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3050 			    "peer_fakeif is NULL");
3051 			goto done;
3052 		}
3053 	} else {
3054 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "peer_ifp is NULL");
3055 		goto done;
3056 	}
3057 
3058 	*packetCount = *byteCount = 0;
3059 
3060 	kern_packet_t sph = *ph;
3061 	while (sph != 0) {
3062 		(*packetCount)++;
3063 		*byteCount += kern_packet_get_data_length(sph);
3064 		sph = kern_packet_get_next(sph);
3065 	}
3066 
3067 	feth_nx_tx_queue_deliver_pkt_chain(fakeif, *ph, nifs,
3068 	    peer_fakeif, 0, 0);
3069 
3070 	*ph = 0;
3071 
3072 done:
3073 	feth_unlock();
3074 	return 0;
3075 }
3076 
3077 
3078 static void
fill_qset_info_and_params(if_fake_ref fakeif,fake_llink * llink_info,uint32_t qset_idx,struct kern_nexus_netif_llink_qset_init * qset_init,bool is_def,bool is_low_latency)3079 fill_qset_info_and_params(if_fake_ref fakeif, fake_llink *llink_info,
3080     uint32_t qset_idx, struct kern_nexus_netif_llink_qset_init *qset_init,
3081     bool is_def, bool is_low_latency)
3082 {
3083 	fake_qset *qset_info = &llink_info->fl_qset[qset_idx];
3084 
3085 	qset_init->nlqi_flags =
3086 	    (is_def ? KERN_NEXUS_NET_LLINK_QSET_DEFAULT : 0) |
3087 	    (is_low_latency ? KERN_NEXUS_NET_LLINK_QSET_LOW_LATENCY : 0) |
3088 	    KERN_NEXUS_NET_LLINK_QSET_AQM;
3089 
3090 	if (feth_in_wmm_mode(fakeif)) {
3091 		qset_init->nlqi_flags |= KERN_NEXUS_NET_LLINK_QSET_WMM_MODE;
3092 		qset_init->nlqi_num_txqs = IFF_NUM_TX_QUEUES_WMM_MODE;
3093 		qset_init->nlqi_num_rxqs = IFF_NUM_RX_QUEUES_WMM_MODE;
3094 	} else {
3095 		qset_init->nlqi_num_txqs = 1;
3096 		qset_init->nlqi_num_rxqs = 1;
3097 	}
3098 	qset_info->fqs_tx_queue_cnt = qset_init->nlqi_num_txqs;
3099 	qset_info->fqs_rx_queue_cnt = qset_init->nlqi_num_rxqs;
3100 
3101 	/* These are needed for locating the peer qset */
3102 	qset_info->fqs_llink_idx = llink_info->fl_idx;
3103 	qset_info->fqs_idx = qset_idx;
3104 }
3105 
3106 static void
fill_llink_info_and_params(if_fake_ref fakeif,uint32_t llink_idx,struct kern_nexus_netif_llink_init * llink_init,uint32_t llink_id,struct kern_nexus_netif_llink_qset_init * qset_init,uint32_t qset_cnt,uint32_t flags)3107 fill_llink_info_and_params(if_fake_ref fakeif, uint32_t llink_idx,
3108     struct kern_nexus_netif_llink_init *llink_init, uint32_t llink_id,
3109     struct kern_nexus_netif_llink_qset_init *qset_init, uint32_t qset_cnt,
3110     uint32_t flags)
3111 {
3112 	fake_llink *llink_info = &fakeif->iff_llink[llink_idx];
3113 	uint32_t i;
3114 	bool create_ll_qset = if_fake_low_latency && (llink_idx != 0);
3115 
3116 	for (i = 0; i < qset_cnt; i++) {
3117 		fill_qset_info_and_params(fakeif, llink_info, i,
3118 		    &qset_init[i], i == 0, create_ll_qset && i == 1);
3119 	}
3120 	llink_info->fl_idx = llink_idx;
3121 
3122 	/* This doesn't have to be the same as llink_idx */
3123 	llink_info->fl_id = llink_id;
3124 	llink_info->fl_qset_cnt = qset_cnt;
3125 
3126 	llink_init->nli_link_id = llink_id;
3127 	llink_init->nli_num_qsets = qset_cnt;
3128 	llink_init->nli_qsets = qset_init;
3129 	llink_init->nli_flags = flags;
3130 	llink_init->nli_ctx = llink_info;
3131 }
3132 
3133 static errno_t
create_non_default_llinks(if_fake_ref fakeif)3134 create_non_default_llinks(if_fake_ref fakeif)
3135 {
3136 	struct kern_nexus *nx;
3137 	fake_nx_t fnx = &fakeif->iff_nx;
3138 	struct kern_nexus_netif_llink_init llink_init;
3139 	struct kern_nexus_netif_llink_qset_init qset_init[FETH_MAX_QSETS];
3140 	errno_t err;
3141 	uint64_t llink_id;
3142 	uint32_t i;
3143 
3144 	nx = nx_find(fnx->fnx_instance, FALSE);
3145 	if (nx == NULL) {
3146 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3147 		    "%s: nx not found", fakeif->iff_name);
3148 		return ENXIO;
3149 	}
3150 	/* Default llink starts at index 0 */
3151 	for (i = 1; i < if_fake_llink_cnt; i++) {
3152 		llink_id = (uint64_t)i;
3153 
3154 		/*
3155 		 * The llink_init and qset_init structures are reused for
3156 		 * each llink creation.
3157 		 */
3158 		fill_llink_info_and_params(fakeif, i, &llink_init,
3159 		    llink_id, qset_init, if_fake_qset_cnt, 0);
3160 		err = kern_nexus_netif_llink_add(nx, &llink_init);
3161 		if (err != 0) {
3162 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3163 			    "%s: llink add failed, error %d",
3164 			    fakeif->iff_name, err);
3165 			goto fail;
3166 		}
3167 		fakeif->iff_llink_cnt++;
3168 	}
3169 	nx_release(nx);
3170 	return 0;
3171 
3172 fail:
3173 	for (i = 0; i < fakeif->iff_llink_cnt; i++) {
3174 		int                     error;
3175 		fake_llink * __single   ll = &fakeif->iff_llink[i];
3176 
3177 		error = kern_nexus_netif_llink_remove(nx, ll->fl_id);
3178 		if (error != 0) {
3179 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3180 			    "%s: llink remove failed, llink_id 0x%llx, "
3181 			    "error %d", fakeif->iff_name,
3182 			    ll->fl_id, error);
3183 		}
3184 		ll->fl_id = 0;
3185 	}
3186 	fakeif->iff_llink_cnt = 0;
3187 	nx_release(nx);
3188 	return err;
3189 }
3190 
3191 static errno_t
create_netif_llink_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)3192 create_netif_llink_provider_and_instance(if_fake_ref fakeif,
3193     struct ifnet_init_eparams * init_params, ifnet_t *ifp,
3194     uuid_t * provider, uuid_t * instance)
3195 {
3196 	errno_t                 err;
3197 	nexus_controller_t      controller = kern_nexus_shared_controller();
3198 	struct kern_nexus_net_init net_init;
3199 	struct kern_nexus_netif_llink_init llink_init;
3200 	struct kern_nexus_netif_llink_qset_init qsets[FETH_MAX_QSETS];
3201 
3202 	nexus_name_t            provider_name;
3203 	nexus_attr_t            nexus_attr = NULL;
3204 	struct kern_nexus_netif_provider_init prov_init = {
3205 		.nxnpi_version = KERN_NEXUS_DOMAIN_PROVIDER_NETIF,
3206 		.nxnpi_flags = NXPIF_VIRTUAL_DEVICE,
3207 		.nxnpi_pre_connect = feth_nx_pre_connect,
3208 		.nxnpi_connected = feth_nx_connected,
3209 		.nxnpi_pre_disconnect = feth_nx_pre_disconnect,
3210 		.nxnpi_disconnected = feth_nx_disconnected,
3211 		.nxnpi_qset_init = feth_nx_qset_init,
3212 		.nxnpi_qset_fini = feth_nx_qset_fini,
3213 		.nxnpi_queue_init = feth_nx_queue_init,
3214 		.nxnpi_queue_fini = feth_nx_queue_fini,
3215 		.nxnpi_tx_qset_notify = feth_nx_tx_qset_notify,
3216 		.nxnpi_config_capab = feth_nx_capab_config,
3217 		.nxnpi_queue_tx_push = feth_nx_queue_tx_push
3218 	};
3219 
3220 	err = kern_nexus_attr_create(&nexus_attr);
3221 	if (err != 0) {
3222 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3223 		    "nexus attribute creation failed, error %d", err);
3224 		goto failed;
3225 	}
3226 
3227 	err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
3228 	VERIFY(err == 0);
3229 
3230 	snprintf((char *)provider_name, sizeof(provider_name),
3231 	    "com.apple.netif.%s", fakeif->iff_name);
3232 	err = kern_nexus_controller_register_provider(controller,
3233 	    feth_nx_dom_prov,
3234 	    provider_name,
3235 	    (struct kern_nexus_provider_init *)&prov_init,
3236 	    sizeof(prov_init),
3237 	    nexus_attr,
3238 	    provider);
3239 	if (err != 0) {
3240 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3241 		    "register provider failed, error %d", err);
3242 		goto failed;
3243 	}
3244 	bzero(&net_init, sizeof(net_init));
3245 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
3246 	net_init.nxneti_flags = 0;
3247 	net_init.nxneti_eparams = init_params;
3248 	net_init.nxneti_lladdr = NULL;
3249 	net_init.nxneti_prepare = feth_netif_prepare;
3250 	net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
3251 	net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
3252 
3253 	/*
3254 	 * Assume llink id is same as the index for if_fake.
3255 	 * This is not required for other drivers.
3256 	 */
3257 	_CASSERT(NETIF_LLINK_ID_DEFAULT == 0);
3258 	fill_llink_info_and_params(fakeif, 0, &llink_init,
3259 	    NETIF_LLINK_ID_DEFAULT, qsets, if_fake_qset_cnt,
3260 	    KERN_NEXUS_NET_LLINK_DEFAULT);
3261 
3262 	net_init.nxneti_llink = &llink_init;
3263 
3264 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
3265 	    *provider, fakeif, NULL, instance, &net_init, ifp);
3266 	if (err != 0) {
3267 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3268 		    "alloc_net_provider_instance failed, %d", err);
3269 		kern_nexus_controller_deregister_provider(controller,
3270 		    *provider);
3271 		uuid_clear(*provider);
3272 		goto failed;
3273 	}
3274 	fakeif->iff_llink_cnt++;
3275 
3276 	if (if_fake_llink_cnt > 1) {
3277 		err = create_non_default_llinks(fakeif);
3278 		if (err != 0) {
3279 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3280 			    "create_non_default_llinks failed, %d", err);
3281 			feth_detach_netif_nexus(fakeif);
3282 			goto failed;
3283 		}
3284 	}
3285 	if (feth_supports_tso(fakeif)) {
3286 		if ((err = feth_set_tso_offload(*ifp)) != 0) {
3287 			goto failed;
3288 		}
3289 	}
3290 failed:
3291 	if (nexus_attr != NULL) {
3292 		kern_nexus_attr_destroy(nexus_attr);
3293 	}
3294 	return err;
3295 }
3296 
3297 static errno_t
feth_attach_netif_nexus(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp)3298 feth_attach_netif_nexus(if_fake_ref fakeif,
3299     struct ifnet_init_eparams * init_params, ifnet_t *ifp)
3300 {
3301 	errno_t                 error;
3302 	fake_nx_t               nx = &fakeif->iff_nx;
3303 
3304 	error = feth_packet_pool_make(fakeif);
3305 	if (error != 0) {
3306 		return error;
3307 	}
3308 	if (if_fake_llink_cnt == 0) {
3309 		return create_netif_provider_and_instance(fakeif, init_params,
3310 		           ifp, &nx->fnx_provider, &nx->fnx_instance);
3311 	} else {
3312 		return create_netif_llink_provider_and_instance(fakeif,
3313 		           init_params, ifp, &nx->fnx_provider,
3314 		           &nx->fnx_instance);
3315 	}
3316 }
3317 
3318 static void
remove_non_default_llinks(if_fake_ref fakeif)3319 remove_non_default_llinks(if_fake_ref fakeif)
3320 {
3321 	struct kern_nexus *nx;
3322 	fake_nx_t fnx = &fakeif->iff_nx;
3323 	uint32_t i;
3324 
3325 	if (fakeif->iff_llink_cnt <= 1) {
3326 		return;
3327 	}
3328 	nx = nx_find(fnx->fnx_instance, FALSE);
3329 	if (nx == NULL) {
3330 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3331 		    "%s: nx not found", fakeif->iff_name);
3332 		return;
3333 	}
3334 	/* Default llink (at index 0) is freed separately */
3335 	for (i = 1; i < fakeif->iff_llink_cnt; i++) {
3336 		int err;
3337 
3338 		err = kern_nexus_netif_llink_remove(nx, fakeif->
3339 		    iff_llink[i].fl_id);
3340 		if (err != 0) {
3341 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3342 			    "%s: llink remove failed, llink_id 0x%llx, "
3343 			    "error %d", fakeif->iff_name,
3344 			    fakeif->iff_llink[i].fl_id, err);
3345 		}
3346 		fakeif->iff_llink[i].fl_id = 0;
3347 	}
3348 	fakeif->iff_llink_cnt = 0;
3349 	nx_release(nx);
3350 }
3351 
3352 static void
detach_provider_and_instance(uuid_t provider,uuid_t instance)3353 detach_provider_and_instance(uuid_t provider, uuid_t instance)
3354 {
3355 	nexus_controller_t controller = kern_nexus_shared_controller();
3356 	errno_t err;
3357 
3358 	if (!uuid_is_null(instance)) {
3359 		err = kern_nexus_controller_free_provider_instance(controller,
3360 		    instance);
3361 		if (err != 0) {
3362 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3363 			    "free_provider_instance failed %d", err);
3364 		}
3365 		uuid_clear(instance);
3366 	}
3367 	if (!uuid_is_null(provider)) {
3368 		err = kern_nexus_controller_deregister_provider(controller,
3369 		    provider);
3370 		if (err != 0) {
3371 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3372 			    "deregister_provider %d", err);
3373 		}
3374 		uuid_clear(provider);
3375 	}
3376 	return;
3377 }
3378 
3379 static void
feth_detach_netif_nexus(if_fake_ref fakeif)3380 feth_detach_netif_nexus(if_fake_ref fakeif)
3381 {
3382 	fake_nx_t fnx = &fakeif->iff_nx;
3383 
3384 	remove_non_default_llinks(fakeif);
3385 	detach_provider_and_instance(fnx->fnx_provider, fnx->fnx_instance);
3386 }
3387 
3388 #endif /* SKYWALK */
3389 
3390 /**
3391 ** feth interface routines
3392 **/
3393 static void
feth_ifnet_set_attrs(if_fake_ref fakeif,ifnet_t ifp)3394 feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp)
3395 {
3396 	errno_t         error;
3397 	ifnet_offload_t offload = 0;
3398 
3399 	ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
3400 	ifnet_set_baudrate(ifp, 0);
3401 	ifnet_set_mtu(ifp, ETHERMTU);
3402 	ifnet_set_flags(ifp,
3403 	    IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX,
3404 	    0xffff);
3405 	ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
3406 	if ((fakeif->iff_flags & IFF_FLAGS_LRO) != 0) {
3407 		offload |= IFNET_LRO;
3408 	}
3409 	if ((fakeif->iff_flags & IFF_FLAGS_HWCSUM) != 0) {
3410 		offload |= IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
3411 		    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6;
3412 	}
3413 	if (feth_supports_tso(fakeif)) {
3414 		offload |= IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
3415 	}
3416 	if (feth_supports_vlan_tagging(fakeif)) {
3417 		offload |= IFNET_VLAN_TAGGING;
3418 	} else if (feth_supports_vlan_mtu(fakeif)) {
3419 		offload |= IFNET_VLAN_MTU;
3420 	}
3421 	error = ifnet_set_offload(ifp, offload);
3422 	if (error != 0) {
3423 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3424 		    "ifnet_set_offload(%s, 0x%x) failed, %d",
3425 		    ifp->if_xname, offload, error);
3426 	} else {
3427 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3428 		    "ifnet_set_offload(%s, 0x%x) succeeded",
3429 		    ifp->if_xname, offload);
3430 	}
3431 }
3432 
3433 static void
interface_link_event(ifnet_t ifp,u_int32_t event_code)3434 interface_link_event(ifnet_t ifp, u_int32_t event_code)
3435 {
3436 	struct event {
3437 		u_int32_t ifnet_family;
3438 		u_int32_t unit;
3439 		char if_name[IFNAMSIZ];
3440 	};
3441 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3442 	struct kern_event_msg *header = (struct kern_event_msg*)message;
3443 	struct event *data = (struct event *)(header + 1);
3444 
3445 	header->total_size   = sizeof(message);
3446 	header->vendor_code  = KEV_VENDOR_APPLE;
3447 	header->kev_class    = KEV_NETWORK_CLASS;
3448 	header->kev_subclass = KEV_DL_SUBCLASS;
3449 	header->event_code   = event_code;
3450 	data->ifnet_family   = ifnet_family(ifp);
3451 	data->unit           = (u_int32_t)ifnet_unit(ifp);
3452 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
3453 	ifnet_event(ifp, header);
3454 }
3455 
3456 static if_fake_ref
ifnet_get_if_fake(ifnet_t ifp)3457 ifnet_get_if_fake(ifnet_t ifp)
3458 {
3459 	return (if_fake_ref)ifnet_softc(ifp);
3460 }
3461 
3462 static int
feth_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)3463 feth_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
3464 {
3465 	bool                            bsd_mode;
3466 	int                             error;
3467 	if_fake_ref                     fakeif;
3468 	struct ifnet_init_eparams       feth_init;
3469 	fake_llink                     *iff_llink;
3470 	ifnet_t                         ifp;
3471 	uint8_t                         mac_address[ETHER_ADDR_LEN];
3472 	bool                            multi_buflet;
3473 	iff_pktpool_mode_t              pktpool_mode;
3474 	bool                            tso_support;
3475 
3476 	/* make local copy of globals needed to make consistency checks below */
3477 	bsd_mode = (if_fake_bsd_mode != 0);
3478 	multi_buflet = (if_fake_multibuflet != 0);
3479 	tso_support = (if_fake_tso_support != 0);
3480 	pktpool_mode = if_fake_pktpool_mode;
3481 
3482 	if (!bsd_mode) {
3483 		/* consistency checks */
3484 		if (if_fake_llink_cnt == 0 &&
3485 		    strbufcmp(sk_ll_prefix, FAKE_ETHER_NAME) == 0) {
3486 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3487 			    "feth used as ifname prefix but logical link "
3488 			    "support in feth is disabled.");
3489 			return EINVAL;
3490 		}
3491 		if (tso_support && pktpool_mode != IFF_PP_MODE_GLOBAL) {
3492 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3493 			    "TSO mode requires global packet pool mode");
3494 			return EINVAL;
3495 		}
3496 		if (multi_buflet && pktpool_mode == IFF_PP_MODE_PRIVATE_SPLIT) {
3497 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3498 			    "multi-buflet not supported for split rx & tx pool");
3499 			return EINVAL;
3500 		}
3501 	}
3502 
3503 	iff_llink = kalloc_type(fake_llink, FETH_MAX_LLINKS, Z_WAITOK_ZERO);
3504 	if (iff_llink == NULL) {
3505 		return ENOBUFS;
3506 	}
3507 	fakeif = kalloc_type(struct if_fake, Z_WAITOK_ZERO_NOFAIL);
3508 	fakeif->iff_llink = iff_llink;
3509 	fakeif->iff_retain_count = 1;
3510 #define FAKE_ETHER_NAME_LEN     (sizeof(FAKE_ETHER_NAME) - 1)
3511 	_CASSERT(FAKE_ETHER_NAME_LEN == 4);
3512 	bcopy(FAKE_ETHER_NAME, mac_address, FAKE_ETHER_NAME_LEN);
3513 	mac_address[ETHER_ADDR_LEN - 2] = (unit & 0xff00) >> 8;
3514 	mac_address[ETHER_ADDR_LEN - 1] = unit & 0xff;
3515 	if (bsd_mode) {
3516 		fakeif->iff_flags |= IFF_FLAGS_BSD_MODE;
3517 	}
3518 	if (if_fake_hwcsum != 0) {
3519 		fakeif->iff_flags |= IFF_FLAGS_HWCSUM;
3520 	}
3521 	if (if_fake_lro != 0) {
3522 		fakeif->iff_flags |= IFF_FLAGS_LRO;
3523 	}
3524 	if (if_fake_vlan_tagging != 0) {
3525 		/* support VLAN tagging in hardware */
3526 		feth_set_supports_vlan_tagging(fakeif);
3527 	} else {
3528 		/* support VLAN mtu-sized packets */
3529 		feth_set_supports_vlan_mtu(fakeif);
3530 	}
3531 	if (if_fake_separate_frame_header != 0) {
3532 		fakeif->iff_flags |= IFF_FLAGS_SEPARATE_FRAME_HEADER;
3533 	}
3534 	fakeif->iff_max_mtu = get_max_mtu(bsd_mode, if_fake_max_mtu);
3535 	fakeif->iff_fcs = if_fake_fcs;
3536 	fakeif->iff_trailer_length = if_fake_trailer_length;
3537 
3538 	/* use the interface name as the unique id for ifp recycle */
3539 	if ((unsigned int)
3540 	    snprintf(fakeif->iff_name, sizeof(fakeif->iff_name), "%s%d",
3541 	    ifc->ifc_name, unit) >= sizeof(fakeif->iff_name)) {
3542 		feth_release(fakeif);
3543 		return EINVAL;
3544 	}
3545 	bzero(&feth_init, sizeof(feth_init));
3546 	feth_init.ver = IFNET_INIT_CURRENT_VERSION;
3547 	feth_init.len = sizeof(feth_init);
3548 	if (feth_in_bsd_mode(fakeif)) {
3549 		if (if_fake_txstart != 0) {
3550 			feth_init.start = feth_start;
3551 		} else {
3552 			feth_init.flags |= IFNET_INIT_LEGACY;
3553 			feth_init.output = feth_output;
3554 		}
3555 		if (tso_support) {
3556 			feth_set_supports_tso(fakeif);
3557 		}
3558 	}
3559 #if SKYWALK
3560 	else {
3561 		feth_init.flags |= IFNET_INIT_SKYWALK_NATIVE;
3562 		/*
3563 		 * Currently we support WMM mode only for Skywalk native
3564 		 * interface.
3565 		 */
3566 		if (if_fake_wmm_mode != 0) {
3567 			fakeif->iff_flags |= IFF_FLAGS_WMM_MODE;
3568 		}
3569 
3570 		if (multi_buflet) {
3571 			fakeif->iff_flags |= IFF_FLAGS_MULTIBUFLETS;
3572 		}
3573 
3574 		fakeif->iff_pp_mode = pktpool_mode;
3575 		if (tso_support) {
3576 			feth_set_supports_tso(fakeif);
3577 		}
3578 
3579 		fakeif->iff_tx_headroom = if_fake_tx_headroom;
3580 		fakeif->iff_adv_interval = if_fake_if_adv_interval;
3581 		if (fakeif->iff_adv_interval > 0) {
3582 			feth_init.flags |= IFNET_INIT_IF_ADV;
3583 		}
3584 		fakeif->iff_tx_drop_rate = if_fake_tx_drops;
3585 		fakeif->iff_tx_completion_mode = if_tx_completion_mode;
3586 		fakeif->iff_tx_exp_policy = if_fake_tx_exp_policy;
3587 	}
3588 	feth_init.tx_headroom = fakeif->iff_tx_headroom;
3589 #endif /* SKYWALK */
3590 	if (if_fake_nxattach == 0) {
3591 		feth_init.flags |= IFNET_INIT_NX_NOAUTO;
3592 	}
3593 	feth_init.uniqueid = fakeif->iff_name;
3594 	feth_init.uniqueid_len = strlen(fakeif->iff_name);
3595 	feth_init.name = ifc->ifc_name;
3596 	feth_init.unit = unit;
3597 	feth_init.family = IFNET_FAMILY_ETHERNET;
3598 	feth_init.type = IFT_ETHER;
3599 	feth_init.demux = ether_demux;
3600 	feth_init.add_proto = ether_add_proto;
3601 	feth_init.del_proto = ether_del_proto;
3602 	feth_init.check_multi = ether_check_multi;
3603 	feth_init.framer_extended = ether_frameout_extended;
3604 	feth_init.softc = fakeif;
3605 	feth_init.ioctl = feth_ioctl;
3606 	feth_init.set_bpf_tap = NULL;
3607 	feth_init.detach = feth_if_free;
3608 	feth_init.broadcast_addr = etherbroadcastaddr;
3609 	feth_init.broadcast_len = ETHER_ADDR_LEN;
3610 	if (feth_in_bsd_mode(fakeif)) {
3611 		error = ifnet_allocate_extended(&feth_init, &ifp);
3612 		if (error) {
3613 			feth_release(fakeif);
3614 			return error;
3615 		}
3616 		feth_ifnet_set_attrs(fakeif, ifp);
3617 		if (feth_supports_tso(fakeif)) {
3618 			feth_set_tso_mtu(ifp, IP_MAXPACKET, IP_MAXPACKET);
3619 		}
3620 	}
3621 #if SKYWALK
3622 	else {
3623 		if (feth_in_wmm_mode(fakeif)) {
3624 			feth_init.output_sched_model =
3625 			    IFNET_SCHED_MODEL_DRIVER_MANAGED;
3626 		}
3627 		error = feth_attach_netif_nexus(fakeif, &feth_init, &ifp);
3628 		if (error != 0) {
3629 			feth_release(fakeif);
3630 			return error;
3631 		}
3632 		/* take an additional reference to ensure that it doesn't go away */
3633 		feth_retain(fakeif);
3634 		fakeif->iff_ifp = ifp;
3635 	}
3636 #endif /* SKYWALK */
3637 	fakeif->iff_media_count = MIN(default_media_words_count, IF_FAKE_MEDIA_LIST_MAX);
3638 	bcopy(default_media_words, fakeif->iff_media_list,
3639 	    fakeif->iff_media_count * sizeof(fakeif->iff_media_list[0]));
3640 	if (feth_in_bsd_mode(fakeif)) {
3641 		error = ifnet_attach(ifp, NULL);
3642 		if (error) {
3643 			ifnet_release(ifp);
3644 			feth_release(fakeif);
3645 			return error;
3646 		}
3647 		fakeif->iff_ifp = ifp;
3648 	}
3649 
3650 	ifnet_set_lladdr(ifp, mac_address, sizeof(mac_address));
3651 
3652 	/* attach as ethernet */
3653 	bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
3654 	return 0;
3655 }
3656 
3657 static int
feth_clone_destroy(ifnet_t ifp)3658 feth_clone_destroy(ifnet_t ifp)
3659 {
3660 	if_fake_ref     fakeif;
3661 #if SKYWALK
3662 	boolean_t       nx_attached = FALSE;
3663 #endif /* SKYWALK */
3664 
3665 	feth_lock();
3666 	fakeif = ifnet_get_if_fake(ifp);
3667 	if (fakeif == NULL || feth_is_detaching(fakeif)) {
3668 		feth_unlock();
3669 		return 0;
3670 	}
3671 	feth_set_detaching(fakeif);
3672 #if SKYWALK
3673 	nx_attached = !feth_in_bsd_mode(fakeif);
3674 #endif /* SKYWALK */
3675 	feth_unlock();
3676 
3677 #if SKYWALK
3678 	if (nx_attached) {
3679 		feth_detach_netif_nexus(fakeif);
3680 		feth_release(fakeif);
3681 	}
3682 #endif /* SKYWALK */
3683 	feth_config(ifp, NULL);
3684 	ifnet_detach(ifp);
3685 	return 0;
3686 }
3687 
3688 static void
feth_enqueue_input(ifnet_t ifp,struct mbuf * m)3689 feth_enqueue_input(ifnet_t ifp, struct mbuf * m)
3690 {
3691 	struct ifnet_stat_increment_param stats = {};
3692 
3693 	stats.packets_in = 1;
3694 	stats.bytes_in = (uint32_t)mbuf_pkthdr_len(m) + ETHER_HDR_LEN;
3695 	ifnet_input(ifp, m, &stats);
3696 }
3697 
3698 
3699 static int
feth_add_mbuf_trailer(struct mbuf * m,void * trailer,size_t trailer_len)3700 feth_add_mbuf_trailer(struct mbuf *m, void *trailer, size_t trailer_len)
3701 {
3702 	int ret;
3703 	ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
3704 
3705 	ret = m_append(m, trailer_len, (caddr_t)trailer);
3706 	if (ret == 1) {
3707 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3708 		    "%zuB trailer added", trailer_len);
3709 		return 0;
3710 	}
3711 	FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT, "m_append failed");
3712 	return ENOTSUP;
3713 }
3714 
3715 static int
feth_add_mbuf_fcs(struct mbuf * m)3716 feth_add_mbuf_fcs(struct mbuf *m)
3717 {
3718 	uint32_t pkt_len, offset = 0;
3719 	uint32_t crc = 0;
3720 	int err = 0;
3721 
3722 	ASSERT(sizeof(crc) == ETHER_CRC_LEN);
3723 
3724 	pkt_len = m->m_pkthdr.len;
3725 	struct mbuf *iter = m;
3726 	while (iter != NULL && offset < pkt_len) {
3727 		uint32_t frag_len = iter->m_len;
3728 		ASSERT(frag_len <= (pkt_len - offset));
3729 		crc = crc32(crc, mtod(iter, void *), frag_len);
3730 		offset += frag_len;
3731 		iter = iter->m_next;
3732 	}
3733 
3734 	err = feth_add_mbuf_trailer(m, &crc, ETHER_CRC_LEN);
3735 	if (err != 0) {
3736 		return err;
3737 	}
3738 
3739 	m->m_flags |= M_HASFCS;
3740 
3741 	return 0;
3742 }
3743 
3744 static void
feth_output_common(ifnet_t ifp,struct mbuf * m,ifnet_t peer,iff_flags_t flags,bool fcs,void * trailer,size_t trailer_len)3745 feth_output_common(ifnet_t ifp, struct mbuf * m, ifnet_t peer,
3746     iff_flags_t flags, bool fcs, void *trailer, size_t trailer_len)
3747 {
3748 	void *                  frame_header;
3749 
3750 	if ((flags & IFF_FLAGS_HWCSUM) != 0) {
3751 		m->m_pkthdr.csum_data = 0xffff;
3752 		m->m_pkthdr.csum_flags =
3753 		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3754 		    CSUM_IP_CHECKED | CSUM_IP_VALID;
3755 	}
3756 
3757 	(void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
3758 	if (trailer_len != 0 && trailer != NULL) {
3759 		feth_add_mbuf_trailer(m, trailer, trailer_len);
3760 	}
3761 	if (fcs) {
3762 		feth_add_mbuf_fcs(m);
3763 	}
3764 	if ((flags & IFF_FLAGS_SEPARATE_FRAME_HEADER) != 0) {
3765 		m = m_copyup(m, ETHER_HDR_LEN, 0);
3766 		if (m == NULL) {
3767 			FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT, "m_copyup failed");
3768 			goto done;
3769 		}
3770 		frame_header = mbuf_data(m);
3771 		mbuf_pkthdr_setheader(m, frame_header);
3772 		m_adj(m, ETHER_HDR_LEN);
3773 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3774 		    "%s: frame 0x%llx data 0x%llx len %ld",
3775 		    ifp->if_xname,
3776 		    (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
3777 		    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
3778 		    mbuf_len(m));
3779 	} else {
3780 		frame_header = mbuf_data(m);
3781 		mbuf_pkthdr_setheader(m, frame_header);
3782 		_mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
3783 	}
3784 
3785 	/* tap it out */
3786 	if (ifp->if_bpf != NULL) {
3787 		fake_bpf_tap_out(ifp, m, frame_header);
3788 	}
3789 
3790 	/* tap it in */
3791 	if (peer->if_bpf != NULL) {
3792 		fake_bpf_tap_in(peer, m, frame_header);
3793 	}
3794 
3795 	(void)mbuf_pkthdr_setrcvif(m, peer);
3796 	feth_enqueue_input(peer, m);
3797 done:
3798 	return;
3799 }
3800 
3801 static void
feth_start(ifnet_t ifp)3802 feth_start(ifnet_t ifp)
3803 {
3804 	if_fake_ref     fakeif;
3805 	iff_flags_t     flags = 0;
3806 	bool            fcs;
3807 	struct mbuf *   m;
3808 	ifnet_t         peer = NULL;
3809 	size_t          trailer_len;
3810 
3811 	feth_lock();
3812 	fakeif = ifnet_get_if_fake(ifp);
3813 	if (fakeif == NULL) {
3814 		feth_unlock();
3815 		return;
3816 	}
3817 
3818 	if (fakeif->iff_start_busy) {
3819 		feth_unlock();
3820 		return;
3821 	}
3822 
3823 	peer = fakeif->iff_peer;
3824 	flags = fakeif->iff_flags;
3825 	fcs = fakeif->iff_fcs;
3826 	trailer_len = fakeif->iff_trailer_length;
3827 
3828 	fakeif->iff_start_busy = TRUE;
3829 	feth_unlock();
3830 	for (;;) {
3831 		if (ifnet_dequeue(ifp, &m) != 0) {
3832 			break;
3833 		}
3834 		if (peer == NULL) {
3835 			m_freem(m);
3836 			continue;
3837 		}
3838 		if (m != NULL) {
3839 			feth_output_common(ifp, m, peer, flags, fcs,
3840 			    feth_trailer, trailer_len);
3841 		}
3842 	}
3843 	feth_lock();
3844 	fakeif = ifnet_get_if_fake(ifp);
3845 	if (fakeif != NULL) {
3846 		fakeif->iff_start_busy = FALSE;
3847 	}
3848 	feth_unlock();
3849 }
3850 
3851 static int
feth_output(ifnet_t ifp,struct mbuf * m)3852 feth_output(ifnet_t ifp, struct mbuf * m)
3853 {
3854 	if_fake_ref             fakeif;
3855 	iff_flags_t             flags;
3856 	bool                    fcs;
3857 	size_t                  trailer_len;
3858 	ifnet_t                 peer = NULL;
3859 
3860 	if (m == NULL) {
3861 		return 0;
3862 	}
3863 	feth_lock();
3864 	fakeif = ifnet_get_if_fake(ifp);
3865 	if (fakeif != NULL) {
3866 		peer = fakeif->iff_peer;
3867 		flags = fakeif->iff_flags;
3868 		fcs = fakeif->iff_fcs;
3869 		trailer_len = fakeif->iff_trailer_length;
3870 	}
3871 	feth_unlock();
3872 	if (peer == NULL) {
3873 		m_freem(m);
3874 		ifnet_stat_increment_out(ifp, 0, 0, 1);
3875 		return 0;
3876 	}
3877 	feth_output_common(ifp, m, peer, flags, fcs, feth_trailer, trailer_len);
3878 	return 0;
3879 }
3880 
3881 static int
feth_config(ifnet_t ifp,ifnet_t peer)3882 feth_config(ifnet_t ifp, ifnet_t peer)
3883 {
3884 	int             connected = FALSE;
3885 	int             disconnected = FALSE;
3886 	int             error = 0;
3887 	if_fake_ref     fakeif = NULL;
3888 
3889 	feth_lock();
3890 	fakeif = ifnet_get_if_fake(ifp);
3891 	if (fakeif == NULL) {
3892 		error = EINVAL;
3893 		goto done;
3894 	}
3895 	if (peer != NULL) {
3896 		/* connect to peer */
3897 		if_fake_ref     peer_fakeif;
3898 
3899 		peer_fakeif = ifnet_get_if_fake(peer);
3900 		if (peer_fakeif == NULL) {
3901 			error = EINVAL;
3902 			goto done;
3903 		}
3904 		if (feth_is_detaching(fakeif) ||
3905 		    feth_is_detaching(peer_fakeif) ||
3906 		    peer_fakeif->iff_peer != NULL ||
3907 		    fakeif->iff_peer != NULL) {
3908 			error = EBUSY;
3909 			goto done;
3910 		}
3911 #if SKYWALK
3912 		if (fakeif->iff_pp_mode !=
3913 		    peer_fakeif->iff_pp_mode) {
3914 			error = EINVAL;
3915 			goto done;
3916 		}
3917 #endif /* SKYWALK */
3918 		fakeif->iff_peer = peer;
3919 		peer_fakeif->iff_peer = ifp;
3920 		connected = TRUE;
3921 	} else if (fakeif->iff_peer != NULL) {
3922 		/* disconnect from peer */
3923 		if_fake_ref     peer_fakeif;
3924 
3925 		peer = fakeif->iff_peer;
3926 		peer_fakeif = ifnet_get_if_fake(peer);
3927 		if (peer_fakeif == NULL) {
3928 			/* should not happen */
3929 			error = EINVAL;
3930 			goto done;
3931 		}
3932 		fakeif->iff_peer = NULL;
3933 		peer_fakeif->iff_peer = NULL;
3934 		disconnected = TRUE;
3935 	}
3936 
3937 done:
3938 	feth_unlock();
3939 
3940 	/* generate link status event if we connect or disconnect */
3941 	if (connected) {
3942 		interface_link_event(ifp, KEV_DL_LINK_ON);
3943 		interface_link_event(peer, KEV_DL_LINK_ON);
3944 	} else if (disconnected) {
3945 		interface_link_event(ifp, KEV_DL_LINK_OFF);
3946 		interface_link_event(peer, KEV_DL_LINK_OFF);
3947 	}
3948 	return error;
3949 }
3950 
3951 static int
feth_set_media(ifnet_t ifp,struct if_fake_request * iffr)3952 feth_set_media(ifnet_t ifp, struct if_fake_request * iffr)
3953 {
3954 	if_fake_ref     fakeif;
3955 	int             error;
3956 
3957 	if (iffr->iffr_media.iffm_count > IF_FAKE_MEDIA_LIST_MAX) {
3958 		/* list is too long */
3959 		return EINVAL;
3960 	}
3961 	feth_lock();
3962 	fakeif = ifnet_get_if_fake(ifp);
3963 	if (fakeif == NULL) {
3964 		error = EINVAL;
3965 		goto done;
3966 	}
3967 	fakeif->iff_media_count = iffr->iffr_media.iffm_count;
3968 	bcopy(iffr->iffr_media.iffm_list, fakeif->iff_media_list,
3969 	    iffr->iffr_media.iffm_count * sizeof(fakeif->iff_media_list[0]));
3970 #if 0
3971 	/* XXX: "auto-negotiate" active with peer? */
3972 	/* generate link status event? */
3973 	fakeif->iff_media_current = iffr->iffr_media.iffm_current;
3974 #endif
3975 	error = 0;
3976 done:
3977 	feth_unlock();
3978 	return error;
3979 }
3980 
3981 static int
if_fake_request_copyin(user_addr_t user_addr,struct if_fake_request * iffr,u_int32_t len)3982 if_fake_request_copyin(user_addr_t user_addr,
3983     struct if_fake_request *iffr, u_int32_t len)
3984 {
3985 	int     error;
3986 
3987 	if (user_addr == USER_ADDR_NULL || len < sizeof(*iffr)) {
3988 		error = EINVAL;
3989 		goto done;
3990 	}
3991 	error = copyin(user_addr, iffr, sizeof(*iffr));
3992 	if (error != 0) {
3993 		goto done;
3994 	}
3995 	if (iffr->iffr_reserved[0] != 0 || iffr->iffr_reserved[1] != 0 ||
3996 	    iffr->iffr_reserved[2] != 0 || iffr->iffr_reserved[3] != 0) {
3997 		error = EINVAL;
3998 		goto done;
3999 	}
4000 done:
4001 	return error;
4002 }
4003 
4004 static int
feth_set_drvspec(ifnet_t ifp,uint32_t cmd,u_int32_t len,user_addr_t user_addr)4005 feth_set_drvspec(ifnet_t ifp, uint32_t cmd, u_int32_t len,
4006     user_addr_t user_addr)
4007 {
4008 	int                     error;
4009 	struct if_fake_request  iffr;
4010 	ifnet_t                 peer;
4011 
4012 	switch (cmd) {
4013 	case IF_FAKE_S_CMD_SET_PEER:
4014 		error = if_fake_request_copyin(user_addr, &iffr, len);
4015 		if (error != 0) {
4016 			break;
4017 		}
4018 		if (iffr.iffr_peer_name[0] == '\0') {
4019 			error = feth_config(ifp, NULL);
4020 			break;
4021 		}
4022 
4023 		/* ensure nul termination */
4024 		iffr.iffr_peer_name[IFNAMSIZ - 1] = '\0';
4025 		peer = ifunit(iffr.iffr_peer_name);
4026 		if (peer == NULL) {
4027 			error = ENXIO;
4028 			break;
4029 		}
4030 		if (ifnet_type(peer) != IFT_ETHER) {
4031 			error = EINVAL;
4032 			break;
4033 		}
4034 		if (strcmp(ifnet_name(peer), FAKE_ETHER_NAME) != 0) {
4035 			error = EINVAL;
4036 			break;
4037 		}
4038 		error = feth_config(ifp, peer);
4039 		break;
4040 	case IF_FAKE_S_CMD_SET_MEDIA:
4041 		error = if_fake_request_copyin(user_addr, &iffr, len);
4042 		if (error != 0) {
4043 			break;
4044 		}
4045 		error = feth_set_media(ifp, &iffr);
4046 		break;
4047 	case IF_FAKE_S_CMD_SET_DEQUEUE_STALL:
4048 		error = if_fake_request_copyin(user_addr, &iffr, len);
4049 		if (error != 0) {
4050 			break;
4051 		}
4052 		error = feth_enable_dequeue_stall(ifp,
4053 		    iffr.iffr_dequeue_stall);
4054 		break;
4055 	default:
4056 		error = EOPNOTSUPP;
4057 		break;
4058 	}
4059 	return error;
4060 }
4061 
4062 static int
feth_get_drvspec(ifnet_t ifp,u_int32_t cmd,u_int32_t len,user_addr_t user_addr)4063 feth_get_drvspec(ifnet_t ifp, u_int32_t cmd, u_int32_t len,
4064     user_addr_t user_addr)
4065 {
4066 	int                     error = EOPNOTSUPP;
4067 	if_fake_ref             fakeif;
4068 	struct if_fake_request  iffr;
4069 	ifnet_t                 peer;
4070 
4071 	switch (cmd) {
4072 	case IF_FAKE_G_CMD_GET_PEER:
4073 		if (len < sizeof(iffr)) {
4074 			error = EINVAL;
4075 			break;
4076 		}
4077 		feth_lock();
4078 		fakeif = ifnet_get_if_fake(ifp);
4079 		if (fakeif == NULL) {
4080 			feth_unlock();
4081 			error = EOPNOTSUPP;
4082 			break;
4083 		}
4084 		peer = fakeif->iff_peer;
4085 		feth_unlock();
4086 		bzero(&iffr, sizeof(iffr));
4087 		if (peer != NULL) {
4088 			strlcpy(iffr.iffr_peer_name,
4089 			    if_name(peer),
4090 			    sizeof(iffr.iffr_peer_name));
4091 		}
4092 		error = copyout(&iffr, user_addr, sizeof(iffr));
4093 		break;
4094 	default:
4095 		break;
4096 	}
4097 	return error;
4098 }
4099 
4100 union ifdrvu {
4101 	struct ifdrv32  *ifdrvu_32;
4102 	struct ifdrv64  *ifdrvu_64;
4103 	void            *ifdrvu_p;
4104 };
4105 
4106 static int
feth_ioctl(ifnet_t ifp,u_long cmd,void * data)4107 feth_ioctl(ifnet_t ifp, u_long cmd, void * data)
4108 {
4109 	unsigned int            count;
4110 	struct ifdevmtu *       devmtu_p;
4111 	union ifdrvu            drv;
4112 	uint32_t                drv_cmd;
4113 	uint32_t                drv_len;
4114 	boolean_t               drv_set_command = FALSE;
4115 	int                     error = 0;
4116 	struct ifmediareq32 *   ifmr;
4117 	struct ifreq *          ifr;
4118 	if_fake_ref             fakeif;
4119 	int                     status;
4120 	user_addr_t             user_addr;
4121 
4122 	ifr = (struct ifreq *)data;
4123 	switch (cmd) {
4124 	case SIOCSIFADDR:
4125 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
4126 		break;
4127 
4128 	case SIOCGIFMEDIA32:
4129 	case SIOCGIFMEDIA64:
4130 		feth_lock();
4131 		fakeif = ifnet_get_if_fake(ifp);
4132 		if (fakeif == NULL) {
4133 			feth_unlock();
4134 			return EOPNOTSUPP;
4135 		}
4136 		status = (fakeif->iff_peer != NULL)
4137 		    ? (IFM_AVALID | IFM_ACTIVE) : IFM_AVALID;
4138 		ifmr = (struct ifmediareq32 *)data;
4139 		user_addr = (cmd == SIOCGIFMEDIA64) ?
4140 		    ((struct ifmediareq64 *)data)->ifmu_ulist :
4141 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
4142 		count = ifmr->ifm_count;
4143 		ifmr->ifm_active = (fakeif->iff_peer != NULL)
4144 		    ? FAKE_DEFAULT_MEDIA : IFM_ETHER;
4145 		ifmr->ifm_current = IFM_ETHER;
4146 		ifmr->ifm_mask = 0;
4147 		ifmr->ifm_status = status;
4148 		if (user_addr == USER_ADDR_NULL) {
4149 			ifmr->ifm_count = fakeif->iff_media_count;
4150 		} else if (count > 0) {
4151 			if (count > fakeif->iff_media_count) {
4152 				count = fakeif->iff_media_count;
4153 			}
4154 			ifmr->ifm_count = count;
4155 			error = copyout(&fakeif->iff_media_list, user_addr,
4156 			    count * sizeof(int));
4157 		}
4158 		feth_unlock();
4159 		break;
4160 
4161 	case SIOCGIFDEVMTU:
4162 		devmtu_p = &ifr->ifr_devmtu;
4163 		devmtu_p->ifdm_current = ifnet_mtu(ifp);
4164 		devmtu_p->ifdm_max = feth_max_mtu(ifp);
4165 		devmtu_p->ifdm_min = IF_MINMTU;
4166 		break;
4167 
4168 	case SIOCSIFMTU:
4169 		if ((unsigned int)ifr->ifr_mtu > feth_max_mtu(ifp) ||
4170 		    ifr->ifr_mtu < IF_MINMTU) {
4171 			error = EINVAL;
4172 		} else {
4173 			error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
4174 		}
4175 		break;
4176 
4177 	case SIOCSDRVSPEC32:
4178 	case SIOCSDRVSPEC64:
4179 		error = proc_suser(current_proc());
4180 		if (error != 0) {
4181 			break;
4182 		}
4183 		drv_set_command = TRUE;
4184 		OS_FALLTHROUGH;
4185 	case SIOCGDRVSPEC32:
4186 	case SIOCGDRVSPEC64:
4187 		drv.ifdrvu_p = data;
4188 		if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
4189 			drv_cmd = drv.ifdrvu_32->ifd_cmd;
4190 			drv_len = drv.ifdrvu_32->ifd_len;
4191 			user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
4192 		} else {
4193 			drv_cmd = drv.ifdrvu_64->ifd_cmd;
4194 			drv_len = drv.ifdrvu_64->ifd_len;
4195 			user_addr = drv.ifdrvu_64->ifd_data;
4196 		}
4197 		if (drv_set_command) {
4198 			error = feth_set_drvspec(ifp, drv_cmd, drv_len,
4199 			    user_addr);
4200 		} else {
4201 			error = feth_get_drvspec(ifp, drv_cmd, drv_len,
4202 			    user_addr);
4203 		}
4204 		break;
4205 
4206 	case SIOCSIFLLADDR:
4207 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
4208 		    ifr->ifr_addr.sa_len);
4209 		break;
4210 
4211 	case SIOCSIFFLAGS:
4212 		if ((ifp->if_flags & IFF_UP) != 0) {
4213 			/* marked up, set running if not already set */
4214 			if ((ifp->if_flags & IFF_RUNNING) == 0) {
4215 				/* set running */
4216 				error = ifnet_set_flags(ifp, IFF_RUNNING,
4217 				    IFF_RUNNING);
4218 			}
4219 		} else if ((ifp->if_flags & IFF_RUNNING) != 0) {
4220 			/* marked down, clear running */
4221 			error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
4222 		}
4223 		break;
4224 
4225 	case SIOCADDMULTI:
4226 	case SIOCDELMULTI:
4227 		error = 0;
4228 		break;
4229 	case SIOCSIFCAP: {
4230 		uint32_t        cap;
4231 
4232 		feth_lock();
4233 		fakeif = ifnet_get_if_fake(ifp);
4234 		if (fakeif == NULL ||
4235 		    (fakeif->iff_flags & IFF_FLAGS_LRO) == 0) {
4236 			feth_unlock();
4237 			return EOPNOTSUPP;
4238 		}
4239 		feth_unlock();
4240 		cap = (ifr->ifr_reqcap & IFCAP_LRO) != 0 ? IFCAP_LRO : 0;
4241 		error = ifnet_set_capabilities_enabled(ifp, cap, IFCAP_LRO);
4242 		break;
4243 	}
4244 	default:
4245 		error = EOPNOTSUPP;
4246 		break;
4247 	}
4248 	return error;
4249 }
4250 
4251 static void
feth_if_free(ifnet_t ifp)4252 feth_if_free(ifnet_t ifp)
4253 {
4254 	if_fake_ref             fakeif;
4255 
4256 	if (ifp == NULL) {
4257 		return;
4258 	}
4259 	feth_lock();
4260 	fakeif = ifnet_get_if_fake(ifp);
4261 	if (fakeif == NULL) {
4262 		feth_unlock();
4263 		return;
4264 	}
4265 	ifp->if_softc = NULL;
4266 #if SKYWALK
4267 	VERIFY(fakeif->iff_doorbell_tcall == NULL);
4268 #endif /* SKYWALK */
4269 	feth_unlock();
4270 	feth_release(fakeif);
4271 	ifnet_release(ifp);
4272 	return;
4273 }
4274 
4275 __private_extern__ void
if_fake_init(void)4276 if_fake_init(void)
4277 {
4278 	int error;
4279 
4280 #if SKYWALK
4281 	(void)feth_register_nexus_domain_provider();
4282 #endif /* SKYWALK */
4283 	error = if_clone_attach(&feth_cloner);
4284 	if (error != 0) {
4285 		return;
4286 	}
4287 	return;
4288 }
4289