xref: /xnu-11417.121.6/bsd/net/if_fake.c (revision a1e26a70f38d1d7daa7b49b258e2f8538ad81650)
1 /*
2  * Copyright (c) 2015-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * if_fake.c
31  * - fake network interface used for testing
32  * - "feth" (e.g. "feth0", "feth1") is a virtual ethernet interface that allows
33  *   two instances to have their output/input paths "crossed-over" so that
34  *   output on one is input on the other
35  */
36 
37 /*
38  * Modification History:
39  *
40  * September 9, 2015	Dieter Siegmund ([email protected])
41  * - created
42  */
43 
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/queue.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/kern_event.h>
54 #include <sys/mcache.h>
55 #include <sys/syslog.h>
56 
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_vlan_var.h>
61 #include <net/if_fake_var.h>
62 #include <net/if_arp.h>
63 #include <net/if_dl.h>
64 #include <net/if_ether.h>
65 #include <net/if_types.h>
66 #include <libkern/OSAtomic.h>
67 
68 #include <net/dlil.h>
69 
70 #include <net/kpi_interface.h>
71 #include <net/kpi_protocol.h>
72 
73 #include <kern/locks.h>
74 #include <kern/zalloc.h>
75 
76 #include <mach/mach_time.h>
77 
78 #include <os/log.h>
79 
80 #ifdef INET
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #endif
84 
85 #include <net/if_media.h>
86 #include <net/ether_if_module.h>
87 #if SKYWALK
88 #include <skywalk/os_skywalk_private.h>
89 #include <skywalk/nexus/netif/nx_netif.h>
90 #include <skywalk/channel/channel_var.h>
91 #endif /* SKYWALK */
92 
93 /*
94  * if_fake_debug, FE_DBGF_*
95  * - 'if_fake_debug' is a bitmask of FE_DBGF_* flags that can be set
96  *   to enable additional logs for the corresponding fake function
97  * - "sysctl net.link.fake.debug" controls the value of
98  *   'if_fake_debug'
99  */
100 static uint32_t if_fake_debug = 0;
101 
102 #define FE_DBGF_LIFECYCLE               0x0001
103 #define FE_DBGF_INPUT                   0x0002
104 #define FE_DBGF_OUTPUT                  0x0004
105 #define FE_DBGF_CONTROL                 0x0008
106 #define FE_DBGF_MISC                    0x0010
107 
108 /*
109  * if_fake_log_level
110  * - 'if_fake_log_level' ensures that by default important logs are
111  *   logged regardless of if_fake_debug by comparing the log level
112  *   in FAKE_LOG to if_fake_log_level
113  * - use "sysctl net.link.fake.log_level" controls the value of
114  *   'if_fake_log_level'
115  * - the default value of 'if_fake_log_level' is LOG_NOTICE; important
116  *   logs must use LOG_NOTICE to ensure they appear by default
117  */
118 #define FAKE_DBGF_ENABLED(__flag)     ((if_fake_debug & __flag) != 0)
119 
120 /*
121  * FAKE_LOG
122  * - macro to generate the specified log conditionally based on
123  *   the specified log level and debug flags
124  */
125 #define FAKE_LOG(__level, __dbgf, __string, ...)              \
126 	do {                                                            \
127 	        if (__level <= if_fake_log_level ||                   \
128 	            FAKE_DBGF_ENABLED(__dbgf)) {                      \
129 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
130 	                       __func__, ## __VA_ARGS__);       \
131 	        }                                                       \
132 	} while (0)
133 
134 static boolean_t
is_power_of_two(unsigned int val)135 is_power_of_two(unsigned int val)
136 {
137 	return (val & (val - 1)) == 0;
138 }
139 
140 #define FAKE_ETHER_NAME         "feth"
141 
142 SYSCTL_DECL(_net_link);
143 SYSCTL_NODE(_net_link, OID_AUTO, fake, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
144     "Fake interface");
145 
146 static int if_fake_txstart = 1;
147 SYSCTL_INT(_net_link_fake, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
148     &if_fake_txstart, 0, "Fake interface TXSTART mode");
149 
150 static int if_fake_hwcsum = 0;
151 SYSCTL_INT(_net_link_fake, OID_AUTO, hwcsum, CTLFLAG_RW | CTLFLAG_LOCKED,
152     &if_fake_hwcsum, 0, "Fake interface simulate hardware checksum");
153 
154 static int if_fake_vlan_tagging = 1;
155 SYSCTL_INT(_net_link_fake, OID_AUTO, vlan_tagging, CTLFLAG_RW | CTLFLAG_LOCKED,
156     &if_fake_vlan_tagging, 0, "Fake interface VLAN tagging");
157 
158 static int if_fake_nxattach = 0;
159 SYSCTL_INT(_net_link_fake, OID_AUTO, nxattach, CTLFLAG_RW | CTLFLAG_LOCKED,
160     &if_fake_nxattach, 0, "Fake interface auto-attach nexus");
161 
162 static int if_fake_bsd_mode = 1;
163 SYSCTL_INT(_net_link_fake, OID_AUTO, bsd_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
164     &if_fake_bsd_mode, 0, "Fake interface attach as BSD interface");
165 
166 static int if_fake_log_level = LOG_NOTICE;
167 SYSCTL_INT(_net_link_fake, OID_AUTO, log_level, CTLFLAG_RW | CTLFLAG_LOCKED,
168     &if_fake_log_level, 0, "Fake interface log level");
169 
170 SYSCTL_INT(_net_link_fake, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
171     &if_fake_debug, 0, "Fake interface debug flags");
172 
173 static int if_fake_wmm_mode = 0;
174 SYSCTL_INT(_net_link_fake, OID_AUTO, wmm_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
175     &if_fake_wmm_mode, 0, "Fake interface in 802.11 WMM mode");
176 
177 static int if_fake_multibuflet = 0;
178 SYSCTL_INT(_net_link_fake, OID_AUTO, multibuflet, CTLFLAG_RW | CTLFLAG_LOCKED,
179     &if_fake_multibuflet, 0, "Fake interface using multi-buflet packets");
180 
181 static int if_fake_low_latency = 0;
182 SYSCTL_INT(_net_link_fake, OID_AUTO, low_latency, CTLFLAG_RW | CTLFLAG_LOCKED,
183     &if_fake_low_latency, 0, "Fake interface with a low latency qset");
184 
185 static int if_fake_switch_combined_mode = 0;
186 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_combined_mode,
187     CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_combined_mode, 0,
188     "Switch a qset between combined and separate mode during dequeues");
189 
190 static int if_fake_switch_mode_frequency = 10;
191 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_mode_frequency,
192     CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_mode_frequency, 0,
193     "The number of dequeues before we switch between the combined and separated mode");
194 
195 static int if_fake_tso_support = 0;
196 SYSCTL_INT(_net_link_fake, OID_AUTO, tso_support, CTLFLAG_RW | CTLFLAG_LOCKED,
197     &if_fake_tso_support, 0, "Fake interface with support for TSO offload");
198 
199 #define DEFAULT_EXPIRATION_THRESHOLD 500 /* usec */
200 static int if_fake_expiration_threshold_us = DEFAULT_EXPIRATION_THRESHOLD;
201 SYSCTL_INT(_net_link_fake, OID_AUTO, expiration_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
202     &if_fake_expiration_threshold_us, DEFAULT_EXPIRATION_THRESHOLD,
203     "Expiration threshold (usec) for expiration testing");
204 
205 static int if_fake_lro = 0;
206 SYSCTL_INT(_net_link_fake, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_LOCKED,
207     &if_fake_lro, 0, "Fake interface report LRO capability");
208 
209 static int if_fake_separate_frame_header = 0;
210 SYSCTL_INT(_net_link_fake, OID_AUTO, separate_frame_header,
211     CTLFLAG_RW | CTLFLAG_LOCKED,
212     &if_fake_separate_frame_header, 0, "Put frame header in separate mbuf");
213 
214 typedef enum {
215 	IFF_PP_MODE_GLOBAL = 0,         /* share a global pool */
216 	IFF_PP_MODE_PRIVATE = 1,        /* creates its own rx/tx pool */
217 	IFF_PP_MODE_PRIVATE_SPLIT = 2,  /* creates its own split rx & tx pool */
218 } iff_pktpool_mode_t;
219 static iff_pktpool_mode_t if_fake_pktpool_mode = IFF_PP_MODE_GLOBAL;
220 SYSCTL_INT(_net_link_fake, OID_AUTO, pktpool_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
221     &if_fake_pktpool_mode, IFF_PP_MODE_GLOBAL,
222     "Fake interface packet pool mode (0 global, 1 private, 2 private split");
223 
224 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX 512
225 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF 96
226 static int if_fake_link_layer_aggregation_factor =
227     FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF;
228 static int
229 feth_link_layer_aggregation_factor_sysctl SYSCTL_HANDLER_ARGS
230 {
231 #pragma unused(oidp, arg1, arg2)
232 	unsigned int new_value;
233 	int changed;
234 	int error;
235 
236 	error = sysctl_io_number(req, if_fake_link_layer_aggregation_factor,
237 	    sizeof(if_fake_link_layer_aggregation_factor), &new_value,
238 	    &changed);
239 	if (error == 0 && changed != 0) {
240 		if (new_value <= 0 ||
241 		    new_value > FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX) {
242 			return EINVAL;
243 		}
244 		if_fake_link_layer_aggregation_factor = new_value;
245 	}
246 	return error;
247 }
248 
249 SYSCTL_PROC(_net_link_fake, OID_AUTO, link_layer_aggregation_factor,
250     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
251     0, 0, feth_link_layer_aggregation_factor_sysctl, "IU",
252     "Fake interface link layer aggregation factor");
253 
254 #define FETH_TX_HEADROOM_MAX      32
255 static unsigned int if_fake_tx_headroom = FETH_TX_HEADROOM_MAX;
256 static int
257 feth_tx_headroom_sysctl SYSCTL_HANDLER_ARGS
258 {
259 #pragma unused(oidp, arg1, arg2)
260 	unsigned int new_value;
261 	int changed;
262 	int error;
263 
264 	error = sysctl_io_number(req, if_fake_tx_headroom,
265 	    sizeof(if_fake_tx_headroom), &new_value, &changed);
266 	if (error == 0 && changed != 0) {
267 		if (new_value > FETH_TX_HEADROOM_MAX ||
268 		    (new_value % 8) != 0) {
269 			return EINVAL;
270 		}
271 		if_fake_tx_headroom = new_value;
272 	}
273 	return 0;
274 }
275 
276 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_headroom,
277     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
278     0, 0, feth_tx_headroom_sysctl, "IU", "Fake ethernet Tx headroom");
279 
280 static int if_fake_fcs = 0;
281 SYSCTL_INT(_net_link_fake, OID_AUTO, fcs, CTLFLAG_RW | CTLFLAG_LOCKED,
282     &if_fake_fcs, 0, "Fake interface using frame check sequence");
283 
284 #define FETH_TRAILER_LENGTH_MAX 28
285 char feth_trailer[FETH_TRAILER_LENGTH_MAX + 1] = "trailertrailertrailertrailer";
286 static unsigned int if_fake_trailer_length = 0;
287 static int
288 feth_trailer_length_sysctl SYSCTL_HANDLER_ARGS
289 {
290 #pragma unused(oidp, arg1, arg2)
291 	unsigned int new_value;
292 	int changed;
293 	int error;
294 
295 	error = sysctl_io_number(req, if_fake_trailer_length,
296 	    sizeof(if_fake_trailer_length), &new_value, &changed);
297 	if (error == 0 && changed != 0) {
298 		if (new_value > FETH_TRAILER_LENGTH_MAX) {
299 			return EINVAL;
300 		}
301 		if_fake_trailer_length = new_value;
302 	}
303 	return 0;
304 }
305 
306 SYSCTL_PROC(_net_link_fake, OID_AUTO, trailer_length,
307     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
308     feth_trailer_length_sysctl, "IU", "Fake interface frame trailer length");
309 
310 /* sysctl net.link.fake.max_mtu */
311 #define FETH_MAX_MTU_DEFAULT    2048
312 #define FETH_MAX_MTU_MAX        ((16 * 1024) - ETHER_HDR_LEN)
313 
314 static unsigned int if_fake_max_mtu = FETH_MAX_MTU_DEFAULT;
315 
316 /* sysctl net.link.fake.buflet_size */
317 #define FETH_BUFLET_SIZE_MIN            512
318 #define FETH_BUFLET_SIZE_MAX            (32 * 1024)
319 #define FETH_TSO_BUFLET_SIZE            (16 * 1024)
320 
321 static unsigned int if_fake_buflet_size = FETH_BUFLET_SIZE_MIN;
322 static unsigned int if_fake_tso_buffer_size = FETH_TSO_BUFLET_SIZE;
323 
324 static int
325 feth_tso_buffer_size_sysctl SYSCTL_HANDLER_ARGS
326 {
327 #pragma unused(oidp, arg1, arg2)
328 	unsigned int new_value;
329 	int changed;
330 	int error;
331 
332 	error = sysctl_io_number(req, if_fake_tso_buffer_size,
333 	    sizeof(if_fake_tso_buffer_size), &new_value, &changed);
334 	if (error == 0 && changed != 0) {
335 		/* must be a power of 2 between min and max */
336 		if (new_value > FETH_BUFLET_SIZE_MAX ||
337 		    new_value < FETH_BUFLET_SIZE_MIN ||
338 		    !is_power_of_two(new_value)) {
339 			return EINVAL;
340 		}
341 		if_fake_tso_buffer_size = new_value;
342 	}
343 	return 0;
344 }
345 
346 SYSCTL_PROC(_net_link_fake, OID_AUTO, tso_buf_size,
347     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
348     0, 0, feth_tso_buffer_size_sysctl, "IU", "Fake interface TSO buffer size");
349 
350 static int
351 feth_max_mtu_sysctl SYSCTL_HANDLER_ARGS
352 {
353 #pragma unused(oidp, arg1, arg2)
354 	unsigned int new_value;
355 	int changed;
356 	int error;
357 
358 	error = sysctl_io_number(req, if_fake_max_mtu,
359 	    sizeof(if_fake_max_mtu), &new_value, &changed);
360 	if (error == 0 && changed != 0) {
361 		if (new_value > FETH_MAX_MTU_MAX ||
362 		    new_value < ETHERMTU ||
363 		    new_value <= if_fake_buflet_size) {
364 			return EINVAL;
365 		}
366 		if_fake_max_mtu = new_value;
367 	}
368 	return 0;
369 }
370 
371 SYSCTL_PROC(_net_link_fake, OID_AUTO, max_mtu,
372     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
373     0, 0, feth_max_mtu_sysctl, "IU", "Fake interface maximum MTU");
374 
375 static int
376 feth_buflet_size_sysctl SYSCTL_HANDLER_ARGS
377 {
378 #pragma unused(oidp, arg1, arg2)
379 	unsigned int new_value;
380 	int changed;
381 	int error;
382 
383 	error = sysctl_io_number(req, if_fake_buflet_size,
384 	    sizeof(if_fake_buflet_size), &new_value, &changed);
385 	if (error == 0 && changed != 0) {
386 		/* must be a power of 2 between min and max */
387 		if (new_value > FETH_BUFLET_SIZE_MAX ||
388 		    new_value < FETH_BUFLET_SIZE_MIN ||
389 		    !is_power_of_two(new_value) ||
390 		    new_value >= if_fake_max_mtu) {
391 			return EINVAL;
392 		}
393 		if_fake_buflet_size = new_value;
394 	}
395 	return 0;
396 }
397 
398 SYSCTL_PROC(_net_link_fake, OID_AUTO, buflet_size,
399     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
400     0, 0, feth_buflet_size_sysctl, "IU", "Fake interface buflet size");
401 
402 static unsigned int if_fake_user_access = 0;
403 
404 static int
405 feth_user_access_sysctl SYSCTL_HANDLER_ARGS
406 {
407 #pragma unused(oidp, arg1, arg2)
408 	unsigned int new_value;
409 	int changed;
410 	int error;
411 
412 	error = sysctl_io_number(req, if_fake_user_access,
413 	    sizeof(if_fake_user_access), &new_value, &changed);
414 	if (error == 0 && changed != 0) {
415 		if (new_value != 0) {
416 			if (new_value != 1) {
417 				return EINVAL;
418 			}
419 		}
420 		if_fake_user_access = new_value;
421 	}
422 	return 0;
423 }
424 
425 SYSCTL_PROC(_net_link_fake, OID_AUTO, user_access,
426     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
427     0, 0, feth_user_access_sysctl, "IU", "Fake interface user access");
428 
429 /* sysctl net.link.fake.if_adv_intvl (unit: millisecond) */
430 #define FETH_IF_ADV_INTVL_MIN            10
431 #define FETH_IF_ADV_INTVL_MAX            INT_MAX
432 
433 static int if_fake_if_adv_interval = 0; /* no interface advisory */
434 static int
435 feth_if_adv_interval_sysctl SYSCTL_HANDLER_ARGS
436 {
437 #pragma unused(oidp, arg1, arg2)
438 	unsigned int new_value;
439 	int changed;
440 	int error;
441 
442 	error = sysctl_io_number(req, if_fake_if_adv_interval,
443 	    sizeof(if_fake_if_adv_interval), &new_value, &changed);
444 	if (error == 0 && changed != 0) {
445 		if ((new_value != 0) && (new_value > FETH_IF_ADV_INTVL_MAX ||
446 		    new_value < FETH_IF_ADV_INTVL_MIN)) {
447 			return EINVAL;
448 		}
449 		if_fake_if_adv_interval = new_value;
450 	}
451 	return 0;
452 }
453 
454 SYSCTL_PROC(_net_link_fake, OID_AUTO, if_adv_intvl,
455     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
456     feth_if_adv_interval_sysctl, "IU",
457     "Fake interface will generate interface advisories reports at the specified interval in ms");
458 
459 /* sysctl net.link.fake.tx_drops */
460 /*
461  * Fake ethernet will drop packet on the transmit path at the specified
462  * rate, i.e drop one in every if_fake_tx_drops number of packets.
463  */
464 #define FETH_TX_DROPS_MIN            0
465 #define FETH_TX_DROPS_MAX            INT_MAX
466 static int if_fake_tx_drops = 0; /* no packets are dropped */
467 static int
468 feth_fake_tx_drops_sysctl SYSCTL_HANDLER_ARGS
469 {
470 #pragma unused(oidp, arg1, arg2)
471 	unsigned int new_value;
472 	int changed;
473 	int error;
474 
475 	error = sysctl_io_number(req, if_fake_tx_drops,
476 	    sizeof(if_fake_tx_drops), &new_value, &changed);
477 	if (error == 0 && changed != 0) {
478 		if (new_value > FETH_TX_DROPS_MAX ||
479 		    new_value < FETH_TX_DROPS_MIN) {
480 			return EINVAL;
481 		}
482 		if_fake_tx_drops = new_value;
483 	}
484 	return 0;
485 }
486 
487 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_drops,
488     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
489     feth_fake_tx_drops_sysctl, "IU",
490     "Fake interface will intermittently drop packets on Tx path");
491 
492 /* sysctl.net.link.fake.tx_exp_policy */
493 
494 typedef enum {
495 	IFF_TX_EXP_POLICY_DISABLED = 0,          /* Expiry notification disabled */
496 	IFF_TX_EXP_POLICY_DROP_AND_NOTIFY = 1,   /* Expiry notification enabled; drop + notify mode */
497 	IFF_TX_EXP_POLICY_NOTIFY_ONLY = 2,       /* Expiry notification enabled; notify only mode */
498 	IFF_TX_EXP_POLICY_METADATA = 3,          /* Expiry notification enabled; use packet metadata */
499 } iff_tx_exp_policy_t;
500 static iff_tx_exp_policy_t if_fake_tx_exp_policy = IFF_TX_EXP_POLICY_DISABLED;
501 
502 static int
503 feth_fake_tx_exp_policy_sysctl SYSCTL_HANDLER_ARGS
504 {
505 #pragma unused(oidp, arg1, arg2)
506 	unsigned int new_value;
507 	int changed;
508 	int error;
509 
510 	error = sysctl_io_number(req, if_fake_tx_exp_policy,
511 	    sizeof(if_fake_tx_exp_policy), &new_value, &changed);
512 	FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
513 	    "if_fake_tx_exp_policy: %u -> %u (%d)",
514 	    if_fake_tx_exp_policy, new_value, changed);
515 	if (error == 0 && changed != 0) {
516 		if (new_value > IFF_TX_EXP_POLICY_METADATA ||
517 		    new_value < IFF_TX_EXP_POLICY_DISABLED) {
518 			return EINVAL;
519 		}
520 		if_fake_tx_exp_policy = new_value;
521 	}
522 	return 0;
523 }
524 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_exp_policy,
525     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
526     feth_fake_tx_exp_policy_sysctl, "IU",
527     "Fake interface handling policy for expired TX attempts "
528     "(0 disabled, 1 drop and notify, 2 notify only, 3 packet metadata)");
529 
530 /* sysctl net.link.fake.tx_completion_mode */
531 typedef enum {
532 	IFF_TX_COMPL_MODE_SYNC = 0,
533 	IFF_TX_COMPL_MODE_ASYNC = 1,
534 } iff_tx_completion_mode_t;
535 static iff_tx_completion_mode_t if_tx_completion_mode = IFF_TX_COMPL_MODE_SYNC;
536 static int
537 feth_fake_tx_completion_mode_sysctl SYSCTL_HANDLER_ARGS
538 {
539 #pragma unused(oidp, arg1, arg2)
540 	unsigned int new_value;
541 	int changed;
542 	int error;
543 
544 	error = sysctl_io_number(req, if_tx_completion_mode,
545 	    sizeof(if_tx_completion_mode), &new_value, &changed);
546 	if (error == 0 && changed != 0) {
547 		if (new_value > IFF_TX_COMPL_MODE_ASYNC ||
548 		    new_value < IFF_TX_COMPL_MODE_SYNC) {
549 			return EINVAL;
550 		}
551 		if_tx_completion_mode = new_value;
552 	}
553 	return 0;
554 }
555 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_completion_mode,
556     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
557     feth_fake_tx_completion_mode_sysctl, "IU",
558     "Fake interface tx completion mode (0 synchronous, 1 asynchronous)");
559 
560 /* sysctl net.link.fake.llink_cnt */
561 
562 /* The maximum number of logical links (including default link) */
563 #define FETH_MAX_LLINKS 16
564 /*
565  * The default number of logical links (including default link).
566  * Zero means logical link mode is disabled.
567  */
568 #define FETH_DEF_LLINKS 0
569 
570 static uint32_t if_fake_llink_cnt = FETH_DEF_LLINKS;
571 static int
572 feth_fake_llink_cnt_sysctl SYSCTL_HANDLER_ARGS
573 {
574 #pragma unused(oidp, arg1, arg2)
575 	unsigned int new_value;
576 	int changed;
577 	int error;
578 
579 	error = sysctl_io_number(req, if_fake_llink_cnt,
580 	    sizeof(if_fake_llink_cnt), &new_value, &changed);
581 	if (error == 0 && changed != 0) {
582 		if (new_value > FETH_MAX_LLINKS) {
583 			return EINVAL;
584 		}
585 		if_fake_llink_cnt = new_value;
586 	}
587 	return 0;
588 }
589 
590 SYSCTL_PROC(_net_link_fake, OID_AUTO, llink_cnt,
591     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
592     feth_fake_llink_cnt_sysctl, "IU",
593     "Fake interface logical link count");
594 
595 /* sysctl net.link.fake.qset_cnt */
596 
597 /* The maximum number of qsets for each logical link */
598 #define FETH_MAX_QSETS  16
599 /* The default number of qsets for each logical link */
600 #define FETH_DEF_QSETS  4
601 
602 static uint32_t if_fake_qset_cnt = FETH_DEF_QSETS;
603 static int
604 feth_fake_qset_cnt_sysctl SYSCTL_HANDLER_ARGS
605 {
606 #pragma unused(oidp, arg1, arg2)
607 	unsigned int new_value;
608 	int changed;
609 	int error;
610 
611 	error = sysctl_io_number(req, if_fake_qset_cnt,
612 	    sizeof(if_fake_qset_cnt), &new_value, &changed);
613 	if (error == 0 && changed != 0) {
614 		if (new_value == 0 ||
615 		    new_value > FETH_MAX_QSETS) {
616 			return EINVAL;
617 		}
618 		if_fake_qset_cnt = new_value;
619 	}
620 	return 0;
621 }
622 
623 SYSCTL_PROC(_net_link_fake, OID_AUTO, qset_cnt,
624     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
625     feth_fake_qset_cnt_sysctl, "IU",
626     "Fake interface queue set count");
627 
628 
629 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)630 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
631 {
632 	mbuf_setdata(m, mtod(m, char *) + len, mbuf_len(m) - len);
633 	mbuf_pkthdr_adjustlen(m, -len);
634 }
635 
636 static inline void *__indexable
get_bpf_header(mbuf_t m,struct ether_header * eh_p,struct ether_vlan_header * evl_p,size_t * header_len)637 get_bpf_header(mbuf_t m, struct ether_header * eh_p,
638     struct ether_vlan_header * evl_p, size_t * header_len)
639 {
640 	void * header;
641 
642 	/* no VLAN tag, just use the ethernet header */
643 	if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
644 		header = (struct ether_header *__bidi_indexable)eh_p;
645 		*header_len = sizeof(*eh_p);
646 		goto done;
647 	}
648 
649 	/* has VLAN tag, populate the ether VLAN header */
650 	bcopy(eh_p, evl_p,
651 	    offsetof(struct ether_header, ether_type));   /* dst+src ether */
652 	evl_p->evl_encap_proto = htons(ETHERTYPE_VLAN);   /* VLAN encap */
653 	evl_p->evl_tag = htons(m->m_pkthdr.vlan_tag);     /* tag */
654 	evl_p->evl_proto = eh_p->ether_type;              /* proto */
655 	*header_len = sizeof(*evl_p);
656 	header = (struct ether_vlan_header *__bidi_indexable)evl_p;
657 
658 done:
659 	return header;
660 }
661 
662 typedef void (*_tap_func)(ifnet_t interface, u_int32_t dlt, mbuf_t packet,
663     void *__sized_by(header_len) header, size_t header_len);
664 
665 static void
fake_bpf_tap_common(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p,_tap_func func)666 fake_bpf_tap_common(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p,
667     _tap_func func)
668 {
669 	struct ether_vlan_header        evl;
670 	void *                          header;
671 	size_t                          header_len;
672 
673 	header = get_bpf_header(m, eh_p, &evl, &header_len);
674 	(*func)(ifp, DLT_EN10MB, m, header, header_len);
675 }
676 
677 static inline void
fake_bpf_tap_in(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)678 fake_bpf_tap_in(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
679 {
680 	fake_bpf_tap_common(ifp, m, eh_p, bpf_tap_in);
681 }
682 
683 
684 static inline void
fake_bpf_tap_out(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)685 fake_bpf_tap_out(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
686 {
687 	fake_bpf_tap_common(ifp, m, eh_p, bpf_tap_out);
688 }
689 
690 /**
691 ** virtual ethernet structures, types
692 **/
693 
694 #define IFF_NUM_TX_RINGS_WMM_MODE       4
695 #define IFF_NUM_RX_RINGS_WMM_MODE       1
696 #define IFF_MAX_TX_RINGS        IFF_NUM_TX_RINGS_WMM_MODE
697 #define IFF_MAX_RX_RINGS        IFF_NUM_RX_RINGS_WMM_MODE
698 #define IFF_NUM_TX_QUEUES_WMM_MODE      4
699 #define IFF_NUM_RX_QUEUES_WMM_MODE      1
700 #define IFF_MAX_TX_QUEUES       IFF_NUM_TX_QUEUES_WMM_MODE
701 #define IFF_MAX_RX_QUEUES       IFF_NUM_RX_QUEUES_WMM_MODE
702 
703 #define IFF_MAX_BATCH_SIZE 32
704 
705 typedef uint16_t        iff_flags_t;
706 #define IFF_FLAGS_HWCSUM                0x0001
707 #define IFF_FLAGS_BSD_MODE              0x0002
708 #define IFF_FLAGS_DETACHING             0x0004
709 #define IFF_FLAGS_WMM_MODE              0x0008
710 #define IFF_FLAGS_MULTIBUFLETS          0x0010
711 #define IFF_FLAGS_TSO_SUPPORT           0x0020
712 #define IFF_FLAGS_LRO                   0x0040
713 #define IFF_FLAGS_VLAN_MTU              0x0080
714 #define IFF_FLAGS_VLAN_TAGGING          0x0100
715 #define IFF_FLAGS_SEPARATE_FRAME_HEADER 0x0200
716 #define IFF_FLAGS_NX_ATTACHED           0x0400
717 
718 #if SKYWALK
719 
720 typedef struct {
721 	uuid_t                  fnx_provider;
722 	uuid_t                  fnx_instance;
723 } fake_nx, *fake_nx_t;
724 
725 typedef struct {
726 	kern_netif_queue_t      fq_queue;
727 } fake_queue;
728 
729 typedef struct {
730 	kern_netif_qset_t       fqs_qset; /* provided by xnu */
731 	fake_queue              fqs_rx_queue[IFF_MAX_RX_QUEUES];
732 	fake_queue              fqs_tx_queue[IFF_MAX_TX_QUEUES];
733 	uint32_t                fqs_rx_queue_cnt;
734 	uint32_t                fqs_tx_queue_cnt;
735 	uint32_t                fqs_llink_idx;
736 	uint32_t                fqs_idx;
737 	uint32_t                fqs_dequeue_cnt;
738 	uint64_t                fqs_id;
739 	boolean_t               fqs_combined_mode;
740 } fake_qset;
741 
742 typedef struct {
743 	uint64_t                fl_id;
744 	uint32_t                fl_idx;
745 	uint32_t                fl_qset_cnt;
746 	fake_qset               fl_qset[FETH_MAX_QSETS];
747 } fake_llink, * fake_llink_t;
748 
749 static kern_pbufpool_t         S_pp;
750 
751 #define IFF_TT_OUTPUT   0x01 /* generate trace_tag on output */
752 #define IFF_TT_INPUT    0x02 /* generate trace_tag on input */
753 static int if_fake_trace_tag_flags = 0;
754 SYSCTL_INT(_net_link_fake, OID_AUTO, trace_tag, CTLFLAG_RW | CTLFLAG_LOCKED,
755     &if_fake_trace_tag_flags, 0, "Fake interface generate trace_tag");
756 static packet_trace_tag_t if_fake_trace_tag_current = 1;
757 
758 #endif /* SKYWALK */
759 
760 struct if_fake {
761 	char                    iff_name[IFNAMSIZ]; /* our unique id */
762 	ifnet_t                 iff_ifp;
763 	iff_flags_t             iff_flags;
764 	uint32_t                iff_retain_count;
765 	ifnet_t                 iff_peer;       /* the other end */
766 	int                     iff_media_current;
767 	int                     iff_media_active;
768 	uint32_t                iff_media_count;
769 	int                     iff_media_list[IF_FAKE_MEDIA_LIST_MAX];
770 	boolean_t               iff_start_busy;
771 	unsigned int            iff_max_mtu;
772 	uint32_t                iff_fcs;
773 	uint32_t                iff_trailer_length;
774 #if SKYWALK
775 	fake_nx                 iff_nx;
776 	struct netif_stats      *iff_nifs;
777 	uint32_t                iff_nifs_ref;
778 	uint32_t                iff_llink_cnt;
779 	kern_channel_ring_t     iff_rx_ring[IFF_MAX_RX_RINGS];
780 	kern_channel_ring_t     iff_tx_ring[IFF_MAX_TX_RINGS];
781 	fake_llink_t            iff_llink __counted_by_or_null(FETH_MAX_LLINKS);
782 	thread_call_t           iff_doorbell_tcall;
783 	thread_call_t           iff_if_adv_tcall;
784 	boolean_t               iff_doorbell_tcall_active;
785 	boolean_t               iff_waiting_for_tcall;
786 	boolean_t               iff_channel_connected;
787 	iff_pktpool_mode_t      iff_pp_mode;
788 	kern_pbufpool_t         iff_rx_pp;
789 	kern_pbufpool_t         iff_tx_pp;
790 	uint32_t                iff_tx_headroom;
791 	unsigned int            iff_adv_interval;
792 	uint32_t                iff_tx_drop_rate;
793 	uint32_t                iff_tx_pkts_count;
794 	iff_tx_completion_mode_t iff_tx_completion_mode;
795 	bool                    iff_intf_adv_enabled;
796 	void                    *iff_intf_adv_kern_ctx;
797 	kern_nexus_capab_interface_advisory_notify_fn_t iff_intf_adv_notify;
798 	iff_tx_exp_policy_t     iff_tx_exp_policy;
799 #endif /* SKYWALK */
800 };
801 
802 typedef struct if_fake * __single if_fake_ref;
803 
804 static if_fake_ref
805 ifnet_get_if_fake(ifnet_t ifp);
806 
807 static inline boolean_t
feth_in_bsd_mode(if_fake_ref fakeif)808 feth_in_bsd_mode(if_fake_ref fakeif)
809 {
810 	return (fakeif->iff_flags & IFF_FLAGS_BSD_MODE) != 0;
811 }
812 
813 static inline void
feth_set_detaching(if_fake_ref fakeif)814 feth_set_detaching(if_fake_ref fakeif)
815 {
816 	fakeif->iff_flags |= IFF_FLAGS_DETACHING;
817 }
818 
819 static inline boolean_t
feth_is_detaching(if_fake_ref fakeif)820 feth_is_detaching(if_fake_ref fakeif)
821 {
822 	return (fakeif->iff_flags & IFF_FLAGS_DETACHING) != 0;
823 }
824 
825 static int
feth_enable_dequeue_stall(ifnet_t ifp,uint32_t enable)826 feth_enable_dequeue_stall(ifnet_t ifp, uint32_t enable)
827 {
828 	int error;
829 
830 	if (enable != 0) {
831 		error = ifnet_disable_output(ifp);
832 	} else {
833 		error = ifnet_enable_output(ifp);
834 	}
835 
836 	return error;
837 }
838 
839 #if SKYWALK
840 static inline boolean_t
feth_in_wmm_mode(if_fake_ref fakeif)841 feth_in_wmm_mode(if_fake_ref fakeif)
842 {
843 	return (fakeif->iff_flags & IFF_FLAGS_WMM_MODE) != 0;
844 }
845 
846 static inline boolean_t
feth_using_multibuflets(if_fake_ref fakeif)847 feth_using_multibuflets(if_fake_ref fakeif)
848 {
849 	return (fakeif->iff_flags & IFF_FLAGS_MULTIBUFLETS) != 0;
850 }
851 static void feth_detach_netif_nexus(if_fake_ref fakeif);
852 
853 static inline boolean_t
feth_has_intf_advisory_configured(if_fake_ref fakeif)854 feth_has_intf_advisory_configured(if_fake_ref fakeif)
855 {
856 	return fakeif->iff_adv_interval > 0;
857 }
858 #endif /* SKYWALK */
859 
860 static inline bool
feth_supports_tso(if_fake_ref fakeif)861 feth_supports_tso(if_fake_ref fakeif)
862 {
863 	return (fakeif->iff_flags & IFF_FLAGS_TSO_SUPPORT) != 0;
864 }
865 
866 static inline void
feth_set_supports_tso(if_fake_ref fakeif)867 feth_set_supports_tso(if_fake_ref fakeif)
868 {
869 	fakeif->iff_flags |= IFF_FLAGS_TSO_SUPPORT;
870 }
871 
872 static inline bool
feth_supports_vlan_mtu(if_fake_ref fakeif)873 feth_supports_vlan_mtu(if_fake_ref fakeif)
874 {
875 	return (fakeif->iff_flags & IFF_FLAGS_VLAN_MTU) != 0;
876 }
877 
878 static inline void
feth_set_supports_vlan_mtu(if_fake_ref fakeif)879 feth_set_supports_vlan_mtu(if_fake_ref fakeif)
880 {
881 	fakeif->iff_flags |= IFF_FLAGS_VLAN_MTU;
882 }
883 
884 static inline bool
feth_supports_vlan_tagging(if_fake_ref fakeif)885 feth_supports_vlan_tagging(if_fake_ref fakeif)
886 {
887 	return (fakeif->iff_flags & IFF_FLAGS_VLAN_TAGGING) != 0;
888 }
889 
890 static inline void
feth_set_supports_vlan_tagging(if_fake_ref fakeif)891 feth_set_supports_vlan_tagging(if_fake_ref fakeif)
892 {
893 	fakeif->iff_flags |= IFF_FLAGS_VLAN_TAGGING;
894 }
895 
896 
897 #define FETH_MAXUNIT    IF_MAXUNIT
898 #define FETH_ZONE_MAX_ELEM      MIN(IFNETS_MAX, FETH_MAXUNIT)
899 
900 static  int feth_clone_create(struct if_clone *, u_int32_t, void *);
901 static  int feth_clone_destroy(ifnet_t);
902 static  int feth_output(ifnet_t ifp, struct mbuf *m);
903 static  void feth_start(ifnet_t ifp);
904 static  int feth_ioctl(ifnet_t ifp, u_long cmd, void * addr);
905 static  int feth_config(ifnet_t ifp, ifnet_t peer);
906 static  void feth_if_free(ifnet_t ifp);
907 static  void feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp);
908 static  void feth_free(if_fake_ref fakeif);
909 
910 static struct if_clone
911     feth_cloner = IF_CLONE_INITIALIZER(FAKE_ETHER_NAME,
912     feth_clone_create,
913     feth_clone_destroy,
914     0,
915     FETH_MAXUNIT);
916 static  void interface_link_event(ifnet_t ifp, u_int32_t event_code);
917 
918 /* some media words to pretend to be ethernet */
919 #define FAKE_DEFAULT_MEDIA      IFM_MAKEWORD(IFM_ETHER, IFM_10G_T, IFM_FDX, 0)
920 static int default_media_words[] = {
921 	IFM_MAKEWORD(IFM_ETHER, 0, 0, 0),
922 	FAKE_DEFAULT_MEDIA,
923 	IFM_MAKEWORD(IFM_ETHER, IFM_2500_T, IFM_FDX, 0),
924 	IFM_MAKEWORD(IFM_ETHER, IFM_5000_T, IFM_FDX, 0),
925 
926 	IFM_MAKEWORD(IFM_ETHER, IFM_10G_KX4, IFM_FDX, 0),
927 	IFM_MAKEWORD(IFM_ETHER, IFM_20G_KR2, IFM_FDX, 0),
928 	IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX, 0),
929 	IFM_MAKEWORD(IFM_ETHER, IFM_25G_KR, IFM_FDX, 0),
930 	IFM_MAKEWORD(IFM_ETHER, IFM_40G_SR4, IFM_FDX, 0),
931 	IFM_MAKEWORD(IFM_ETHER, IFM_50G_CR2, IFM_FDX, 0),
932 	IFM_MAKEWORD(IFM_ETHER, IFM_56G_R4, IFM_FDX, 0),
933 	IFM_MAKEWORD(IFM_ETHER, IFM_100G_CR4, IFM_FDX, 0),
934 	IFM_MAKEWORD(IFM_ETHER, IFM_400G_AUI8, IFM_FDX, 0),
935 };
936 #define default_media_words_count (sizeof(default_media_words)          \
937 	                           / sizeof (default_media_words[0]))
938 
939 /**
940 ** veth locks
941 **/
942 
943 static LCK_GRP_DECLARE(feth_lck_grp, "fake");
944 static LCK_MTX_DECLARE(feth_lck_mtx, &feth_lck_grp);
945 
946 static inline void
feth_lock(void)947 feth_lock(void)
948 {
949 	lck_mtx_lock(&feth_lck_mtx);
950 }
951 
952 static inline void
feth_unlock(void)953 feth_unlock(void)
954 {
955 	lck_mtx_unlock(&feth_lck_mtx);
956 }
957 
958 static inline int
get_max_mtu(int bsd_mode,unsigned int max_mtu)959 get_max_mtu(int bsd_mode, unsigned int max_mtu)
960 {
961 	unsigned int    mtu;
962 
963 	if (bsd_mode != 0) {
964 		mtu = (njcl > 0) ? (M16KCLBYTES - ETHER_HDR_LEN)
965 		    : MBIGCLBYTES - ETHER_HDR_LEN;
966 		if (mtu > max_mtu) {
967 			mtu = max_mtu;
968 		}
969 	} else {
970 		mtu = max_mtu;
971 	}
972 	return mtu;
973 }
974 
975 static inline unsigned int
feth_max_mtu(ifnet_t ifp)976 feth_max_mtu(ifnet_t ifp)
977 {
978 	if_fake_ref     fakeif;
979 	unsigned int    max_mtu = ETHERMTU;
980 
981 	feth_lock();
982 	fakeif = ifnet_get_if_fake(ifp);
983 	if (fakeif != NULL) {
984 		max_mtu = fakeif->iff_max_mtu;
985 	}
986 	feth_unlock();
987 	return max_mtu;
988 }
989 
990 static void
feth_free(if_fake_ref fakeif)991 feth_free(if_fake_ref fakeif)
992 {
993 	VERIFY(fakeif->iff_retain_count == 0);
994 #if SKYWALK
995 	if (!feth_in_bsd_mode(fakeif)) {
996 		if (fakeif->iff_pp_mode == IFF_PP_MODE_GLOBAL) {
997 			VERIFY(fakeif->iff_rx_pp == S_pp);
998 			VERIFY(fakeif->iff_tx_pp == S_pp);
999 			pp_release(fakeif->iff_rx_pp);
1000 			fakeif->iff_rx_pp = NULL;
1001 			pp_release(fakeif->iff_tx_pp);
1002 			fakeif->iff_tx_pp = NULL;
1003 			feth_lock();
1004 			if (S_pp != NULL && S_pp->pp_refcnt == 1) {
1005 				pp_release(S_pp);
1006 				S_pp = NULL;
1007 			}
1008 			feth_unlock();
1009 		} else {
1010 			if (fakeif->iff_rx_pp != NULL) {
1011 				pp_release(fakeif->iff_rx_pp);
1012 				fakeif->iff_rx_pp = NULL;
1013 			}
1014 			if (fakeif->iff_tx_pp != NULL) {
1015 				pp_release(fakeif->iff_tx_pp);
1016 				fakeif->iff_tx_pp = NULL;
1017 			}
1018 		}
1019 	}
1020 #endif /* SKYWALK */
1021 
1022 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s", fakeif->iff_name);
1023 	if (fakeif->iff_llink != NULL) {
1024 		fake_llink_t    llink;
1025 		llink = fakeif->iff_llink;
1026 		fakeif->iff_llink = NULL;
1027 		kfree_type(fake_llink, FETH_MAX_LLINKS, llink);
1028 	}
1029 	kfree_type(struct if_fake, fakeif);
1030 }
1031 
1032 static void
feth_release(if_fake_ref fakeif)1033 feth_release(if_fake_ref fakeif)
1034 {
1035 	u_int32_t               old_retain_count;
1036 
1037 	old_retain_count = OSDecrementAtomic(&fakeif->iff_retain_count);
1038 	switch (old_retain_count) {
1039 	case 0:
1040 		VERIFY(old_retain_count != 0);
1041 		break;
1042 	case 1:
1043 		feth_free(fakeif);
1044 		break;
1045 	default:
1046 		break;
1047 	}
1048 	return;
1049 }
1050 
1051 #if SKYWALK
1052 
1053 static void
feth_retain(if_fake_ref fakeif)1054 feth_retain(if_fake_ref fakeif)
1055 {
1056 	OSIncrementAtomic(&fakeif->iff_retain_count);
1057 }
1058 
1059 static void
feth_packet_pool_init_prepare(if_fake_ref fakeif,struct kern_pbufpool_init * pp_init)1060 feth_packet_pool_init_prepare(if_fake_ref fakeif,
1061     struct kern_pbufpool_init *pp_init)
1062 {
1063 	uint32_t max_mtu = fakeif->iff_max_mtu;
1064 	uint32_t buflet_size = if_fake_buflet_size;
1065 
1066 	bzero(pp_init, sizeof(*pp_init));
1067 	pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1068 	pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
1069 	pp_init->kbi_packets = 1024; /* TBD configurable */
1070 	if (feth_supports_tso(fakeif)) {
1071 		buflet_size = if_fake_tso_buffer_size;
1072 	}
1073 	if (feth_using_multibuflets(fakeif)) {
1074 		pp_init->kbi_bufsize = buflet_size;
1075 		pp_init->kbi_max_frags = howmany(max_mtu, buflet_size);
1076 		pp_init->kbi_buflets = pp_init->kbi_packets *
1077 		    pp_init->kbi_max_frags;
1078 		pp_init->kbi_flags |= KBIF_BUFFER_ON_DEMAND;
1079 	} else {
1080 		pp_init->kbi_bufsize = max(max_mtu, buflet_size);
1081 		pp_init->kbi_max_frags = 1;
1082 		pp_init->kbi_buflets = pp_init->kbi_packets;
1083 	}
1084 	pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
1085 	if (if_fake_user_access != 0) {
1086 		pp_init->kbi_flags |= KBIF_USER_ACCESS;
1087 	}
1088 	pp_init->kbi_ctx = NULL;
1089 	pp_init->kbi_ctx_retain = NULL;
1090 	pp_init->kbi_ctx_release = NULL;
1091 }
1092 
1093 static errno_t
feth_packet_pool_make(if_fake_ref fakeif)1094 feth_packet_pool_make(if_fake_ref fakeif)
1095 {
1096 	struct kern_pbufpool_init pp_init;
1097 	errno_t err;
1098 
1099 	feth_packet_pool_init_prepare(fakeif, &pp_init);
1100 
1101 	switch (fakeif->iff_pp_mode) {
1102 	case IFF_PP_MODE_GLOBAL:
1103 		feth_lock();
1104 		if (S_pp == NULL) {
1105 			(void)snprintf((char *)pp_init.kbi_name,
1106 			    sizeof(pp_init.kbi_name), "%s", "feth shared pp");
1107 			err = kern_pbufpool_create(&pp_init, &S_pp, NULL);
1108 		}
1109 		pp_retain(S_pp);
1110 		feth_unlock();
1111 		fakeif->iff_rx_pp = S_pp;
1112 		pp_retain(S_pp);
1113 		fakeif->iff_tx_pp = S_pp;
1114 		break;
1115 	case IFF_PP_MODE_PRIVATE:
1116 		(void)snprintf((char *)pp_init.kbi_name,
1117 		    sizeof(pp_init.kbi_name), "%s pp", fakeif->iff_name);
1118 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
1119 		pp_retain(fakeif->iff_rx_pp);
1120 		fakeif->iff_tx_pp = fakeif->iff_rx_pp;
1121 		break;
1122 	case IFF_PP_MODE_PRIVATE_SPLIT:
1123 		(void)snprintf((char *)pp_init.kbi_name,
1124 		    sizeof(pp_init.kbi_name), "%s rx pp", fakeif->iff_name);
1125 		pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
1126 		    KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
1127 		pp_init.kbi_flags |= (KBIF_IODIR_IN | KBIF_BUFFER_ON_DEMAND);
1128 		pp_init.kbi_packets = 1024;
1129 		pp_init.kbi_bufsize = if_fake_link_layer_aggregation_factor * 1024;
1130 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
1131 		if (err != 0) {
1132 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1133 			    "rx pp create failed %d", err);
1134 			return err;
1135 		}
1136 		pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
1137 		    KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
1138 		pp_init.kbi_flags |= KBIF_IODIR_OUT;
1139 		pp_init.kbi_packets = 1024;            /* TBD configurable */
1140 		pp_init.kbi_bufsize = fakeif->iff_max_mtu;
1141 		(void)snprintf((char *)pp_init.kbi_name,
1142 		    sizeof(pp_init.kbi_name), "%s tx pp", fakeif->iff_name);
1143 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_tx_pp, NULL);
1144 		if (err != 0) {
1145 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1146 			    "tx pp create failed %d", err);
1147 			pp_release(fakeif->iff_rx_pp);
1148 			return err;
1149 		}
1150 		break;
1151 	default:
1152 		VERIFY(0);
1153 		__builtin_unreachable();
1154 	}
1155 
1156 	return 0;
1157 }
1158 
1159 static void
feth_packet_set_trace_tag(kern_packet_t ph,int flag)1160 feth_packet_set_trace_tag(kern_packet_t ph, int flag)
1161 {
1162 	if (if_fake_trace_tag_flags & flag) {
1163 		if (++if_fake_trace_tag_current == 0) {
1164 			if_fake_trace_tag_current = 1;
1165 		}
1166 		kern_packet_set_trace_tag(ph, if_fake_trace_tag_current);
1167 	}
1168 }
1169 
1170 static errno_t
feth_clone_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1171 feth_clone_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1172 {
1173 	errno_t err = 0;
1174 	kern_pbufpool_t pp = dif->iff_rx_pp;
1175 	kern_packet_t dph = 0, dph0 = 0;
1176 	kern_buflet_t sbuf, dbuf0 = NULL, dbuf;
1177 	caddr_t saddr, daddr;
1178 	uint32_t soff, doff;
1179 	uint32_t slen, dlen;
1180 	uint32_t dlim0, dlim;
1181 
1182 	sbuf = kern_packet_get_next_buflet(sph, NULL);
1183 	saddr = __unsafe_forge_bidi_indexable(caddr_t,
1184 	    kern_buflet_get_data_address(sbuf),
1185 	    kern_buflet_get_data_limit(sbuf));
1186 	doff = soff = kern_buflet_get_data_offset(sbuf);
1187 	dlen = slen = kern_buflet_get_data_length(sbuf);
1188 
1189 	/* packet clone is only supported for single-buflet */
1190 	ASSERT(kern_packet_get_buflet_count(sph) == 1);
1191 	ASSERT(soff == kern_packet_get_headroom(sph));
1192 	ASSERT(slen == kern_packet_get_data_length(sph));
1193 
1194 	dph0 = *pdph;
1195 	if (dph0 == 0) {
1196 		dlim0 = 0;
1197 	} else {
1198 		dbuf0 = kern_packet_get_next_buflet(dph0, NULL);
1199 		ASSERT(kern_buflet_get_object_limit(dbuf0) ==
1200 		    PP_BUF_OBJ_SIZE_DEF(pp));
1201 		ASSERT(kern_buflet_get_data_limit(dbuf0) % 16 == 0);
1202 		dlim0 = ((size_t)kern_buflet_get_object_address(dbuf0) +
1203 		    kern_buflet_get_object_limit(dbuf0)) -
1204 		    ((size_t)kern_buflet_get_data_address(dbuf0) +
1205 		    kern_buflet_get_data_limit(dbuf0));
1206 	}
1207 
1208 	if (doff + dlen > dlim0) {
1209 		err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1210 		if (err != 0) {
1211 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1212 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1213 			return err;
1214 		}
1215 		dbuf = kern_packet_get_next_buflet(dph, NULL);
1216 		ASSERT(kern_buflet_get_data_address(dbuf) ==
1217 		    kern_buflet_get_object_address(dbuf));
1218 		daddr = __unsafe_forge_bidi_indexable(caddr_t,
1219 		    kern_buflet_get_data_address(dbuf),
1220 		    kern_buflet_get_data_limit(dbuf));
1221 		dlim = kern_buflet_get_object_limit(dbuf);
1222 		ASSERT(dlim == PP_BUF_OBJ_SIZE_DEF(pp));
1223 	} else {
1224 		err = kern_packet_clone_nosleep(dph0, &dph, KPKT_COPY_LIGHT);
1225 		if (err != 0) {
1226 			FAKE_LOG(LOG_INFO, FE_DBGF_OUTPUT,
1227 			    "packet clone err %d", err);
1228 			return err;
1229 		}
1230 		dbuf = kern_packet_get_next_buflet(dph, NULL);
1231 		ASSERT(kern_buflet_get_object_address(dbuf) ==
1232 		    kern_buflet_get_object_address(dbuf0));
1233 		daddr = __unsafe_forge_bidi_indexable(caddr_t,
1234 		    kern_buflet_get_data_address(dbuf0),
1235 		    kern_buflet_get_object_limit(dbuf0)) + kern_buflet_get_data_limit(dbuf0);
1236 		dlim = dlim0;
1237 	}
1238 
1239 	ASSERT(doff + dlen <= dlim);
1240 
1241 	ASSERT((uintptr_t)daddr % 16 == 0);
1242 	bcopy(saddr + soff, daddr + doff, slen);
1243 
1244 	dlim = MIN(dlim, P2ROUNDUP(doff + dlen, 16));
1245 	err = kern_buflet_set_data_address(dbuf, daddr);
1246 	VERIFY(err == 0);
1247 	err = kern_buflet_set_data_limit(dbuf, dlim);
1248 	VERIFY(err == 0);
1249 	err = kern_buflet_set_data_length(dbuf, dlen);
1250 	VERIFY(err == 0);
1251 	err = kern_buflet_set_data_offset(dbuf, doff);
1252 	VERIFY(err == 0);
1253 	err = kern_packet_set_headroom(dph, doff);
1254 	VERIFY(err == 0);
1255 	err = kern_packet_set_link_header_length(dph,
1256 	    kern_packet_get_link_header_length(sph));
1257 	VERIFY(err == 0);
1258 	err = kern_packet_set_service_class(dph,
1259 	    kern_packet_get_service_class(sph));
1260 	VERIFY(err == 0);
1261 	err = kern_packet_finalize(dph);
1262 	VERIFY(err == 0);
1263 	*pdph = dph;
1264 
1265 	return err;
1266 }
1267 
1268 static inline void
feth_copy_buflet(kern_buflet_t sbuf,kern_buflet_t dbuf)1269 feth_copy_buflet(kern_buflet_t sbuf, kern_buflet_t dbuf)
1270 {
1271 	errno_t err;
1272 	uint32_t off, len;
1273 	caddr_t saddr, daddr;
1274 
1275 	saddr = __unsafe_forge_bidi_indexable(caddr_t,
1276 	    kern_buflet_get_data_address(sbuf),
1277 	    kern_buflet_get_data_limit(sbuf));
1278 	off = kern_buflet_get_data_offset(sbuf);
1279 	len = kern_buflet_get_data_length(sbuf);
1280 	daddr = __unsafe_forge_bidi_indexable(caddr_t,
1281 	    kern_buflet_get_data_address(dbuf),
1282 	    kern_buflet_get_data_limit(dbuf));
1283 	bcopy(saddr + off, daddr + off, len);
1284 	err = kern_buflet_set_data_offset(dbuf, off);
1285 	VERIFY(err == 0);
1286 	err = kern_buflet_set_data_length(dbuf, len);
1287 	VERIFY(err == 0);
1288 }
1289 
1290 static int
feth_add_packet_trailer(kern_packet_t ph,void * __sized_by (trailer_len)trailer,size_t trailer_len)1291 feth_add_packet_trailer(kern_packet_t ph, void * __sized_by(trailer_len) trailer, size_t trailer_len)
1292 {
1293 	errno_t err = 0;
1294 
1295 	ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
1296 
1297 	kern_buflet_t buf = NULL, iter = NULL;
1298 	while ((iter = kern_packet_get_next_buflet(ph, iter)) != NULL) {
1299 		buf = iter;
1300 	}
1301 	ASSERT(buf != NULL);
1302 
1303 	uint32_t dlim = kern_buflet_get_data_limit(buf);
1304 	uint32_t doff = kern_buflet_get_data_offset(buf);
1305 	uint32_t dlen = kern_buflet_get_data_length(buf);
1306 
1307 	size_t trailer_room = dlim - doff - dlen;
1308 
1309 	if (trailer_room < trailer_len) {
1310 		FAKE_LOG(LOG_INFO, FE_DBGF_OUTPUT, "not enough room");
1311 		return ERANGE;
1312 	}
1313 
1314 	void *data = __unsafe_forge_bidi_indexable(caddr_t,
1315 	    kern_buflet_get_data_address(buf),
1316 	    kern_buflet_get_data_limit(buf)) + doff + dlen;
1317 	memcpy(data, trailer, trailer_len);
1318 
1319 	err = kern_buflet_set_data_length(buf, dlen + trailer_len);
1320 	VERIFY(err == 0);
1321 
1322 	err = kern_packet_finalize(ph);
1323 	VERIFY(err == 0);
1324 
1325 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%zuB trailer added", trailer_len);
1326 
1327 	return 0;
1328 }
1329 
1330 static int
feth_add_packet_fcs(kern_packet_t ph)1331 feth_add_packet_fcs(kern_packet_t ph)
1332 {
1333 	uint32_t crc = 0;
1334 	int err;
1335 
1336 	ASSERT(sizeof(crc) == ETHER_CRC_LEN);
1337 
1338 	kern_buflet_t buf = NULL;
1339 	while ((buf = kern_packet_get_next_buflet(ph, buf)) != NULL) {
1340 		uint32_t doff = kern_buflet_get_data_offset(buf);
1341 		uint32_t dlen = kern_buflet_get_data_length(buf);
1342 		void *data = __unsafe_forge_bidi_indexable(caddr_t,
1343 		    kern_buflet_get_data_address(buf),
1344 		    kern_buflet_get_data_limit(buf)) + doff;
1345 		crc = crc32(crc, data, dlen);
1346 	}
1347 
1348 	err = feth_add_packet_trailer(ph, &crc, ETHER_CRC_LEN);
1349 	if (!err) {
1350 		return err;
1351 	}
1352 
1353 	err = kern_packet_set_link_ethfcs(ph);
1354 	VERIFY(err == 0);
1355 
1356 	return 0;
1357 }
1358 
1359 static errno_t
feth_copy_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1360 feth_copy_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1361 {
1362 	errno_t err = 0;
1363 	uint16_t i, bufcnt;
1364 	mach_vm_address_t baddr;
1365 	kern_buflet_t sbuf = NULL, dbuf = NULL;
1366 	kern_pbufpool_t pp = dif->iff_rx_pp;
1367 	kern_packet_t dph;
1368 	boolean_t multi_buflet = feth_using_multibuflets(dif);
1369 
1370 	bufcnt = kern_packet_get_buflet_count(sph);
1371 	ASSERT((bufcnt == 1) || multi_buflet);
1372 	*pdph = 0;
1373 
1374 	err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1375 	if (err != 0) {
1376 		STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1377 		STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1378 		return err;
1379 	}
1380 
1381 	/* pre-constructed single buflet packet copy */
1382 	sbuf = kern_packet_get_next_buflet(sph, NULL);
1383 	dbuf = kern_packet_get_next_buflet(dph, NULL);
1384 	feth_copy_buflet(sbuf, dbuf);
1385 
1386 	if (!multi_buflet) {
1387 		goto done;
1388 	}
1389 
1390 	/* un-constructed multi-buflet packet copy */
1391 	for (i = 1; i < bufcnt; i++) {
1392 		kern_buflet_t __single dbuf_next = NULL;
1393 
1394 		sbuf = kern_packet_get_next_buflet(sph, sbuf);
1395 		VERIFY(sbuf != NULL);
1396 		err = kern_pbufpool_alloc_buflet_nosleep(pp, &dbuf_next);
1397 		if (err != 0) {
1398 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1399 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_BUF);
1400 			break;
1401 		}
1402 		ASSERT(dbuf_next != NULL);
1403 		feth_copy_buflet(sbuf, dbuf_next);
1404 		err = kern_packet_add_buflet(dph, dbuf, dbuf_next);
1405 		VERIFY(err == 0);
1406 		dbuf = dbuf_next;
1407 	}
1408 	if (__improbable(err != 0)) {
1409 		dbuf = NULL;
1410 		while (i-- != 0) {
1411 			dbuf = kern_packet_get_next_buflet(dph, dbuf);
1412 			VERIFY(dbuf != NULL);
1413 			baddr = (mach_vm_address_t)
1414 			    kern_buflet_get_data_address(dbuf);
1415 			VERIFY(baddr != 0);
1416 		}
1417 		kern_pbufpool_free(pp, dph);
1418 		dph = 0;
1419 	}
1420 
1421 done:
1422 	if (__probable(err == 0)) {
1423 		err = kern_packet_set_headroom(dph,
1424 		    kern_packet_get_headroom(sph));
1425 		VERIFY(err == 0);
1426 		err = kern_packet_set_link_header_length(dph,
1427 		    kern_packet_get_link_header_length(sph));
1428 		VERIFY(err == 0);
1429 		err = kern_packet_set_service_class(dph,
1430 		    kern_packet_get_service_class(sph));
1431 		VERIFY(err == 0);
1432 		err = kern_packet_finalize(dph);
1433 		VERIFY(err == 0);
1434 		VERIFY(bufcnt == kern_packet_get_buflet_count(dph));
1435 		*pdph = dph;
1436 	}
1437 	return err;
1438 }
1439 
1440 static inline void
feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)1441 feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)
1442 {
1443 	/*
1444 	 * Nothing to do if not a TSO offloaded packet.
1445 	 */
1446 	uint16_t seg_sz = 0;
1447 	seg_sz = kern_packet_get_protocol_segment_size(ph);
1448 	if (seg_sz == 0) {
1449 		return;
1450 	}
1451 	/*
1452 	 * For RX, make the packet appear as a fully validated LRO packet.
1453 	 */
1454 	packet_csum_flags_t csum_flags = PACKET_CSUM_IP_CHECKED |
1455 	    PACKET_CSUM_IP_VALID | PACKET_CSUM_DATA_VALID |
1456 	    PACKET_CSUM_PSEUDO_HDR;
1457 	(void) kern_packet_set_inet_checksum(ph, csum_flags, 0, 0xFFFF, FALSE);
1458 	return;
1459 }
1460 
1461 static void
feth_rx_submit(if_fake_ref sif,if_fake_ref dif,kern_packet_t * __counted_by (n_pkts)sphs,uint32_t n_pkts)1462 feth_rx_submit(if_fake_ref sif, if_fake_ref dif, kern_packet_t * __counted_by(n_pkts) sphs,
1463     uint32_t n_pkts)
1464 {
1465 	errno_t err = 0;
1466 	struct kern_channel_ring_stat_increment stats;
1467 	kern_channel_ring_t rx_ring = NULL;
1468 	kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
1469 	kern_packet_t sph = 0, dph = 0;
1470 
1471 	memset(&stats, 0, sizeof(stats));
1472 
1473 	rx_ring = dif->iff_rx_ring[0];
1474 	if (rx_ring == NULL) {
1475 		return;
1476 	}
1477 
1478 	kr_enter(rx_ring, TRUE);
1479 	kern_channel_reclaim(rx_ring);
1480 	rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1481 
1482 	for (uint32_t i = 0; i < n_pkts && rx_slot != NULL; i++) {
1483 		sph = sphs[i];
1484 
1485 		switch (dif->iff_pp_mode) {
1486 		case IFF_PP_MODE_GLOBAL:
1487 			sphs[i] = 0;
1488 			dph = sph;
1489 			feth_update_pkt_tso_metadata_for_rx(dph);
1490 			err = kern_packet_finalize(dph);
1491 			VERIFY(err == 0);
1492 			break;
1493 		case IFF_PP_MODE_PRIVATE:
1494 			err = feth_copy_packet(dif, sph, &dph);
1495 			break;
1496 		case IFF_PP_MODE_PRIVATE_SPLIT:
1497 			err = feth_clone_packet(dif, sph, &dph);
1498 			break;
1499 		default:
1500 			VERIFY(0);
1501 			__builtin_unreachable();
1502 		}
1503 		if (__improbable(err != 0)) {
1504 			continue;
1505 		}
1506 
1507 		if (sif->iff_trailer_length != 0) {
1508 			feth_add_packet_trailer(dph, feth_trailer,
1509 			    sif->iff_trailer_length);
1510 		}
1511 		if (sif->iff_fcs != 0) {
1512 			feth_add_packet_fcs(dph);
1513 		}
1514 		feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1515 		bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1516 		stats.kcrsi_slots_transferred++;
1517 		stats.kcrsi_bytes_transferred
1518 		        += kern_packet_get_data_length(dph);
1519 
1520 		/* attach the packet to the RX ring */
1521 		err = kern_channel_slot_attach_packet(rx_ring, rx_slot, dph);
1522 		VERIFY(err == 0);
1523 		last_rx_slot = rx_slot;
1524 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1525 	}
1526 
1527 	if (last_rx_slot != NULL) {
1528 		kern_channel_advance_slot(rx_ring, last_rx_slot);
1529 		kern_channel_increment_ring_net_stats(rx_ring, dif->iff_ifp,
1530 		    &stats);
1531 	}
1532 
1533 	if (rx_ring != NULL) {
1534 		kr_exit(rx_ring);
1535 		kern_channel_notify(rx_ring, 0);
1536 	}
1537 }
1538 
1539 static void
feth_rx_queue_submit(if_fake_ref sif,if_fake_ref dif,uint32_t llink_idx,uint32_t qset_idx,kern_packet_t * __counted_by (n_pkts)sphs,uint32_t n_pkts)1540 feth_rx_queue_submit(if_fake_ref sif, if_fake_ref dif, uint32_t llink_idx,
1541     uint32_t qset_idx, kern_packet_t * __counted_by(n_pkts) sphs, uint32_t n_pkts)
1542 {
1543 	errno_t err = 0;
1544 	kern_netif_queue_t queue;
1545 	kern_packet_t sph = 0, dph = 0;
1546 	fake_llink *llink;
1547 	fake_qset *qset;
1548 
1549 	if (llink_idx >= dif->iff_llink_cnt) {
1550 		FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1551 		    "invalid llink_idx idx %d (max %d) on peer %s",
1552 		    llink_idx, dif->iff_llink_cnt, dif->iff_name);
1553 		return;
1554 	}
1555 	llink = &dif->iff_llink[llink_idx];
1556 	if (qset_idx >= llink->fl_qset_cnt) {
1557 		FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1558 		    "invalid qset_idx %d (max %d) on peer %s",
1559 		    qset_idx, llink->fl_qset_cnt, dif->iff_name);
1560 		return;
1561 	}
1562 	qset = &dif->iff_llink[llink_idx].fl_qset[qset_idx];
1563 	queue = qset->fqs_rx_queue[0].fq_queue;
1564 	if (queue == NULL) {
1565 		FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1566 		    "NULL default queue (llink_idx %d, qset_idx %d) on peer %s",
1567 		    llink_idx, qset_idx, dif->iff_name);
1568 		return;
1569 	}
1570 	for (uint32_t i = 0; i < n_pkts; i++) {
1571 		uint32_t flags;
1572 
1573 		sph = sphs[i];
1574 
1575 		switch (dif->iff_pp_mode) {
1576 		case IFF_PP_MODE_GLOBAL:
1577 			sphs[i] = 0;
1578 			dph = sph;
1579 			feth_update_pkt_tso_metadata_for_rx(dph);
1580 			break;
1581 		case IFF_PP_MODE_PRIVATE:
1582 			err = feth_copy_packet(dif, sph, &dph);
1583 			break;
1584 		case IFF_PP_MODE_PRIVATE_SPLIT:
1585 			err = feth_clone_packet(dif, sph, &dph);
1586 			break;
1587 		default:
1588 			VERIFY(0);
1589 			__builtin_unreachable();
1590 		}
1591 		if (__improbable(err != 0)) {
1592 			continue;
1593 		}
1594 
1595 		if (sif->iff_trailer_length != 0) {
1596 			feth_add_packet_trailer(dph, feth_trailer,
1597 			    sif->iff_trailer_length);
1598 		}
1599 		if (sif->iff_fcs != 0) {
1600 			feth_add_packet_fcs(dph);
1601 		}
1602 		feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1603 		bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1604 
1605 		flags = (i == n_pkts - 1) ?
1606 		    KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH : 0;
1607 		kern_netif_queue_rx_enqueue(queue, dph, 1, flags);
1608 	}
1609 }
1610 
1611 static void
feth_tx_complete(if_fake_ref fakeif,kern_packet_t * __counted_by (nphs)phs,uint32_t nphs)1612 feth_tx_complete(if_fake_ref fakeif, kern_packet_t * __counted_by(nphs) phs, uint32_t nphs)
1613 {
1614 	for (uint32_t i = 0; i < nphs; i++) {
1615 		kern_packet_t ph = phs[i];
1616 		if (ph == 0) {
1617 			continue;
1618 		}
1619 		int err = kern_packet_set_tx_completion_status(ph, 0);
1620 		VERIFY(err == 0);
1621 		kern_packet_tx_completion(ph, fakeif->iff_ifp);
1622 		kern_pbufpool_free(fakeif->iff_tx_pp, phs[i]);
1623 		phs[i] = 0;
1624 	}
1625 }
1626 
1627 #define NSEC_PER_USEC 1000ull
1628 /*
1629  * Calculate the time delta that passed from `since' to `until'.
1630  * If `until' happens before `since', returns negative value.
1631  */
1632 static bool
feth_packet_has_expired(if_fake_ref __unused fakeif,kern_packet_t ph,uint64_t * out_deadline)1633 feth_packet_has_expired(if_fake_ref __unused fakeif, kern_packet_t ph,
1634     uint64_t *out_deadline)
1635 {
1636 	uint64_t now;
1637 	uint64_t packet_expire_time_mach;
1638 	int64_t time_until_expiration;
1639 	errno_t err;
1640 	bool expired = false;
1641 
1642 	static mach_timebase_info_data_t clock_timebase = {0, 0};
1643 
1644 	if (clock_timebase.denom == 0) {
1645 		clock_timebase_info(&clock_timebase);
1646 		VERIFY(clock_timebase.denom != 0);
1647 	}
1648 
1649 	err = kern_packet_get_expire_time(ph, &packet_expire_time_mach);
1650 	if (err) {
1651 		goto out;
1652 	}
1653 
1654 	now = mach_absolute_time();
1655 	time_until_expiration = packet_expire_time_mach - now;
1656 	if (time_until_expiration < 0) {
1657 		/* The packet had expired */
1658 		expired = true;
1659 		goto out;
1660 	}
1661 
1662 	/* Convert the time_delta from mach ticks to nanoseconds */
1663 	time_until_expiration *= clock_timebase.numer;
1664 	time_until_expiration /= clock_timebase.denom;
1665 	/* convert from nanoseconds to microseconds */
1666 	time_until_expiration /= 1000ull;
1667 
1668 	if (if_fake_expiration_threshold_us < time_until_expiration) {
1669 		/* packet has some life ahead of it */
1670 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1671 		    "Packet has %llu usec until expiration",
1672 		    time_until_expiration);
1673 		goto out;
1674 	}
1675 
1676 out:
1677 	if (expired && out_deadline) {
1678 		*out_deadline = packet_expire_time_mach;
1679 	}
1680 
1681 	return expired;
1682 }
1683 
1684 static errno_t
feth_get_packet_notification_details(if_fake_ref fakeif,kern_packet_t ph,packet_id_t * pkt_id,uint32_t * nx_port_id)1685 feth_get_packet_notification_details(if_fake_ref fakeif, kern_packet_t ph,
1686     packet_id_t *pkt_id, uint32_t *nx_port_id)
1687 {
1688 	errno_t err = 0;
1689 
1690 	err = kern_packet_get_packetid(ph, pkt_id);
1691 	if (err != 0) {
1692 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1693 		    "%s err=%d getting packetid", fakeif->iff_name, err);
1694 		return err;
1695 	}
1696 
1697 	err = kern_packet_get_tx_nexus_port_id(ph, nx_port_id);
1698 	if (err != 0) {
1699 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1700 		    "%s err=%d getting nx_port_id", fakeif->iff_name, err);
1701 		return err;
1702 	}
1703 
1704 	return 0;
1705 }
1706 
1707 static packet_expiry_action_t
feth_get_effective_expn_action(if_fake_ref fakeif,kern_packet_t ph)1708 feth_get_effective_expn_action(if_fake_ref fakeif, kern_packet_t ph)
1709 {
1710 	errno_t err;
1711 	packet_expiry_action_t expiry_action;
1712 
1713 	switch (fakeif->iff_tx_exp_policy) {
1714 	case IFF_TX_EXP_POLICY_DISABLED:
1715 		expiry_action = PACKET_EXPIRY_ACTION_NONE;
1716 		break;
1717 	case IFF_TX_EXP_POLICY_NOTIFY_ONLY:
1718 		expiry_action = PACKET_EXPIRY_ACTION_NOTIFY;
1719 		break;
1720 	case IFF_TX_EXP_POLICY_DROP_AND_NOTIFY:
1721 		expiry_action = PACKET_EXPIRY_ACTION_DROP;
1722 		break;
1723 	case IFF_TX_EXP_POLICY_METADATA:
1724 		err = kern_packet_get_expiry_action(ph, &expiry_action);
1725 		if (err != 0) {
1726 			if (err != ENOENT) {
1727 				FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1728 				    "Error %d when getting expiry action",
1729 				    err);
1730 			}
1731 			expiry_action = PACKET_EXPIRY_ACTION_NONE;
1732 		}
1733 		break;
1734 	default:
1735 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1736 		    "Unrecognized value %d for \"net.link.fake.tx_exp_policy\"",
1737 		    fakeif->iff_tx_exp_policy);
1738 		expiry_action = PACKET_EXPIRY_ACTION_NONE;
1739 	}
1740 
1741 	return expiry_action;
1742 }
1743 
1744 /* returns true if the packet is selected for epxiration and should be dropped */
1745 static bool
feth_tx_expired_error(if_fake_ref fakeif,kern_packet_t ph)1746 feth_tx_expired_error(if_fake_ref fakeif, kern_packet_t ph)
1747 {
1748 	int err = 0;
1749 	uint32_t nx_port_id = 0;
1750 	os_channel_event_packet_transmit_expired_t expn = {0};
1751 	packet_expiry_action_t expiry_action = PACKET_EXPIRY_ACTION_NONE;
1752 
1753 	FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC, "%s", fakeif->iff_name);
1754 
1755 	if (feth_packet_has_expired(fakeif, ph, &expn.packet_tx_expiration_deadline)) {
1756 		expiry_action = feth_get_effective_expn_action(fakeif, ph);
1757 	}
1758 
1759 	bool drop_packet = (expiry_action == PACKET_EXPIRY_ACTION_DROP);
1760 	if (expiry_action != PACKET_EXPIRY_ACTION_NONE) {
1761 		/* set the expiration status code */
1762 		expn.packet_tx_expiration_status = drop_packet ?
1763 		    CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_DROPPED :
1764 		    CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_NOT_DROPPED;
1765 
1766 		/* Mark the expiration timestamp */
1767 		expn.packet_tx_expiration_timestamp = mach_absolute_time();
1768 
1769 		err = feth_get_packet_notification_details(fakeif, ph,
1770 		    &expn.packet_id, &nx_port_id);
1771 
1772 		if (err == 0) {
1773 			err = kern_channel_event_transmit_expired(
1774 				fakeif->iff_ifp, &expn, nx_port_id);
1775 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1776 			    "%s sent expiry notification on nexus port "
1777 			    "%u notif code %u",
1778 			    fakeif->iff_name, nx_port_id,
1779 			    expn.packet_tx_expiration_status);
1780 		}
1781 		if (err != 0) {
1782 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1783 			    "%s err=%d, nx_port_id: 0x%x",
1784 			    fakeif->iff_name, err, nx_port_id);
1785 		}
1786 	}
1787 
1788 	return drop_packet;
1789 }
1790 
1791 /* returns true if the packet is selected for TX error & dropped */
1792 static bool
feth_tx_complete_error(if_fake_ref fakeif,kern_packet_t ph)1793 feth_tx_complete_error(if_fake_ref fakeif, kern_packet_t ph)
1794 {
1795 	int err;
1796 
1797 	if (fakeif->iff_tx_drop_rate == 0 ||
1798 	    fakeif->iff_tx_pkts_count != fakeif->iff_tx_drop_rate) {
1799 		return false;
1800 	}
1801 	/* simulate TX completion error on the packet */
1802 	if (fakeif->iff_tx_completion_mode == IFF_TX_COMPL_MODE_SYNC) {
1803 		err = kern_packet_set_tx_completion_status(ph,
1804 		    CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED);
1805 		VERIFY(err == 0);
1806 		kern_packet_tx_completion(ph, fakeif->iff_ifp);
1807 	} else {
1808 		uint32_t nx_port_id = 0;
1809 		os_channel_event_packet_transmit_status_t pkt_tx_status = {0};
1810 
1811 		pkt_tx_status.packet_status =
1812 		    CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED;
1813 		err = feth_get_packet_notification_details(fakeif, ph,
1814 		    &pkt_tx_status.packet_id, &nx_port_id);
1815 		if (err == 0) {
1816 			err = kern_channel_event_transmit_status(
1817 				fakeif->iff_ifp, &pkt_tx_status, nx_port_id);
1818 		}
1819 		if (err != 0) {
1820 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1821 			    "%s err=%d, nx_port_id: 0x%x",
1822 			    fakeif->iff_name, err, nx_port_id);
1823 		}
1824 	}
1825 
1826 	return true;
1827 }
1828 
1829 static void
feth_if_adv(thread_call_param_t arg0,thread_call_param_t arg1)1830 feth_if_adv(thread_call_param_t arg0, thread_call_param_t arg1)
1831 {
1832 #pragma unused(arg1)
1833 	errno_t                            error;
1834 	if_fake_ref                        fakeif = (if_fake_ref)arg0;
1835 	struct ifnet_interface_advisory    if_adv;
1836 	struct ifnet_stats_param           if_stat;
1837 
1838 	feth_lock();
1839 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1840 		feth_unlock();
1841 		return;
1842 	}
1843 	feth_unlock();
1844 
1845 	if (!fakeif->iff_intf_adv_enabled) {
1846 		goto done;
1847 	}
1848 
1849 	error = ifnet_stat(fakeif->iff_ifp, &if_stat);
1850 	if (error != 0) {
1851 		FAKE_LOG(LOG_NOTICE, 0, "%s: ifnet_stat() failed %d",
1852 		    fakeif->iff_name, error);
1853 		goto done;
1854 	}
1855 	if_adv.header.version = IF_INTERFACE_ADVISORY_VERSION_CURRENT;
1856 	if_adv.header.direction = IF_INTERFACE_ADVISORY_DIRECTION_TX;
1857 	if_adv.header.interface_type =
1858 	    IF_INTERFACE_ADVISORY_INTERFACE_TYPE_WIFI;
1859 	if_adv.capacity.timestamp = mach_absolute_time();
1860 	if_adv.capacity.rate_trend_suggestion =
1861 	    IF_INTERFACE_ADVISORY_RATE_SUGGESTION_RAMP_NEUTRAL;
1862 	if_adv.capacity.max_bandwidth = 1000 * 1000 * 1000; /* 1Gbps */
1863 	if_adv.capacity.total_byte_count = if_stat.packets_out;
1864 	if_adv.capacity.average_throughput = 1000 * 1000 * 1000; /* 1Gbps */
1865 	if_adv.capacity.flushable_queue_size = UINT32_MAX;
1866 	if_adv.capacity.non_flushable_queue_size = UINT32_MAX;
1867 	if_adv.capacity.average_delay = 1; /* ms */
1868 
1869 	error = fakeif->iff_intf_adv_notify(fakeif->iff_intf_adv_kern_ctx,
1870 	    &if_adv);
1871 	if (error != 0) {
1872 		FAKE_LOG(LOG_NOTICE, 0,
1873 		    "%s: interface advisory report failed %d",
1874 		    fakeif->iff_name, error);
1875 	}
1876 
1877 done:
1878 	feth_lock();
1879 	if (!feth_is_detaching(fakeif) && fakeif->iff_channel_connected) {
1880 		uint64_t deadline;
1881 		clock_interval_to_deadline(fakeif->iff_adv_interval,
1882 		    NSEC_PER_MSEC, &deadline);
1883 		thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1884 	}
1885 	feth_unlock();
1886 }
1887 
1888 static int
feth_if_adv_tcall_create(if_fake_ref fakeif)1889 feth_if_adv_tcall_create(if_fake_ref fakeif)
1890 {
1891 	uint64_t deadline;
1892 
1893 	feth_lock();
1894 	ASSERT(fakeif->iff_if_adv_tcall == NULL);
1895 	ASSERT(fakeif->iff_adv_interval > 0);
1896 	ASSERT(fakeif->iff_channel_connected);
1897 	fakeif->iff_if_adv_tcall =
1898 	    thread_call_allocate_with_options(feth_if_adv,
1899 	    (thread_call_param_t)fakeif, THREAD_CALL_PRIORITY_KERNEL,
1900 	    THREAD_CALL_OPTIONS_ONCE);
1901 	if (fakeif->iff_if_adv_tcall == NULL) {
1902 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1903 		    "%s if_adv tcall alloc failed",
1904 		    fakeif->iff_name);
1905 		feth_unlock();
1906 		return ENXIO;
1907 	}
1908 	/* retain for the interface advisory thread call */
1909 	feth_retain(fakeif);
1910 	clock_interval_to_deadline(fakeif->iff_adv_interval,
1911 	    NSEC_PER_MSEC, &deadline);
1912 	thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1913 	feth_unlock();
1914 	return 0;
1915 }
1916 
1917 /**
1918 ** nexus netif domain provider
1919 **/
1920 static errno_t
feth_nxdp_init(kern_nexus_domain_provider_t domprov)1921 feth_nxdp_init(kern_nexus_domain_provider_t domprov)
1922 {
1923 #pragma unused(domprov)
1924 	return 0;
1925 }
1926 
1927 static void
feth_nxdp_fini(kern_nexus_domain_provider_t domprov)1928 feth_nxdp_fini(kern_nexus_domain_provider_t domprov)
1929 {
1930 #pragma unused(domprov)
1931 }
1932 
1933 static uuid_t                   feth_nx_dom_prov;
1934 
1935 static errno_t
feth_register_nexus_domain_provider(void)1936 feth_register_nexus_domain_provider(void)
1937 {
1938 	const struct kern_nexus_domain_provider_init dp_init = {
1939 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1940 		.nxdpi_flags = 0,
1941 		.nxdpi_init = feth_nxdp_init,
1942 		.nxdpi_fini = feth_nxdp_fini
1943 	};
1944 	errno_t                         err = 0;
1945 
1946 	nexus_domain_provider_name_t feth_provider_name = "com.apple.feth";
1947 
1948 	/* feth_nxdp_init() is called before this function returns */
1949 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
1950 	    feth_provider_name,
1951 	    &dp_init, sizeof(dp_init),
1952 	    &feth_nx_dom_prov);
1953 	if (err != 0) {
1954 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1955 		    "failed to register domain provider");
1956 		return err;
1957 	}
1958 	return 0;
1959 }
1960 
1961 /**
1962 ** netif nexus routines
1963 **/
1964 static if_fake_ref
feth_nexus_context(kern_nexus_t nexus)1965 feth_nexus_context(kern_nexus_t nexus)
1966 {
1967 	if_fake_ref fakeif;
1968 
1969 	fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
1970 	assert(fakeif != NULL);
1971 	return fakeif;
1972 }
1973 
1974 static uint8_t
feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1975 feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1976 {
1977 	switch (svc_class) {
1978 	case KPKT_SC_VO:
1979 		return 0;
1980 	case KPKT_SC_VI:
1981 		return 1;
1982 	case KPKT_SC_BE:
1983 		return 2;
1984 	case KPKT_SC_BK:
1985 		return 3;
1986 	default:
1987 		VERIFY(0);
1988 		return 0;
1989 	}
1990 }
1991 
1992 static errno_t
feth_nx_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1993 feth_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1994     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1995     void **ring_ctx)
1996 {
1997 	if_fake_ref     fakeif;
1998 	int             err;
1999 #pragma unused(nxprov, channel, ring_ctx)
2000 	feth_lock();
2001 	fakeif = feth_nexus_context(nexus);
2002 	if (feth_is_detaching(fakeif)) {
2003 		feth_unlock();
2004 		return 0;
2005 	}
2006 	if (is_tx_ring) {
2007 		if (feth_in_wmm_mode(fakeif)) {
2008 			kern_packet_svc_class_t svc_class;
2009 			uint8_t ring_idx;
2010 
2011 			err = kern_channel_get_service_class(ring, &svc_class);
2012 			VERIFY(err == 0);
2013 			ring_idx = feth_find_tx_ring_by_svc(svc_class);
2014 			VERIFY(ring_idx < IFF_NUM_TX_RINGS_WMM_MODE);
2015 			VERIFY(fakeif->iff_tx_ring[ring_idx] == NULL);
2016 			fakeif->iff_tx_ring[ring_idx] = ring;
2017 		} else {
2018 			VERIFY(fakeif->iff_tx_ring[0] == NULL);
2019 			fakeif->iff_tx_ring[0] = ring;
2020 		}
2021 	} else {
2022 		VERIFY(fakeif->iff_rx_ring[0] == NULL);
2023 		fakeif->iff_rx_ring[0] = ring;
2024 	}
2025 	fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2026 	feth_unlock();
2027 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: %s ring init",
2028 	    fakeif->iff_name, is_tx_ring ? "TX" : "RX");
2029 	return 0;
2030 }
2031 
2032 static void
feth_nx_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)2033 feth_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2034     kern_channel_ring_t ring)
2035 {
2036 #pragma unused(nxprov, ring)
2037 	if_fake_ref       fakeif;
2038 	thread_call_t   __single tcall = NULL;
2039 
2040 	feth_lock();
2041 	fakeif = feth_nexus_context(nexus);
2042 	if (fakeif->iff_rx_ring[0] == ring) {
2043 		fakeif->iff_rx_ring[0] = NULL;
2044 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2045 		    "%s: RX ring fini", fakeif->iff_name);
2046 	} else if (feth_in_wmm_mode(fakeif)) {
2047 		int i;
2048 		for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
2049 			if (fakeif->iff_tx_ring[i] == ring) {
2050 				fakeif->iff_tx_ring[i] = NULL;
2051 				break;
2052 			}
2053 		}
2054 		for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
2055 			if (fakeif->iff_tx_ring[i] != NULL) {
2056 				break;
2057 			}
2058 		}
2059 		if (i == IFF_MAX_TX_RINGS) {
2060 			tcall = fakeif->iff_doorbell_tcall;
2061 			fakeif->iff_doorbell_tcall = NULL;
2062 		}
2063 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2064 		    "%s: TX ring fini", fakeif->iff_name);
2065 	} else if (fakeif->iff_tx_ring[0] == ring) {
2066 		tcall = fakeif->iff_doorbell_tcall;
2067 		fakeif->iff_doorbell_tcall = NULL;
2068 		fakeif->iff_tx_ring[0] = NULL;
2069 	}
2070 	fakeif->iff_nifs = NULL;
2071 	feth_unlock();
2072 	if (tcall != NULL) {
2073 		boolean_t       success;
2074 
2075 		success = thread_call_cancel_wait(tcall);
2076 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2077 		    "%s: thread_call_cancel %s", fakeif->iff_name,
2078 		    success ? "SUCCESS" : "FAILURE");
2079 		if (!success) {
2080 			feth_lock();
2081 			if (fakeif->iff_doorbell_tcall_active) {
2082 				fakeif->iff_waiting_for_tcall = TRUE;
2083 				FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2084 				    "%s: *waiting for threadcall",
2085 				    fakeif->iff_name);
2086 				do {
2087 					msleep(fakeif, &feth_lck_mtx,
2088 					    PZERO, "feth threadcall", 0);
2089 				} while (fakeif->iff_doorbell_tcall_active);
2090 				FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2091 				    "%s: ^threadcall done",
2092 				    fakeif->iff_name);
2093 				fakeif->iff_waiting_for_tcall = FALSE;
2094 			}
2095 			feth_unlock();
2096 		}
2097 		success = thread_call_free(tcall);
2098 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2099 		    "%s: thread_call_free %s",
2100 		    fakeif->iff_name,
2101 		    success ? "SUCCESS" : "FAILURE");
2102 		feth_release(fakeif);
2103 		VERIFY(success == TRUE);
2104 	}
2105 }
2106 
2107 static errno_t
feth_nx_pre_connect(kern_nexus_provider_t nxprov,proc_t proc,kern_nexus_t nexus,nexus_port_t port,kern_channel_t channel,void ** channel_context)2108 feth_nx_pre_connect(kern_nexus_provider_t nxprov,
2109     proc_t proc, kern_nexus_t nexus, nexus_port_t port, kern_channel_t channel,
2110     void **channel_context)
2111 {
2112 #pragma unused(nxprov, proc, nexus, port, channel, channel_context)
2113 	return 0;
2114 }
2115 
2116 static errno_t
feth_nx_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2117 feth_nx_connected(kern_nexus_provider_t nxprov,
2118     kern_nexus_t nexus, kern_channel_t channel)
2119 {
2120 #pragma unused(nxprov, channel)
2121 	int err;
2122 	if_fake_ref fakeif;
2123 
2124 	fakeif = feth_nexus_context(nexus);
2125 	feth_lock();
2126 	if (feth_is_detaching(fakeif)) {
2127 		feth_unlock();
2128 		return EBUSY;
2129 	}
2130 	feth_retain(fakeif);
2131 	fakeif->iff_channel_connected = TRUE;
2132 	feth_unlock();
2133 	if (feth_has_intf_advisory_configured(fakeif)) {
2134 		err = feth_if_adv_tcall_create(fakeif);
2135 		if (err != 0) {
2136 			return err;
2137 		}
2138 	}
2139 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: connected channel %p",
2140 	    fakeif->iff_name, channel);
2141 	return 0;
2142 }
2143 
2144 static void
feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2145 feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,
2146     kern_nexus_t nexus, kern_channel_t channel)
2147 {
2148 #pragma unused(nxprov, channel)
2149 	if_fake_ref fakeif;
2150 	thread_call_t __single tcall;
2151 	boolean_t connected;
2152 
2153 	fakeif = feth_nexus_context(nexus);
2154 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2155 	    "%s: pre-disconnect channel %p",
2156 	    fakeif->iff_name, channel);
2157 	/* Quiesce the interface and flush any pending outbound packets. */
2158 	if_down(fakeif->iff_ifp);
2159 	feth_lock();
2160 	connected = fakeif->iff_channel_connected;
2161 	fakeif->iff_channel_connected = FALSE;
2162 	tcall = fakeif->iff_if_adv_tcall;
2163 	fakeif->iff_if_adv_tcall = NULL;
2164 	feth_unlock();
2165 	if (tcall != NULL) {
2166 		(void) thread_call_cancel_wait(tcall);
2167 		if (!thread_call_free(tcall)) {
2168 			boolean_t freed;
2169 			(void) thread_call_cancel_wait(tcall);
2170 			freed = thread_call_free(tcall);
2171 			VERIFY(freed);
2172 		}
2173 		/* release for the interface advisory thread call */
2174 		feth_release(fakeif);
2175 	}
2176 	if (connected) {
2177 		feth_release(fakeif);
2178 	}
2179 }
2180 
2181 static void
feth_nx_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2182 feth_nx_disconnected(kern_nexus_provider_t nxprov,
2183     kern_nexus_t nexus, kern_channel_t channel)
2184 {
2185 #pragma unused(nxprov, channel)
2186 	if_fake_ref fakeif;
2187 
2188 	fakeif = feth_nexus_context(nexus);
2189 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: disconnected channel %p",
2190 	    fakeif->iff_name, channel);
2191 }
2192 
2193 static errno_t
feth_nx_slot_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index,struct kern_slot_prop ** slot_prop_addr,void ** slot_context)2194 feth_nx_slot_init(kern_nexus_provider_t nxprov,
2195     kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2196     uint32_t slot_index, struct kern_slot_prop **slot_prop_addr,
2197     void **slot_context)
2198 {
2199 #pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
2200 	return 0;
2201 }
2202 
2203 static void
feth_nx_slot_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index)2204 feth_nx_slot_fini(kern_nexus_provider_t nxprov,
2205     kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2206     uint32_t slot_index)
2207 {
2208 #pragma unused(nxprov, nexus, ring, slot, slot_index)
2209 }
2210 
2211 static errno_t
feth_nx_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)2212 feth_nx_sync_tx(kern_nexus_provider_t nxprov,
2213     kern_nexus_t nexus, kern_channel_ring_t tx_ring, uint32_t flags)
2214 {
2215 #pragma unused(nxprov)
2216 	if_fake_ref             fakeif;
2217 	ifnet_t                 ifp;
2218 	kern_channel_slot_t     last_tx_slot = NULL;
2219 	ifnet_t                 peer_ifp;
2220 	if_fake_ref             peer_fakeif = NULL;
2221 	struct kern_channel_ring_stat_increment stats;
2222 	kern_channel_slot_t     tx_slot;
2223 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2224 	kern_packet_t           pkts[IFF_MAX_BATCH_SIZE];
2225 	uint32_t                n_pkts = 0;
2226 
2227 	memset(&stats, 0, sizeof(stats));
2228 
2229 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2230 	fakeif = feth_nexus_context(nexus);
2231 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2232 	    "%s ring %d flags 0x%x", fakeif->iff_name,
2233 	    tx_ring->ckr_ring_id, flags);
2234 	(void)flags;
2235 	feth_lock();
2236 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2237 		feth_unlock();
2238 		return 0;
2239 	}
2240 	ifp = fakeif->iff_ifp;
2241 	peer_ifp = fakeif->iff_peer;
2242 	if (peer_ifp != NULL) {
2243 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
2244 		if (peer_fakeif != NULL) {
2245 			if (feth_is_detaching(peer_fakeif)) {
2246 				FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2247 				    "%s peer fakeif %s is detaching",
2248 				    fakeif->iff_name, peer_fakeif->iff_name);
2249 				goto done;
2250 			}
2251 			if (!peer_fakeif->iff_channel_connected) {
2252 				if (fakeif->iff_tx_exp_policy ==
2253 				    IFF_TX_EXP_POLICY_DISABLED) {
2254 					FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2255 					    "%s peer fakeif %s channel not connected, expn: %d",
2256 					    fakeif->iff_name, peer_fakeif->iff_name,
2257 					    fakeif->iff_tx_exp_policy);
2258 					goto done;
2259 				}
2260 			}
2261 		} else {
2262 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2263 			    "%s no peer fakeif (peer %p)",
2264 			    fakeif->iff_name, peer_ifp);
2265 			goto done;
2266 		}
2267 	} else {
2268 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2269 		    "%s no peer", fakeif->iff_name);
2270 		goto done;
2271 	}
2272 	tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
2273 	while (tx_slot != NULL) {
2274 		uint16_t off;
2275 		kern_packet_t sph;
2276 
2277 		/* detach the packet from the TX ring */
2278 		sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
2279 		VERIFY(sph != 0);
2280 		kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
2281 
2282 		/* bpf tap output */
2283 		off = kern_packet_get_headroom(sph);
2284 		VERIFY(off >= fakeif->iff_tx_headroom);
2285 		kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2286 		feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2287 		bpf_tap_packet_out(ifp, DLT_EN10MB, sph, NULL, 0);
2288 
2289 		/* drop packets, if requested */
2290 		fakeif->iff_tx_pkts_count++;
2291 		if (feth_tx_expired_error(fakeif, sph) ||
2292 		    feth_tx_complete_error(fakeif, sph) ||
2293 		    !peer_fakeif->iff_channel_connected) {
2294 			fakeif->iff_tx_pkts_count = 0;
2295 			kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2296 			STATS_INC(nifs, NETIF_STATS_DROP);
2297 			goto next_tx_slot;
2298 		}
2299 
2300 		ASSERT(sph != 0);
2301 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2302 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2303 
2304 		stats.kcrsi_slots_transferred++;
2305 		stats.kcrsi_bytes_transferred
2306 		        += kern_packet_get_data_length(sph);
2307 
2308 		/* prepare batch for receiver */
2309 		pkts[n_pkts++] = sph;
2310 		if (n_pkts == IFF_MAX_BATCH_SIZE) {
2311 			feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2312 			feth_tx_complete(fakeif, pkts, n_pkts);
2313 			n_pkts = 0;
2314 		}
2315 
2316 next_tx_slot:
2317 		last_tx_slot = tx_slot;
2318 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
2319 	}
2320 
2321 	/* catch last batch for receiver */
2322 	if (n_pkts != 0) {
2323 		feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2324 		feth_tx_complete(fakeif, pkts, n_pkts);
2325 		n_pkts = 0;
2326 	}
2327 
2328 	if (last_tx_slot != NULL) {
2329 		kern_channel_advance_slot(tx_ring, last_tx_slot);
2330 		kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
2331 	}
2332 done:
2333 	feth_unlock();
2334 	return 0;
2335 }
2336 
2337 static errno_t
feth_nx_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2338 feth_nx_sync_rx(kern_nexus_provider_t nxprov,
2339     kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2340 {
2341 #pragma unused(nxprov, ring, flags)
2342 	if_fake_ref           fakeif;
2343 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2344 
2345 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
2346 	fakeif = feth_nexus_context(nexus);
2347 	FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT, "%s", fakeif->iff_name);
2348 	return 0;
2349 }
2350 
2351 static errno_t
feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif,boolean_t doorbell_ctxt)2352 feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif, boolean_t doorbell_ctxt)
2353 {
2354 	int i;
2355 	errno_t error = 0;
2356 	boolean_t more;
2357 
2358 	for (i = 0; i < IFF_NUM_TX_RINGS_WMM_MODE; i++) {
2359 		kern_channel_ring_t ring = fakeif->iff_tx_ring[i];
2360 		if (ring != NULL) {
2361 			error = kern_channel_tx_refill(ring, UINT32_MAX,
2362 			    UINT32_MAX, doorbell_ctxt, &more);
2363 		}
2364 		if (error != 0) {
2365 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2366 			    "%s: TX refill ring %d (%s) %d",
2367 			    fakeif->iff_name, ring->ckr_ring_id,
2368 			    doorbell_ctxt ? "sync" : "async", error);
2369 			if (!((error == EAGAIN) || (error == EBUSY))) {
2370 				break;
2371 			}
2372 		} else {
2373 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2374 			    "%s: TX refilled ring %d (%s)",
2375 			    fakeif->iff_name, ring->ckr_ring_id,
2376 			    doorbell_ctxt ? "sync" : "async");
2377 		}
2378 	}
2379 	return error;
2380 }
2381 
2382 static void
feth_async_doorbell(thread_call_param_t arg0,thread_call_param_t arg1)2383 feth_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
2384 {
2385 #pragma unused(arg1)
2386 	errno_t                 error;
2387 	if_fake_ref             fakeif = (if_fake_ref)arg0;
2388 	kern_channel_ring_t     ring;
2389 	boolean_t               more;
2390 
2391 	feth_lock();
2392 	ring = fakeif->iff_tx_ring[0];
2393 	if (feth_is_detaching(fakeif) ||
2394 	    !fakeif->iff_channel_connected ||
2395 	    ring == NULL) {
2396 		goto done;
2397 	}
2398 	fakeif->iff_doorbell_tcall_active = TRUE;
2399 	feth_unlock();
2400 	if (feth_in_wmm_mode(fakeif)) {
2401 		error = feth_nx_tx_dequeue_driver_managed(fakeif, FALSE);
2402 	} else {
2403 		error = kern_channel_tx_refill(ring, UINT32_MAX,
2404 		    UINT32_MAX, FALSE, &more);
2405 	}
2406 	if (error != 0) {
2407 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s: TX refill failed %d",
2408 		    fakeif->iff_name, error);
2409 	} else {
2410 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s: TX refilled",
2411 		    fakeif->iff_name);
2412 	}
2413 
2414 	feth_lock();
2415 done:
2416 	fakeif->iff_doorbell_tcall_active = FALSE;
2417 	if (fakeif->iff_waiting_for_tcall) {
2418 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2419 		    "%s: threadcall waking up waiter", fakeif->iff_name);
2420 		wakeup((caddr_t)fakeif);
2421 	}
2422 	feth_unlock();
2423 }
2424 
2425 static void
feth_schedule_async_doorbell(if_fake_ref fakeif)2426 feth_schedule_async_doorbell(if_fake_ref fakeif)
2427 {
2428 	thread_call_t  __single tcall;
2429 
2430 	feth_lock();
2431 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2432 		feth_unlock();
2433 		return;
2434 	}
2435 	tcall = fakeif->iff_doorbell_tcall;
2436 	if (tcall != NULL) {
2437 		thread_call_enter(tcall);
2438 	} else {
2439 		tcall = thread_call_allocate_with_options(feth_async_doorbell,
2440 		    (thread_call_param_t)fakeif,
2441 		    THREAD_CALL_PRIORITY_KERNEL,
2442 		    THREAD_CALL_OPTIONS_ONCE);
2443 		if (tcall == NULL) {
2444 			FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT,
2445 			    "%s tcall alloc failed", fakeif->iff_name);
2446 		} else {
2447 			fakeif->iff_doorbell_tcall = tcall;
2448 			feth_retain(fakeif);
2449 			thread_call_enter(tcall);
2450 		}
2451 	}
2452 	feth_unlock();
2453 }
2454 
2455 static errno_t
feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2456 feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,
2457     kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2458 {
2459 #pragma unused(nxprov, ring, flags)
2460 	errno_t         error;
2461 	if_fake_ref     fakeif;
2462 
2463 	fakeif = feth_nexus_context(nexus);
2464 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s", fakeif->iff_name);
2465 
2466 	if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
2467 		boolean_t       more;
2468 		/* synchronous tx refill */
2469 		if (feth_in_wmm_mode(fakeif)) {
2470 			error = feth_nx_tx_dequeue_driver_managed(fakeif, TRUE);
2471 		} else {
2472 			error = kern_channel_tx_refill(ring, UINT32_MAX,
2473 			    UINT32_MAX, TRUE, &more);
2474 		}
2475 		if (error != 0) {
2476 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2477 			    "%s: TX refill (sync) %d", fakeif->iff_name, error);
2478 		} else {
2479 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2480 			    "%s: TX refilled (sync)", fakeif->iff_name);
2481 		}
2482 	} else {
2483 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2484 		    "%s: schedule async refill", fakeif->iff_name);
2485 		feth_schedule_async_doorbell(fakeif);
2486 	}
2487 	return 0;
2488 }
2489 
2490 static errno_t
feth_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)2491 feth_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
2492 {
2493 	if_fake_ref fakeif;
2494 
2495 	fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
2496 	feth_ifnet_set_attrs(fakeif, ifp);
2497 	return 0;
2498 }
2499 
2500 static errno_t
feth_nx_intf_adv_config(void * prov_ctx,bool enable)2501 feth_nx_intf_adv_config(void *prov_ctx, bool enable)
2502 {
2503 	if_fake_ref fakeif = prov_ctx;
2504 
2505 	feth_lock();
2506 	fakeif->iff_intf_adv_enabled = enable;
2507 	feth_unlock();
2508 	FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2509 	    "%s enable %d", fakeif->iff_name, enable);
2510 	return 0;
2511 }
2512 
2513 static errno_t
fill_capab_interface_advisory(if_fake_ref fakeif,void * contents,uint32_t * len)2514 fill_capab_interface_advisory(if_fake_ref fakeif, void *contents, uint32_t *len)
2515 {
2516 	struct kern_nexus_capab_interface_advisory * __single capab = contents;
2517 
2518 	if (*len != sizeof(*capab)) {
2519 		return EINVAL;
2520 	}
2521 	if (capab->kncia_version !=
2522 	    KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
2523 		return EINVAL;
2524 	}
2525 	if (!feth_has_intf_advisory_configured(fakeif)) {
2526 		return ENOTSUP;
2527 	}
2528 	VERIFY(capab->kncia_notify != NULL);
2529 	fakeif->iff_intf_adv_kern_ctx = capab->kncia_kern_context;
2530 	fakeif->iff_intf_adv_notify = capab->kncia_notify;
2531 	capab->kncia_provider_context = fakeif;
2532 	capab->kncia_config = feth_nx_intf_adv_config;
2533 	return 0;
2534 }
2535 
2536 static errno_t
feth_notify_steering_info(void * prov_ctx,void * qset_ctx,struct ifnet_traffic_descriptor_common * td,bool add)2537 feth_notify_steering_info(void *prov_ctx, void *qset_ctx,
2538     struct ifnet_traffic_descriptor_common *td, bool add)
2539 {
2540 #pragma unused(td)
2541 	if_fake_ref fakeif = prov_ctx;
2542 	fake_qset * __single qset = qset_ctx;
2543 
2544 	FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2545 	    "%s: notify_steering_info: qset_id 0x%llx, %s",
2546 	    fakeif->iff_name, qset->fqs_id, add ? "add" : "remove");
2547 	return 0;
2548 }
2549 
2550 static errno_t
fill_capab_qset_extensions(if_fake_ref fakeif,void * contents,uint32_t * len)2551 fill_capab_qset_extensions(if_fake_ref fakeif, void *contents, uint32_t *len)
2552 {
2553 	struct kern_nexus_capab_qset_extensions * __single capab = contents;
2554 
2555 	if (*len != sizeof(*capab)) {
2556 		return EINVAL;
2557 	}
2558 	if (capab->cqe_version !=
2559 	    KERN_NEXUS_CAPAB_QSET_EXTENSIONS_VERSION_1) {
2560 		return EINVAL;
2561 	}
2562 	capab->cqe_prov_ctx = fakeif;
2563 	capab->cqe_notify_steering_info = feth_notify_steering_info;
2564 	return 0;
2565 }
2566 
2567 static errno_t
feth_nx_capab_config(kern_nexus_provider_t nxprov,kern_nexus_t nx,kern_nexus_capab_t capab,void * contents,uint32_t * len)2568 feth_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
2569     kern_nexus_capab_t capab, void *contents, uint32_t *len)
2570 {
2571 #pragma unused(nxprov)
2572 	errno_t error;
2573 	if_fake_ref fakeif;
2574 
2575 	fakeif = feth_nexus_context(nx);
2576 	FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL, "%s", fakeif->iff_name);
2577 
2578 	switch (capab) {
2579 	case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
2580 		error = fill_capab_interface_advisory(fakeif, contents, len);
2581 		break;
2582 	case KERN_NEXUS_CAPAB_QSET_EXTENSIONS:
2583 		error = fill_capab_qset_extensions(fakeif, contents, len);
2584 		break;
2585 	default:
2586 		error = ENOTSUP;
2587 		break;
2588 	}
2589 	return error;
2590 }
2591 
2592 static int
feth_set_tso_mtu(ifnet_t ifp,uint32_t tso_v4_mtu,uint32_t tso_v6_mtu)2593 feth_set_tso_mtu(ifnet_t ifp, uint32_t tso_v4_mtu, uint32_t tso_v6_mtu)
2594 {
2595 	int     error;
2596 
2597 	error = ifnet_set_tso_mtu(ifp, AF_INET, tso_v4_mtu);
2598 	if (error != 0) {
2599 		FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2600 		    "set TSO MTU IPv4 failed on %s, err %d",
2601 		    if_name(ifp), error);
2602 		return error;
2603 	}
2604 	error = ifnet_set_tso_mtu(ifp, AF_INET6, tso_v6_mtu);
2605 	if (error != 0) {
2606 		FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2607 		    "set TSO MTU IPv6 failed on %s, err %d",
2608 		    if_name(ifp), error);
2609 		return error;
2610 	}
2611 	return 0;
2612 }
2613 
2614 static int
feth_set_tso_offload(ifnet_t ifp)2615 feth_set_tso_offload(ifnet_t ifp)
2616 {
2617 	ifnet_offload_t offload;
2618 	int error;
2619 
2620 	offload = IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2621 	error = ifnet_set_offload(ifp, offload);
2622 	if (error != 0) {
2623 		FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2624 		    "set TSO offload failed on %s, err %d",
2625 		    if_name(ifp), error);
2626 		goto done;
2627 	}
2628 	error = feth_set_tso_mtu(ifp, if_fake_tso_buffer_size,
2629 	    if_fake_tso_buffer_size);
2630 done:
2631 	return error;
2632 }
2633 
2634 static errno_t
create_netif_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2635 create_netif_provider_and_instance(if_fake_ref fakeif,
2636     struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2637     uuid_t * provider, uuid_t * instance)
2638 {
2639 	errno_t                 err;
2640 	nexus_controller_t      controller = kern_nexus_shared_controller();
2641 	struct kern_nexus_net_init net_init;
2642 	nexus_name_t            provider_name;
2643 	nexus_attr_t            __single nexus_attr = NULL;
2644 	struct kern_nexus_provider_init prov_init = {
2645 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2646 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2647 		.nxpi_pre_connect = feth_nx_pre_connect,
2648 		.nxpi_connected = feth_nx_connected,
2649 		.nxpi_pre_disconnect = feth_nx_pre_disconnect,
2650 		.nxpi_disconnected = feth_nx_disconnected,
2651 		.nxpi_ring_init = feth_nx_ring_init,
2652 		.nxpi_ring_fini = feth_nx_ring_fini,
2653 		.nxpi_slot_init = feth_nx_slot_init,
2654 		.nxpi_slot_fini = feth_nx_slot_fini,
2655 		.nxpi_sync_tx = feth_nx_sync_tx,
2656 		.nxpi_sync_rx = feth_nx_sync_rx,
2657 		.nxpi_tx_doorbell = feth_nx_tx_doorbell,
2658 		.nxpi_config_capab = feth_nx_capab_config,
2659 	};
2660 
2661 	_CASSERT(IFF_MAX_RX_RINGS == 1);
2662 	err = kern_nexus_attr_create(&nexus_attr);
2663 	if (err != 0) {
2664 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
2665 		    "nexus attribute creation failed, error %d", err);
2666 		goto failed;
2667 	}
2668 	if (feth_in_wmm_mode(fakeif)) {
2669 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_TX_RINGS,
2670 		    IFF_NUM_TX_RINGS_WMM_MODE);
2671 		VERIFY(err == 0);
2672 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_RX_RINGS,
2673 		    IFF_NUM_RX_RINGS_WMM_MODE);
2674 		VERIFY(err == 0);
2675 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_QMAP,
2676 		    NEXUS_QMAP_TYPE_WMM);
2677 		VERIFY(err == 0);
2678 	}
2679 
2680 	err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2681 	VERIFY(err == 0);
2682 	snprintf((char *)provider_name, sizeof(provider_name),
2683 	    "com.apple.netif.%s", fakeif->iff_name);
2684 	err = kern_nexus_controller_register_provider(controller,
2685 	    feth_nx_dom_prov,
2686 	    provider_name,
2687 	    &prov_init,
2688 	    sizeof(prov_init),
2689 	    nexus_attr,
2690 	    provider);
2691 	if (err != 0) {
2692 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
2693 		    "register provider failed, error %d", err);
2694 		goto failed;
2695 	}
2696 	bzero(&net_init, sizeof(net_init));
2697 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2698 	net_init.nxneti_flags = 0;
2699 	net_init.nxneti_eparams = init_params;
2700 	net_init.nxneti_lladdr = NULL;
2701 	net_init.nxneti_prepare = feth_netif_prepare;
2702 	net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2703 	net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2704 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
2705 	    *provider,
2706 	    fakeif,
2707 	    NULL,
2708 	    instance,
2709 	    &net_init,
2710 	    ifp);
2711 	if (err != 0) {
2712 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2713 		    "alloc_net_provider_instance failed, %d", err);
2714 		kern_nexus_controller_deregister_provider(controller,
2715 		    *provider);
2716 		uuid_clear(*provider);
2717 		goto failed;
2718 	}
2719 	if (feth_supports_tso(fakeif)) {
2720 		if ((err = feth_set_tso_offload(*ifp)) != 0) {
2721 			goto failed;
2722 		}
2723 	}
2724 
2725 failed:
2726 	if (nexus_attr != NULL) {
2727 		kern_nexus_attr_destroy(nexus_attr);
2728 	}
2729 	return err;
2730 }
2731 
2732 /*
2733  * The nif_stats need to be referenced because we don't want it set
2734  * to NULL until the last llink is removed.
2735  */
2736 static void
get_nexus_stats(if_fake_ref fakeif,kern_nexus_t nexus)2737 get_nexus_stats(if_fake_ref fakeif, kern_nexus_t nexus)
2738 {
2739 	if (++fakeif->iff_nifs_ref == 1) {
2740 		ASSERT(fakeif->iff_nifs == NULL);
2741 		fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2742 	}
2743 }
2744 
2745 static void
clear_nexus_stats(if_fake_ref fakeif)2746 clear_nexus_stats(if_fake_ref fakeif)
2747 {
2748 	if (--fakeif->iff_nifs_ref == 0) {
2749 		ASSERT(fakeif->iff_nifs != NULL);
2750 		fakeif->iff_nifs = NULL;
2751 	}
2752 }
2753 
2754 static errno_t
feth_nx_qset_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * llink_ctx,uint8_t qset_idx,uint64_t qset_id,kern_netif_qset_t qset,void ** qset_ctx)2755 feth_nx_qset_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2756     void *llink_ctx, uint8_t qset_idx, uint64_t qset_id, kern_netif_qset_t qset,
2757     void **qset_ctx)
2758 {
2759 #pragma unused(nxprov)
2760 	if_fake_ref fakeif;
2761 	fake_llink * __single fl = llink_ctx;
2762 	fake_qset *fqs;
2763 
2764 	feth_lock();
2765 	fakeif = feth_nexus_context(nexus);
2766 	if (feth_is_detaching(fakeif)) {
2767 		feth_unlock();
2768 		FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2769 		    "%s: detaching", fakeif->iff_name);
2770 		return ENXIO;
2771 	}
2772 	if (qset_idx >= fl->fl_qset_cnt) {
2773 		feth_unlock();
2774 		FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2775 		    "%s: invalid qset_idx %d", fakeif->iff_name, qset_idx);
2776 		return EINVAL;
2777 	}
2778 	fqs = &fl->fl_qset[qset_idx];
2779 	ASSERT(fqs->fqs_qset == NULL);
2780 	fqs->fqs_qset = qset;
2781 	fqs->fqs_id = qset_id;
2782 	*qset_ctx = fqs;
2783 
2784 	/* XXX This should really be done during registration */
2785 	get_nexus_stats(fakeif, nexus);
2786 	feth_unlock();
2787 	return 0;
2788 }
2789 
2790 static void
feth_nx_qset_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx)2791 feth_nx_qset_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2792     void *qset_ctx)
2793 {
2794 #pragma unused(nxprov)
2795 	if_fake_ref fakeif;
2796 	fake_qset * __single fqs = qset_ctx;
2797 
2798 	feth_lock();
2799 	fakeif = feth_nexus_context(nexus);
2800 	clear_nexus_stats(fakeif);
2801 	ASSERT(fqs->fqs_qset != NULL);
2802 	fqs->fqs_qset = NULL;
2803 	fqs->fqs_id = 0;
2804 	feth_unlock();
2805 }
2806 
2807 static errno_t
feth_nx_queue_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint8_t qidx,bool tx,kern_netif_queue_t queue,void ** queue_ctx)2808 feth_nx_queue_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2809     void *qset_ctx, uint8_t qidx, bool tx, kern_netif_queue_t queue,
2810     void **queue_ctx)
2811 {
2812 #pragma unused(nxprov)
2813 	if_fake_ref fakeif;
2814 	fake_qset *__single fqs = qset_ctx;
2815 	fake_queue *fq;
2816 
2817 	feth_lock();
2818 	fakeif = feth_nexus_context(nexus);
2819 	if (feth_is_detaching(fakeif)) {
2820 		FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2821 		    "%s: detaching", fakeif->iff_name);
2822 		feth_unlock();
2823 		return ENXIO;
2824 	}
2825 	if (tx) {
2826 		if (qidx >= fqs->fqs_tx_queue_cnt) {
2827 			FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2828 			    "%s: invalid tx qidx %d", fakeif->iff_name, qidx);
2829 			feth_unlock();
2830 			return EINVAL;
2831 		}
2832 		fq = &fqs->fqs_tx_queue[qidx];
2833 	} else {
2834 		if (qidx >= fqs->fqs_rx_queue_cnt) {
2835 			FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2836 			    "%s: invalid rx qidx %d", fakeif->iff_name, qidx);
2837 			feth_unlock();
2838 			return EINVAL;
2839 		}
2840 		fq = &fqs->fqs_rx_queue[qidx];
2841 	}
2842 	ASSERT(fq->fq_queue == NULL);
2843 	fq->fq_queue = queue;
2844 	*queue_ctx = fq;
2845 	feth_unlock();
2846 	return 0;
2847 }
2848 
2849 static void
feth_nx_queue_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx)2850 feth_nx_queue_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2851     void *queue_ctx)
2852 {
2853 #pragma unused(nxprov, nexus)
2854 	fake_queue *__single fq = queue_ctx;
2855 
2856 	feth_lock();
2857 	ASSERT(fq->fq_queue != NULL);
2858 	fq->fq_queue = NULL;
2859 	feth_unlock();
2860 }
2861 
2862 static void
feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif,kern_packet_t sph,struct netif_stats * nifs,if_fake_ref peer_fakeif,uint32_t llink_idx,uint32_t qset_idx)2863 feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif, kern_packet_t sph,
2864     struct netif_stats *nifs, if_fake_ref peer_fakeif,
2865     uint32_t llink_idx, uint32_t qset_idx)
2866 {
2867 	kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2868 	uint32_t n_pkts = 0;
2869 
2870 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2871 	    "%s -> %s", fakeif->iff_name, peer_fakeif->iff_name);
2872 
2873 	while (sph != 0) {
2874 		uint16_t off;
2875 		kern_packet_t next;
2876 
2877 		next = kern_packet_get_next(sph);
2878 		kern_packet_set_next(sph, 0);
2879 
2880 		/* bpf tap output */
2881 		off = kern_packet_get_headroom(sph);
2882 		VERIFY(off >= fakeif->iff_tx_headroom);
2883 		kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2884 		feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2885 		bpf_tap_packet_out(fakeif->iff_ifp, DLT_EN10MB, sph, NULL, 0);
2886 
2887 		/* drop packets, if requested */
2888 		fakeif->iff_tx_pkts_count++;
2889 		if (feth_tx_expired_error(fakeif, sph) ||
2890 		    feth_tx_complete_error(fakeif, sph)) {
2891 			fakeif->iff_tx_pkts_count = 0;
2892 			kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2893 			STATS_INC(nifs, NETIF_STATS_DROP);
2894 			goto next_pkt;
2895 		}
2896 		ASSERT(sph != 0);
2897 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2898 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2899 
2900 		/* prepare batch for receiver */
2901 		pkts[n_pkts++] = sph;
2902 		if (n_pkts == IFF_MAX_BATCH_SIZE) {
2903 			feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx,
2904 			    qset_idx, pkts, n_pkts);
2905 			feth_tx_complete(fakeif, pkts, n_pkts);
2906 			n_pkts = 0;
2907 		}
2908 next_pkt:
2909 		sph = next;
2910 	}
2911 	/* catch last batch for receiver */
2912 	if (n_pkts != 0) {
2913 		feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx, qset_idx,
2914 		    pkts, n_pkts);
2915 		feth_tx_complete(fakeif, pkts, n_pkts);
2916 		n_pkts = 0;
2917 	}
2918 }
2919 
2920 static errno_t
feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint32_t flags)2921 feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2922     void *qset_ctx, uint32_t flags)
2923 {
2924 #pragma unused(nxprov)
2925 	if_fake_ref             fakeif;
2926 	ifnet_t                 ifp;
2927 	ifnet_t                 peer_ifp;
2928 	if_fake_ref             peer_fakeif = NULL;
2929 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2930 	fake_qset               * __single qset = qset_ctx;
2931 	boolean_t               detaching, connected;
2932 	uint32_t                i;
2933 	errno_t                 err;
2934 
2935 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2936 	fakeif = feth_nexus_context(nexus);
2937 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2938 	    "%s qset %p, idx %d, flags 0x%x", fakeif->iff_name, qset,
2939 	    qset->fqs_idx, flags);
2940 
2941 	feth_lock();
2942 	detaching = feth_is_detaching(fakeif);
2943 	connected = fakeif->iff_channel_connected;
2944 	if (detaching || !connected) {
2945 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2946 		    "%s: detaching %s, channel connected %s",
2947 		    fakeif->iff_name,
2948 		    (detaching ? "true" : "false"),
2949 		    (connected ? "true" : "false"));
2950 		feth_unlock();
2951 		return 0;
2952 	}
2953 	ifp = fakeif->iff_ifp;
2954 	peer_ifp = fakeif->iff_peer;
2955 	if (peer_ifp != NULL) {
2956 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
2957 		if (peer_fakeif != NULL) {
2958 			detaching = feth_is_detaching(peer_fakeif);
2959 			connected = peer_fakeif->iff_channel_connected;
2960 			if (detaching || !connected) {
2961 				FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2962 				    "peer %s: detaching %s, "
2963 				    "channel connected %s",
2964 				    peer_fakeif->iff_name,
2965 				    (detaching ? "true" : "false"),
2966 				    (connected ? "true" : "false"));
2967 				goto done;
2968 			}
2969 		} else {
2970 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2971 			    "peer_fakeif is NULL");
2972 			goto done;
2973 		}
2974 	} else {
2975 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "peer_ifp is NULL");
2976 		goto done;
2977 	}
2978 
2979 	if (if_fake_switch_combined_mode &&
2980 	    qset->fqs_dequeue_cnt >= if_fake_switch_mode_frequency) {
2981 		if (qset->fqs_combined_mode) {
2982 			kern_netif_set_qset_separate(qset->fqs_qset);
2983 		} else {
2984 			kern_netif_set_qset_combined(qset->fqs_qset);
2985 		}
2986 		qset->fqs_combined_mode = !qset->fqs_combined_mode;
2987 		qset->fqs_dequeue_cnt = 0;
2988 	}
2989 
2990 	for (i = 0; i < qset->fqs_tx_queue_cnt; i++) {
2991 		kern_packet_t sph = 0;
2992 		kern_netif_queue_t queue = qset->fqs_tx_queue[i].fq_queue;
2993 		boolean_t more = FALSE;
2994 
2995 		err = kern_netif_queue_tx_dequeue(queue, UINT32_MAX, UINT32_MAX,
2996 		    &more, &sph);
2997 		if (err != 0 && err != EAGAIN) {
2998 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2999 			    "%s queue %p dequeue failed: err "
3000 			    "%d", fakeif->iff_name, queue, err);
3001 		}
3002 		feth_nx_tx_queue_deliver_pkt_chain(fakeif, sph, nifs,
3003 		    peer_fakeif, qset->fqs_llink_idx, qset->fqs_idx);
3004 	}
3005 
3006 done:
3007 	feth_unlock();
3008 	return 0;
3009 }
3010 
3011 
3012 static errno_t
feth_nx_queue_tx_push(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx,kern_packet_t * ph,uint32_t * packetCount,uint32_t * byteCount)3013 feth_nx_queue_tx_push(kern_nexus_provider_t nxprov,
3014     kern_nexus_t nexus, void *queue_ctx, kern_packet_t *ph,
3015     uint32_t *packetCount, uint32_t *byteCount)
3016 {
3017 #pragma unused(nxprov)
3018 	if_fake_ref             fakeif;
3019 	ifnet_t                 ifp;
3020 	ifnet_t                 peer_ifp;
3021 	if_fake_ref             peer_fakeif = NULL;
3022 	struct netif_stats      *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
3023 	fake_queue              *__single fq = queue_ctx;
3024 	boolean_t               detaching, connected;
3025 
3026 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
3027 	fakeif = feth_nexus_context(nexus);
3028 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s queue %p", fakeif->iff_name, fq);
3029 
3030 	feth_lock();
3031 
3032 	detaching = feth_is_detaching(fakeif);
3033 	connected = fakeif->iff_channel_connected;
3034 	if (detaching || !connected) {
3035 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3036 		    "%s: detaching %s, channel connected %s",
3037 		    fakeif->iff_name,
3038 		    (detaching ? "true" : "false"),
3039 		    (connected ? "true" : "false"));
3040 		goto done;
3041 	}
3042 	ifp = fakeif->iff_ifp;
3043 	peer_ifp = fakeif->iff_peer;
3044 	if (peer_ifp != NULL) {
3045 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
3046 		if (peer_fakeif != NULL) {
3047 			detaching = feth_is_detaching(peer_fakeif);
3048 			connected = peer_fakeif->iff_channel_connected;
3049 			if (detaching || !connected) {
3050 				FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3051 				    "peer %s: detaching %s, "
3052 				    "channel connected %s",
3053 				    peer_fakeif->iff_name,
3054 				    (detaching ? "true" : "false"),
3055 				    (connected ? "true" : "false"));
3056 				goto done;
3057 			}
3058 		} else {
3059 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3060 			    "peer_fakeif is NULL");
3061 			goto done;
3062 		}
3063 	} else {
3064 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "peer_ifp is NULL");
3065 		goto done;
3066 	}
3067 
3068 	*packetCount = *byteCount = 0;
3069 
3070 	kern_packet_t sph = *ph;
3071 	while (sph != 0) {
3072 		(*packetCount)++;
3073 		*byteCount += kern_packet_get_data_length(sph);
3074 		sph = kern_packet_get_next(sph);
3075 	}
3076 
3077 	feth_nx_tx_queue_deliver_pkt_chain(fakeif, *ph, nifs,
3078 	    peer_fakeif, 0, 0);
3079 
3080 	*ph = 0;
3081 
3082 done:
3083 	feth_unlock();
3084 	return 0;
3085 }
3086 
3087 
3088 static void
fill_qset_info_and_params(if_fake_ref fakeif,fake_llink * llink_info,uint32_t qset_idx,struct kern_nexus_netif_llink_qset_init * qset_init,bool is_def,bool is_low_latency)3089 fill_qset_info_and_params(if_fake_ref fakeif, fake_llink *llink_info,
3090     uint32_t qset_idx, struct kern_nexus_netif_llink_qset_init *qset_init,
3091     bool is_def, bool is_low_latency)
3092 {
3093 	fake_qset *qset_info = &llink_info->fl_qset[qset_idx];
3094 
3095 	qset_init->nlqi_flags =
3096 	    (is_def ? KERN_NEXUS_NET_LLINK_QSET_DEFAULT : 0) |
3097 	    (is_low_latency ? KERN_NEXUS_NET_LLINK_QSET_LOW_LATENCY : 0) |
3098 	    KERN_NEXUS_NET_LLINK_QSET_AQM;
3099 
3100 	if (feth_in_wmm_mode(fakeif)) {
3101 		qset_init->nlqi_flags |= KERN_NEXUS_NET_LLINK_QSET_WMM_MODE;
3102 		qset_init->nlqi_num_txqs = IFF_NUM_TX_QUEUES_WMM_MODE;
3103 		qset_init->nlqi_num_rxqs = IFF_NUM_RX_QUEUES_WMM_MODE;
3104 	} else {
3105 		qset_init->nlqi_num_txqs = 1;
3106 		qset_init->nlqi_num_rxqs = 1;
3107 	}
3108 	qset_info->fqs_tx_queue_cnt = qset_init->nlqi_num_txqs;
3109 	qset_info->fqs_rx_queue_cnt = qset_init->nlqi_num_rxqs;
3110 
3111 	/* These are needed for locating the peer qset */
3112 	qset_info->fqs_llink_idx = llink_info->fl_idx;
3113 	qset_info->fqs_idx = qset_idx;
3114 }
3115 
3116 static void
fill_llink_info_and_params(if_fake_ref fakeif,uint32_t llink_idx,struct kern_nexus_netif_llink_init * llink_init,uint32_t llink_id,struct kern_nexus_netif_llink_qset_init * __counted_by (qset_cnt)qset_init,uint32_t qset_cnt,uint32_t flags)3117 fill_llink_info_and_params(if_fake_ref fakeif, uint32_t llink_idx,
3118     struct kern_nexus_netif_llink_init *llink_init, uint32_t llink_id,
3119     struct kern_nexus_netif_llink_qset_init * __counted_by(qset_cnt) qset_init, uint32_t qset_cnt,
3120     uint32_t flags)
3121 {
3122 	fake_llink *llink_info = &fakeif->iff_llink[llink_idx];
3123 	uint32_t i;
3124 	bool create_ll_qset = if_fake_low_latency && (llink_idx != 0);
3125 
3126 	for (i = 0; i < qset_cnt; i++) {
3127 		fill_qset_info_and_params(fakeif, llink_info, i,
3128 		    &qset_init[i], i == 0, create_ll_qset && i == 1);
3129 	}
3130 	llink_info->fl_idx = llink_idx;
3131 
3132 	/* This doesn't have to be the same as llink_idx */
3133 	llink_info->fl_id = llink_id;
3134 	llink_info->fl_qset_cnt = qset_cnt;
3135 
3136 	llink_init->nli_link_id = llink_id;
3137 	llink_init->nli_num_qsets = qset_cnt;
3138 	llink_init->nli_qsets = qset_init;
3139 	llink_init->nli_flags = flags;
3140 	llink_init->nli_ctx = llink_info;
3141 }
3142 
3143 static errno_t
create_non_default_llinks(if_fake_ref fakeif)3144 create_non_default_llinks(if_fake_ref fakeif)
3145 {
3146 	struct kern_nexus *nx;
3147 	fake_nx_t fnx = &fakeif->iff_nx;
3148 	struct kern_nexus_netif_llink_init llink_init;
3149 	struct kern_nexus_netif_llink_qset_init qset_init[FETH_MAX_QSETS];
3150 	errno_t err;
3151 	uint64_t llink_id;
3152 	uint32_t i;
3153 
3154 	nx = nx_find(fnx->fnx_instance, FALSE);
3155 	if (nx == NULL) {
3156 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3157 		    "%s: nx not found", fakeif->iff_name);
3158 		return ENXIO;
3159 	}
3160 	/* Default llink starts at index 0 */
3161 	for (i = 1; i < if_fake_llink_cnt; i++) {
3162 		llink_id = (uint64_t)i;
3163 
3164 		/*
3165 		 * The llink_init and qset_init structures are reused for
3166 		 * each llink creation.
3167 		 */
3168 		fill_llink_info_and_params(fakeif, i, &llink_init,
3169 		    llink_id, qset_init, if_fake_qset_cnt, 0);
3170 		err = kern_nexus_netif_llink_add(nx, &llink_init);
3171 		if (err != 0) {
3172 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3173 			    "%s: llink add failed, error %d",
3174 			    fakeif->iff_name, err);
3175 			goto fail;
3176 		}
3177 		fakeif->iff_llink_cnt++;
3178 	}
3179 	nx_release(nx);
3180 	return 0;
3181 
3182 fail:
3183 	for (i = 0; i < fakeif->iff_llink_cnt; i++) {
3184 		int                     error;
3185 		fake_llink * __single   ll = &fakeif->iff_llink[i];
3186 
3187 		error = kern_nexus_netif_llink_remove(nx, ll->fl_id);
3188 		if (error != 0) {
3189 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3190 			    "%s: llink remove failed, llink_id 0x%llx, "
3191 			    "error %d", fakeif->iff_name,
3192 			    ll->fl_id, error);
3193 		}
3194 		ll->fl_id = 0;
3195 	}
3196 	fakeif->iff_llink_cnt = 0;
3197 	nx_release(nx);
3198 	return err;
3199 }
3200 
3201 static errno_t
create_netif_llink_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)3202 create_netif_llink_provider_and_instance(if_fake_ref fakeif,
3203     struct ifnet_init_eparams * init_params, ifnet_t *ifp,
3204     uuid_t * provider, uuid_t * instance)
3205 {
3206 	errno_t                 err;
3207 	nexus_controller_t      controller = kern_nexus_shared_controller();
3208 	struct kern_nexus_net_init net_init;
3209 	struct kern_nexus_netif_llink_init llink_init;
3210 	struct kern_nexus_netif_llink_qset_init qsets[FETH_MAX_QSETS];
3211 
3212 	nexus_name_t            provider_name;
3213 	nexus_attr_t            __single nexus_attr = NULL;
3214 	struct kern_nexus_netif_provider_init prov_init = {
3215 		.nxnpi_version = KERN_NEXUS_DOMAIN_PROVIDER_NETIF,
3216 		.nxnpi_flags = NXPIF_VIRTUAL_DEVICE,
3217 		.nxnpi_pre_connect = feth_nx_pre_connect,
3218 		.nxnpi_connected = feth_nx_connected,
3219 		.nxnpi_pre_disconnect = feth_nx_pre_disconnect,
3220 		.nxnpi_disconnected = feth_nx_disconnected,
3221 		.nxnpi_qset_init = feth_nx_qset_init,
3222 		.nxnpi_qset_fini = feth_nx_qset_fini,
3223 		.nxnpi_queue_init = feth_nx_queue_init,
3224 		.nxnpi_queue_fini = feth_nx_queue_fini,
3225 		.nxnpi_tx_qset_notify = feth_nx_tx_qset_notify,
3226 		.nxnpi_config_capab = feth_nx_capab_config,
3227 		.nxnpi_queue_tx_push = feth_nx_queue_tx_push
3228 	};
3229 
3230 	err = kern_nexus_attr_create(&nexus_attr);
3231 	if (err != 0) {
3232 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3233 		    "nexus attribute creation failed, error %d", err);
3234 		goto failed;
3235 	}
3236 
3237 	err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
3238 	VERIFY(err == 0);
3239 
3240 	snprintf((char *)provider_name, sizeof(provider_name),
3241 	    "com.apple.netif.%s", fakeif->iff_name);
3242 	err = kern_nexus_controller_register_provider(controller,
3243 	    feth_nx_dom_prov,
3244 	    provider_name,
3245 	    (struct kern_nexus_provider_init *)&prov_init,
3246 	    sizeof(prov_init),
3247 	    nexus_attr,
3248 	    provider);
3249 	if (err != 0) {
3250 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3251 		    "register provider failed, error %d", err);
3252 		goto failed;
3253 	}
3254 	bzero(&net_init, sizeof(net_init));
3255 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
3256 	net_init.nxneti_flags = 0;
3257 	net_init.nxneti_eparams = init_params;
3258 	net_init.nxneti_lladdr = NULL;
3259 	net_init.nxneti_prepare = feth_netif_prepare;
3260 	net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
3261 	net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
3262 
3263 	/*
3264 	 * Assume llink id is same as the index for if_fake.
3265 	 * This is not required for other drivers.
3266 	 */
3267 	_CASSERT(NETIF_LLINK_ID_DEFAULT == 0);
3268 	fill_llink_info_and_params(fakeif, 0, &llink_init,
3269 	    NETIF_LLINK_ID_DEFAULT, qsets, if_fake_qset_cnt,
3270 	    KERN_NEXUS_NET_LLINK_DEFAULT);
3271 
3272 	net_init.nxneti_llink = &llink_init;
3273 
3274 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
3275 	    *provider, fakeif, NULL, instance, &net_init, ifp);
3276 	if (err != 0) {
3277 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3278 		    "alloc_net_provider_instance failed, %d", err);
3279 		kern_nexus_controller_deregister_provider(controller,
3280 		    *provider);
3281 		uuid_clear(*provider);
3282 		goto failed;
3283 	}
3284 	fakeif->iff_llink_cnt++;
3285 
3286 	if (if_fake_llink_cnt > 1) {
3287 		err = create_non_default_llinks(fakeif);
3288 		if (err != 0) {
3289 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3290 			    "create_non_default_llinks failed, %d", err);
3291 			feth_detach_netif_nexus(fakeif);
3292 			goto failed;
3293 		}
3294 	}
3295 	if (feth_supports_tso(fakeif)) {
3296 		if ((err = feth_set_tso_offload(*ifp)) != 0) {
3297 			goto failed;
3298 		}
3299 	}
3300 failed:
3301 	if (nexus_attr != NULL) {
3302 		kern_nexus_attr_destroy(nexus_attr);
3303 	}
3304 	return err;
3305 }
3306 
3307 static errno_t
feth_attach_netif_nexus(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp)3308 feth_attach_netif_nexus(if_fake_ref fakeif,
3309     struct ifnet_init_eparams * init_params, ifnet_t *ifp)
3310 {
3311 	errno_t                 error;
3312 	fake_nx_t               nx = &fakeif->iff_nx;
3313 
3314 	error = feth_packet_pool_make(fakeif);
3315 	if (error != 0) {
3316 		return error;
3317 	}
3318 	if (if_fake_llink_cnt == 0) {
3319 		return create_netif_provider_and_instance(fakeif, init_params,
3320 		           ifp, &nx->fnx_provider, &nx->fnx_instance);
3321 	} else {
3322 		return create_netif_llink_provider_and_instance(fakeif,
3323 		           init_params, ifp, &nx->fnx_provider,
3324 		           &nx->fnx_instance);
3325 	}
3326 }
3327 
3328 static void
remove_non_default_llinks(const char * name,fake_nx_t fnx,fake_llink_t llink __counted_by (FETH_MAX_LLINKS),uint32_t llink_cnt)3329 remove_non_default_llinks(const char * name, fake_nx_t fnx,
3330     fake_llink_t llink __counted_by(FETH_MAX_LLINKS),
3331     uint32_t llink_cnt)
3332 {
3333 	struct kern_nexus *nx;
3334 	uint32_t i;
3335 
3336 	if (llink_cnt <= 1) {
3337 		goto done;
3338 	}
3339 	nx = nx_find(fnx->fnx_instance, FALSE);
3340 	if (nx == NULL) {
3341 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3342 		    "%s: nx not found", name);
3343 		goto done;
3344 	}
3345 	/* Default llink (at index 0) is freed separately */
3346 	for (i = 1; i < llink_cnt; i++) {
3347 		int err;
3348 
3349 		err = kern_nexus_netif_llink_remove(nx, llink[i].fl_id);
3350 		if (err != 0) {
3351 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3352 			    "%s: llink remove failed, llink_id 0x%llx, "
3353 			    "error %d", name,
3354 			    llink[i].fl_id, err);
3355 		}
3356 	}
3357 	nx_release(nx);
3358 done:
3359 	return;
3360 }
3361 
3362 static void
detach_provider_and_instance(uuid_t provider,uuid_t instance)3363 detach_provider_and_instance(uuid_t provider, uuid_t instance)
3364 {
3365 	nexus_controller_t controller = kern_nexus_shared_controller();
3366 	errno_t err;
3367 
3368 	if (!uuid_is_null(instance)) {
3369 		err = kern_nexus_controller_free_provider_instance(controller,
3370 		    instance);
3371 		if (err != 0) {
3372 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3373 			    "free_provider_instance failed %d", err);
3374 		} else {
3375 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3376 			    "deregister_instance");
3377 		}
3378 	}
3379 	if (!uuid_is_null(provider)) {
3380 		err = kern_nexus_controller_deregister_provider(controller,
3381 		    provider);
3382 		if (err != 0) {
3383 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3384 			    "deregister_provider %d", err);
3385 		} else {
3386 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3387 			    "deregister_provider");
3388 		}
3389 	}
3390 	return;
3391 }
3392 
3393 static void
feth_detach_netif_nexus(if_fake_ref fakeif)3394 feth_detach_netif_nexus(if_fake_ref fakeif)
3395 {
3396 	fake_nx         fnx;
3397 	fake_llink_t    llink;
3398 	uint32_t        llink_cnt;
3399 
3400 	feth_lock();
3401 	fnx = fakeif->iff_nx;
3402 	bzero(&fakeif->iff_nx, sizeof(fakeif->iff_nx));
3403 	llink = fakeif->iff_llink;
3404 	fakeif->iff_llink = NULL;
3405 	llink_cnt = fakeif->iff_llink_cnt;
3406 	fakeif->iff_llink_cnt = 0;
3407 	feth_unlock();
3408 	remove_non_default_llinks(__unsafe_null_terminated_from_indexable(fakeif->iff_name), &fnx, llink, llink_cnt);
3409 	detach_provider_and_instance(fnx.fnx_provider, fnx.fnx_instance);
3410 	if (llink != NULL) {
3411 		kfree_type(fake_llink, FETH_MAX_LLINKS, llink);
3412 	}
3413 	return;
3414 }
3415 #endif /* SKYWALK */
3416 
3417 /**
3418 ** feth interface routines
3419 **/
3420 static void
feth_ifnet_set_attrs(if_fake_ref fakeif,ifnet_t ifp)3421 feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp)
3422 {
3423 	errno_t         error;
3424 	ifnet_offload_t offload = 0;
3425 
3426 	ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
3427 	ifnet_set_baudrate(ifp, 0);
3428 	ifnet_set_mtu(ifp, ETHERMTU);
3429 	ifnet_set_flags(ifp,
3430 	    IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX,
3431 	    0xffff);
3432 	ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
3433 	if ((fakeif->iff_flags & IFF_FLAGS_LRO) != 0) {
3434 		offload |= IFNET_LRO;
3435 	}
3436 	if ((fakeif->iff_flags & IFF_FLAGS_HWCSUM) != 0) {
3437 		offload |= IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
3438 		    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6;
3439 	}
3440 	if (feth_supports_tso(fakeif)) {
3441 		offload |= IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
3442 	}
3443 	if (feth_supports_vlan_tagging(fakeif)) {
3444 		offload |= IFNET_VLAN_TAGGING;
3445 	} else if (feth_supports_vlan_mtu(fakeif)) {
3446 		offload |= IFNET_VLAN_MTU;
3447 	}
3448 	error = ifnet_set_offload(ifp, offload);
3449 	if (error != 0) {
3450 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3451 		    "ifnet_set_offload(%s, 0x%x) failed, %d",
3452 		    ifp->if_xname, offload, error);
3453 	} else {
3454 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3455 		    "ifnet_set_offload(%s, 0x%x) succeeded",
3456 		    ifp->if_xname, offload);
3457 	}
3458 }
3459 
3460 static void
interface_link_event(ifnet_t ifp,u_int32_t event_code)3461 interface_link_event(ifnet_t ifp, u_int32_t event_code)
3462 {
3463 	struct event {
3464 		u_int32_t ifnet_family;
3465 		u_int32_t unit;
3466 		char if_name[IFNAMSIZ];
3467 	};
3468 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3469 	struct kern_event_msg *__single header = (struct kern_event_msg*)message;
3470 	struct event *data = (struct event *)(message + offsetof(struct kern_event_msg, event_data));
3471 
3472 	header->total_size   = sizeof(message);
3473 	header->vendor_code  = KEV_VENDOR_APPLE;
3474 	header->kev_class    = KEV_NETWORK_CLASS;
3475 	header->kev_subclass = KEV_DL_SUBCLASS;
3476 	header->event_code   = event_code;
3477 	data->ifnet_family   = ifnet_family(ifp);
3478 	data->unit           = (u_int32_t)ifnet_unit(ifp);
3479 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
3480 	ifnet_event(ifp, header);
3481 }
3482 
3483 static if_fake_ref
ifnet_get_if_fake(ifnet_t ifp)3484 ifnet_get_if_fake(ifnet_t ifp)
3485 {
3486 	return (if_fake_ref)ifnet_softc(ifp);
3487 }
3488 
3489 static int
feth_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)3490 feth_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
3491 {
3492 	bool                            bsd_mode;
3493 	int                             error;
3494 	if_fake_ref                   fakeif;
3495 	struct ifnet_init_eparams       feth_init;
3496 	fake_llink_t                    iff_llink __counted_by_or_null(FETH_MAX_LLINKS) = NULL;
3497 	ifnet_t                         __single ifp;
3498 	char                            mac_address[ETHER_ADDR_LEN];
3499 	bool                            multi_buflet;
3500 	iff_pktpool_mode_t              pktpool_mode;
3501 	bool                            tso_support;
3502 
3503 	/* make local copy of globals needed to make consistency checks below */
3504 	bsd_mode = (if_fake_bsd_mode != 0);
3505 	multi_buflet = (if_fake_multibuflet != 0);
3506 	tso_support = (if_fake_tso_support != 0);
3507 	pktpool_mode = if_fake_pktpool_mode;
3508 
3509 	if (!bsd_mode) {
3510 		/* consistency checks */
3511 		if (if_fake_llink_cnt == 0 &&
3512 		    strbufcmp(sk_ll_prefix, FAKE_ETHER_NAME) == 0) {
3513 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3514 			    "feth used as ifname prefix but logical link "
3515 			    "support in feth is disabled.");
3516 			return EINVAL;
3517 		}
3518 		if (tso_support && pktpool_mode != IFF_PP_MODE_GLOBAL) {
3519 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3520 			    "TSO mode requires global packet pool mode");
3521 			return EINVAL;
3522 		}
3523 		if (multi_buflet && pktpool_mode == IFF_PP_MODE_PRIVATE_SPLIT) {
3524 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3525 			    "multi-buflet not supported for split rx & tx pool");
3526 			return EINVAL;
3527 		}
3528 		iff_llink = kalloc_type(fake_llink,
3529 		    FETH_MAX_LLINKS, Z_WAITOK_ZERO);
3530 		if (iff_llink == NULL) {
3531 			return ENOBUFS;
3532 		}
3533 	}
3534 	fakeif = kalloc_type(struct if_fake, Z_WAITOK_ZERO_NOFAIL);
3535 	fakeif->iff_llink = iff_llink;
3536 	fakeif->iff_retain_count = 1;
3537 #define FAKE_ETHER_NAME_LEN     (sizeof(FAKE_ETHER_NAME) - 1)
3538 	_CASSERT(FAKE_ETHER_NAME_LEN == 4);
3539 	strbufcpy(mac_address, FAKE_ETHER_NAME);
3540 	mac_address[ETHER_ADDR_LEN - 2] = (unit & 0xff00) >> 8;
3541 	mac_address[ETHER_ADDR_LEN - 1] = unit & 0xff;
3542 	if (bsd_mode) {
3543 		fakeif->iff_flags |= IFF_FLAGS_BSD_MODE;
3544 	}
3545 	if (if_fake_hwcsum != 0) {
3546 		fakeif->iff_flags |= IFF_FLAGS_HWCSUM;
3547 	}
3548 	if (if_fake_lro != 0) {
3549 		fakeif->iff_flags |= IFF_FLAGS_LRO;
3550 	}
3551 	if (if_fake_vlan_tagging != 0) {
3552 		/* support VLAN tagging in hardware */
3553 		feth_set_supports_vlan_tagging(fakeif);
3554 	} else {
3555 		/* support VLAN mtu-sized packets */
3556 		feth_set_supports_vlan_mtu(fakeif);
3557 	}
3558 	if (if_fake_separate_frame_header != 0) {
3559 		fakeif->iff_flags |= IFF_FLAGS_SEPARATE_FRAME_HEADER;
3560 	}
3561 	fakeif->iff_max_mtu = get_max_mtu(bsd_mode, if_fake_max_mtu);
3562 	fakeif->iff_fcs = if_fake_fcs;
3563 	fakeif->iff_trailer_length = if_fake_trailer_length;
3564 
3565 	/* use the interface name as the unique id for ifp recycle */
3566 	if ((unsigned int)
3567 	    snprintf(fakeif->iff_name, sizeof(fakeif->iff_name), "%s%d",
3568 	    ifc->ifc_name, unit) >= sizeof(fakeif->iff_name)) {
3569 		feth_release(fakeif);
3570 		return EINVAL;
3571 	}
3572 	bzero(&feth_init, sizeof(feth_init));
3573 	feth_init.ver = IFNET_INIT_CURRENT_VERSION;
3574 	feth_init.len = sizeof(feth_init);
3575 	if (feth_in_bsd_mode(fakeif)) {
3576 		if (if_fake_txstart != 0) {
3577 			feth_init.start = feth_start;
3578 		} else {
3579 			feth_init.flags |= IFNET_INIT_LEGACY;
3580 			feth_init.output = feth_output;
3581 		}
3582 		if (tso_support) {
3583 			feth_set_supports_tso(fakeif);
3584 		}
3585 	}
3586 #if SKYWALK
3587 	else {
3588 		feth_init.flags |= IFNET_INIT_SKYWALK_NATIVE;
3589 		/*
3590 		 * Currently we support WMM mode only for Skywalk native
3591 		 * interface.
3592 		 */
3593 		if (if_fake_wmm_mode != 0) {
3594 			fakeif->iff_flags |= IFF_FLAGS_WMM_MODE;
3595 		}
3596 
3597 		if (multi_buflet) {
3598 			fakeif->iff_flags |= IFF_FLAGS_MULTIBUFLETS;
3599 		}
3600 
3601 		fakeif->iff_pp_mode = pktpool_mode;
3602 		if (tso_support) {
3603 			feth_set_supports_tso(fakeif);
3604 		}
3605 
3606 		fakeif->iff_tx_headroom = if_fake_tx_headroom;
3607 		fakeif->iff_adv_interval = if_fake_if_adv_interval;
3608 		if (fakeif->iff_adv_interval > 0) {
3609 			feth_init.flags |= IFNET_INIT_IF_ADV;
3610 		}
3611 		fakeif->iff_tx_drop_rate = if_fake_tx_drops;
3612 		fakeif->iff_tx_completion_mode = if_tx_completion_mode;
3613 		fakeif->iff_tx_exp_policy = if_fake_tx_exp_policy;
3614 	}
3615 	feth_init.tx_headroom = fakeif->iff_tx_headroom;
3616 #endif /* SKYWALK */
3617 	if (if_fake_nxattach == 0) {
3618 		feth_init.flags |= IFNET_INIT_NX_NOAUTO;
3619 	}
3620 	feth_init.uniqueid_len = (uint32_t)strbuflen(fakeif->iff_name);
3621 	feth_init.uniqueid = fakeif->iff_name;
3622 	feth_init.name = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
3623 	feth_init.unit = unit;
3624 	feth_init.family = IFNET_FAMILY_ETHERNET;
3625 	feth_init.type = IFT_ETHER;
3626 	feth_init.demux = ether_demux;
3627 	feth_init.add_proto = ether_add_proto;
3628 	feth_init.del_proto = ether_del_proto;
3629 	feth_init.check_multi = ether_check_multi;
3630 	feth_init.framer_extended = ether_frameout_extended;
3631 	feth_init.softc = fakeif;
3632 	feth_init.ioctl = feth_ioctl;
3633 	feth_init.set_bpf_tap = NULL;
3634 	feth_init.detach = feth_if_free;
3635 	feth_init.broadcast_addr = etherbroadcastaddr;
3636 	feth_init.broadcast_len = ETHER_ADDR_LEN;
3637 	if (feth_in_bsd_mode(fakeif)) {
3638 		error = ifnet_allocate_extended(&feth_init, &ifp);
3639 		if (error) {
3640 			feth_release(fakeif);
3641 			return error;
3642 		}
3643 		feth_ifnet_set_attrs(fakeif, ifp);
3644 		if (feth_supports_tso(fakeif)) {
3645 			feth_set_tso_mtu(ifp, IP_MAXPACKET, IP_MAXPACKET);
3646 		}
3647 	}
3648 #if SKYWALK
3649 	else {
3650 		if (feth_in_wmm_mode(fakeif)) {
3651 			feth_init.output_sched_model =
3652 			    IFNET_SCHED_MODEL_DRIVER_MANAGED;
3653 		}
3654 		error = feth_attach_netif_nexus(fakeif, &feth_init, &ifp);
3655 		if (error != 0) {
3656 			feth_release(fakeif);
3657 			return error;
3658 		}
3659 		/* take an additional reference to ensure that it doesn't go away */
3660 		feth_retain(fakeif);
3661 		fakeif->iff_flags |= IFF_FLAGS_NX_ATTACHED;
3662 		fakeif->iff_ifp = ifp;
3663 	}
3664 #endif /* SKYWALK */
3665 	fakeif->iff_media_count = MIN(default_media_words_count, IF_FAKE_MEDIA_LIST_MAX);
3666 	bcopy(default_media_words, fakeif->iff_media_list,
3667 	    fakeif->iff_media_count * sizeof(fakeif->iff_media_list[0]));
3668 	if (feth_in_bsd_mode(fakeif)) {
3669 		error = ifnet_attach(ifp, NULL);
3670 		if (error) {
3671 			ifnet_release(ifp);
3672 			feth_release(fakeif);
3673 			return error;
3674 		}
3675 		fakeif->iff_ifp = ifp;
3676 	}
3677 
3678 	ifnet_set_lladdr(ifp, mac_address, sizeof(mac_address));
3679 
3680 	/* attach as ethernet */
3681 	bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
3682 	return 0;
3683 }
3684 
3685 static int
feth_clone_destroy(ifnet_t ifp)3686 feth_clone_destroy(ifnet_t ifp)
3687 {
3688 	if_fake_ref     fakeif;
3689 #if SKYWALK
3690 	boolean_t       nx_attached = FALSE;
3691 #endif /* SKYWALK */
3692 
3693 	feth_lock();
3694 	fakeif = ifnet_get_if_fake(ifp);
3695 	if (fakeif == NULL || feth_is_detaching(fakeif)) {
3696 		feth_unlock();
3697 		return 0;
3698 	}
3699 	feth_set_detaching(fakeif);
3700 #if SKYWALK
3701 	nx_attached = (fakeif->iff_flags & IFF_FLAGS_NX_ATTACHED) != 0;
3702 #endif /* SKYWALK */
3703 	feth_unlock();
3704 	feth_config(ifp, NULL);
3705 #if SKYWALK
3706 	if (nx_attached) {
3707 		feth_detach_netif_nexus(fakeif);
3708 		feth_release(fakeif);
3709 	}
3710 #endif /* SKYWALK */
3711 	ifnet_detach(ifp);
3712 	return 0;
3713 }
3714 
3715 static void
feth_enqueue_input(ifnet_t ifp,struct mbuf * m)3716 feth_enqueue_input(ifnet_t ifp, struct mbuf * m)
3717 {
3718 	struct ifnet_stat_increment_param stats = {};
3719 
3720 	stats.packets_in = 1;
3721 	stats.bytes_in = (uint32_t)mbuf_pkthdr_len(m) + ETHER_HDR_LEN;
3722 	ifnet_input(ifp, m, &stats);
3723 }
3724 
3725 
3726 static int
feth_add_mbuf_trailer(struct mbuf * m,void * trailer __sized_by (trailer_len),size_t trailer_len)3727 feth_add_mbuf_trailer(struct mbuf *m, void *trailer __sized_by(trailer_len), size_t trailer_len)
3728 {
3729 	int ret;
3730 	ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
3731 
3732 	ret = m_append(m, trailer_len, (caddr_t)trailer);
3733 	if (ret == 1) {
3734 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3735 		    "%zuB trailer added", trailer_len);
3736 		return 0;
3737 	}
3738 	FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT, "m_append failed");
3739 	return ENOTSUP;
3740 }
3741 
3742 static int
feth_add_mbuf_fcs(struct mbuf * m)3743 feth_add_mbuf_fcs(struct mbuf *m)
3744 {
3745 	uint32_t pkt_len, offset = 0;
3746 	uint32_t crc = 0;
3747 	int err = 0;
3748 
3749 	ASSERT(sizeof(crc) == ETHER_CRC_LEN);
3750 
3751 	pkt_len = m->m_pkthdr.len;
3752 	struct mbuf *iter = m;
3753 	while (iter != NULL && offset < pkt_len) {
3754 		uint32_t frag_len = iter->m_len;
3755 		ASSERT(frag_len <= (pkt_len - offset));
3756 		crc = crc32(crc, mtod(iter, void *), frag_len);
3757 		offset += frag_len;
3758 		iter = iter->m_next;
3759 	}
3760 
3761 	err = feth_add_mbuf_trailer(m, &crc, ETHER_CRC_LEN);
3762 	if (err != 0) {
3763 		return err;
3764 	}
3765 
3766 	m->m_flags |= M_HASFCS;
3767 
3768 	return 0;
3769 }
3770 
3771 static void
feth_output_common(ifnet_t ifp,struct mbuf * m,ifnet_t peer,iff_flags_t flags,bool fcs,void * trailer __sized_by (trailer_len),size_t trailer_len)3772 feth_output_common(ifnet_t ifp, struct mbuf * m, ifnet_t peer,
3773     iff_flags_t flags, bool fcs, void *trailer __sized_by(trailer_len), size_t trailer_len)
3774 {
3775 	void *                  frame_header;
3776 
3777 	if ((flags & IFF_FLAGS_HWCSUM) != 0) {
3778 		m->m_pkthdr.csum_data = 0xffff;
3779 		m->m_pkthdr.csum_flags =
3780 		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3781 		    CSUM_IP_CHECKED | CSUM_IP_VALID;
3782 	}
3783 
3784 	(void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
3785 	if (trailer_len != 0 && trailer != NULL) {
3786 		feth_add_mbuf_trailer(m, trailer, trailer_len);
3787 	}
3788 	if (fcs) {
3789 		feth_add_mbuf_fcs(m);
3790 	}
3791 	if ((flags & IFF_FLAGS_SEPARATE_FRAME_HEADER) != 0) {
3792 		m = m_copyup(m, ETHER_HDR_LEN, 0);
3793 		if (m == NULL) {
3794 			FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT, "m_copyup failed");
3795 			goto done;
3796 		}
3797 		frame_header = mtod(m, void *);
3798 		mbuf_pkthdr_setheader(m, frame_header);
3799 		m_adj(m, ETHER_HDR_LEN);
3800 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3801 		    "%s: frame 0x%llx data 0x%llx len %ld",
3802 		    ifp->if_xname,
3803 		    (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
3804 		    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
3805 		    mbuf_len(m));
3806 	} else {
3807 		frame_header = mtod(m, void *);
3808 		mbuf_pkthdr_setheader(m, frame_header);
3809 		_mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
3810 	}
3811 
3812 	/* tap it out */
3813 	if (ifp->if_bpf != NULL) {
3814 		fake_bpf_tap_out(ifp, m, frame_header);
3815 	}
3816 
3817 	/* tap it in */
3818 	if (peer->if_bpf != NULL) {
3819 		fake_bpf_tap_in(peer, m, frame_header);
3820 	}
3821 
3822 	(void)mbuf_pkthdr_setrcvif(m, peer);
3823 	feth_enqueue_input(peer, m);
3824 done:
3825 	return;
3826 }
3827 
3828 static void
feth_start(ifnet_t ifp)3829 feth_start(ifnet_t ifp)
3830 {
3831 	if_fake_ref     fakeif;
3832 	iff_flags_t     flags = 0;
3833 	bool            fcs;
3834 	struct mbuf *   __single m;
3835 	ifnet_t         peer = NULL;
3836 	size_t          trailer_len;
3837 
3838 	feth_lock();
3839 	fakeif = ifnet_get_if_fake(ifp);
3840 	if (fakeif == NULL) {
3841 		feth_unlock();
3842 		return;
3843 	}
3844 
3845 	if (fakeif->iff_start_busy) {
3846 		feth_unlock();
3847 		return;
3848 	}
3849 
3850 	peer = fakeif->iff_peer;
3851 	flags = fakeif->iff_flags;
3852 	fcs = fakeif->iff_fcs;
3853 	trailer_len = fakeif->iff_trailer_length;
3854 
3855 	fakeif->iff_start_busy = TRUE;
3856 	feth_unlock();
3857 	for (;;) {
3858 		if (ifnet_dequeue(ifp, &m) != 0) {
3859 			break;
3860 		}
3861 		if (peer == NULL) {
3862 			m_freem(m);
3863 			continue;
3864 		}
3865 		if (m != NULL) {
3866 			feth_output_common(ifp, m, peer, flags, fcs,
3867 			    feth_trailer, trailer_len);
3868 		}
3869 	}
3870 	feth_lock();
3871 	fakeif = ifnet_get_if_fake(ifp);
3872 	if (fakeif != NULL) {
3873 		fakeif->iff_start_busy = FALSE;
3874 	}
3875 	feth_unlock();
3876 }
3877 
3878 static int
feth_output(ifnet_t ifp,struct mbuf * m)3879 feth_output(ifnet_t ifp, struct mbuf * m)
3880 {
3881 	if_fake_ref             fakeif;
3882 	iff_flags_t             flags;
3883 	bool                    fcs;
3884 	size_t                  trailer_len;
3885 	ifnet_t                 peer = NULL;
3886 
3887 	if (m == NULL) {
3888 		return 0;
3889 	}
3890 	feth_lock();
3891 	fakeif = ifnet_get_if_fake(ifp);
3892 	if (fakeif != NULL) {
3893 		peer = fakeif->iff_peer;
3894 		flags = fakeif->iff_flags;
3895 		fcs = fakeif->iff_fcs;
3896 		trailer_len = fakeif->iff_trailer_length;
3897 	}
3898 	feth_unlock();
3899 	if (peer == NULL) {
3900 		m_freem(m);
3901 		ifnet_stat_increment_out(ifp, 0, 0, 1);
3902 		return 0;
3903 	}
3904 	feth_output_common(ifp, m, peer, flags, fcs, feth_trailer, trailer_len);
3905 	return 0;
3906 }
3907 
3908 static int
feth_config(ifnet_t ifp,ifnet_t peer)3909 feth_config(ifnet_t ifp, ifnet_t peer)
3910 {
3911 	int             connected = FALSE;
3912 	int             disconnected = FALSE;
3913 	int             error = 0;
3914 	if_fake_ref     fakeif = NULL;
3915 
3916 	feth_lock();
3917 	fakeif = ifnet_get_if_fake(ifp);
3918 	if (fakeif == NULL) {
3919 		error = EINVAL;
3920 		goto done;
3921 	}
3922 	if (peer != NULL) {
3923 		/* connect to peer */
3924 		if_fake_ref   peer_fakeif;
3925 
3926 		peer_fakeif = ifnet_get_if_fake(peer);
3927 		if (peer_fakeif == NULL) {
3928 			error = EINVAL;
3929 			goto done;
3930 		}
3931 		if (feth_is_detaching(fakeif) ||
3932 		    feth_is_detaching(peer_fakeif) ||
3933 		    peer_fakeif->iff_peer != NULL ||
3934 		    fakeif->iff_peer != NULL) {
3935 			error = EBUSY;
3936 			goto done;
3937 		}
3938 #if SKYWALK
3939 		if (fakeif->iff_pp_mode !=
3940 		    peer_fakeif->iff_pp_mode) {
3941 			error = EINVAL;
3942 			goto done;
3943 		}
3944 #endif /* SKYWALK */
3945 		fakeif->iff_peer = peer;
3946 		peer_fakeif->iff_peer = ifp;
3947 		connected = TRUE;
3948 	} else if (fakeif->iff_peer != NULL) {
3949 		/* disconnect from peer */
3950 		if_fake_ref   peer_fakeif;
3951 
3952 		peer = fakeif->iff_peer;
3953 		peer_fakeif = ifnet_get_if_fake(peer);
3954 		if (peer_fakeif == NULL) {
3955 			/* should not happen */
3956 			error = EINVAL;
3957 			goto done;
3958 		}
3959 		fakeif->iff_peer = NULL;
3960 		peer_fakeif->iff_peer = NULL;
3961 		disconnected = TRUE;
3962 	}
3963 
3964 done:
3965 	feth_unlock();
3966 
3967 	/* generate link status event if we connect or disconnect */
3968 	if (connected) {
3969 		interface_link_event(ifp, KEV_DL_LINK_ON);
3970 		interface_link_event(peer, KEV_DL_LINK_ON);
3971 	} else if (disconnected) {
3972 		interface_link_event(ifp, KEV_DL_LINK_OFF);
3973 		interface_link_event(peer, KEV_DL_LINK_OFF);
3974 	}
3975 	return error;
3976 }
3977 
3978 static int
feth_set_media(ifnet_t ifp,struct if_fake_request * iffr)3979 feth_set_media(ifnet_t ifp, struct if_fake_request * iffr)
3980 {
3981 	if_fake_ref     fakeif;
3982 	int             error;
3983 
3984 	if (iffr->iffr_media.iffm_count > IF_FAKE_MEDIA_LIST_MAX) {
3985 		/* list is too long */
3986 		return EINVAL;
3987 	}
3988 	feth_lock();
3989 	fakeif = ifnet_get_if_fake(ifp);
3990 	if (fakeif == NULL) {
3991 		error = EINVAL;
3992 		goto done;
3993 	}
3994 	fakeif->iff_media_count = iffr->iffr_media.iffm_count;
3995 	bcopy(iffr->iffr_media.iffm_list, fakeif->iff_media_list,
3996 	    iffr->iffr_media.iffm_count * sizeof(fakeif->iff_media_list[0]));
3997 #if 0
3998 	/* XXX: "auto-negotiate" active with peer? */
3999 	/* generate link status event? */
4000 	fakeif->iff_media_current = iffr->iffr_media.iffm_current;
4001 #endif
4002 	error = 0;
4003 done:
4004 	feth_unlock();
4005 	return error;
4006 }
4007 
4008 static int
if_fake_request_copyin(user_addr_t user_addr,struct if_fake_request * iffr,u_int32_t len)4009 if_fake_request_copyin(user_addr_t user_addr,
4010     struct if_fake_request *iffr, u_int32_t len)
4011 {
4012 	int     error;
4013 
4014 	if (user_addr == USER_ADDR_NULL || len < sizeof(*iffr)) {
4015 		error = EINVAL;
4016 		goto done;
4017 	}
4018 	error = copyin(user_addr, iffr, sizeof(*iffr));
4019 	if (error != 0) {
4020 		goto done;
4021 	}
4022 	if (iffr->iffr_reserved[0] != 0 || iffr->iffr_reserved[1] != 0 ||
4023 	    iffr->iffr_reserved[2] != 0 || iffr->iffr_reserved[3] != 0) {
4024 		error = EINVAL;
4025 		goto done;
4026 	}
4027 done:
4028 	return error;
4029 }
4030 
4031 static int
feth_set_drvspec(ifnet_t ifp,uint32_t cmd,u_int32_t len,user_addr_t user_addr)4032 feth_set_drvspec(ifnet_t ifp, uint32_t cmd, u_int32_t len,
4033     user_addr_t user_addr)
4034 {
4035 	int                     error;
4036 	struct if_fake_request  iffr;
4037 	ifnet_t                 peer;
4038 
4039 	switch (cmd) {
4040 	case IF_FAKE_S_CMD_SET_PEER:
4041 		error = if_fake_request_copyin(user_addr, &iffr, len);
4042 		if (error != 0) {
4043 			break;
4044 		}
4045 		if (iffr.iffr_peer_name[0] == '\0') {
4046 			error = feth_config(ifp, NULL);
4047 			break;
4048 		}
4049 
4050 		/* ensure nul termination */
4051 		iffr.iffr_peer_name[IFNAMSIZ - 1] = '\0';
4052 		peer = ifunit(__unsafe_null_terminated_from_indexable(iffr.iffr_peer_name));
4053 		if (peer == NULL) {
4054 			error = ENXIO;
4055 			break;
4056 		}
4057 		if (ifnet_type(peer) != IFT_ETHER) {
4058 			error = EINVAL;
4059 			break;
4060 		}
4061 		if (strcmp(ifnet_name(peer), FAKE_ETHER_NAME) != 0) {
4062 			error = EINVAL;
4063 			break;
4064 		}
4065 		error = feth_config(ifp, peer);
4066 		break;
4067 	case IF_FAKE_S_CMD_SET_MEDIA:
4068 		error = if_fake_request_copyin(user_addr, &iffr, len);
4069 		if (error != 0) {
4070 			break;
4071 		}
4072 		error = feth_set_media(ifp, &iffr);
4073 		break;
4074 	case IF_FAKE_S_CMD_SET_DEQUEUE_STALL:
4075 		error = if_fake_request_copyin(user_addr, &iffr, len);
4076 		if (error != 0) {
4077 			break;
4078 		}
4079 		error = feth_enable_dequeue_stall(ifp,
4080 		    iffr.iffr_dequeue_stall);
4081 		break;
4082 	default:
4083 		error = EOPNOTSUPP;
4084 		break;
4085 	}
4086 	return error;
4087 }
4088 
4089 static int
feth_get_drvspec(ifnet_t ifp,u_int32_t cmd,u_int32_t len,user_addr_t user_addr)4090 feth_get_drvspec(ifnet_t ifp, u_int32_t cmd, u_int32_t len,
4091     user_addr_t user_addr)
4092 {
4093 	int                     error = EOPNOTSUPP;
4094 	if_fake_ref             fakeif;
4095 	struct if_fake_request  iffr;
4096 	ifnet_t                 peer;
4097 
4098 	switch (cmd) {
4099 	case IF_FAKE_G_CMD_GET_PEER:
4100 		if (len < sizeof(iffr)) {
4101 			error = EINVAL;
4102 			break;
4103 		}
4104 		feth_lock();
4105 		fakeif = ifnet_get_if_fake(ifp);
4106 		if (fakeif == NULL) {
4107 			feth_unlock();
4108 			error = EOPNOTSUPP;
4109 			break;
4110 		}
4111 		peer = fakeif->iff_peer;
4112 		feth_unlock();
4113 		bzero(&iffr, sizeof(iffr));
4114 		if (peer != NULL) {
4115 			strlcpy(iffr.iffr_peer_name,
4116 			    if_name(peer),
4117 			    sizeof(iffr.iffr_peer_name));
4118 		}
4119 		error = copyout(&iffr, user_addr, sizeof(iffr));
4120 		break;
4121 	default:
4122 		break;
4123 	}
4124 	return error;
4125 }
4126 
4127 union ifdrvu {
4128 	struct ifdrv32  *ifdrvu_32;
4129 	struct ifdrv64  *ifdrvu_64;
4130 	void            *ifdrvu_p;
4131 };
4132 
4133 static int
feth_ioctl(ifnet_t ifp,u_long cmd,void * data)4134 feth_ioctl(ifnet_t ifp, u_long cmd, void * data)
4135 {
4136 	unsigned int            count;
4137 	struct ifdevmtu *       devmtu_p;
4138 	union ifdrvu            drv;
4139 	uint32_t                drv_cmd;
4140 	uint32_t                drv_len;
4141 	boolean_t               drv_set_command = FALSE;
4142 	int                     error = 0;
4143 	struct ifmediareq32 *   ifmr;
4144 	struct ifreq *          ifr;
4145 	if_fake_ref             fakeif;
4146 	int                     status;
4147 	user_addr_t             user_addr;
4148 
4149 	ifr = (struct ifreq *)data;
4150 	switch (cmd) {
4151 	case SIOCSIFADDR:
4152 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
4153 		break;
4154 
4155 	case SIOCGIFMEDIA32:
4156 	case SIOCGIFMEDIA64:
4157 		feth_lock();
4158 		fakeif = ifnet_get_if_fake(ifp);
4159 		if (fakeif == NULL) {
4160 			feth_unlock();
4161 			return EOPNOTSUPP;
4162 		}
4163 		status = (fakeif->iff_peer != NULL)
4164 		    ? (IFM_AVALID | IFM_ACTIVE) : IFM_AVALID;
4165 		ifmr = (struct ifmediareq32 *)data;
4166 		user_addr = (cmd == SIOCGIFMEDIA64) ?
4167 		    ((struct ifmediareq64 *)data)->ifmu_ulist :
4168 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
4169 		count = ifmr->ifm_count;
4170 		ifmr->ifm_active = (fakeif->iff_peer != NULL)
4171 		    ? FAKE_DEFAULT_MEDIA : IFM_ETHER;
4172 		ifmr->ifm_current = IFM_ETHER;
4173 		ifmr->ifm_mask = 0;
4174 		ifmr->ifm_status = status;
4175 		if (user_addr == USER_ADDR_NULL) {
4176 			ifmr->ifm_count = fakeif->iff_media_count;
4177 		} else if (count > 0) {
4178 			if (count > fakeif->iff_media_count) {
4179 				count = fakeif->iff_media_count;
4180 			}
4181 			ifmr->ifm_count = count;
4182 			error = copyout(&fakeif->iff_media_list, user_addr,
4183 			    count * sizeof(int));
4184 		}
4185 		feth_unlock();
4186 		break;
4187 
4188 	case SIOCGIFDEVMTU:
4189 		devmtu_p = &ifr->ifr_devmtu;
4190 		devmtu_p->ifdm_current = ifnet_mtu(ifp);
4191 		devmtu_p->ifdm_max = feth_max_mtu(ifp);
4192 		devmtu_p->ifdm_min = IF_MINMTU;
4193 		break;
4194 
4195 	case SIOCSIFMTU:
4196 		if ((unsigned int)ifr->ifr_mtu > feth_max_mtu(ifp) ||
4197 		    ifr->ifr_mtu < IF_MINMTU) {
4198 			error = EINVAL;
4199 		} else {
4200 			error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
4201 		}
4202 		break;
4203 
4204 	case SIOCSDRVSPEC32:
4205 	case SIOCSDRVSPEC64:
4206 		error = proc_suser(current_proc());
4207 		if (error != 0) {
4208 			break;
4209 		}
4210 		drv_set_command = TRUE;
4211 		OS_FALLTHROUGH;
4212 	case SIOCGDRVSPEC32:
4213 	case SIOCGDRVSPEC64:
4214 		drv.ifdrvu_p = data;
4215 		if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
4216 			drv_cmd = drv.ifdrvu_32->ifd_cmd;
4217 			drv_len = drv.ifdrvu_32->ifd_len;
4218 			user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
4219 		} else {
4220 			drv_cmd = drv.ifdrvu_64->ifd_cmd;
4221 			drv_len = drv.ifdrvu_64->ifd_len;
4222 			user_addr = drv.ifdrvu_64->ifd_data;
4223 		}
4224 		if (drv_set_command) {
4225 			error = feth_set_drvspec(ifp, drv_cmd, drv_len,
4226 			    user_addr);
4227 		} else {
4228 			error = feth_get_drvspec(ifp, drv_cmd, drv_len,
4229 			    user_addr);
4230 		}
4231 		break;
4232 
4233 	case SIOCSIFLLADDR:
4234 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
4235 		    ifr->ifr_addr.sa_len);
4236 		break;
4237 
4238 	case SIOCSIFFLAGS:
4239 		if ((ifp->if_flags & IFF_UP) != 0) {
4240 			/* marked up, set running if not already set */
4241 			if ((ifp->if_flags & IFF_RUNNING) == 0) {
4242 				/* set running */
4243 				error = ifnet_set_flags(ifp, IFF_RUNNING,
4244 				    IFF_RUNNING);
4245 			}
4246 		} else if ((ifp->if_flags & IFF_RUNNING) != 0) {
4247 			/* marked down, clear running */
4248 			error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
4249 		}
4250 		break;
4251 
4252 	case SIOCADDMULTI:
4253 	case SIOCDELMULTI:
4254 		error = 0;
4255 		break;
4256 	case SIOCSIFCAP: {
4257 		uint32_t        cap;
4258 
4259 		feth_lock();
4260 		fakeif = ifnet_get_if_fake(ifp);
4261 		if (fakeif == NULL ||
4262 		    (fakeif->iff_flags & IFF_FLAGS_LRO) == 0) {
4263 			feth_unlock();
4264 			return EOPNOTSUPP;
4265 		}
4266 		feth_unlock();
4267 		cap = (ifr->ifr_reqcap & IFCAP_LRO) != 0 ? IFCAP_LRO : 0;
4268 		error = ifnet_set_capabilities_enabled(ifp, cap, IFCAP_LRO);
4269 		break;
4270 	}
4271 	default:
4272 		error = EOPNOTSUPP;
4273 		break;
4274 	}
4275 	return error;
4276 }
4277 
4278 static void
feth_if_free(ifnet_t ifp)4279 feth_if_free(ifnet_t ifp)
4280 {
4281 	if_fake_ref           fakeif;
4282 
4283 	if (ifp == NULL) {
4284 		return;
4285 	}
4286 	feth_lock();
4287 	fakeif = ifnet_get_if_fake(ifp);
4288 	if (fakeif == NULL) {
4289 		feth_unlock();
4290 		return;
4291 	}
4292 	ifp->if_softc = NULL;
4293 #if SKYWALK
4294 	VERIFY(fakeif->iff_doorbell_tcall == NULL);
4295 #endif /* SKYWALK */
4296 	feth_unlock();
4297 	feth_release(fakeif);
4298 	ifnet_release(ifp);
4299 	return;
4300 }
4301 
4302 __private_extern__ void
if_fake_init(void)4303 if_fake_init(void)
4304 {
4305 	int error;
4306 
4307 #if SKYWALK
4308 	(void)feth_register_nexus_domain_provider();
4309 #endif /* SKYWALK */
4310 	error = if_clone_attach(&feth_cloner);
4311 	if (error != 0) {
4312 		return;
4313 	}
4314 	return;
4315 }
4316