xref: /xnu-12377.61.12/bsd/net/if_fake.c (revision 4d495c6e23c53686cf65f45067f79024cf5dcee8)
1 /*
2  * Copyright (c) 2015-2025 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * if_fake.c
31  * - fake network interface used for testing
32  * - "feth" (e.g. "feth0", "feth1") is a virtual ethernet interface that allows
33  *   two instances to have their output/input paths "crossed-over" so that
34  *   output on one is input on the other
35  */
36 
37 /*
38  * Modification History:
39  *
40  * September 9, 2015	Dieter Siegmund ([email protected])
41  * - created
42  */
43 
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/queue.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/kern_event.h>
54 #include <sys/mcache.h>
55 #include <sys/syslog.h>
56 
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_vlan_var.h>
61 #include <net/if_fake_var.h>
62 #include <net/if_arp.h>
63 #include <net/if_dl.h>
64 #include <net/if_ether.h>
65 #include <net/if_types.h>
66 #include <libkern/OSAtomic.h>
67 
68 #include <net/dlil.h>
69 
70 #include <net/kpi_interface.h>
71 #include <net/kpi_protocol.h>
72 
73 #include <kern/locks.h>
74 #include <kern/zalloc.h>
75 
76 #include <mach/mach_time.h>
77 
78 #include <os/log.h>
79 
80 #ifdef INET
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #endif
84 
85 #include <net/if_media.h>
86 #include <net/ether_if_module.h>
87 #if SKYWALK
88 #include <skywalk/os_skywalk_private.h>
89 #include <skywalk/nexus/netif/nx_netif.h>
90 #include <skywalk/channel/channel_var.h>
91 #endif /* SKYWALK */
92 
93 /*
94  * if_fake_debug, FE_DBGF_*
95  * - 'if_fake_debug' is a bitmask of FE_DBGF_* flags that can be set
96  *   to enable additional logs for the corresponding fake function
97  * - "sysctl net.link.fake.debug" controls the value of
98  *   'if_fake_debug'
99  */
100 static uint32_t if_fake_debug = 0;
101 
102 #define FE_DBGF_LIFECYCLE               0x0001
103 #define FE_DBGF_INPUT                   0x0002
104 #define FE_DBGF_OUTPUT                  0x0004
105 #define FE_DBGF_CONTROL                 0x0008
106 #define FE_DBGF_MISC                    0x0010
107 
108 /*
109  * if_fake_log_level
110  * - 'if_fake_log_level' ensures that by default important logs are
111  *   logged regardless of if_fake_debug by comparing the log level
112  *   in FAKE_LOG to if_fake_log_level
113  * - use "sysctl net.link.fake.log_level" controls the value of
114  *   'if_fake_log_level'
115  * - the default value of 'if_fake_log_level' is LOG_NOTICE; important
116  *   logs must use LOG_NOTICE to ensure they appear by default
117  */
118 #define FAKE_DBGF_ENABLED(__flag)     ((if_fake_debug & __flag) != 0)
119 
120 /*
121  * FAKE_LOG
122  * - macro to generate the specified log conditionally based on
123  *   the specified log level and debug flags
124  */
125 #define FAKE_LOG(__level, __dbgf, __string, ...)              \
126 	do {                                                            \
127 	        if (__level <= if_fake_log_level ||                   \
128 	            FAKE_DBGF_ENABLED(__dbgf)) {                      \
129 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
130 	                       __func__, ## __VA_ARGS__);       \
131 	        }                                                       \
132 	} while (0)
133 
134 static boolean_t
is_power_of_two(unsigned int val)135 is_power_of_two(unsigned int val)
136 {
137 	return (val & (val - 1)) == 0;
138 }
139 
140 #define FAKE_ETHER_NAME         "feth"
141 
142 SYSCTL_DECL(_net_link);
143 SYSCTL_NODE(_net_link, OID_AUTO, fake, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
144     "Fake interface");
145 
146 static int if_fake_txstart = 1;
147 SYSCTL_INT(_net_link_fake, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
148     &if_fake_txstart, 0, "Fake interface TXSTART mode");
149 
150 static int if_fake_hwcsum = 0;
151 SYSCTL_INT(_net_link_fake, OID_AUTO, hwcsum, CTLFLAG_RW | CTLFLAG_LOCKED,
152     &if_fake_hwcsum, 0, "Fake interface simulate hardware checksum");
153 
154 static int if_fake_vlan_tagging = 1;
155 SYSCTL_INT(_net_link_fake, OID_AUTO, vlan_tagging, CTLFLAG_RW | CTLFLAG_LOCKED,
156     &if_fake_vlan_tagging, 0, "Fake interface VLAN tagging");
157 
158 static int if_fake_nxattach = 0;
159 SYSCTL_INT(_net_link_fake, OID_AUTO, nxattach, CTLFLAG_RW | CTLFLAG_LOCKED,
160     &if_fake_nxattach, 0, "Fake interface auto-attach nexus");
161 
162 static int if_fake_bsd_mode = 1;
163 SYSCTL_INT(_net_link_fake, OID_AUTO, bsd_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
164     &if_fake_bsd_mode, 0, "Fake interface attach as BSD interface");
165 
166 static int if_fake_log_level = LOG_NOTICE;
167 SYSCTL_INT(_net_link_fake, OID_AUTO, log_level, CTLFLAG_RW | CTLFLAG_LOCKED,
168     &if_fake_log_level, 0, "Fake interface log level");
169 
170 SYSCTL_INT(_net_link_fake, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
171     &if_fake_debug, 0, "Fake interface debug flags");
172 
173 static int if_fake_wmm_mode = 0;
174 SYSCTL_INT(_net_link_fake, OID_AUTO, wmm_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
175     &if_fake_wmm_mode, 0, "Fake interface in 802.11 WMM mode");
176 
177 static int if_fake_multibuflet = 0;
178 SYSCTL_INT(_net_link_fake, OID_AUTO, multibuflet, CTLFLAG_RW | CTLFLAG_LOCKED,
179     &if_fake_multibuflet, 0, "Fake interface using multi-buflet packets");
180 
181 static int if_fake_low_latency = 0;
182 SYSCTL_INT(_net_link_fake, OID_AUTO, low_latency, CTLFLAG_RW | CTLFLAG_LOCKED,
183     &if_fake_low_latency, 0, "Fake interface with a low latency qset");
184 
185 static int if_fake_tso_support = 0;
186 SYSCTL_INT(_net_link_fake, OID_AUTO, tso_support, CTLFLAG_RW | CTLFLAG_LOCKED,
187     &if_fake_tso_support, 0, "Fake interface with support for TSO offload");
188 
189 #define DEFAULT_EXPIRATION_THRESHOLD 500 /* usec */
190 static int if_fake_expiration_threshold_us = DEFAULT_EXPIRATION_THRESHOLD;
191 SYSCTL_INT(_net_link_fake, OID_AUTO, expiration_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
192     &if_fake_expiration_threshold_us, DEFAULT_EXPIRATION_THRESHOLD,
193     "Expiration threshold (usec) for expiration testing");
194 
195 static int if_fake_lro = 0;
196 SYSCTL_INT(_net_link_fake, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_LOCKED,
197     &if_fake_lro, 0, "Fake interface report LRO capability");
198 
199 static int if_fake_separate_frame_header = 0;
200 SYSCTL_INT(_net_link_fake, OID_AUTO, separate_frame_header,
201     CTLFLAG_RW | CTLFLAG_LOCKED,
202     &if_fake_separate_frame_header, 0, "Put frame header in separate mbuf");
203 
204 static int if_fake_fail_ioctl = 0;
205 SYSCTL_INT(_net_link_fake, OID_AUTO, fail_ioctl, CTLFLAG_RW | CTLFLAG_LOCKED,
206     &if_fake_fail_ioctl, 0, "Fake interface fail ioctl");
207 
208 typedef enum {
209 	IFF_PP_MODE_GLOBAL = 0,         /* share a global pool */
210 	IFF_PP_MODE_PRIVATE = 1,        /* creates its own rx/tx pool */
211 	IFF_PP_MODE_PRIVATE_SPLIT = 2,  /* creates its own split rx & tx pool */
212 } iff_pktpool_mode_t;
213 static iff_pktpool_mode_t if_fake_pktpool_mode = IFF_PP_MODE_GLOBAL;
214 SYSCTL_INT(_net_link_fake, OID_AUTO, pktpool_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
215     &if_fake_pktpool_mode, IFF_PP_MODE_GLOBAL,
216     "Fake interface packet pool mode (0 global, 1 private, 2 private split");
217 
218 static int if_fake_rx_flow_steering_support = 0;
219 SYSCTL_INT(_net_link_fake, OID_AUTO, rx_flow_steering_support, CTLFLAG_RW | CTLFLAG_LOCKED,
220     &if_fake_rx_flow_steering_support, 0, "Fake interface with support for Rx flow steering");
221 
222 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX 512
223 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF 96
224 static int if_fake_link_layer_aggregation_factor =
225     FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF;
226 static int
227 feth_link_layer_aggregation_factor_sysctl SYSCTL_HANDLER_ARGS
228 {
229 #pragma unused(oidp, arg1, arg2)
230 	unsigned int new_value;
231 	int changed;
232 	int error;
233 
234 	error = sysctl_io_number(req, if_fake_link_layer_aggregation_factor,
235 	    sizeof(if_fake_link_layer_aggregation_factor), &new_value,
236 	    &changed);
237 	if (error == 0 && changed != 0) {
238 		if (new_value <= 0 ||
239 		    new_value > FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX) {
240 			return EINVAL;
241 		}
242 		if_fake_link_layer_aggregation_factor = new_value;
243 	}
244 	return error;
245 }
246 
247 SYSCTL_PROC(_net_link_fake, OID_AUTO, link_layer_aggregation_factor,
248     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
249     0, 0, feth_link_layer_aggregation_factor_sysctl, "IU",
250     "Fake interface link layer aggregation factor");
251 
252 #define FETH_TX_HEADROOM_MAX      32
253 static unsigned int if_fake_tx_headroom = FETH_TX_HEADROOM_MAX;
254 static int
255 feth_tx_headroom_sysctl SYSCTL_HANDLER_ARGS
256 {
257 #pragma unused(oidp, arg1, arg2)
258 	unsigned int new_value;
259 	int changed;
260 	int error;
261 
262 	error = sysctl_io_number(req, if_fake_tx_headroom,
263 	    sizeof(if_fake_tx_headroom), &new_value, &changed);
264 	if (error == 0 && changed != 0) {
265 		if (new_value > FETH_TX_HEADROOM_MAX ||
266 		    (new_value % 8) != 0) {
267 			return EINVAL;
268 		}
269 		if_fake_tx_headroom = new_value;
270 	}
271 	return 0;
272 }
273 
274 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_headroom,
275     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
276     0, 0, feth_tx_headroom_sysctl, "IU", "Fake ethernet Tx headroom");
277 
278 static int if_fake_fcs = 0;
279 SYSCTL_INT(_net_link_fake, OID_AUTO, fcs, CTLFLAG_RW | CTLFLAG_LOCKED,
280     &if_fake_fcs, 0, "Fake interface using frame check sequence");
281 
282 #define FETH_TRAILER_LENGTH_MAX 28
283 char feth_trailer[FETH_TRAILER_LENGTH_MAX + 1] = "trailertrailertrailertrailer";
284 static unsigned int if_fake_trailer_length = 0;
285 static int
286 feth_trailer_length_sysctl SYSCTL_HANDLER_ARGS
287 {
288 #pragma unused(oidp, arg1, arg2)
289 	unsigned int new_value;
290 	int changed;
291 	int error;
292 
293 	error = sysctl_io_number(req, if_fake_trailer_length,
294 	    sizeof(if_fake_trailer_length), &new_value, &changed);
295 	if (error == 0 && changed != 0) {
296 		if (new_value > FETH_TRAILER_LENGTH_MAX) {
297 			return EINVAL;
298 		}
299 		if_fake_trailer_length = new_value;
300 	}
301 	return 0;
302 }
303 
304 SYSCTL_PROC(_net_link_fake, OID_AUTO, trailer_length,
305     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
306     feth_trailer_length_sysctl, "IU", "Fake interface frame trailer length");
307 
308 /* sysctl net.link.fake.max_mtu */
309 #define FETH_MAX_MTU_DEFAULT    2048
310 #define FETH_MAX_MTU_MAX        ((16 * 1024) - ETHER_HDR_LEN)
311 
312 static unsigned int if_fake_max_mtu = FETH_MAX_MTU_DEFAULT;
313 
314 /* sysctl net.link.fake.buflet_size */
315 #define FETH_BUFLET_SIZE_MIN            512
316 #define FETH_BUFLET_SIZE_MAX            (32 * 1024)
317 #define FETH_TSO_BUFLET_SIZE            (16 * 1024)
318 
319 static unsigned int if_fake_buflet_size = FETH_BUFLET_SIZE_MIN;
320 static unsigned int if_fake_tso_buffer_size = FETH_TSO_BUFLET_SIZE;
321 
322 static int
323 feth_tso_buffer_size_sysctl SYSCTL_HANDLER_ARGS
324 {
325 #pragma unused(oidp, arg1, arg2)
326 	unsigned int new_value;
327 	int changed;
328 	int error;
329 
330 	error = sysctl_io_number(req, if_fake_tso_buffer_size,
331 	    sizeof(if_fake_tso_buffer_size), &new_value, &changed);
332 	if (error == 0 && changed != 0) {
333 		/* must be a power of 2 between min and max */
334 		if (new_value > FETH_BUFLET_SIZE_MAX ||
335 		    new_value < FETH_BUFLET_SIZE_MIN ||
336 		    !is_power_of_two(new_value)) {
337 			return EINVAL;
338 		}
339 		if_fake_tso_buffer_size = new_value;
340 	}
341 	return 0;
342 }
343 
344 SYSCTL_PROC(_net_link_fake, OID_AUTO, tso_buf_size,
345     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
346     0, 0, feth_tso_buffer_size_sysctl, "IU", "Fake interface TSO buffer size");
347 
348 static int
349 feth_max_mtu_sysctl SYSCTL_HANDLER_ARGS
350 {
351 #pragma unused(oidp, arg1, arg2)
352 	unsigned int new_value;
353 	int changed;
354 	int error;
355 
356 	error = sysctl_io_number(req, if_fake_max_mtu,
357 	    sizeof(if_fake_max_mtu), &new_value, &changed);
358 	if (error == 0 && changed != 0) {
359 		if (new_value > FETH_MAX_MTU_MAX ||
360 		    new_value < ETHERMTU ||
361 		    new_value <= if_fake_buflet_size) {
362 			return EINVAL;
363 		}
364 		if_fake_max_mtu = new_value;
365 	}
366 	return 0;
367 }
368 
369 SYSCTL_PROC(_net_link_fake, OID_AUTO, max_mtu,
370     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
371     0, 0, feth_max_mtu_sysctl, "IU", "Fake interface maximum MTU");
372 
373 static int
374 feth_buflet_size_sysctl SYSCTL_HANDLER_ARGS
375 {
376 #pragma unused(oidp, arg1, arg2)
377 	unsigned int new_value;
378 	int changed;
379 	int error;
380 
381 	error = sysctl_io_number(req, if_fake_buflet_size,
382 	    sizeof(if_fake_buflet_size), &new_value, &changed);
383 	if (error == 0 && changed != 0) {
384 		/* must be a power of 2 between min and max */
385 		if (new_value > FETH_BUFLET_SIZE_MAX ||
386 		    new_value < FETH_BUFLET_SIZE_MIN ||
387 		    !is_power_of_two(new_value) ||
388 		    new_value >= if_fake_max_mtu) {
389 			return EINVAL;
390 		}
391 		if_fake_buflet_size = new_value;
392 	}
393 	return 0;
394 }
395 
396 SYSCTL_PROC(_net_link_fake, OID_AUTO, buflet_size,
397     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
398     0, 0, feth_buflet_size_sysctl, "IU", "Fake interface buflet size");
399 
400 static unsigned int if_fake_user_access = 0;
401 
402 static int
403 feth_user_access_sysctl SYSCTL_HANDLER_ARGS
404 {
405 #pragma unused(oidp, arg1, arg2)
406 	unsigned int new_value;
407 	int changed;
408 	int error;
409 
410 	error = sysctl_io_number(req, if_fake_user_access,
411 	    sizeof(if_fake_user_access), &new_value, &changed);
412 	if (error == 0 && changed != 0) {
413 		if (new_value != 0) {
414 			if (new_value != 1) {
415 				return EINVAL;
416 			}
417 		}
418 		if_fake_user_access = new_value;
419 	}
420 	return 0;
421 }
422 
423 SYSCTL_PROC(_net_link_fake, OID_AUTO, user_access,
424     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
425     0, 0, feth_user_access_sysctl, "IU", "Fake interface user access");
426 
427 /* sysctl net.link.fake.if_adv_intvl (unit: millisecond) */
428 #define FETH_IF_ADV_INTVL_MIN            10
429 #define FETH_IF_ADV_INTVL_MAX            INT_MAX
430 
431 static int if_fake_if_adv_interval = 0; /* no interface advisory */
432 static int
433 feth_if_adv_interval_sysctl SYSCTL_HANDLER_ARGS
434 {
435 #pragma unused(oidp, arg1, arg2)
436 	unsigned int new_value;
437 	int changed;
438 	int error;
439 
440 	error = sysctl_io_number(req, if_fake_if_adv_interval,
441 	    sizeof(if_fake_if_adv_interval), &new_value, &changed);
442 	if (error == 0 && changed != 0) {
443 		if ((new_value != 0) && (new_value > FETH_IF_ADV_INTVL_MAX ||
444 		    new_value < FETH_IF_ADV_INTVL_MIN)) {
445 			return EINVAL;
446 		}
447 		if_fake_if_adv_interval = new_value;
448 	}
449 	return 0;
450 }
451 
452 SYSCTL_PROC(_net_link_fake, OID_AUTO, if_adv_intvl,
453     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
454     feth_if_adv_interval_sysctl, "IU",
455     "Fake interface will generate interface advisories reports at the specified interval in ms");
456 
457 /* sysctl net.link.fake.tx_drops */
458 /*
459  * Fake ethernet will drop packet on the transmit path at the specified
460  * rate, i.e drop one in every if_fake_tx_drops number of packets.
461  */
462 #define FETH_TX_DROPS_MIN            0
463 #define FETH_TX_DROPS_MAX            INT_MAX
464 static int if_fake_tx_drops = 0; /* no packets are dropped */
465 static int
466 feth_fake_tx_drops_sysctl SYSCTL_HANDLER_ARGS
467 {
468 #pragma unused(oidp, arg1, arg2)
469 	unsigned int new_value;
470 	int changed;
471 	int error;
472 
473 	error = sysctl_io_number(req, if_fake_tx_drops,
474 	    sizeof(if_fake_tx_drops), &new_value, &changed);
475 	if (error == 0 && changed != 0) {
476 		if (new_value > FETH_TX_DROPS_MAX ||
477 		    new_value < FETH_TX_DROPS_MIN) {
478 			return EINVAL;
479 		}
480 		if_fake_tx_drops = new_value;
481 	}
482 	return 0;
483 }
484 
485 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_drops,
486     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
487     feth_fake_tx_drops_sysctl, "IU",
488     "Fake interface will intermittently drop packets on Tx path");
489 
490 /* sysctl.net.link.fake.tx_exp_policy */
491 
492 typedef enum {
493 	IFF_TX_EXP_POLICY_DISABLED = 0,          /* Expiry notification disabled */
494 	IFF_TX_EXP_POLICY_DROP_AND_NOTIFY = 1,   /* Expiry notification enabled; drop + notify mode */
495 	IFF_TX_EXP_POLICY_NOTIFY_ONLY = 2,       /* Expiry notification enabled; notify only mode */
496 	IFF_TX_EXP_POLICY_METADATA = 3,          /* Expiry notification enabled; use packet metadata */
497 } iff_tx_exp_policy_t;
498 static iff_tx_exp_policy_t if_fake_tx_exp_policy = IFF_TX_EXP_POLICY_DISABLED;
499 
500 static int
501 feth_fake_tx_exp_policy_sysctl SYSCTL_HANDLER_ARGS
502 {
503 #pragma unused(oidp, arg1, arg2)
504 	unsigned int new_value;
505 	int changed;
506 	int error;
507 
508 	error = sysctl_io_number(req, if_fake_tx_exp_policy,
509 	    sizeof(if_fake_tx_exp_policy), &new_value, &changed);
510 	FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
511 	    "if_fake_tx_exp_policy: %u -> %u (%d)",
512 	    if_fake_tx_exp_policy, new_value, changed);
513 	if (error == 0 && changed != 0) {
514 		if (new_value > IFF_TX_EXP_POLICY_METADATA ||
515 		    new_value < IFF_TX_EXP_POLICY_DISABLED) {
516 			return EINVAL;
517 		}
518 		if_fake_tx_exp_policy = new_value;
519 	}
520 	return 0;
521 }
522 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_exp_policy,
523     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
524     feth_fake_tx_exp_policy_sysctl, "IU",
525     "Fake interface handling policy for expired TX attempts "
526     "(0 disabled, 1 drop and notify, 2 notify only, 3 packet metadata)");
527 
528 /* sysctl net.link.fake.tx_completion_mode */
529 typedef enum {
530 	IFF_TX_COMPL_MODE_SYNC = 0,
531 	IFF_TX_COMPL_MODE_ASYNC = 1,
532 } iff_tx_completion_mode_t;
533 static iff_tx_completion_mode_t if_tx_completion_mode = IFF_TX_COMPL_MODE_SYNC;
534 static int
535 feth_fake_tx_completion_mode_sysctl SYSCTL_HANDLER_ARGS
536 {
537 #pragma unused(oidp, arg1, arg2)
538 	unsigned int new_value;
539 	int changed;
540 	int error;
541 
542 	error = sysctl_io_number(req, if_tx_completion_mode,
543 	    sizeof(if_tx_completion_mode), &new_value, &changed);
544 	if (error == 0 && changed != 0) {
545 		if (new_value > IFF_TX_COMPL_MODE_ASYNC ||
546 		    new_value < IFF_TX_COMPL_MODE_SYNC) {
547 			return EINVAL;
548 		}
549 		if_tx_completion_mode = new_value;
550 	}
551 	return 0;
552 }
553 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_completion_mode,
554     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
555     feth_fake_tx_completion_mode_sysctl, "IU",
556     "Fake interface tx completion mode (0 synchronous, 1 asynchronous)");
557 
558 /* sysctl net.link.fake.llink_cnt */
559 
560 /* The maximum number of logical links (including default link) */
561 #define FETH_MAX_LLINKS 16
562 /*
563  * The default number of logical links (including default link).
564  * Zero means logical link mode is disabled.
565  */
566 #define FETH_DEF_LLINKS 0
567 
568 static uint32_t if_fake_llink_cnt = FETH_DEF_LLINKS;
569 static int
570 feth_fake_llink_cnt_sysctl SYSCTL_HANDLER_ARGS
571 {
572 #pragma unused(oidp, arg1, arg2)
573 	unsigned int new_value;
574 	int changed;
575 	int error;
576 
577 	error = sysctl_io_number(req, if_fake_llink_cnt,
578 	    sizeof(if_fake_llink_cnt), &new_value, &changed);
579 	if (error == 0 && changed != 0) {
580 		if (new_value > FETH_MAX_LLINKS) {
581 			return EINVAL;
582 		}
583 		if_fake_llink_cnt = new_value;
584 	}
585 	return 0;
586 }
587 
588 SYSCTL_PROC(_net_link_fake, OID_AUTO, llink_cnt,
589     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
590     feth_fake_llink_cnt_sysctl, "IU",
591     "Fake interface logical link count");
592 
593 /* sysctl net.link.fake.qset_cnt */
594 
595 /* The maximum number of qsets for each logical link */
596 #define FETH_MAX_QSETS  16
597 /* The default number of qsets for each logical link */
598 #define FETH_DEF_QSETS  4
599 
600 static uint32_t if_fake_qset_cnt = FETH_DEF_QSETS;
601 static int
602 feth_fake_qset_cnt_sysctl SYSCTL_HANDLER_ARGS
603 {
604 #pragma unused(oidp, arg1, arg2)
605 	unsigned int new_value;
606 	int changed;
607 	int error;
608 
609 	error = sysctl_io_number(req, if_fake_qset_cnt,
610 	    sizeof(if_fake_qset_cnt), &new_value, &changed);
611 	if (error == 0 && changed != 0) {
612 		if (new_value == 0 ||
613 		    new_value > FETH_MAX_QSETS) {
614 			return EINVAL;
615 		}
616 		if_fake_qset_cnt = new_value;
617 	}
618 	return 0;
619 }
620 
621 SYSCTL_PROC(_net_link_fake, OID_AUTO, qset_cnt,
622     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
623     feth_fake_qset_cnt_sysctl, "IU",
624     "Fake interface queue set count");
625 
626 
627 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)628 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
629 {
630 	mbuf_setdata(m, mtod(m, char *) + len, mbuf_len(m) - len);
631 	mbuf_pkthdr_adjustlen(m, -len);
632 }
633 
634 static inline void *__indexable
get_bpf_header(mbuf_t m,struct ether_header * eh_p,struct ether_vlan_header * evl_p,size_t * header_len)635 get_bpf_header(mbuf_t m, struct ether_header * eh_p,
636     struct ether_vlan_header * evl_p, size_t * header_len)
637 {
638 	void * header;
639 
640 	/* no VLAN tag, just use the ethernet header */
641 	if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
642 		header = (struct ether_header *__bidi_indexable)eh_p;
643 		*header_len = sizeof(*eh_p);
644 		goto done;
645 	}
646 
647 	/* has VLAN tag, populate the ether VLAN header */
648 	bcopy(eh_p, evl_p,
649 	    offsetof(struct ether_header, ether_type));   /* dst+src ether */
650 	evl_p->evl_encap_proto = htons(ETHERTYPE_VLAN);   /* VLAN encap */
651 	evl_p->evl_tag = htons(m->m_pkthdr.vlan_tag);     /* tag */
652 	evl_p->evl_proto = eh_p->ether_type;              /* proto */
653 	*header_len = sizeof(*evl_p);
654 	header = (struct ether_vlan_header *__bidi_indexable)evl_p;
655 
656 done:
657 	return header;
658 }
659 
660 typedef void (*_tap_func)(ifnet_t interface, u_int32_t dlt, mbuf_t packet,
661     void *__sized_by(header_len) header, size_t header_len);
662 
663 static void
fake_bpf_tap_common(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p,_tap_func func)664 fake_bpf_tap_common(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p,
665     _tap_func func)
666 {
667 	struct ether_vlan_header        evl;
668 	void *                          header;
669 	size_t                          header_len;
670 
671 	header = get_bpf_header(m, eh_p, &evl, &header_len);
672 	(*func)(ifp, DLT_EN10MB, m, header, header_len);
673 }
674 
675 static inline void
fake_bpf_tap_in(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)676 fake_bpf_tap_in(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
677 {
678 	fake_bpf_tap_common(ifp, m, eh_p, bpf_tap_in);
679 }
680 
681 
682 static inline void
fake_bpf_tap_out(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)683 fake_bpf_tap_out(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
684 {
685 	fake_bpf_tap_common(ifp, m, eh_p, bpf_tap_out);
686 }
687 
688 /**
689 ** virtual ethernet structures, types
690 **/
691 
692 #define IFF_NUM_TX_RINGS_WMM_MODE       4
693 #define IFF_NUM_RX_RINGS_WMM_MODE       1
694 #define IFF_MAX_TX_RINGS        IFF_NUM_TX_RINGS_WMM_MODE
695 #define IFF_MAX_RX_RINGS        IFF_NUM_RX_RINGS_WMM_MODE
696 #define IFF_NUM_TX_QUEUES_WMM_MODE      4
697 #define IFF_NUM_RX_QUEUES_WMM_MODE      1
698 #define IFF_MAX_TX_QUEUES       IFF_NUM_TX_QUEUES_WMM_MODE
699 #define IFF_MAX_RX_QUEUES       IFF_NUM_RX_QUEUES_WMM_MODE
700 
701 #define IFF_MAX_BATCH_SIZE 32
702 
703 typedef uint16_t        iff_flags_t;
704 #define IFF_FLAGS_HWCSUM                0x0001
705 #define IFF_FLAGS_BSD_MODE              0x0002
706 #define IFF_FLAGS_DETACHING             0x0004
707 #define IFF_FLAGS_WMM_MODE              0x0008
708 #define IFF_FLAGS_MULTIBUFLETS          0x0010
709 #define IFF_FLAGS_TSO_SUPPORT           0x0020
710 #define IFF_FLAGS_LRO                   0x0040
711 #define IFF_FLAGS_VLAN_MTU              0x0080
712 #define IFF_FLAGS_VLAN_TAGGING          0x0100
713 #define IFF_FLAGS_SEPARATE_FRAME_HEADER 0x0200
714 #define IFF_FLAGS_NX_ATTACHED           0x0400
715 #define IFF_FLAGS_RX_FLOW_STEERING      0x0800
716 
717 #if SKYWALK
718 
719 typedef struct {
720 	uuid_t                  fnx_provider;
721 	uuid_t                  fnx_instance;
722 } fake_nx, *fake_nx_t;
723 
724 typedef struct {
725 	kern_netif_queue_t      fq_queue;
726 } fake_queue;
727 
728 typedef struct {
729 	kern_netif_qset_t       fqs_qset; /* provided by xnu */
730 	fake_queue              fqs_rx_queue[IFF_MAX_RX_QUEUES];
731 	fake_queue              fqs_tx_queue[IFF_MAX_TX_QUEUES];
732 	uint32_t                fqs_rx_queue_cnt;
733 	uint32_t                fqs_tx_queue_cnt;
734 	uint32_t                fqs_llink_idx;
735 	uint32_t                fqs_idx;
736 	uint32_t                fqs_dequeue_cnt;
737 	uint64_t                fqs_id;
738 } fake_qset;
739 
740 typedef struct {
741 	uint64_t                fl_id;
742 	uint32_t                fl_idx;
743 	uint32_t                fl_qset_cnt;
744 	fake_qset               fl_qset[FETH_MAX_QSETS];
745 } fake_llink, * fake_llink_t;
746 
747 static kern_pbufpool_t         S_pp;
748 
749 #define IFF_TT_OUTPUT   0x01 /* generate trace_tag on output */
750 #define IFF_TT_INPUT    0x02 /* generate trace_tag on input */
751 static int if_fake_trace_tag_flags = 0;
752 SYSCTL_INT(_net_link_fake, OID_AUTO, trace_tag, CTLFLAG_RW | CTLFLAG_LOCKED,
753     &if_fake_trace_tag_flags, 0, "Fake interface generate trace_tag");
754 static packet_trace_tag_t if_fake_trace_tag_current = 1;
755 
756 #endif /* SKYWALK */
757 
758 struct if_fake {
759 	char                    iff_name[IFNAMSIZ]; /* our unique id */
760 	ifnet_t                 iff_ifp;
761 	iff_flags_t             iff_flags;
762 	uint32_t                iff_retain_count;
763 	ifnet_t                 iff_peer;       /* the other end */
764 	int                     iff_media_current;
765 	int                     iff_media_active;
766 	uint32_t                iff_media_count;
767 	int                     iff_media_list[IF_FAKE_MEDIA_LIST_MAX];
768 	boolean_t               iff_start_busy;
769 	unsigned int            iff_max_mtu;
770 	uint32_t                iff_fcs;
771 	uint32_t                iff_trailer_length;
772 #if SKYWALK
773 	fake_nx                 iff_nx;
774 	struct netif_stats      *iff_nifs;
775 	uint32_t                iff_nifs_ref;
776 	uint32_t                iff_llink_cnt;
777 	kern_channel_ring_t     iff_rx_ring[IFF_MAX_RX_RINGS];
778 	kern_channel_ring_t     iff_tx_ring[IFF_MAX_TX_RINGS];
779 	fake_llink_t            iff_llink __counted_by_or_null(FETH_MAX_LLINKS);
780 	thread_call_t           iff_doorbell_tcall;
781 	thread_call_t           iff_if_adv_tcall;
782 	boolean_t               iff_doorbell_tcall_active;
783 	boolean_t               iff_waiting_for_tcall;
784 	boolean_t               iff_channel_connected;
785 	iff_pktpool_mode_t      iff_pp_mode;
786 	kern_pbufpool_t         iff_rx_pp;
787 	kern_pbufpool_t         iff_tx_pp;
788 	uint32_t                iff_tx_headroom;
789 	unsigned int            iff_adv_interval;
790 	uint32_t                iff_tx_drop_rate;
791 	uint32_t                iff_tx_pkts_count;
792 	iff_tx_completion_mode_t iff_tx_completion_mode;
793 	bool                    iff_intf_adv_enabled;
794 	void                    *iff_intf_adv_kern_ctx;
795 	kern_nexus_capab_interface_advisory_notify_fn_t iff_intf_adv_notify;
796 	iff_tx_exp_policy_t     iff_tx_exp_policy;
797 #endif /* SKYWALK */
798 };
799 
800 typedef struct if_fake * __single if_fake_ref;
801 
802 static if_fake_ref
803 ifnet_get_if_fake(ifnet_t ifp);
804 
805 static inline boolean_t
feth_in_bsd_mode(if_fake_ref fakeif)806 feth_in_bsd_mode(if_fake_ref fakeif)
807 {
808 	return (fakeif->iff_flags & IFF_FLAGS_BSD_MODE) != 0;
809 }
810 
811 static inline void
feth_set_detaching(if_fake_ref fakeif)812 feth_set_detaching(if_fake_ref fakeif)
813 {
814 	fakeif->iff_flags |= IFF_FLAGS_DETACHING;
815 }
816 
817 static inline boolean_t
feth_is_detaching(if_fake_ref fakeif)818 feth_is_detaching(if_fake_ref fakeif)
819 {
820 	return (fakeif->iff_flags & IFF_FLAGS_DETACHING) != 0;
821 }
822 
823 static int
feth_enable_dequeue_stall(ifnet_t ifp,uint32_t enable)824 feth_enable_dequeue_stall(ifnet_t ifp, uint32_t enable)
825 {
826 	int error;
827 
828 	if (enable != 0) {
829 		error = ifnet_disable_output(ifp);
830 	} else {
831 		error = ifnet_enable_output(ifp);
832 	}
833 
834 	return error;
835 }
836 
837 #if SKYWALK
838 static inline boolean_t
feth_in_wmm_mode(if_fake_ref fakeif)839 feth_in_wmm_mode(if_fake_ref fakeif)
840 {
841 	return (fakeif->iff_flags & IFF_FLAGS_WMM_MODE) != 0;
842 }
843 
844 static inline boolean_t
feth_using_multibuflets(if_fake_ref fakeif)845 feth_using_multibuflets(if_fake_ref fakeif)
846 {
847 	return (fakeif->iff_flags & IFF_FLAGS_MULTIBUFLETS) != 0;
848 }
849 static void feth_detach_netif_nexus(if_fake_ref fakeif);
850 
851 static inline boolean_t
feth_has_intf_advisory_configured(if_fake_ref fakeif)852 feth_has_intf_advisory_configured(if_fake_ref fakeif)
853 {
854 	return fakeif->iff_adv_interval > 0;
855 }
856 #endif /* SKYWALK */
857 
858 static inline bool
feth_supports_tso(if_fake_ref fakeif)859 feth_supports_tso(if_fake_ref fakeif)
860 {
861 	return (fakeif->iff_flags & IFF_FLAGS_TSO_SUPPORT) != 0;
862 }
863 
864 static inline void
feth_set_supports_tso(if_fake_ref fakeif)865 feth_set_supports_tso(if_fake_ref fakeif)
866 {
867 	fakeif->iff_flags |= IFF_FLAGS_TSO_SUPPORT;
868 }
869 
870 static inline bool
feth_supports_vlan_mtu(if_fake_ref fakeif)871 feth_supports_vlan_mtu(if_fake_ref fakeif)
872 {
873 	return (fakeif->iff_flags & IFF_FLAGS_VLAN_MTU) != 0;
874 }
875 
876 static inline void
feth_set_supports_vlan_mtu(if_fake_ref fakeif)877 feth_set_supports_vlan_mtu(if_fake_ref fakeif)
878 {
879 	fakeif->iff_flags |= IFF_FLAGS_VLAN_MTU;
880 }
881 
882 static inline bool
feth_supports_vlan_tagging(if_fake_ref fakeif)883 feth_supports_vlan_tagging(if_fake_ref fakeif)
884 {
885 	return (fakeif->iff_flags & IFF_FLAGS_VLAN_TAGGING) != 0;
886 }
887 
888 static inline void
feth_set_supports_vlan_tagging(if_fake_ref fakeif)889 feth_set_supports_vlan_tagging(if_fake_ref fakeif)
890 {
891 	fakeif->iff_flags |= IFF_FLAGS_VLAN_TAGGING;
892 }
893 
894 static inline void
feth_set_supports_rx_flow_steering(if_fake_ref fakeif)895 feth_set_supports_rx_flow_steering(if_fake_ref fakeif)
896 {
897 	fakeif->iff_flags |= IFF_FLAGS_RX_FLOW_STEERING;
898 }
899 
900 static inline bool
feth_supports_rx_flow_steering(if_fake_ref fakeif)901 feth_supports_rx_flow_steering(if_fake_ref fakeif)
902 {
903 	return (fakeif->iff_flags & IFF_FLAGS_RX_FLOW_STEERING) != 0;
904 }
905 
906 #define FETH_MAXUNIT    IF_MAXUNIT
907 #define FETH_ZONE_MAX_ELEM      MIN(IFNETS_MAX, FETH_MAXUNIT)
908 
909 static  int feth_clone_create(struct if_clone *, u_int32_t, void *);
910 static  int feth_clone_destroy(ifnet_t);
911 static  int feth_output(ifnet_t ifp, struct mbuf *m);
912 static  void feth_start(ifnet_t ifp);
913 static  int feth_ioctl(ifnet_t ifp, u_long cmd, void * addr);
914 static  int feth_config(ifnet_t ifp, ifnet_t peer);
915 static  void feth_if_free(ifnet_t ifp);
916 static  void feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp);
917 static  void feth_free(if_fake_ref fakeif);
918 
919 static struct if_clone
920     feth_cloner = IF_CLONE_INITIALIZER(FAKE_ETHER_NAME,
921     feth_clone_create,
922     feth_clone_destroy,
923     0,
924     FETH_MAXUNIT);
925 static  void interface_link_event(ifnet_t ifp, u_int32_t event_code);
926 
927 /* some media words to pretend to be ethernet */
928 #define FAKE_DEFAULT_MEDIA      IFM_MAKEWORD(IFM_ETHER, IFM_10G_T, IFM_FDX, 0)
929 static int default_media_words[] = {
930 	IFM_MAKEWORD(IFM_ETHER, 0, 0, 0),
931 	FAKE_DEFAULT_MEDIA,
932 	IFM_MAKEWORD(IFM_ETHER, IFM_2500_T, IFM_FDX, 0),
933 	IFM_MAKEWORD(IFM_ETHER, IFM_5000_T, IFM_FDX, 0),
934 
935 	IFM_MAKEWORD(IFM_ETHER, IFM_10G_KX4, IFM_FDX, 0),
936 	IFM_MAKEWORD(IFM_ETHER, IFM_20G_KR2, IFM_FDX, 0),
937 	IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX, 0),
938 	IFM_MAKEWORD(IFM_ETHER, IFM_25G_KR, IFM_FDX, 0),
939 	IFM_MAKEWORD(IFM_ETHER, IFM_40G_SR4, IFM_FDX, 0),
940 	IFM_MAKEWORD(IFM_ETHER, IFM_50G_CR2, IFM_FDX, 0),
941 	IFM_MAKEWORD(IFM_ETHER, IFM_56G_R4, IFM_FDX, 0),
942 	IFM_MAKEWORD(IFM_ETHER, IFM_100G_CR4, IFM_FDX, 0),
943 	IFM_MAKEWORD(IFM_ETHER, IFM_400G_AUI8, IFM_FDX, 0),
944 };
945 #define default_media_words_count (sizeof(default_media_words)          \
946 	                           / sizeof (default_media_words[0]))
947 
948 /**
949 ** veth locks
950 **/
951 
952 static LCK_GRP_DECLARE(feth_lck_grp, "fake");
953 static LCK_MTX_DECLARE(feth_lck_mtx, &feth_lck_grp);
954 
955 static inline void
feth_lock(void)956 feth_lock(void)
957 {
958 	lck_mtx_lock(&feth_lck_mtx);
959 }
960 
961 static inline void
feth_unlock(void)962 feth_unlock(void)
963 {
964 	lck_mtx_unlock(&feth_lck_mtx);
965 }
966 
967 static inline int
get_max_mtu(int bsd_mode,unsigned int max_mtu)968 get_max_mtu(int bsd_mode, unsigned int max_mtu)
969 {
970 	unsigned int    mtu;
971 
972 	if (bsd_mode != 0) {
973 		mtu = M16KCLBYTES - ETHER_HDR_LEN;
974 		if (mtu > max_mtu) {
975 			mtu = max_mtu;
976 		}
977 	} else {
978 		mtu = max_mtu;
979 	}
980 	return mtu;
981 }
982 
983 static inline unsigned int
feth_max_mtu(ifnet_t ifp)984 feth_max_mtu(ifnet_t ifp)
985 {
986 	if_fake_ref     fakeif;
987 	unsigned int    max_mtu = ETHERMTU;
988 
989 	feth_lock();
990 	fakeif = ifnet_get_if_fake(ifp);
991 	if (fakeif != NULL) {
992 		max_mtu = fakeif->iff_max_mtu;
993 	}
994 	feth_unlock();
995 	return max_mtu;
996 }
997 
998 static void
feth_free(if_fake_ref fakeif)999 feth_free(if_fake_ref fakeif)
1000 {
1001 	VERIFY(fakeif->iff_retain_count == 0);
1002 #if SKYWALK
1003 	if (!feth_in_bsd_mode(fakeif)) {
1004 		if (fakeif->iff_pp_mode == IFF_PP_MODE_GLOBAL) {
1005 			VERIFY(fakeif->iff_rx_pp == S_pp);
1006 			VERIFY(fakeif->iff_tx_pp == S_pp);
1007 			pp_release(fakeif->iff_rx_pp);
1008 			fakeif->iff_rx_pp = NULL;
1009 			pp_release(fakeif->iff_tx_pp);
1010 			fakeif->iff_tx_pp = NULL;
1011 			feth_lock();
1012 			if (S_pp != NULL && S_pp->pp_refcnt == 1) {
1013 				pp_release(S_pp);
1014 				S_pp = NULL;
1015 			}
1016 			feth_unlock();
1017 		} else {
1018 			if (fakeif->iff_rx_pp != NULL) {
1019 				pp_release(fakeif->iff_rx_pp);
1020 				fakeif->iff_rx_pp = NULL;
1021 			}
1022 			if (fakeif->iff_tx_pp != NULL) {
1023 				pp_release(fakeif->iff_tx_pp);
1024 				fakeif->iff_tx_pp = NULL;
1025 			}
1026 		}
1027 	}
1028 #endif /* SKYWALK */
1029 
1030 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s", fakeif->iff_name);
1031 	if (fakeif->iff_llink != NULL) {
1032 		fake_llink_t    llink;
1033 		llink = fakeif->iff_llink;
1034 		fakeif->iff_llink = NULL;
1035 		kfree_type(fake_llink, FETH_MAX_LLINKS, llink);
1036 	}
1037 	kfree_type(struct if_fake, fakeif);
1038 }
1039 
1040 static void
feth_release(if_fake_ref fakeif)1041 feth_release(if_fake_ref fakeif)
1042 {
1043 	u_int32_t               old_retain_count;
1044 
1045 	old_retain_count = OSDecrementAtomic(&fakeif->iff_retain_count);
1046 	switch (old_retain_count) {
1047 	case 0:
1048 		VERIFY(old_retain_count != 0);
1049 		break;
1050 	case 1:
1051 		feth_free(fakeif);
1052 		break;
1053 	default:
1054 		break;
1055 	}
1056 	return;
1057 }
1058 
1059 #if SKYWALK
1060 
1061 static void
feth_retain(if_fake_ref fakeif)1062 feth_retain(if_fake_ref fakeif)
1063 {
1064 	OSIncrementAtomic(&fakeif->iff_retain_count);
1065 }
1066 
1067 static void
feth_packet_pool_init_prepare(if_fake_ref fakeif,struct kern_pbufpool_init * pp_init)1068 feth_packet_pool_init_prepare(if_fake_ref fakeif,
1069     struct kern_pbufpool_init *pp_init)
1070 {
1071 	uint32_t max_mtu = fakeif->iff_max_mtu;
1072 	uint32_t buflet_size = if_fake_buflet_size;
1073 
1074 	bzero(pp_init, sizeof(*pp_init));
1075 	pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1076 	pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
1077 	pp_init->kbi_packets = 1024; /* TBD configurable */
1078 	if (feth_supports_tso(fakeif)) {
1079 		buflet_size = if_fake_tso_buffer_size;
1080 	}
1081 	if (feth_using_multibuflets(fakeif)) {
1082 		pp_init->kbi_bufsize = buflet_size;
1083 		pp_init->kbi_max_frags = howmany(max_mtu, buflet_size);
1084 		pp_init->kbi_buflets = pp_init->kbi_packets *
1085 		    pp_init->kbi_max_frags;
1086 		pp_init->kbi_flags |= KBIF_BUFFER_ON_DEMAND;
1087 	} else {
1088 		pp_init->kbi_bufsize = max(max_mtu, buflet_size);
1089 		pp_init->kbi_max_frags = 1;
1090 		pp_init->kbi_buflets = pp_init->kbi_packets;
1091 	}
1092 	pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
1093 	if (if_fake_user_access != 0) {
1094 		pp_init->kbi_flags |= KBIF_USER_ACCESS;
1095 	}
1096 	pp_init->kbi_ctx = NULL;
1097 	pp_init->kbi_ctx_retain = NULL;
1098 	pp_init->kbi_ctx_release = NULL;
1099 }
1100 
1101 static errno_t
feth_packet_pool_make(if_fake_ref fakeif)1102 feth_packet_pool_make(if_fake_ref fakeif)
1103 {
1104 	struct kern_pbufpool_init pp_init;
1105 	errno_t err;
1106 
1107 	feth_packet_pool_init_prepare(fakeif, &pp_init);
1108 
1109 	switch (fakeif->iff_pp_mode) {
1110 	case IFF_PP_MODE_GLOBAL:
1111 		feth_lock();
1112 		if (S_pp == NULL) {
1113 			(void)snprintf((char *)pp_init.kbi_name,
1114 			    sizeof(pp_init.kbi_name), "%s", "feth shared pp");
1115 			err = kern_pbufpool_create(&pp_init, &S_pp, NULL);
1116 		}
1117 		pp_retain(S_pp);
1118 		feth_unlock();
1119 		fakeif->iff_rx_pp = S_pp;
1120 		pp_retain(S_pp);
1121 		fakeif->iff_tx_pp = S_pp;
1122 		break;
1123 	case IFF_PP_MODE_PRIVATE:
1124 		(void)snprintf((char *)pp_init.kbi_name,
1125 		    sizeof(pp_init.kbi_name), "%s pp", fakeif->iff_name);
1126 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
1127 		pp_retain(fakeif->iff_rx_pp);
1128 		fakeif->iff_tx_pp = fakeif->iff_rx_pp;
1129 		break;
1130 	case IFF_PP_MODE_PRIVATE_SPLIT:
1131 		(void)snprintf((char *)pp_init.kbi_name,
1132 		    sizeof(pp_init.kbi_name), "%s rx pp", fakeif->iff_name);
1133 		pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
1134 		    KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
1135 		pp_init.kbi_flags |= (KBIF_IODIR_IN | KBIF_BUFFER_ON_DEMAND);
1136 		pp_init.kbi_packets = 1024;
1137 		pp_init.kbi_bufsize = if_fake_link_layer_aggregation_factor * 1024;
1138 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
1139 		if (err != 0) {
1140 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1141 			    "rx pp create failed %d", err);
1142 			return err;
1143 		}
1144 		pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
1145 		    KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
1146 		pp_init.kbi_flags |= KBIF_IODIR_OUT;
1147 		pp_init.kbi_packets = 1024;            /* TBD configurable */
1148 		pp_init.kbi_bufsize = fakeif->iff_max_mtu;
1149 		(void)snprintf((char *)pp_init.kbi_name,
1150 		    sizeof(pp_init.kbi_name), "%s tx pp", fakeif->iff_name);
1151 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_tx_pp, NULL);
1152 		if (err != 0) {
1153 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1154 			    "tx pp create failed %d", err);
1155 			pp_release(fakeif->iff_rx_pp);
1156 			return err;
1157 		}
1158 		break;
1159 	default:
1160 		VERIFY(0);
1161 		__builtin_unreachable();
1162 	}
1163 
1164 	return 0;
1165 }
1166 
1167 static void
feth_packet_set_trace_tag(kern_packet_t ph,int flag)1168 feth_packet_set_trace_tag(kern_packet_t ph, int flag)
1169 {
1170 	if (if_fake_trace_tag_flags & flag) {
1171 		if (++if_fake_trace_tag_current == 0) {
1172 			if_fake_trace_tag_current = 1;
1173 		}
1174 		kern_packet_set_trace_tag(ph, if_fake_trace_tag_current);
1175 	}
1176 }
1177 
1178 static errno_t
feth_clone_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1179 feth_clone_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1180 {
1181 	errno_t err = 0;
1182 	kern_pbufpool_t pp = dif->iff_rx_pp;
1183 	kern_packet_t dph = 0, dph0 = 0;
1184 	kern_buflet_t sbuf, dbuf0 = NULL, dbuf;
1185 	caddr_t saddr, daddr;
1186 	uint32_t soff, doff;
1187 	uint32_t slen, dlen;
1188 	uint32_t dlim0, dlim;
1189 
1190 	sbuf = kern_packet_get_next_buflet(sph, NULL);
1191 	saddr = __unsafe_forge_bidi_indexable(caddr_t,
1192 	    kern_buflet_get_data_address(sbuf),
1193 	    kern_buflet_get_data_limit(sbuf));
1194 	doff = soff = kern_buflet_get_data_offset(sbuf);
1195 	dlen = slen = kern_buflet_get_data_length(sbuf);
1196 
1197 	/* packet clone is only supported for single-buflet */
1198 	ASSERT(kern_packet_get_buflet_count(sph) == 1);
1199 	ASSERT(soff == kern_packet_get_headroom(sph));
1200 	ASSERT(slen == kern_packet_get_data_length(sph));
1201 
1202 	dph0 = *pdph;
1203 	if (dph0 == 0) {
1204 		dlim0 = 0;
1205 	} else {
1206 		dbuf0 = kern_packet_get_next_buflet(dph0, NULL);
1207 		ASSERT(kern_buflet_get_object_limit(dbuf0) ==
1208 		    PP_BUF_OBJ_SIZE_DEF(pp));
1209 		ASSERT(kern_buflet_get_data_limit(dbuf0) % 16 == 0);
1210 		dlim0 = ((size_t)kern_buflet_get_object_address(dbuf0) +
1211 		    kern_buflet_get_object_limit(dbuf0)) -
1212 		    ((size_t)kern_buflet_get_data_address(dbuf0) +
1213 		    kern_buflet_get_data_limit(dbuf0));
1214 	}
1215 
1216 	if (doff + dlen > dlim0) {
1217 		err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1218 		if (err != 0) {
1219 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1220 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1221 			return err;
1222 		}
1223 		dbuf = kern_packet_get_next_buflet(dph, NULL);
1224 		ASSERT(kern_buflet_get_data_address(dbuf) ==
1225 		    kern_buflet_get_object_address(dbuf));
1226 		daddr = __unsafe_forge_bidi_indexable(caddr_t,
1227 		    kern_buflet_get_data_address(dbuf),
1228 		    kern_buflet_get_data_limit(dbuf));
1229 		dlim = kern_buflet_get_object_limit(dbuf);
1230 		ASSERT(dlim == PP_BUF_OBJ_SIZE_DEF(pp));
1231 	} else {
1232 		err = kern_packet_clone_nosleep(dph0, &dph, KPKT_COPY_LIGHT);
1233 		if (err != 0) {
1234 			FAKE_LOG(LOG_INFO, FE_DBGF_OUTPUT,
1235 			    "packet clone err %d", err);
1236 			return err;
1237 		}
1238 		dbuf = kern_packet_get_next_buflet(dph, NULL);
1239 		ASSERT(kern_buflet_get_object_address(dbuf) ==
1240 		    kern_buflet_get_object_address(dbuf0));
1241 		daddr = __unsafe_forge_bidi_indexable(caddr_t,
1242 		    kern_buflet_get_data_address(dbuf0),
1243 		    kern_buflet_get_object_limit(dbuf0)) + kern_buflet_get_data_limit(dbuf0);
1244 		dlim = dlim0;
1245 	}
1246 
1247 	ASSERT(doff + dlen <= dlim);
1248 
1249 	ASSERT((uintptr_t)daddr % 16 == 0);
1250 	bcopy(saddr + soff, daddr + doff, slen);
1251 
1252 	dlim = MIN(dlim, P2ROUNDUP(doff + dlen, 16));
1253 	err = kern_buflet_set_data_address(dbuf, daddr);
1254 	VERIFY(err == 0);
1255 	err = kern_buflet_set_data_limit(dbuf, dlim);
1256 	VERIFY(err == 0);
1257 	err = kern_buflet_set_data_length(dbuf, dlen);
1258 	VERIFY(err == 0);
1259 	err = kern_buflet_set_data_offset(dbuf, doff);
1260 	VERIFY(err == 0);
1261 	err = kern_packet_set_headroom(dph, doff);
1262 	VERIFY(err == 0);
1263 	err = kern_packet_set_link_header_length(dph,
1264 	    kern_packet_get_link_header_length(sph));
1265 	VERIFY(err == 0);
1266 	err = kern_packet_set_service_class(dph,
1267 	    kern_packet_get_service_class(sph));
1268 	VERIFY(err == 0);
1269 	err = kern_packet_finalize(dph);
1270 	VERIFY(err == 0);
1271 	*pdph = dph;
1272 
1273 	return err;
1274 }
1275 
1276 static inline void
feth_copy_buflet(kern_buflet_t sbuf,kern_buflet_t dbuf)1277 feth_copy_buflet(kern_buflet_t sbuf, kern_buflet_t dbuf)
1278 {
1279 	errno_t err;
1280 	uint32_t off, len;
1281 	caddr_t saddr, daddr;
1282 
1283 	saddr = __unsafe_forge_bidi_indexable(caddr_t,
1284 	    kern_buflet_get_data_address(sbuf),
1285 	    kern_buflet_get_data_limit(sbuf));
1286 	off = kern_buflet_get_data_offset(sbuf);
1287 	len = kern_buflet_get_data_length(sbuf);
1288 	daddr = __unsafe_forge_bidi_indexable(caddr_t,
1289 	    kern_buflet_get_data_address(dbuf),
1290 	    kern_buflet_get_data_limit(dbuf));
1291 	bcopy(saddr + off, daddr + off, len);
1292 	err = kern_buflet_set_data_offset(dbuf, off);
1293 	VERIFY(err == 0);
1294 	err = kern_buflet_set_data_length(dbuf, len);
1295 	VERIFY(err == 0);
1296 }
1297 
1298 static int
feth_add_packet_trailer(kern_packet_t ph,void * __sized_by (trailer_len)trailer,size_t trailer_len)1299 feth_add_packet_trailer(kern_packet_t ph, void * __sized_by(trailer_len) trailer, size_t trailer_len)
1300 {
1301 	errno_t err = 0;
1302 
1303 	ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
1304 
1305 	kern_buflet_t buf = NULL, iter = NULL;
1306 	while ((iter = kern_packet_get_next_buflet(ph, iter)) != NULL) {
1307 		buf = iter;
1308 	}
1309 	ASSERT(buf != NULL);
1310 
1311 	uint32_t dlim = kern_buflet_get_data_limit(buf);
1312 	uint32_t doff = kern_buflet_get_data_offset(buf);
1313 	uint32_t dlen = kern_buflet_get_data_length(buf);
1314 
1315 	size_t trailer_room = dlim - doff - dlen;
1316 
1317 	if (trailer_room < trailer_len) {
1318 		FAKE_LOG(LOG_INFO, FE_DBGF_OUTPUT, "not enough room");
1319 		return ERANGE;
1320 	}
1321 
1322 	void *data = __unsafe_forge_bidi_indexable(caddr_t,
1323 	    kern_buflet_get_data_address(buf),
1324 	    kern_buflet_get_data_limit(buf)) + doff + dlen;
1325 	memcpy(data, trailer, trailer_len);
1326 
1327 	err = kern_buflet_set_data_length(buf, dlen + trailer_len);
1328 	VERIFY(err == 0);
1329 
1330 	err = kern_packet_finalize(ph);
1331 	VERIFY(err == 0);
1332 
1333 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%zuB trailer added", trailer_len);
1334 
1335 	return 0;
1336 }
1337 
1338 static int
feth_add_packet_fcs(kern_packet_t ph)1339 feth_add_packet_fcs(kern_packet_t ph)
1340 {
1341 	uint32_t crc = 0;
1342 	int err;
1343 
1344 	ASSERT(sizeof(crc) == ETHER_CRC_LEN);
1345 
1346 	kern_buflet_t buf = NULL;
1347 	while ((buf = kern_packet_get_next_buflet(ph, buf)) != NULL) {
1348 		uint32_t doff = kern_buflet_get_data_offset(buf);
1349 		uint32_t dlen = kern_buflet_get_data_length(buf);
1350 		void *data = __unsafe_forge_bidi_indexable(caddr_t,
1351 		    kern_buflet_get_data_address(buf),
1352 		    kern_buflet_get_data_limit(buf)) + doff;
1353 		crc = crc32(crc, data, dlen);
1354 	}
1355 
1356 	err = feth_add_packet_trailer(ph, &crc, ETHER_CRC_LEN);
1357 	if (!err) {
1358 		return err;
1359 	}
1360 
1361 	err = kern_packet_set_link_ethfcs(ph);
1362 	VERIFY(err == 0);
1363 
1364 	return 0;
1365 }
1366 
1367 static errno_t
feth_copy_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1368 feth_copy_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1369 {
1370 	errno_t err = 0;
1371 	uint16_t i, bufcnt;
1372 	mach_vm_address_t baddr;
1373 	kern_buflet_t sbuf = NULL, dbuf = NULL;
1374 	kern_pbufpool_t pp = dif->iff_rx_pp;
1375 	kern_packet_t dph;
1376 	boolean_t multi_buflet = feth_using_multibuflets(dif);
1377 
1378 	bufcnt = kern_packet_get_buflet_count(sph);
1379 	ASSERT((bufcnt == 1) || multi_buflet);
1380 	*pdph = 0;
1381 
1382 	err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1383 	if (err != 0) {
1384 		STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1385 		STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1386 		return err;
1387 	}
1388 
1389 	/* pre-constructed single buflet packet copy */
1390 	sbuf = kern_packet_get_next_buflet(sph, NULL);
1391 	dbuf = kern_packet_get_next_buflet(dph, NULL);
1392 	feth_copy_buflet(sbuf, dbuf);
1393 
1394 	if (!multi_buflet) {
1395 		goto done;
1396 	}
1397 
1398 	/* un-constructed multi-buflet packet copy */
1399 	for (i = 1; i < bufcnt; i++) {
1400 		kern_buflet_t __single dbuf_next = NULL;
1401 
1402 		sbuf = kern_packet_get_next_buflet(sph, sbuf);
1403 		VERIFY(sbuf != NULL);
1404 		err = kern_pbufpool_alloc_buflet_nosleep(pp, &dbuf_next);
1405 		if (err != 0) {
1406 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1407 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_BUF);
1408 			break;
1409 		}
1410 		ASSERT(dbuf_next != NULL);
1411 		feth_copy_buflet(sbuf, dbuf_next);
1412 		err = kern_packet_add_buflet(dph, dbuf, dbuf_next);
1413 		VERIFY(err == 0);
1414 		dbuf = dbuf_next;
1415 	}
1416 	if (__improbable(err != 0)) {
1417 		dbuf = NULL;
1418 		while (i-- != 0) {
1419 			dbuf = kern_packet_get_next_buflet(dph, dbuf);
1420 			VERIFY(dbuf != NULL);
1421 			baddr = (mach_vm_address_t)
1422 			    kern_buflet_get_data_address(dbuf);
1423 			VERIFY(baddr != 0);
1424 		}
1425 		kern_pbufpool_free(pp, dph);
1426 		dph = 0;
1427 	}
1428 
1429 done:
1430 	if (__probable(err == 0)) {
1431 		err = kern_packet_set_headroom(dph,
1432 		    kern_packet_get_headroom(sph));
1433 		VERIFY(err == 0);
1434 		err = kern_packet_set_link_header_length(dph,
1435 		    kern_packet_get_link_header_length(sph));
1436 		VERIFY(err == 0);
1437 		err = kern_packet_set_service_class(dph,
1438 		    kern_packet_get_service_class(sph));
1439 		VERIFY(err == 0);
1440 		err = kern_packet_finalize(dph);
1441 		VERIFY(err == 0);
1442 		VERIFY(bufcnt == kern_packet_get_buflet_count(dph));
1443 		*pdph = dph;
1444 	}
1445 	return err;
1446 }
1447 
1448 static inline void
feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)1449 feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)
1450 {
1451 	/*
1452 	 * Nothing to do if not a TSO offloaded packet.
1453 	 */
1454 	uint16_t seg_sz = 0;
1455 	seg_sz = kern_packet_get_protocol_segment_size(ph);
1456 	if (seg_sz == 0) {
1457 		return;
1458 	}
1459 	/*
1460 	 * For RX, make the packet appear as a fully validated LRO packet.
1461 	 */
1462 	packet_csum_flags_t csum_flags = PACKET_CSUM_IP_CHECKED |
1463 	    PACKET_CSUM_IP_VALID | PACKET_CSUM_DATA_VALID |
1464 	    PACKET_CSUM_PSEUDO_HDR;
1465 	(void) kern_packet_set_inet_checksum(ph, csum_flags, 0, 0xFFFF, FALSE);
1466 	return;
1467 }
1468 
1469 static void
feth_rx_submit(if_fake_ref sif,if_fake_ref dif,kern_packet_t * __counted_by (n_pkts)sphs,uint32_t n_pkts)1470 feth_rx_submit(if_fake_ref sif, if_fake_ref dif, kern_packet_t * __counted_by(n_pkts) sphs,
1471     uint32_t n_pkts)
1472 {
1473 	errno_t err = 0;
1474 	struct kern_channel_ring_stat_increment stats;
1475 	kern_channel_ring_t rx_ring = NULL;
1476 	kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
1477 	kern_packet_t sph = 0, dph = 0;
1478 
1479 	memset(&stats, 0, sizeof(stats));
1480 
1481 	rx_ring = dif->iff_rx_ring[0];
1482 	if (rx_ring == NULL) {
1483 		return;
1484 	}
1485 
1486 	kr_enter(rx_ring, TRUE);
1487 	kern_channel_reclaim(rx_ring);
1488 	rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1489 
1490 	for (uint32_t i = 0; i < n_pkts && rx_slot != NULL; i++) {
1491 		sph = sphs[i];
1492 
1493 		switch (dif->iff_pp_mode) {
1494 		case IFF_PP_MODE_GLOBAL:
1495 			sphs[i] = 0;
1496 			dph = sph;
1497 			feth_update_pkt_tso_metadata_for_rx(dph);
1498 			err = kern_packet_finalize(dph);
1499 			VERIFY(err == 0);
1500 			break;
1501 		case IFF_PP_MODE_PRIVATE:
1502 			err = feth_copy_packet(dif, sph, &dph);
1503 			break;
1504 		case IFF_PP_MODE_PRIVATE_SPLIT:
1505 			err = feth_clone_packet(dif, sph, &dph);
1506 			break;
1507 		default:
1508 			VERIFY(0);
1509 			__builtin_unreachable();
1510 		}
1511 		if (__improbable(err != 0)) {
1512 			continue;
1513 		}
1514 
1515 		if (sif->iff_trailer_length != 0) {
1516 			feth_add_packet_trailer(dph, feth_trailer,
1517 			    sif->iff_trailer_length);
1518 		}
1519 		if (sif->iff_fcs != 0) {
1520 			feth_add_packet_fcs(dph);
1521 		}
1522 		feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1523 		bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1524 		stats.kcrsi_slots_transferred++;
1525 		stats.kcrsi_bytes_transferred
1526 		        += kern_packet_get_data_length(dph);
1527 
1528 		/* attach the packet to the RX ring */
1529 		err = kern_channel_slot_attach_packet(rx_ring, rx_slot, dph);
1530 		VERIFY(err == 0);
1531 		last_rx_slot = rx_slot;
1532 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1533 	}
1534 
1535 	if (last_rx_slot != NULL) {
1536 		kern_channel_advance_slot(rx_ring, last_rx_slot);
1537 		kern_channel_increment_ring_net_stats(rx_ring, dif->iff_ifp,
1538 		    &stats);
1539 	}
1540 
1541 	if (rx_ring != NULL) {
1542 		kr_exit(rx_ring);
1543 		kern_channel_notify(rx_ring, 0);
1544 	}
1545 }
1546 
1547 static void
feth_rx_queue_submit(if_fake_ref sif,if_fake_ref dif,uint32_t llink_idx,uint32_t qset_idx,kern_packet_t * __counted_by (n_pkts)sphs,uint32_t n_pkts)1548 feth_rx_queue_submit(if_fake_ref sif, if_fake_ref dif, uint32_t llink_idx,
1549     uint32_t qset_idx, kern_packet_t * __counted_by(n_pkts) sphs, uint32_t n_pkts)
1550 {
1551 	errno_t err = 0;
1552 	kern_netif_queue_t queue;
1553 	kern_packet_t sph = 0, dph = 0;
1554 	fake_llink *llink;
1555 	fake_qset *qset;
1556 
1557 	if (llink_idx >= dif->iff_llink_cnt) {
1558 		FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1559 		    "invalid llink_idx idx %d (max %d) on peer %s",
1560 		    llink_idx, dif->iff_llink_cnt, dif->iff_name);
1561 		return;
1562 	}
1563 	llink = &dif->iff_llink[llink_idx];
1564 	if (qset_idx >= llink->fl_qset_cnt) {
1565 		FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1566 		    "invalid qset_idx %d (max %d) on peer %s",
1567 		    qset_idx, llink->fl_qset_cnt, dif->iff_name);
1568 		return;
1569 	}
1570 	qset = &dif->iff_llink[llink_idx].fl_qset[qset_idx];
1571 	queue = qset->fqs_rx_queue[0].fq_queue;
1572 	if (queue == NULL) {
1573 		FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1574 		    "NULL default queue (llink_idx %d, qset_idx %d) on peer %s",
1575 		    llink_idx, qset_idx, dif->iff_name);
1576 		return;
1577 	}
1578 	for (uint32_t i = 0; i < n_pkts; i++) {
1579 		uint32_t flags;
1580 
1581 		sph = sphs[i];
1582 
1583 		switch (dif->iff_pp_mode) {
1584 		case IFF_PP_MODE_GLOBAL:
1585 			sphs[i] = 0;
1586 			dph = sph;
1587 			feth_update_pkt_tso_metadata_for_rx(dph);
1588 			break;
1589 		case IFF_PP_MODE_PRIVATE:
1590 			err = feth_copy_packet(dif, sph, &dph);
1591 			break;
1592 		case IFF_PP_MODE_PRIVATE_SPLIT:
1593 			err = feth_clone_packet(dif, sph, &dph);
1594 			break;
1595 		default:
1596 			VERIFY(0);
1597 			__builtin_unreachable();
1598 		}
1599 		if (__improbable(err != 0)) {
1600 			continue;
1601 		}
1602 
1603 		if (sif->iff_trailer_length != 0) {
1604 			feth_add_packet_trailer(dph, feth_trailer,
1605 			    sif->iff_trailer_length);
1606 		}
1607 		if (sif->iff_fcs != 0) {
1608 			feth_add_packet_fcs(dph);
1609 		}
1610 		feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1611 		bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1612 
1613 		flags = (i == n_pkts - 1) ?
1614 		    KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH : 0;
1615 		kern_netif_queue_rx_enqueue(queue, dph, 1, flags);
1616 	}
1617 }
1618 
1619 static void
feth_tx_complete(if_fake_ref fakeif,kern_packet_t * __counted_by (nphs)phs,uint32_t nphs)1620 feth_tx_complete(if_fake_ref fakeif, kern_packet_t * __counted_by(nphs) phs, uint32_t nphs)
1621 {
1622 	for (uint32_t i = 0; i < nphs; i++) {
1623 		kern_packet_t ph = phs[i];
1624 		if (ph == 0) {
1625 			continue;
1626 		}
1627 		int err = kern_packet_set_tx_completion_status(ph, 0);
1628 		VERIFY(err == 0);
1629 		kern_packet_tx_completion(ph, fakeif->iff_ifp);
1630 		kern_pbufpool_free(fakeif->iff_tx_pp, phs[i]);
1631 		phs[i] = 0;
1632 	}
1633 }
1634 
1635 #define NSEC_PER_USEC 1000ull
1636 /*
1637  * Calculate the time delta that passed from `since' to `until'.
1638  * If `until' happens before `since', returns negative value.
1639  */
1640 static bool
feth_packet_has_expired(if_fake_ref __unused fakeif,kern_packet_t ph,uint64_t * out_deadline)1641 feth_packet_has_expired(if_fake_ref __unused fakeif, kern_packet_t ph,
1642     uint64_t *out_deadline)
1643 {
1644 	uint64_t now;
1645 	uint64_t packet_expire_time_mach;
1646 	int64_t time_until_expiration;
1647 	errno_t err;
1648 	bool expired = false;
1649 
1650 	static mach_timebase_info_data_t clock_timebase = {0, 0};
1651 
1652 	if (clock_timebase.denom == 0) {
1653 		clock_timebase_info(&clock_timebase);
1654 		VERIFY(clock_timebase.denom != 0);
1655 	}
1656 
1657 	err = kern_packet_get_expire_time(ph, &packet_expire_time_mach);
1658 	if (err) {
1659 		goto out;
1660 	}
1661 
1662 	now = mach_absolute_time();
1663 	time_until_expiration = packet_expire_time_mach - now;
1664 	if (time_until_expiration < 0) {
1665 		/* The packet had expired */
1666 		expired = true;
1667 		goto out;
1668 	}
1669 
1670 	/* Convert the time_delta from mach ticks to nanoseconds */
1671 	time_until_expiration *= clock_timebase.numer;
1672 	time_until_expiration /= clock_timebase.denom;
1673 	/* convert from nanoseconds to microseconds */
1674 	time_until_expiration /= 1000ull;
1675 
1676 	if (if_fake_expiration_threshold_us < time_until_expiration) {
1677 		/* packet has some life ahead of it */
1678 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1679 		    "Packet has %llu usec until expiration",
1680 		    time_until_expiration);
1681 		goto out;
1682 	}
1683 
1684 out:
1685 	if (expired && out_deadline) {
1686 		*out_deadline = packet_expire_time_mach;
1687 	}
1688 
1689 	return expired;
1690 }
1691 
1692 static errno_t
feth_get_packet_notification_details(if_fake_ref fakeif,kern_packet_t ph,packet_id_t * pkt_id,uint32_t * nx_port_id)1693 feth_get_packet_notification_details(if_fake_ref fakeif, kern_packet_t ph,
1694     packet_id_t *pkt_id, uint32_t *nx_port_id)
1695 {
1696 	errno_t err = 0;
1697 
1698 	err = kern_packet_get_packetid(ph, pkt_id);
1699 	if (err != 0) {
1700 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1701 		    "%s err=%d getting packetid", fakeif->iff_name, err);
1702 		return err;
1703 	}
1704 
1705 	err = kern_packet_get_tx_nexus_port_id(ph, nx_port_id);
1706 	if (err != 0) {
1707 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1708 		    "%s err=%d getting nx_port_id", fakeif->iff_name, err);
1709 		return err;
1710 	}
1711 
1712 	return 0;
1713 }
1714 
1715 static packet_expiry_action_t
feth_get_effective_expn_action(if_fake_ref fakeif,kern_packet_t ph)1716 feth_get_effective_expn_action(if_fake_ref fakeif, kern_packet_t ph)
1717 {
1718 	errno_t err;
1719 	packet_expiry_action_t expiry_action;
1720 
1721 	switch (fakeif->iff_tx_exp_policy) {
1722 	case IFF_TX_EXP_POLICY_DISABLED:
1723 		expiry_action = PACKET_EXPIRY_ACTION_NONE;
1724 		break;
1725 	case IFF_TX_EXP_POLICY_NOTIFY_ONLY:
1726 		expiry_action = PACKET_EXPIRY_ACTION_NOTIFY;
1727 		break;
1728 	case IFF_TX_EXP_POLICY_DROP_AND_NOTIFY:
1729 		expiry_action = PACKET_EXPIRY_ACTION_DROP;
1730 		break;
1731 	case IFF_TX_EXP_POLICY_METADATA:
1732 		err = kern_packet_get_expiry_action(ph, &expiry_action);
1733 		if (err != 0) {
1734 			if (err != ENOENT) {
1735 				FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1736 				    "Error %d when getting expiry action",
1737 				    err);
1738 			}
1739 			expiry_action = PACKET_EXPIRY_ACTION_NONE;
1740 		}
1741 		break;
1742 	default:
1743 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1744 		    "Unrecognized value %d for \"net.link.fake.tx_exp_policy\"",
1745 		    fakeif->iff_tx_exp_policy);
1746 		expiry_action = PACKET_EXPIRY_ACTION_NONE;
1747 	}
1748 
1749 	return expiry_action;
1750 }
1751 
1752 /* returns true if the packet is selected for epxiration and should be dropped */
1753 static bool
feth_tx_expired_error(if_fake_ref fakeif,kern_packet_t ph)1754 feth_tx_expired_error(if_fake_ref fakeif, kern_packet_t ph)
1755 {
1756 	int err = 0;
1757 	uint32_t nx_port_id = 0;
1758 	os_channel_event_packet_transmit_expired_t expn = {0};
1759 	packet_expiry_action_t expiry_action = PACKET_EXPIRY_ACTION_NONE;
1760 
1761 	FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC, "%s", fakeif->iff_name);
1762 
1763 	if (feth_packet_has_expired(fakeif, ph, &expn.packet_tx_expiration_deadline)) {
1764 		expiry_action = feth_get_effective_expn_action(fakeif, ph);
1765 	}
1766 
1767 	bool drop_packet = (expiry_action == PACKET_EXPIRY_ACTION_DROP);
1768 	if (expiry_action != PACKET_EXPIRY_ACTION_NONE) {
1769 		/* set the expiration status code */
1770 		expn.packet_tx_expiration_status = drop_packet ?
1771 		    CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_DROPPED :
1772 		    CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_NOT_DROPPED;
1773 
1774 		/* Mark the expiration timestamp */
1775 		expn.packet_tx_expiration_timestamp = mach_absolute_time();
1776 
1777 		err = feth_get_packet_notification_details(fakeif, ph,
1778 		    &expn.packet_id, &nx_port_id);
1779 
1780 		if (err == 0) {
1781 			err = kern_channel_event_transmit_expired(
1782 				fakeif->iff_ifp, &expn, nx_port_id);
1783 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1784 			    "%s sent expiry notification on nexus port "
1785 			    "%u notif code %u",
1786 			    fakeif->iff_name, nx_port_id,
1787 			    expn.packet_tx_expiration_status);
1788 		}
1789 		if (err != 0) {
1790 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1791 			    "%s err=%d, nx_port_id: 0x%x",
1792 			    fakeif->iff_name, err, nx_port_id);
1793 		}
1794 	}
1795 
1796 	return drop_packet;
1797 }
1798 
1799 /* returns true if the packet is selected for TX error & dropped */
1800 static bool
feth_tx_complete_error(if_fake_ref fakeif,kern_packet_t ph)1801 feth_tx_complete_error(if_fake_ref fakeif, kern_packet_t ph)
1802 {
1803 	int err;
1804 
1805 	if (fakeif->iff_tx_drop_rate == 0 ||
1806 	    fakeif->iff_tx_pkts_count != fakeif->iff_tx_drop_rate) {
1807 		return false;
1808 	}
1809 	/* simulate TX completion error on the packet */
1810 	if (fakeif->iff_tx_completion_mode == IFF_TX_COMPL_MODE_SYNC) {
1811 		err = kern_packet_set_tx_completion_status(ph,
1812 		    CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED);
1813 		VERIFY(err == 0);
1814 		kern_packet_tx_completion(ph, fakeif->iff_ifp);
1815 	} else {
1816 		uint32_t nx_port_id = 0;
1817 		os_channel_event_packet_transmit_status_t pkt_tx_status = {0};
1818 
1819 		pkt_tx_status.packet_status =
1820 		    CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED;
1821 		err = feth_get_packet_notification_details(fakeif, ph,
1822 		    &pkt_tx_status.packet_id, &nx_port_id);
1823 		if (err == 0) {
1824 			err = kern_channel_event_transmit_status(
1825 				fakeif->iff_ifp, &pkt_tx_status, nx_port_id);
1826 		}
1827 		if (err != 0) {
1828 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1829 			    "%s err=%d, nx_port_id: 0x%x",
1830 			    fakeif->iff_name, err, nx_port_id);
1831 		}
1832 	}
1833 
1834 	return true;
1835 }
1836 
1837 static void
feth_if_adv(thread_call_param_t arg0,thread_call_param_t arg1)1838 feth_if_adv(thread_call_param_t arg0, thread_call_param_t arg1)
1839 {
1840 #pragma unused(arg1)
1841 	errno_t                            error;
1842 	if_fake_ref                        fakeif = (if_fake_ref)arg0;
1843 	struct ifnet_interface_advisory    if_adv;
1844 	struct ifnet_stats_param           if_stat;
1845 
1846 	feth_lock();
1847 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1848 		feth_unlock();
1849 		return;
1850 	}
1851 	feth_unlock();
1852 
1853 	if (!fakeif->iff_intf_adv_enabled) {
1854 		goto done;
1855 	}
1856 
1857 	error = ifnet_stat(fakeif->iff_ifp, &if_stat);
1858 	if (error != 0) {
1859 		FAKE_LOG(LOG_NOTICE, 0, "%s: ifnet_stat() failed %d",
1860 		    fakeif->iff_name, error);
1861 		goto done;
1862 	}
1863 	if_adv.header.version = IF_INTERFACE_ADVISORY_VERSION_CURRENT;
1864 	if_adv.header.direction = IF_INTERFACE_ADVISORY_DIRECTION_TX;
1865 	if_adv.header.interface_type =
1866 	    IF_INTERFACE_ADVISORY_INTERFACE_TYPE_WIFI;
1867 	if_adv.capacity.timestamp = mach_absolute_time();
1868 	if_adv.capacity.rate_trend_suggestion =
1869 	    IF_INTERFACE_ADVISORY_RATE_SUGGESTION_RAMP_NEUTRAL;
1870 	if_adv.capacity.max_bandwidth = 1000 * 1000 * 1000; /* 1Gbps */
1871 	if_adv.capacity.total_byte_count = if_stat.packets_out;
1872 	if_adv.capacity.average_throughput = 1000 * 1000 * 1000; /* 1Gbps */
1873 	if_adv.capacity.flushable_queue_size = UINT32_MAX;
1874 	if_adv.capacity.non_flushable_queue_size = UINT32_MAX;
1875 	if_adv.capacity.average_delay = 1; /* ms */
1876 
1877 	error = fakeif->iff_intf_adv_notify(fakeif->iff_intf_adv_kern_ctx,
1878 	    &if_adv);
1879 	if (error != 0) {
1880 		FAKE_LOG(LOG_NOTICE, 0,
1881 		    "%s: interface advisory report failed %d",
1882 		    fakeif->iff_name, error);
1883 	}
1884 
1885 done:
1886 	feth_lock();
1887 	if (!feth_is_detaching(fakeif) && fakeif->iff_channel_connected) {
1888 		uint64_t deadline;
1889 		clock_interval_to_deadline(fakeif->iff_adv_interval,
1890 		    NSEC_PER_MSEC, &deadline);
1891 		thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1892 	}
1893 	feth_unlock();
1894 }
1895 
1896 static int
feth_if_adv_tcall_create(if_fake_ref fakeif)1897 feth_if_adv_tcall_create(if_fake_ref fakeif)
1898 {
1899 	uint64_t deadline;
1900 
1901 	feth_lock();
1902 	ASSERT(fakeif->iff_if_adv_tcall == NULL);
1903 	ASSERT(fakeif->iff_adv_interval > 0);
1904 	ASSERT(fakeif->iff_channel_connected);
1905 	fakeif->iff_if_adv_tcall =
1906 	    thread_call_allocate_with_options(feth_if_adv,
1907 	    (thread_call_param_t)fakeif, THREAD_CALL_PRIORITY_KERNEL,
1908 	    THREAD_CALL_OPTIONS_ONCE);
1909 	if (fakeif->iff_if_adv_tcall == NULL) {
1910 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1911 		    "%s if_adv tcall alloc failed",
1912 		    fakeif->iff_name);
1913 		feth_unlock();
1914 		return ENXIO;
1915 	}
1916 	/* retain for the interface advisory thread call */
1917 	feth_retain(fakeif);
1918 	clock_interval_to_deadline(fakeif->iff_adv_interval,
1919 	    NSEC_PER_MSEC, &deadline);
1920 	thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1921 	feth_unlock();
1922 	return 0;
1923 }
1924 
1925 /**
1926 ** nexus netif domain provider
1927 **/
1928 static errno_t
feth_nxdp_init(kern_nexus_domain_provider_t domprov)1929 feth_nxdp_init(kern_nexus_domain_provider_t domprov)
1930 {
1931 #pragma unused(domprov)
1932 	return 0;
1933 }
1934 
1935 static void
feth_nxdp_fini(kern_nexus_domain_provider_t domprov)1936 feth_nxdp_fini(kern_nexus_domain_provider_t domprov)
1937 {
1938 #pragma unused(domprov)
1939 }
1940 
1941 static uuid_t                   feth_nx_dom_prov;
1942 
1943 static errno_t
feth_register_nexus_domain_provider(void)1944 feth_register_nexus_domain_provider(void)
1945 {
1946 	const struct kern_nexus_domain_provider_init dp_init = {
1947 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1948 		.nxdpi_flags = 0,
1949 		.nxdpi_init = feth_nxdp_init,
1950 		.nxdpi_fini = feth_nxdp_fini
1951 	};
1952 	errno_t                         err = 0;
1953 
1954 	nexus_domain_provider_name_t feth_provider_name = "com.apple.feth";
1955 
1956 	/* feth_nxdp_init() is called before this function returns */
1957 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
1958 	    feth_provider_name,
1959 	    &dp_init, sizeof(dp_init),
1960 	    &feth_nx_dom_prov);
1961 	if (err != 0) {
1962 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1963 		    "failed to register domain provider");
1964 		return err;
1965 	}
1966 	return 0;
1967 }
1968 
1969 /**
1970 ** netif nexus routines
1971 **/
1972 static if_fake_ref
feth_nexus_context(kern_nexus_t nexus)1973 feth_nexus_context(kern_nexus_t nexus)
1974 {
1975 	if_fake_ref fakeif;
1976 
1977 	fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
1978 	assert(fakeif != NULL);
1979 	return fakeif;
1980 }
1981 
1982 static uint8_t
feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1983 feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1984 {
1985 	switch (svc_class) {
1986 	case KPKT_SC_VO:
1987 		return 0;
1988 	case KPKT_SC_VI:
1989 		return 1;
1990 	case KPKT_SC_BE:
1991 		return 2;
1992 	case KPKT_SC_BK:
1993 		return 3;
1994 	default:
1995 		VERIFY(0);
1996 		return 0;
1997 	}
1998 }
1999 
2000 static errno_t
feth_nx_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)2001 feth_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2002     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
2003     void **ring_ctx)
2004 {
2005 	if_fake_ref     fakeif;
2006 	int             err;
2007 #pragma unused(nxprov, channel, ring_ctx)
2008 	feth_lock();
2009 	fakeif = feth_nexus_context(nexus);
2010 	if (feth_is_detaching(fakeif)) {
2011 		feth_unlock();
2012 		return 0;
2013 	}
2014 	if (is_tx_ring) {
2015 		if (feth_in_wmm_mode(fakeif)) {
2016 			kern_packet_svc_class_t svc_class;
2017 			uint8_t ring_idx;
2018 
2019 			err = kern_channel_get_service_class(ring, &svc_class);
2020 			VERIFY(err == 0);
2021 			ring_idx = feth_find_tx_ring_by_svc(svc_class);
2022 			VERIFY(ring_idx < IFF_NUM_TX_RINGS_WMM_MODE);
2023 			VERIFY(fakeif->iff_tx_ring[ring_idx] == NULL);
2024 			fakeif->iff_tx_ring[ring_idx] = ring;
2025 		} else {
2026 			VERIFY(fakeif->iff_tx_ring[0] == NULL);
2027 			fakeif->iff_tx_ring[0] = ring;
2028 		}
2029 	} else {
2030 		VERIFY(fakeif->iff_rx_ring[0] == NULL);
2031 		fakeif->iff_rx_ring[0] = ring;
2032 	}
2033 	fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2034 	feth_unlock();
2035 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: %s ring init",
2036 	    fakeif->iff_name, is_tx_ring ? "TX" : "RX");
2037 	return 0;
2038 }
2039 
2040 static void
feth_nx_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)2041 feth_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2042     kern_channel_ring_t ring)
2043 {
2044 #pragma unused(nxprov, ring)
2045 	if_fake_ref       fakeif;
2046 	thread_call_t   __single tcall = NULL;
2047 
2048 	feth_lock();
2049 	fakeif = feth_nexus_context(nexus);
2050 	if (fakeif->iff_rx_ring[0] == ring) {
2051 		fakeif->iff_rx_ring[0] = NULL;
2052 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2053 		    "%s: RX ring fini", fakeif->iff_name);
2054 	} else if (feth_in_wmm_mode(fakeif)) {
2055 		int i;
2056 		for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
2057 			if (fakeif->iff_tx_ring[i] == ring) {
2058 				fakeif->iff_tx_ring[i] = NULL;
2059 				break;
2060 			}
2061 		}
2062 		for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
2063 			if (fakeif->iff_tx_ring[i] != NULL) {
2064 				break;
2065 			}
2066 		}
2067 		if (i == IFF_MAX_TX_RINGS) {
2068 			tcall = fakeif->iff_doorbell_tcall;
2069 			fakeif->iff_doorbell_tcall = NULL;
2070 		}
2071 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2072 		    "%s: TX ring fini", fakeif->iff_name);
2073 	} else if (fakeif->iff_tx_ring[0] == ring) {
2074 		tcall = fakeif->iff_doorbell_tcall;
2075 		fakeif->iff_doorbell_tcall = NULL;
2076 		fakeif->iff_tx_ring[0] = NULL;
2077 	}
2078 	fakeif->iff_nifs = NULL;
2079 	feth_unlock();
2080 	if (tcall != NULL) {
2081 		boolean_t       success;
2082 
2083 		success = thread_call_cancel_wait(tcall);
2084 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2085 		    "%s: thread_call_cancel %s", fakeif->iff_name,
2086 		    success ? "SUCCESS" : "FAILURE");
2087 		if (!success) {
2088 			feth_lock();
2089 			if (fakeif->iff_doorbell_tcall_active) {
2090 				fakeif->iff_waiting_for_tcall = TRUE;
2091 				FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2092 				    "%s: *waiting for threadcall",
2093 				    fakeif->iff_name);
2094 				do {
2095 					msleep(fakeif, &feth_lck_mtx,
2096 					    PZERO, "feth threadcall", 0);
2097 				} while (fakeif->iff_doorbell_tcall_active);
2098 				FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2099 				    "%s: ^threadcall done",
2100 				    fakeif->iff_name);
2101 				fakeif->iff_waiting_for_tcall = FALSE;
2102 			}
2103 			feth_unlock();
2104 		}
2105 		success = thread_call_free(tcall);
2106 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2107 		    "%s: thread_call_free %s",
2108 		    fakeif->iff_name,
2109 		    success ? "SUCCESS" : "FAILURE");
2110 		feth_release(fakeif);
2111 		VERIFY(success == TRUE);
2112 	}
2113 }
2114 
2115 static errno_t
feth_nx_pre_connect(kern_nexus_provider_t nxprov,proc_t proc,kern_nexus_t nexus,nexus_port_t port,kern_channel_t channel,void ** channel_context)2116 feth_nx_pre_connect(kern_nexus_provider_t nxprov,
2117     proc_t proc, kern_nexus_t nexus, nexus_port_t port, kern_channel_t channel,
2118     void **channel_context)
2119 {
2120 #pragma unused(nxprov, proc, nexus, port, channel, channel_context)
2121 	return 0;
2122 }
2123 
2124 static errno_t
feth_nx_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2125 feth_nx_connected(kern_nexus_provider_t nxprov,
2126     kern_nexus_t nexus, kern_channel_t channel)
2127 {
2128 #pragma unused(nxprov, channel)
2129 	int err;
2130 	if_fake_ref fakeif;
2131 
2132 	fakeif = feth_nexus_context(nexus);
2133 	feth_lock();
2134 	if (feth_is_detaching(fakeif)) {
2135 		feth_unlock();
2136 		return EBUSY;
2137 	}
2138 	feth_retain(fakeif);
2139 	fakeif->iff_channel_connected = TRUE;
2140 	feth_unlock();
2141 	if (feth_has_intf_advisory_configured(fakeif)) {
2142 		err = feth_if_adv_tcall_create(fakeif);
2143 		if (err != 0) {
2144 			return err;
2145 		}
2146 	}
2147 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: connected channel %p",
2148 	    fakeif->iff_name, channel);
2149 	return 0;
2150 }
2151 
2152 static void
feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2153 feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,
2154     kern_nexus_t nexus, kern_channel_t channel)
2155 {
2156 #pragma unused(nxprov, channel)
2157 	if_fake_ref fakeif;
2158 	thread_call_t __single tcall;
2159 	boolean_t connected;
2160 
2161 	fakeif = feth_nexus_context(nexus);
2162 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2163 	    "%s: pre-disconnect channel %p",
2164 	    fakeif->iff_name, channel);
2165 	/* Quiesce the interface and flush any pending outbound packets. */
2166 	if_down(fakeif->iff_ifp);
2167 	feth_lock();
2168 	connected = fakeif->iff_channel_connected;
2169 	fakeif->iff_channel_connected = FALSE;
2170 	tcall = fakeif->iff_if_adv_tcall;
2171 	fakeif->iff_if_adv_tcall = NULL;
2172 	feth_unlock();
2173 	if (tcall != NULL) {
2174 		(void) thread_call_cancel_wait(tcall);
2175 		if (!thread_call_free(tcall)) {
2176 			boolean_t freed;
2177 			(void) thread_call_cancel_wait(tcall);
2178 			freed = thread_call_free(tcall);
2179 			VERIFY(freed);
2180 		}
2181 		/* release for the interface advisory thread call */
2182 		feth_release(fakeif);
2183 	}
2184 	if (connected) {
2185 		feth_release(fakeif);
2186 	}
2187 }
2188 
2189 static void
feth_nx_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2190 feth_nx_disconnected(kern_nexus_provider_t nxprov,
2191     kern_nexus_t nexus, kern_channel_t channel)
2192 {
2193 #pragma unused(nxprov, channel)
2194 	if_fake_ref fakeif;
2195 
2196 	fakeif = feth_nexus_context(nexus);
2197 	FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: disconnected channel %p",
2198 	    fakeif->iff_name, channel);
2199 }
2200 
2201 static errno_t
feth_nx_slot_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index,struct kern_slot_prop ** slot_prop_addr,void ** slot_context)2202 feth_nx_slot_init(kern_nexus_provider_t nxprov,
2203     kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2204     uint32_t slot_index, struct kern_slot_prop **slot_prop_addr,
2205     void **slot_context)
2206 {
2207 #pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
2208 	return 0;
2209 }
2210 
2211 static void
feth_nx_slot_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index)2212 feth_nx_slot_fini(kern_nexus_provider_t nxprov,
2213     kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2214     uint32_t slot_index)
2215 {
2216 #pragma unused(nxprov, nexus, ring, slot, slot_index)
2217 }
2218 
2219 static errno_t
feth_nx_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)2220 feth_nx_sync_tx(kern_nexus_provider_t nxprov,
2221     kern_nexus_t nexus, kern_channel_ring_t tx_ring, uint32_t flags)
2222 {
2223 #pragma unused(nxprov)
2224 	if_fake_ref             fakeif;
2225 	ifnet_t                 ifp;
2226 	kern_channel_slot_t     last_tx_slot = NULL;
2227 	ifnet_t                 peer_ifp;
2228 	if_fake_ref             peer_fakeif = NULL;
2229 	struct kern_channel_ring_stat_increment stats;
2230 	kern_channel_slot_t     tx_slot;
2231 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2232 	kern_packet_t           pkts[IFF_MAX_BATCH_SIZE];
2233 	uint32_t                n_pkts = 0;
2234 
2235 	memset(&stats, 0, sizeof(stats));
2236 
2237 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2238 	fakeif = feth_nexus_context(nexus);
2239 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2240 	    "%s ring %d flags 0x%x", fakeif->iff_name,
2241 	    tx_ring->ckr_ring_id, flags);
2242 	(void)flags;
2243 	feth_lock();
2244 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2245 		feth_unlock();
2246 		return 0;
2247 	}
2248 	ifp = fakeif->iff_ifp;
2249 	peer_ifp = fakeif->iff_peer;
2250 	if (peer_ifp != NULL) {
2251 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
2252 		if (peer_fakeif != NULL) {
2253 			if (feth_is_detaching(peer_fakeif)) {
2254 				FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2255 				    "%s peer fakeif %s is detaching",
2256 				    fakeif->iff_name, peer_fakeif->iff_name);
2257 				goto done;
2258 			}
2259 			if (!peer_fakeif->iff_channel_connected) {
2260 				if (fakeif->iff_tx_exp_policy ==
2261 				    IFF_TX_EXP_POLICY_DISABLED) {
2262 					FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2263 					    "%s peer fakeif %s channel not connected, expn: %d",
2264 					    fakeif->iff_name, peer_fakeif->iff_name,
2265 					    fakeif->iff_tx_exp_policy);
2266 					goto done;
2267 				}
2268 			}
2269 		} else {
2270 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2271 			    "%s no peer fakeif (peer %p)",
2272 			    fakeif->iff_name, peer_ifp);
2273 			goto done;
2274 		}
2275 	} else {
2276 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2277 		    "%s no peer", fakeif->iff_name);
2278 		goto done;
2279 	}
2280 	tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
2281 	while (tx_slot != NULL) {
2282 		uint16_t off;
2283 		kern_packet_t sph;
2284 
2285 		/* detach the packet from the TX ring */
2286 		sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
2287 		VERIFY(sph != 0);
2288 		kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
2289 
2290 		/* bpf tap output */
2291 		off = kern_packet_get_headroom(sph);
2292 		VERIFY(off >= fakeif->iff_tx_headroom);
2293 		kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2294 		feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2295 		bpf_tap_packet_out(ifp, DLT_EN10MB, sph, NULL, 0);
2296 
2297 		/* drop packets, if requested */
2298 		fakeif->iff_tx_pkts_count++;
2299 		if (feth_tx_expired_error(fakeif, sph) ||
2300 		    feth_tx_complete_error(fakeif, sph) ||
2301 		    !peer_fakeif->iff_channel_connected) {
2302 			fakeif->iff_tx_pkts_count = 0;
2303 			kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2304 			STATS_INC(nifs, NETIF_STATS_DROP);
2305 			goto next_tx_slot;
2306 		}
2307 
2308 		ASSERT(sph != 0);
2309 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2310 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2311 
2312 		stats.kcrsi_slots_transferred++;
2313 		stats.kcrsi_bytes_transferred
2314 		        += kern_packet_get_data_length(sph);
2315 
2316 		/* prepare batch for receiver */
2317 		pkts[n_pkts++] = sph;
2318 		if (n_pkts == IFF_MAX_BATCH_SIZE) {
2319 			feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2320 			feth_tx_complete(fakeif, pkts, n_pkts);
2321 			n_pkts = 0;
2322 		}
2323 
2324 next_tx_slot:
2325 		last_tx_slot = tx_slot;
2326 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
2327 	}
2328 
2329 	/* catch last batch for receiver */
2330 	if (n_pkts != 0) {
2331 		feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2332 		feth_tx_complete(fakeif, pkts, n_pkts);
2333 		n_pkts = 0;
2334 	}
2335 
2336 	if (last_tx_slot != NULL) {
2337 		kern_channel_advance_slot(tx_ring, last_tx_slot);
2338 		kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
2339 	}
2340 done:
2341 	feth_unlock();
2342 	return 0;
2343 }
2344 
2345 static errno_t
feth_nx_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2346 feth_nx_sync_rx(kern_nexus_provider_t nxprov,
2347     kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2348 {
2349 #pragma unused(nxprov, ring, flags)
2350 	if_fake_ref           fakeif;
2351 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2352 
2353 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
2354 	fakeif = feth_nexus_context(nexus);
2355 	FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT, "%s", fakeif->iff_name);
2356 	return 0;
2357 }
2358 
2359 static errno_t
feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif,boolean_t doorbell_ctxt)2360 feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif, boolean_t doorbell_ctxt)
2361 {
2362 	int i;
2363 	errno_t error = 0;
2364 	boolean_t more;
2365 
2366 	for (i = 0; i < IFF_NUM_TX_RINGS_WMM_MODE; i++) {
2367 		kern_channel_ring_t ring = fakeif->iff_tx_ring[i];
2368 		if (ring != NULL) {
2369 			error = kern_channel_tx_refill(ring, UINT32_MAX,
2370 			    UINT32_MAX, doorbell_ctxt, &more);
2371 		}
2372 		if (error != 0) {
2373 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2374 			    "%s: TX refill ring %d (%s) %d",
2375 			    fakeif->iff_name, ring->ckr_ring_id,
2376 			    doorbell_ctxt ? "sync" : "async", error);
2377 			if (!((error == EAGAIN) || (error == EBUSY))) {
2378 				break;
2379 			}
2380 		} else {
2381 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2382 			    "%s: TX refilled ring %d (%s)",
2383 			    fakeif->iff_name, ring->ckr_ring_id,
2384 			    doorbell_ctxt ? "sync" : "async");
2385 		}
2386 	}
2387 	return error;
2388 }
2389 
2390 static void
feth_async_doorbell(thread_call_param_t arg0,thread_call_param_t arg1)2391 feth_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
2392 {
2393 #pragma unused(arg1)
2394 	errno_t                 error;
2395 	if_fake_ref             fakeif = (if_fake_ref)arg0;
2396 	kern_channel_ring_t     ring;
2397 	boolean_t               more;
2398 
2399 	feth_lock();
2400 	ring = fakeif->iff_tx_ring[0];
2401 	if (feth_is_detaching(fakeif) ||
2402 	    !fakeif->iff_channel_connected ||
2403 	    ring == NULL) {
2404 		goto done;
2405 	}
2406 	fakeif->iff_doorbell_tcall_active = TRUE;
2407 	feth_unlock();
2408 	if (feth_in_wmm_mode(fakeif)) {
2409 		error = feth_nx_tx_dequeue_driver_managed(fakeif, FALSE);
2410 	} else {
2411 		error = kern_channel_tx_refill(ring, UINT32_MAX,
2412 		    UINT32_MAX, FALSE, &more);
2413 	}
2414 	if (error != 0) {
2415 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s: TX refill failed %d",
2416 		    fakeif->iff_name, error);
2417 	} else {
2418 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s: TX refilled",
2419 		    fakeif->iff_name);
2420 	}
2421 
2422 	feth_lock();
2423 done:
2424 	fakeif->iff_doorbell_tcall_active = FALSE;
2425 	if (fakeif->iff_waiting_for_tcall) {
2426 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2427 		    "%s: threadcall waking up waiter", fakeif->iff_name);
2428 		wakeup((caddr_t)fakeif);
2429 	}
2430 	feth_unlock();
2431 }
2432 
2433 static void
feth_schedule_async_doorbell(if_fake_ref fakeif)2434 feth_schedule_async_doorbell(if_fake_ref fakeif)
2435 {
2436 	thread_call_t  __single tcall;
2437 
2438 	feth_lock();
2439 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2440 		feth_unlock();
2441 		return;
2442 	}
2443 	tcall = fakeif->iff_doorbell_tcall;
2444 	if (tcall != NULL) {
2445 		thread_call_enter(tcall);
2446 	} else {
2447 		tcall = thread_call_allocate_with_options(feth_async_doorbell,
2448 		    (thread_call_param_t)fakeif,
2449 		    THREAD_CALL_PRIORITY_KERNEL,
2450 		    THREAD_CALL_OPTIONS_ONCE);
2451 		if (tcall == NULL) {
2452 			FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT,
2453 			    "%s tcall alloc failed", fakeif->iff_name);
2454 		} else {
2455 			fakeif->iff_doorbell_tcall = tcall;
2456 			feth_retain(fakeif);
2457 			thread_call_enter(tcall);
2458 		}
2459 	}
2460 	feth_unlock();
2461 }
2462 
2463 static errno_t
feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2464 feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,
2465     kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2466 {
2467 #pragma unused(nxprov, ring, flags)
2468 	errno_t         error;
2469 	if_fake_ref     fakeif;
2470 
2471 	fakeif = feth_nexus_context(nexus);
2472 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s", fakeif->iff_name);
2473 
2474 	if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
2475 		boolean_t       more;
2476 		/* synchronous tx refill */
2477 		if (feth_in_wmm_mode(fakeif)) {
2478 			error = feth_nx_tx_dequeue_driver_managed(fakeif, TRUE);
2479 		} else {
2480 			error = kern_channel_tx_refill(ring, UINT32_MAX,
2481 			    UINT32_MAX, TRUE, &more);
2482 		}
2483 		if (error != 0) {
2484 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2485 			    "%s: TX refill (sync) %d", fakeif->iff_name, error);
2486 		} else {
2487 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2488 			    "%s: TX refilled (sync)", fakeif->iff_name);
2489 		}
2490 	} else {
2491 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2492 		    "%s: schedule async refill", fakeif->iff_name);
2493 		feth_schedule_async_doorbell(fakeif);
2494 	}
2495 	return 0;
2496 }
2497 
2498 static errno_t
feth_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)2499 feth_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
2500 {
2501 	if_fake_ref fakeif;
2502 
2503 	fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
2504 	feth_ifnet_set_attrs(fakeif, ifp);
2505 	return 0;
2506 }
2507 
2508 static errno_t
feth_nx_intf_adv_config(void * prov_ctx,bool enable)2509 feth_nx_intf_adv_config(void *prov_ctx, bool enable)
2510 {
2511 	if_fake_ref fakeif = prov_ctx;
2512 
2513 	feth_lock();
2514 	fakeif->iff_intf_adv_enabled = enable;
2515 	feth_unlock();
2516 	FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2517 	    "%s enable %d", fakeif->iff_name, enable);
2518 	return 0;
2519 }
2520 
2521 static errno_t
fill_capab_interface_advisory(if_fake_ref fakeif,void * contents,uint32_t * len)2522 fill_capab_interface_advisory(if_fake_ref fakeif, void *contents, uint32_t *len)
2523 {
2524 	struct kern_nexus_capab_interface_advisory * __single capab = contents;
2525 
2526 	if (*len != sizeof(*capab)) {
2527 		return EINVAL;
2528 	}
2529 	if (capab->kncia_version !=
2530 	    KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
2531 		return EINVAL;
2532 	}
2533 	if (!feth_has_intf_advisory_configured(fakeif)) {
2534 		return ENOTSUP;
2535 	}
2536 	VERIFY(capab->kncia_notify != NULL);
2537 	fakeif->iff_intf_adv_kern_ctx = capab->kncia_kern_context;
2538 	fakeif->iff_intf_adv_notify = capab->kncia_notify;
2539 	capab->kncia_provider_context = fakeif;
2540 	capab->kncia_config = feth_nx_intf_adv_config;
2541 	return 0;
2542 }
2543 
2544 static errno_t
feth_notify_steering_info(void * prov_ctx,void * qset_ctx,struct ifnet_traffic_descriptor_common * td,bool add)2545 feth_notify_steering_info(void *prov_ctx, void *qset_ctx,
2546     struct ifnet_traffic_descriptor_common *td, bool add)
2547 {
2548 #pragma unused(td)
2549 	if_fake_ref fakeif = prov_ctx;
2550 	fake_qset * __single qset = qset_ctx;
2551 
2552 	FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2553 	    "%s: notify_steering_info: qset_id 0x%llx, %s",
2554 	    fakeif->iff_name, qset->fqs_id, add ? "add" : "remove");
2555 	return 0;
2556 }
2557 
2558 static errno_t
fill_capab_qset_extensions(if_fake_ref fakeif,void * contents,uint32_t * len)2559 fill_capab_qset_extensions(if_fake_ref fakeif, void *contents, uint32_t *len)
2560 {
2561 	struct kern_nexus_capab_qset_extensions * __single capab = contents;
2562 
2563 	if (*len != sizeof(*capab)) {
2564 		return EINVAL;
2565 	}
2566 	if (capab->cqe_version !=
2567 	    KERN_NEXUS_CAPAB_QSET_EXTENSIONS_VERSION_1) {
2568 		return EINVAL;
2569 	}
2570 	capab->cqe_prov_ctx = fakeif;
2571 	capab->cqe_notify_steering_info = feth_notify_steering_info;
2572 	return 0;
2573 }
2574 
2575 static errno_t
feth_nx_rx_flow_steering_config(void * prov_ctx,uint32_t id,struct ifnet_traffic_descriptor_common * td,uint32_t action)2576 feth_nx_rx_flow_steering_config(void *prov_ctx, uint32_t id,
2577     struct ifnet_traffic_descriptor_common *td, uint32_t action)
2578 {
2579 #pragma unused(td)
2580 	if_fake_ref fakeif = prov_ctx;
2581 
2582 	FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2583 	    "%s: nx_rx_flow_steering_config: id 0x%x, action %u",
2584 	    fakeif->iff_name, id, action);
2585 	return 0;
2586 }
2587 
2588 static errno_t
fill_capab_rx_flow_steering(if_fake_ref fakeif,void * contents,uint32_t * len)2589 fill_capab_rx_flow_steering(if_fake_ref fakeif, void *contents, uint32_t *len)
2590 {
2591 	struct kern_nexus_capab_rx_flow_steering * __single capab = contents;
2592 
2593 	if (*len != sizeof(*capab)) {
2594 		return EINVAL;
2595 	}
2596 	if (capab->kncrxfs_version !=
2597 	    KERN_NEXUS_CAPAB_RX_FLOW_STEERING_VERSION_1) {
2598 		return EINVAL;
2599 	}
2600 
2601 	capab->kncrxfs_prov_ctx = fakeif;
2602 	capab->kncrxfs_config = feth_nx_rx_flow_steering_config;
2603 	return 0;
2604 }
2605 
2606 static errno_t
feth_nx_capab_config(kern_nexus_provider_t nxprov,kern_nexus_t nx,kern_nexus_capab_t capab,void * contents,uint32_t * len)2607 feth_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
2608     kern_nexus_capab_t capab, void *contents, uint32_t *len)
2609 {
2610 #pragma unused(nxprov)
2611 	errno_t error;
2612 	if_fake_ref fakeif;
2613 
2614 	fakeif = feth_nexus_context(nx);
2615 	FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL, "%s", fakeif->iff_name);
2616 
2617 	switch (capab) {
2618 	case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
2619 		error = fill_capab_interface_advisory(fakeif, contents, len);
2620 		break;
2621 	case KERN_NEXUS_CAPAB_QSET_EXTENSIONS:
2622 		error = fill_capab_qset_extensions(fakeif, contents, len);
2623 		break;
2624 	case KERN_NEXUS_CAPAB_RX_FLOW_STEERING:
2625 		error = fill_capab_rx_flow_steering(fakeif, contents, len);
2626 		break;
2627 	default:
2628 		error = ENOTSUP;
2629 		break;
2630 	}
2631 	return error;
2632 }
2633 
2634 static int
feth_set_tso_mtu(ifnet_t ifp,uint32_t tso_v4_mtu,uint32_t tso_v6_mtu)2635 feth_set_tso_mtu(ifnet_t ifp, uint32_t tso_v4_mtu, uint32_t tso_v6_mtu)
2636 {
2637 	int     error;
2638 
2639 	error = ifnet_set_tso_mtu(ifp, AF_INET, tso_v4_mtu);
2640 	if (error != 0) {
2641 		FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2642 		    "set TSO MTU IPv4 failed on %s, err %d",
2643 		    if_name(ifp), error);
2644 		return error;
2645 	}
2646 	error = ifnet_set_tso_mtu(ifp, AF_INET6, tso_v6_mtu);
2647 	if (error != 0) {
2648 		FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2649 		    "set TSO MTU IPv6 failed on %s, err %d",
2650 		    if_name(ifp), error);
2651 		return error;
2652 	}
2653 	return 0;
2654 }
2655 
2656 static int
feth_set_tso_offload(ifnet_t ifp)2657 feth_set_tso_offload(ifnet_t ifp)
2658 {
2659 	ifnet_offload_t offload;
2660 	int error;
2661 
2662 	offload = IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2663 	error = ifnet_set_offload(ifp, offload);
2664 	if (error != 0) {
2665 		FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2666 		    "set TSO offload failed on %s, err %d",
2667 		    if_name(ifp), error);
2668 		goto done;
2669 	}
2670 	error = feth_set_tso_mtu(ifp, if_fake_tso_buffer_size,
2671 	    if_fake_tso_buffer_size);
2672 done:
2673 	return error;
2674 }
2675 
2676 static errno_t
create_netif_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2677 create_netif_provider_and_instance(if_fake_ref fakeif,
2678     struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2679     uuid_t * provider, uuid_t * instance)
2680 {
2681 	errno_t                 err;
2682 	nexus_controller_t      controller = kern_nexus_shared_controller();
2683 	struct kern_nexus_net_init net_init;
2684 	nexus_name_t            provider_name;
2685 	nexus_attr_t            __single nexus_attr = NULL;
2686 	struct kern_nexus_provider_init prov_init = {
2687 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2688 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2689 		.nxpi_pre_connect = feth_nx_pre_connect,
2690 		.nxpi_connected = feth_nx_connected,
2691 		.nxpi_pre_disconnect = feth_nx_pre_disconnect,
2692 		.nxpi_disconnected = feth_nx_disconnected,
2693 		.nxpi_ring_init = feth_nx_ring_init,
2694 		.nxpi_ring_fini = feth_nx_ring_fini,
2695 		.nxpi_slot_init = feth_nx_slot_init,
2696 		.nxpi_slot_fini = feth_nx_slot_fini,
2697 		.nxpi_sync_tx = feth_nx_sync_tx,
2698 		.nxpi_sync_rx = feth_nx_sync_rx,
2699 		.nxpi_tx_doorbell = feth_nx_tx_doorbell,
2700 		.nxpi_config_capab = feth_nx_capab_config,
2701 	};
2702 
2703 	static_assert(IFF_MAX_RX_RINGS == 1);
2704 	err = kern_nexus_attr_create(&nexus_attr);
2705 	if (err != 0) {
2706 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
2707 		    "nexus attribute creation failed, error %d", err);
2708 		goto failed;
2709 	}
2710 	if (feth_in_wmm_mode(fakeif)) {
2711 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_TX_RINGS,
2712 		    IFF_NUM_TX_RINGS_WMM_MODE);
2713 		VERIFY(err == 0);
2714 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_RX_RINGS,
2715 		    IFF_NUM_RX_RINGS_WMM_MODE);
2716 		VERIFY(err == 0);
2717 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_QMAP,
2718 		    NEXUS_QMAP_TYPE_WMM);
2719 		VERIFY(err == 0);
2720 	}
2721 
2722 	err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2723 	VERIFY(err == 0);
2724 	snprintf((char *)provider_name, sizeof(provider_name),
2725 	    "com.apple.netif.%s", fakeif->iff_name);
2726 	err = kern_nexus_controller_register_provider(controller,
2727 	    feth_nx_dom_prov,
2728 	    provider_name,
2729 	    &prov_init,
2730 	    sizeof(prov_init),
2731 	    nexus_attr,
2732 	    provider);
2733 	if (err != 0) {
2734 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
2735 		    "register provider failed, error %d", err);
2736 		goto failed;
2737 	}
2738 	bzero(&net_init, sizeof(net_init));
2739 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2740 	net_init.nxneti_flags = 0;
2741 	net_init.nxneti_eparams = init_params;
2742 	net_init.nxneti_lladdr = NULL;
2743 	net_init.nxneti_prepare = feth_netif_prepare;
2744 	net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2745 	net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2746 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
2747 	    *provider,
2748 	    fakeif,
2749 	    NULL,
2750 	    instance,
2751 	    &net_init,
2752 	    ifp);
2753 	if (err != 0) {
2754 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2755 		    "alloc_net_provider_instance failed, %d", err);
2756 		kern_nexus_controller_deregister_provider(controller,
2757 		    *provider);
2758 		uuid_clear(*provider);
2759 		goto failed;
2760 	}
2761 	if (feth_supports_tso(fakeif)) {
2762 		if ((err = feth_set_tso_offload(*ifp)) != 0) {
2763 			goto failed;
2764 		}
2765 	}
2766 
2767 failed:
2768 	if (nexus_attr != NULL) {
2769 		kern_nexus_attr_destroy(nexus_attr);
2770 	}
2771 	return err;
2772 }
2773 
2774 /*
2775  * The nif_stats need to be referenced because we don't want it set
2776  * to NULL until the last llink is removed.
2777  */
2778 static void
get_nexus_stats(if_fake_ref fakeif,kern_nexus_t nexus)2779 get_nexus_stats(if_fake_ref fakeif, kern_nexus_t nexus)
2780 {
2781 	if (++fakeif->iff_nifs_ref == 1) {
2782 		ASSERT(fakeif->iff_nifs == NULL);
2783 		fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2784 	}
2785 }
2786 
2787 static void
clear_nexus_stats(if_fake_ref fakeif)2788 clear_nexus_stats(if_fake_ref fakeif)
2789 {
2790 	if (--fakeif->iff_nifs_ref == 0) {
2791 		ASSERT(fakeif->iff_nifs != NULL);
2792 		fakeif->iff_nifs = NULL;
2793 	}
2794 }
2795 
2796 static errno_t
feth_nx_qset_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * llink_ctx,uint8_t qset_idx,uint64_t qset_id,kern_netif_qset_t qset,void ** qset_ctx)2797 feth_nx_qset_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2798     void *llink_ctx, uint8_t qset_idx, uint64_t qset_id, kern_netif_qset_t qset,
2799     void **qset_ctx)
2800 {
2801 #pragma unused(nxprov)
2802 	if_fake_ref fakeif;
2803 	fake_llink * __single fl = llink_ctx;
2804 	fake_qset *fqs;
2805 
2806 	feth_lock();
2807 	fakeif = feth_nexus_context(nexus);
2808 	if (feth_is_detaching(fakeif)) {
2809 		feth_unlock();
2810 		FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2811 		    "%s: detaching", fakeif->iff_name);
2812 		return ENXIO;
2813 	}
2814 	if (qset_idx >= fl->fl_qset_cnt) {
2815 		feth_unlock();
2816 		FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2817 		    "%s: invalid qset_idx %d", fakeif->iff_name, qset_idx);
2818 		return EINVAL;
2819 	}
2820 	fqs = &fl->fl_qset[qset_idx];
2821 	ASSERT(fqs->fqs_qset == NULL);
2822 	fqs->fqs_qset = qset;
2823 	fqs->fqs_id = qset_id;
2824 	*qset_ctx = fqs;
2825 
2826 	/* XXX This should really be done during registration */
2827 	get_nexus_stats(fakeif, nexus);
2828 	feth_unlock();
2829 	return 0;
2830 }
2831 
2832 static void
feth_nx_qset_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx)2833 feth_nx_qset_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2834     void *qset_ctx)
2835 {
2836 #pragma unused(nxprov)
2837 	if_fake_ref fakeif;
2838 	fake_qset * __single fqs = qset_ctx;
2839 
2840 	feth_lock();
2841 	fakeif = feth_nexus_context(nexus);
2842 	clear_nexus_stats(fakeif);
2843 	ASSERT(fqs->fqs_qset != NULL);
2844 	fqs->fqs_qset = NULL;
2845 	fqs->fqs_id = 0;
2846 	feth_unlock();
2847 }
2848 
2849 static errno_t
feth_nx_queue_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint8_t qidx,bool tx,kern_netif_queue_t queue,void ** queue_ctx)2850 feth_nx_queue_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2851     void *qset_ctx, uint8_t qidx, bool tx, kern_netif_queue_t queue,
2852     void **queue_ctx)
2853 {
2854 #pragma unused(nxprov)
2855 	if_fake_ref fakeif;
2856 	fake_qset *__single fqs = qset_ctx;
2857 	fake_queue *fq;
2858 
2859 	feth_lock();
2860 	fakeif = feth_nexus_context(nexus);
2861 	if (feth_is_detaching(fakeif)) {
2862 		FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2863 		    "%s: detaching", fakeif->iff_name);
2864 		feth_unlock();
2865 		return ENXIO;
2866 	}
2867 	if (tx) {
2868 		if (qidx >= fqs->fqs_tx_queue_cnt) {
2869 			FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2870 			    "%s: invalid tx qidx %d", fakeif->iff_name, qidx);
2871 			feth_unlock();
2872 			return EINVAL;
2873 		}
2874 		fq = &fqs->fqs_tx_queue[qidx];
2875 	} else {
2876 		if (qidx >= fqs->fqs_rx_queue_cnt) {
2877 			FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2878 			    "%s: invalid rx qidx %d", fakeif->iff_name, qidx);
2879 			feth_unlock();
2880 			return EINVAL;
2881 		}
2882 		fq = &fqs->fqs_rx_queue[qidx];
2883 	}
2884 	ASSERT(fq->fq_queue == NULL);
2885 	fq->fq_queue = queue;
2886 	*queue_ctx = fq;
2887 	feth_unlock();
2888 	return 0;
2889 }
2890 
2891 static void
feth_nx_queue_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx)2892 feth_nx_queue_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2893     void *queue_ctx)
2894 {
2895 #pragma unused(nxprov, nexus)
2896 	fake_queue *__single fq = queue_ctx;
2897 
2898 	feth_lock();
2899 	ASSERT(fq->fq_queue != NULL);
2900 	fq->fq_queue = NULL;
2901 	feth_unlock();
2902 }
2903 
2904 static void
feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif,kern_packet_t sph,struct netif_stats * nifs,if_fake_ref peer_fakeif,uint32_t llink_idx,uint32_t qset_idx)2905 feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif, kern_packet_t sph,
2906     struct netif_stats *nifs, if_fake_ref peer_fakeif,
2907     uint32_t llink_idx, uint32_t qset_idx)
2908 {
2909 	kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2910 	uint32_t n_pkts = 0;
2911 
2912 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2913 	    "%s -> %s", fakeif->iff_name, peer_fakeif->iff_name);
2914 
2915 	while (sph != 0) {
2916 		uint16_t off;
2917 		kern_packet_t next;
2918 
2919 		next = kern_packet_get_next(sph);
2920 		kern_packet_set_next(sph, 0);
2921 
2922 		/* bpf tap output */
2923 		off = kern_packet_get_headroom(sph);
2924 		VERIFY(off >= fakeif->iff_tx_headroom);
2925 		kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2926 		feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2927 		bpf_tap_packet_out(fakeif->iff_ifp, DLT_EN10MB, sph, NULL, 0);
2928 
2929 		/* drop packets, if requested */
2930 		fakeif->iff_tx_pkts_count++;
2931 		if (feth_tx_expired_error(fakeif, sph) ||
2932 		    feth_tx_complete_error(fakeif, sph)) {
2933 			fakeif->iff_tx_pkts_count = 0;
2934 			kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2935 			STATS_INC(nifs, NETIF_STATS_DROP);
2936 			goto next_pkt;
2937 		}
2938 		ASSERT(sph != 0);
2939 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2940 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2941 
2942 		/* prepare batch for receiver */
2943 		pkts[n_pkts++] = sph;
2944 		if (n_pkts == IFF_MAX_BATCH_SIZE) {
2945 			feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx,
2946 			    qset_idx, pkts, n_pkts);
2947 			feth_tx_complete(fakeif, pkts, n_pkts);
2948 			n_pkts = 0;
2949 		}
2950 next_pkt:
2951 		sph = next;
2952 	}
2953 	/* catch last batch for receiver */
2954 	if (n_pkts != 0) {
2955 		feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx, qset_idx,
2956 		    pkts, n_pkts);
2957 		feth_tx_complete(fakeif, pkts, n_pkts);
2958 		n_pkts = 0;
2959 	}
2960 }
2961 
2962 static errno_t
feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint32_t flags)2963 feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2964     void *qset_ctx, uint32_t flags)
2965 {
2966 #pragma unused(nxprov)
2967 	if_fake_ref             fakeif;
2968 	ifnet_t                 peer_ifp;
2969 	if_fake_ref             peer_fakeif = NULL;
2970 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2971 	fake_qset               * __single qset = qset_ctx;
2972 	boolean_t               detaching, connected;
2973 	uint32_t                i;
2974 	errno_t                 err;
2975 
2976 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2977 	fakeif = feth_nexus_context(nexus);
2978 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2979 	    "%s qset %p, idx %d, flags 0x%x", fakeif->iff_name, qset,
2980 	    qset->fqs_idx, flags);
2981 
2982 	feth_lock();
2983 	detaching = feth_is_detaching(fakeif);
2984 	connected = fakeif->iff_channel_connected;
2985 	if (detaching || !connected) {
2986 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2987 		    "%s: detaching %s, channel connected %s",
2988 		    fakeif->iff_name,
2989 		    (detaching ? "true" : "false"),
2990 		    (connected ? "true" : "false"));
2991 		feth_unlock();
2992 		return 0;
2993 	}
2994 	peer_ifp = fakeif->iff_peer;
2995 	if (peer_ifp != NULL) {
2996 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
2997 		if (peer_fakeif != NULL) {
2998 			detaching = feth_is_detaching(peer_fakeif);
2999 			connected = peer_fakeif->iff_channel_connected;
3000 			if (detaching || !connected) {
3001 				FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3002 				    "peer %s: detaching %s, "
3003 				    "channel connected %s",
3004 				    peer_fakeif->iff_name,
3005 				    (detaching ? "true" : "false"),
3006 				    (connected ? "true" : "false"));
3007 				goto done;
3008 			}
3009 		} else {
3010 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3011 			    "peer_fakeif is NULL");
3012 			goto done;
3013 		}
3014 	} else {
3015 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "peer_ifp is NULL");
3016 		goto done;
3017 	}
3018 
3019 	for (i = 0; i < qset->fqs_tx_queue_cnt; i++) {
3020 		kern_packet_t sph = 0;
3021 		kern_netif_queue_t queue = qset->fqs_tx_queue[i].fq_queue;
3022 		boolean_t more = FALSE;
3023 
3024 		err = kern_netif_queue_tx_dequeue(queue, UINT32_MAX, UINT32_MAX,
3025 		    &more, &sph);
3026 		if (err != 0 && err != EAGAIN) {
3027 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3028 			    "%s queue %p dequeue failed: err "
3029 			    "%d", fakeif->iff_name, queue, err);
3030 		}
3031 		feth_nx_tx_queue_deliver_pkt_chain(fakeif, sph, nifs,
3032 		    peer_fakeif, qset->fqs_llink_idx, qset->fqs_idx);
3033 	}
3034 
3035 done:
3036 	feth_unlock();
3037 	return 0;
3038 }
3039 
3040 
3041 static errno_t
feth_nx_queue_tx_push(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx,kern_packet_t * ph,uint32_t * packetCount,uint32_t * byteCount)3042 feth_nx_queue_tx_push(kern_nexus_provider_t nxprov,
3043     kern_nexus_t nexus, void *queue_ctx, kern_packet_t *ph,
3044     uint32_t *packetCount, uint32_t *byteCount)
3045 {
3046 #pragma unused(nxprov)
3047 	if_fake_ref             fakeif;
3048 	ifnet_t                 peer_ifp;
3049 	if_fake_ref             peer_fakeif = NULL;
3050 	struct netif_stats      *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
3051 	fake_queue              *__single fq = queue_ctx;
3052 	boolean_t               detaching, connected;
3053 
3054 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
3055 	fakeif = feth_nexus_context(nexus);
3056 	FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s queue %p", fakeif->iff_name, fq);
3057 
3058 	feth_lock();
3059 
3060 	detaching = feth_is_detaching(fakeif);
3061 	connected = fakeif->iff_channel_connected;
3062 	if (detaching || !connected) {
3063 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3064 		    "%s: detaching %s, channel connected %s",
3065 		    fakeif->iff_name,
3066 		    (detaching ? "true" : "false"),
3067 		    (connected ? "true" : "false"));
3068 		goto done;
3069 	}
3070 	peer_ifp = fakeif->iff_peer;
3071 	if (peer_ifp != NULL) {
3072 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
3073 		if (peer_fakeif != NULL) {
3074 			detaching = feth_is_detaching(peer_fakeif);
3075 			connected = peer_fakeif->iff_channel_connected;
3076 			if (detaching || !connected) {
3077 				FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3078 				    "peer %s: detaching %s, "
3079 				    "channel connected %s",
3080 				    peer_fakeif->iff_name,
3081 				    (detaching ? "true" : "false"),
3082 				    (connected ? "true" : "false"));
3083 				goto done;
3084 			}
3085 		} else {
3086 			FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3087 			    "peer_fakeif is NULL");
3088 			goto done;
3089 		}
3090 	} else {
3091 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "peer_ifp is NULL");
3092 		goto done;
3093 	}
3094 
3095 	*packetCount = *byteCount = 0;
3096 
3097 	kern_packet_t sph = *ph;
3098 	while (sph != 0) {
3099 		(*packetCount)++;
3100 		*byteCount += kern_packet_get_data_length(sph);
3101 		sph = kern_packet_get_next(sph);
3102 	}
3103 
3104 	feth_nx_tx_queue_deliver_pkt_chain(fakeif, *ph, nifs,
3105 	    peer_fakeif, 0, 0);
3106 
3107 	*ph = 0;
3108 
3109 done:
3110 	feth_unlock();
3111 	return 0;
3112 }
3113 
3114 
3115 static void
fill_qset_info_and_params(if_fake_ref fakeif,fake_llink * llink_info,uint32_t qset_idx,struct kern_nexus_netif_llink_qset_init * qset_init,bool is_def,bool is_low_latency)3116 fill_qset_info_and_params(if_fake_ref fakeif, fake_llink *llink_info,
3117     uint32_t qset_idx, struct kern_nexus_netif_llink_qset_init *qset_init,
3118     bool is_def, bool is_low_latency)
3119 {
3120 	fake_qset *qset_info = &llink_info->fl_qset[qset_idx];
3121 
3122 	qset_init->nlqi_flags =
3123 	    (is_def ? KERN_NEXUS_NET_LLINK_QSET_DEFAULT : 0) |
3124 	    (is_low_latency ? KERN_NEXUS_NET_LLINK_QSET_LOW_LATENCY : 0) |
3125 	    KERN_NEXUS_NET_LLINK_QSET_AQM;
3126 
3127 	if (feth_in_wmm_mode(fakeif)) {
3128 		qset_init->nlqi_flags |= KERN_NEXUS_NET_LLINK_QSET_WMM_MODE;
3129 		qset_init->nlqi_num_txqs = IFF_NUM_TX_QUEUES_WMM_MODE;
3130 		qset_init->nlqi_num_rxqs = IFF_NUM_RX_QUEUES_WMM_MODE;
3131 	} else {
3132 		qset_init->nlqi_num_txqs = 1;
3133 		qset_init->nlqi_num_rxqs = 1;
3134 	}
3135 	qset_info->fqs_tx_queue_cnt = qset_init->nlqi_num_txqs;
3136 	qset_info->fqs_rx_queue_cnt = qset_init->nlqi_num_rxqs;
3137 
3138 	/* These are needed for locating the peer qset */
3139 	qset_info->fqs_llink_idx = llink_info->fl_idx;
3140 	qset_info->fqs_idx = qset_idx;
3141 }
3142 
3143 static void
fill_llink_info_and_params(if_fake_ref fakeif,uint32_t llink_idx,struct kern_nexus_netif_llink_init * llink_init,uint32_t llink_id,struct kern_nexus_netif_llink_qset_init * __counted_by (qset_cnt)qset_init,uint32_t qset_cnt,uint32_t flags)3144 fill_llink_info_and_params(if_fake_ref fakeif, uint32_t llink_idx,
3145     struct kern_nexus_netif_llink_init *llink_init, uint32_t llink_id,
3146     struct kern_nexus_netif_llink_qset_init * __counted_by(qset_cnt) qset_init, uint32_t qset_cnt,
3147     uint32_t flags)
3148 {
3149 	fake_llink *llink_info = &fakeif->iff_llink[llink_idx];
3150 	uint32_t i;
3151 	bool create_ll_qset = if_fake_low_latency && (llink_idx != 0);
3152 
3153 	for (i = 0; i < qset_cnt; i++) {
3154 		fill_qset_info_and_params(fakeif, llink_info, i,
3155 		    &qset_init[i], i == 0, create_ll_qset && i == 1);
3156 	}
3157 	llink_info->fl_idx = llink_idx;
3158 
3159 	/* This doesn't have to be the same as llink_idx */
3160 	llink_info->fl_id = llink_id;
3161 	llink_info->fl_qset_cnt = qset_cnt;
3162 
3163 	llink_init->nli_link_id = llink_id;
3164 	llink_init->nli_num_qsets = qset_cnt;
3165 	llink_init->nli_qsets = qset_init;
3166 	llink_init->nli_flags = flags;
3167 	llink_init->nli_ctx = llink_info;
3168 }
3169 
3170 static errno_t
create_non_default_llinks(if_fake_ref fakeif)3171 create_non_default_llinks(if_fake_ref fakeif)
3172 {
3173 	struct kern_nexus *nx;
3174 	fake_nx_t fnx = &fakeif->iff_nx;
3175 	struct kern_nexus_netif_llink_init llink_init;
3176 	struct kern_nexus_netif_llink_qset_init qset_init[FETH_MAX_QSETS];
3177 	errno_t err;
3178 	uint64_t llink_id;
3179 	uint32_t i;
3180 
3181 	nx = nx_find(fnx->fnx_instance, FALSE);
3182 	if (nx == NULL) {
3183 		FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3184 		    "%s: nx not found", fakeif->iff_name);
3185 		return ENXIO;
3186 	}
3187 	/* Default llink starts at index 0 */
3188 	for (i = 1; i < if_fake_llink_cnt; i++) {
3189 		llink_id = (uint64_t)i;
3190 
3191 		/*
3192 		 * The llink_init and qset_init structures are reused for
3193 		 * each llink creation.
3194 		 */
3195 		fill_llink_info_and_params(fakeif, i, &llink_init,
3196 		    llink_id, qset_init, if_fake_qset_cnt, 0);
3197 		err = kern_nexus_netif_llink_add(nx, &llink_init);
3198 		if (err != 0) {
3199 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3200 			    "%s: llink add failed, error %d",
3201 			    fakeif->iff_name, err);
3202 			goto fail;
3203 		}
3204 		fakeif->iff_llink_cnt++;
3205 	}
3206 	nx_release(nx);
3207 	return 0;
3208 
3209 fail:
3210 	for (i = 0; i < fakeif->iff_llink_cnt; i++) {
3211 		int                     error;
3212 		fake_llink * __single   ll = &fakeif->iff_llink[i];
3213 
3214 		error = kern_nexus_netif_llink_remove(nx, ll->fl_id);
3215 		if (error != 0) {
3216 			FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3217 			    "%s: llink remove failed, llink_id 0x%llx, "
3218 			    "error %d", fakeif->iff_name,
3219 			    ll->fl_id, error);
3220 		}
3221 		ll->fl_id = 0;
3222 	}
3223 	fakeif->iff_llink_cnt = 0;
3224 	nx_release(nx);
3225 	return err;
3226 }
3227 
3228 static errno_t
create_netif_llink_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)3229 create_netif_llink_provider_and_instance(if_fake_ref fakeif,
3230     struct ifnet_init_eparams * init_params, ifnet_t *ifp,
3231     uuid_t * provider, uuid_t * instance)
3232 {
3233 	errno_t                 err;
3234 	nexus_controller_t      controller = kern_nexus_shared_controller();
3235 	struct kern_nexus_net_init net_init;
3236 	struct kern_nexus_netif_llink_init llink_init;
3237 	struct kern_nexus_netif_llink_qset_init qsets[FETH_MAX_QSETS];
3238 
3239 	nexus_name_t            provider_name;
3240 	nexus_attr_t            __single nexus_attr = NULL;
3241 	struct kern_nexus_netif_provider_init prov_init = {
3242 		.nxnpi_version = KERN_NEXUS_DOMAIN_PROVIDER_NETIF,
3243 		.nxnpi_flags = NXPIF_VIRTUAL_DEVICE,
3244 		.nxnpi_pre_connect = feth_nx_pre_connect,
3245 		.nxnpi_connected = feth_nx_connected,
3246 		.nxnpi_pre_disconnect = feth_nx_pre_disconnect,
3247 		.nxnpi_disconnected = feth_nx_disconnected,
3248 		.nxnpi_qset_init = feth_nx_qset_init,
3249 		.nxnpi_qset_fini = feth_nx_qset_fini,
3250 		.nxnpi_queue_init = feth_nx_queue_init,
3251 		.nxnpi_queue_fini = feth_nx_queue_fini,
3252 		.nxnpi_tx_qset_notify = feth_nx_tx_qset_notify,
3253 		.nxnpi_config_capab = feth_nx_capab_config,
3254 		.nxnpi_queue_tx_push = feth_nx_queue_tx_push
3255 	};
3256 
3257 	err = kern_nexus_attr_create(&nexus_attr);
3258 	if (err != 0) {
3259 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3260 		    "nexus attribute creation failed, error %d", err);
3261 		goto failed;
3262 	}
3263 
3264 	err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
3265 	VERIFY(err == 0);
3266 
3267 	snprintf((char *)provider_name, sizeof(provider_name),
3268 	    "com.apple.netif.%s", fakeif->iff_name);
3269 	err = kern_nexus_controller_register_provider(controller,
3270 	    feth_nx_dom_prov,
3271 	    provider_name,
3272 	    (struct kern_nexus_provider_init *)&prov_init,
3273 	    sizeof(prov_init),
3274 	    nexus_attr,
3275 	    provider);
3276 	if (err != 0) {
3277 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3278 		    "register provider failed, error %d", err);
3279 		goto failed;
3280 	}
3281 	bzero(&net_init, sizeof(net_init));
3282 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
3283 	net_init.nxneti_flags = 0;
3284 	net_init.nxneti_eparams = init_params;
3285 	net_init.nxneti_lladdr = NULL;
3286 	net_init.nxneti_prepare = feth_netif_prepare;
3287 	net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
3288 	net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
3289 
3290 	/*
3291 	 * Assume llink id is same as the index for if_fake.
3292 	 * This is not required for other drivers.
3293 	 */
3294 	static_assert(NETIF_LLINK_ID_DEFAULT == 0);
3295 	fill_llink_info_and_params(fakeif, 0, &llink_init,
3296 	    NETIF_LLINK_ID_DEFAULT, qsets, if_fake_qset_cnt,
3297 	    KERN_NEXUS_NET_LLINK_DEFAULT);
3298 
3299 	net_init.nxneti_llink = &llink_init;
3300 
3301 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
3302 	    *provider, fakeif, NULL, instance, &net_init, ifp);
3303 	if (err != 0) {
3304 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3305 		    "alloc_net_provider_instance failed, %d", err);
3306 		kern_nexus_controller_deregister_provider(controller,
3307 		    *provider);
3308 		uuid_clear(*provider);
3309 		goto failed;
3310 	}
3311 	fakeif->iff_llink_cnt++;
3312 
3313 	if (if_fake_llink_cnt > 1) {
3314 		err = create_non_default_llinks(fakeif);
3315 		if (err != 0) {
3316 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3317 			    "create_non_default_llinks failed, %d", err);
3318 			feth_detach_netif_nexus(fakeif);
3319 			goto failed;
3320 		}
3321 	}
3322 	if (feth_supports_tso(fakeif)) {
3323 		if ((err = feth_set_tso_offload(*ifp)) != 0) {
3324 			goto failed;
3325 		}
3326 	}
3327 failed:
3328 	if (nexus_attr != NULL) {
3329 		kern_nexus_attr_destroy(nexus_attr);
3330 	}
3331 	return err;
3332 }
3333 
3334 static errno_t
feth_attach_netif_nexus(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp)3335 feth_attach_netif_nexus(if_fake_ref fakeif,
3336     struct ifnet_init_eparams * init_params, ifnet_t *ifp)
3337 {
3338 	errno_t                 error;
3339 	fake_nx_t               nx = &fakeif->iff_nx;
3340 
3341 	error = feth_packet_pool_make(fakeif);
3342 	if (error != 0) {
3343 		return error;
3344 	}
3345 	if (if_fake_llink_cnt == 0) {
3346 		return create_netif_provider_and_instance(fakeif, init_params,
3347 		           ifp, &nx->fnx_provider, &nx->fnx_instance);
3348 	} else {
3349 		return create_netif_llink_provider_and_instance(fakeif,
3350 		           init_params, ifp, &nx->fnx_provider,
3351 		           &nx->fnx_instance);
3352 	}
3353 }
3354 
3355 static void
remove_non_default_llinks(const char * name,fake_nx_t fnx,fake_llink_t llink __counted_by (FETH_MAX_LLINKS),uint32_t llink_cnt)3356 remove_non_default_llinks(const char * name, fake_nx_t fnx,
3357     fake_llink_t llink __counted_by(FETH_MAX_LLINKS),
3358     uint32_t llink_cnt)
3359 {
3360 	struct kern_nexus *nx;
3361 	uint32_t i;
3362 
3363 	if (llink_cnt <= 1) {
3364 		goto done;
3365 	}
3366 	nx = nx_find(fnx->fnx_instance, FALSE);
3367 	if (nx == NULL) {
3368 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3369 		    "%s: nx not found", name);
3370 		goto done;
3371 	}
3372 	/* Default llink (at index 0) is freed separately */
3373 	for (i = 1; i < llink_cnt; i++) {
3374 		int err;
3375 
3376 		err = kern_nexus_netif_llink_remove(nx, llink[i].fl_id);
3377 		if (err != 0) {
3378 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3379 			    "%s: llink remove failed, llink_id 0x%llx, "
3380 			    "error %d", name,
3381 			    llink[i].fl_id, err);
3382 		}
3383 	}
3384 	nx_release(nx);
3385 done:
3386 	return;
3387 }
3388 
3389 static void
detach_provider_and_instance(uuid_t provider,uuid_t instance)3390 detach_provider_and_instance(uuid_t provider, uuid_t instance)
3391 {
3392 	nexus_controller_t controller = kern_nexus_shared_controller();
3393 	errno_t err;
3394 
3395 	if (!uuid_is_null(instance)) {
3396 		err = kern_nexus_controller_free_provider_instance(controller,
3397 		    instance);
3398 		if (err != 0) {
3399 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3400 			    "free_provider_instance failed %d", err);
3401 		} else {
3402 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3403 			    "deregister_instance");
3404 		}
3405 	}
3406 	if (!uuid_is_null(provider)) {
3407 		err = kern_nexus_controller_deregister_provider(controller,
3408 		    provider);
3409 		if (err != 0) {
3410 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3411 			    "deregister_provider %d", err);
3412 		} else {
3413 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3414 			    "deregister_provider");
3415 		}
3416 	}
3417 	return;
3418 }
3419 
3420 static void
feth_detach_netif_nexus(if_fake_ref fakeif)3421 feth_detach_netif_nexus(if_fake_ref fakeif)
3422 {
3423 	fake_nx         fnx;
3424 	fake_llink_t    llink;
3425 	uint32_t        llink_cnt;
3426 
3427 	feth_lock();
3428 	fnx = fakeif->iff_nx;
3429 	bzero(&fakeif->iff_nx, sizeof(fakeif->iff_nx));
3430 	llink = fakeif->iff_llink;
3431 	fakeif->iff_llink = NULL;
3432 	llink_cnt = fakeif->iff_llink_cnt;
3433 	fakeif->iff_llink_cnt = 0;
3434 	feth_unlock();
3435 	remove_non_default_llinks(__unsafe_null_terminated_from_indexable(fakeif->iff_name), &fnx, llink, llink_cnt);
3436 	detach_provider_and_instance(fnx.fnx_provider, fnx.fnx_instance);
3437 	if (llink != NULL) {
3438 		kfree_type(fake_llink, FETH_MAX_LLINKS, llink);
3439 	}
3440 	return;
3441 }
3442 #endif /* SKYWALK */
3443 
3444 /**
3445 ** feth interface routines
3446 **/
3447 static void
feth_ifnet_set_attrs(if_fake_ref fakeif,ifnet_t ifp)3448 feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp)
3449 {
3450 	errno_t         error;
3451 	ifnet_offload_t offload = 0;
3452 
3453 	ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
3454 	ifnet_set_baudrate(ifp, 0);
3455 	ifnet_set_mtu(ifp, ETHERMTU);
3456 	ifnet_set_flags(ifp,
3457 	    IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX,
3458 	    0xffff);
3459 	ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
3460 	if ((fakeif->iff_flags & IFF_FLAGS_LRO) != 0) {
3461 		offload |= IFNET_LRO;
3462 	}
3463 	if ((fakeif->iff_flags & IFF_FLAGS_HWCSUM) != 0) {
3464 		offload |= IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
3465 		    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6;
3466 	}
3467 	if (feth_supports_tso(fakeif)) {
3468 		offload |= IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
3469 	}
3470 	if (feth_supports_vlan_tagging(fakeif)) {
3471 		offload |= IFNET_VLAN_TAGGING;
3472 	} else if (feth_supports_vlan_mtu(fakeif)) {
3473 		offload |= IFNET_VLAN_MTU;
3474 	}
3475 	error = ifnet_set_offload(ifp, offload);
3476 	if (error != 0) {
3477 		FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3478 		    "ifnet_set_offload(%s, 0x%x) failed, %d",
3479 		    ifp->if_xname, offload, error);
3480 	} else {
3481 		FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3482 		    "ifnet_set_offload(%s, 0x%x) succeeded",
3483 		    ifp->if_xname, offload);
3484 	}
3485 	if (feth_supports_rx_flow_steering(fakeif)) {
3486 		ifnet_set_rx_flow_steering(ifp, true);
3487 	}
3488 }
3489 
3490 static void
interface_link_event(ifnet_t ifp,u_int32_t event_code)3491 interface_link_event(ifnet_t ifp, u_int32_t event_code)
3492 {
3493 	struct event {
3494 		u_int32_t ifnet_family;
3495 		u_int32_t unit;
3496 		char if_name[IFNAMSIZ];
3497 	};
3498 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3499 	struct kern_event_msg *__single header = (struct kern_event_msg*)message;
3500 	struct event *data = (struct event *)(message + offsetof(struct kern_event_msg, event_data));
3501 
3502 	header->total_size   = sizeof(message);
3503 	header->vendor_code  = KEV_VENDOR_APPLE;
3504 	header->kev_class    = KEV_NETWORK_CLASS;
3505 	header->kev_subclass = KEV_DL_SUBCLASS;
3506 	header->event_code   = event_code;
3507 	data->ifnet_family   = ifnet_family(ifp);
3508 	data->unit           = (u_int32_t)ifnet_unit(ifp);
3509 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
3510 	ifnet_event(ifp, header);
3511 }
3512 
3513 static if_fake_ref
ifnet_get_if_fake(ifnet_t ifp)3514 ifnet_get_if_fake(ifnet_t ifp)
3515 {
3516 	return (if_fake_ref)ifnet_softc(ifp);
3517 }
3518 
3519 static int
feth_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)3520 feth_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
3521 {
3522 	bool                            bsd_mode;
3523 	int                             error;
3524 	if_fake_ref                   fakeif;
3525 	struct ifnet_init_eparams       feth_init;
3526 	fake_llink_t                    iff_llink __counted_by_or_null(FETH_MAX_LLINKS) = NULL;
3527 	ifnet_t                         __single ifp;
3528 	char                            mac_address[ETHER_ADDR_LEN];
3529 	bool                            multi_buflet;
3530 	iff_pktpool_mode_t              pktpool_mode;
3531 	bool                            tso_support;
3532 	bool                            rx_flow_steering_support;
3533 
3534 	/* make local copy of globals needed to make consistency checks below */
3535 	bsd_mode = (if_fake_bsd_mode != 0);
3536 	multi_buflet = (if_fake_multibuflet != 0);
3537 	tso_support = (if_fake_tso_support != 0);
3538 	pktpool_mode = if_fake_pktpool_mode;
3539 	rx_flow_steering_support = (if_fake_rx_flow_steering_support != 0);
3540 
3541 	if (!bsd_mode) {
3542 		/* consistency checks */
3543 		if (if_fake_llink_cnt == 0 &&
3544 		    strbufcmp(sk_ll_prefix, FAKE_ETHER_NAME) == 0) {
3545 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3546 			    "feth used as ifname prefix but logical link "
3547 			    "support in feth is disabled.");
3548 			return EINVAL;
3549 		}
3550 		if (tso_support && pktpool_mode != IFF_PP_MODE_GLOBAL) {
3551 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3552 			    "TSO mode requires global packet pool mode");
3553 			return EINVAL;
3554 		}
3555 		if (multi_buflet && pktpool_mode == IFF_PP_MODE_PRIVATE_SPLIT) {
3556 			FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3557 			    "multi-buflet not supported for split rx & tx pool");
3558 			return EINVAL;
3559 		}
3560 		iff_llink = kalloc_type(fake_llink,
3561 		    FETH_MAX_LLINKS, Z_WAITOK_ZERO);
3562 		if (iff_llink == NULL) {
3563 			return ENOBUFS;
3564 		}
3565 	}
3566 	fakeif = kalloc_type(struct if_fake, Z_WAITOK_ZERO_NOFAIL);
3567 	fakeif->iff_llink = iff_llink;
3568 	fakeif->iff_retain_count = 1;
3569 #define FAKE_ETHER_NAME_LEN     (sizeof(FAKE_ETHER_NAME) - 1)
3570 	static_assert(FAKE_ETHER_NAME_LEN == 4);
3571 	strbufcpy(mac_address, FAKE_ETHER_NAME);
3572 	mac_address[ETHER_ADDR_LEN - 2] = (unit & 0xff00) >> 8;
3573 	mac_address[ETHER_ADDR_LEN - 1] = unit & 0xff;
3574 	if (bsd_mode) {
3575 		fakeif->iff_flags |= IFF_FLAGS_BSD_MODE;
3576 	}
3577 	if (if_fake_hwcsum != 0) {
3578 		fakeif->iff_flags |= IFF_FLAGS_HWCSUM;
3579 	}
3580 	if (if_fake_lro != 0) {
3581 		fakeif->iff_flags |= IFF_FLAGS_LRO;
3582 	}
3583 	if (if_fake_vlan_tagging != 0) {
3584 		/* support VLAN tagging in hardware */
3585 		feth_set_supports_vlan_tagging(fakeif);
3586 	} else {
3587 		/* support VLAN mtu-sized packets */
3588 		feth_set_supports_vlan_mtu(fakeif);
3589 	}
3590 	if (if_fake_separate_frame_header != 0) {
3591 		fakeif->iff_flags |= IFF_FLAGS_SEPARATE_FRAME_HEADER;
3592 	}
3593 	fakeif->iff_max_mtu = get_max_mtu(bsd_mode, if_fake_max_mtu);
3594 	fakeif->iff_fcs = if_fake_fcs;
3595 	fakeif->iff_trailer_length = if_fake_trailer_length;
3596 
3597 	/* use the interface name as the unique id for ifp recycle */
3598 	if ((unsigned int)
3599 	    snprintf(fakeif->iff_name, sizeof(fakeif->iff_name), "%s%d",
3600 	    ifc->ifc_name, unit) >= sizeof(fakeif->iff_name)) {
3601 		feth_release(fakeif);
3602 		return EINVAL;
3603 	}
3604 	bzero(&feth_init, sizeof(feth_init));
3605 	feth_init.ver = IFNET_INIT_CURRENT_VERSION;
3606 	feth_init.len = sizeof(feth_init);
3607 	if (feth_in_bsd_mode(fakeif)) {
3608 		if (if_fake_txstart != 0) {
3609 			feth_init.start = feth_start;
3610 		} else {
3611 			feth_init.flags |= IFNET_INIT_LEGACY;
3612 			feth_init.output = feth_output;
3613 		}
3614 		if (tso_support) {
3615 			feth_set_supports_tso(fakeif);
3616 		}
3617 	}
3618 #if SKYWALK
3619 	else {
3620 		feth_init.flags |= IFNET_INIT_SKYWALK_NATIVE;
3621 		/*
3622 		 * Currently we support WMM mode only for Skywalk native
3623 		 * interface.
3624 		 */
3625 		if (if_fake_wmm_mode != 0) {
3626 			fakeif->iff_flags |= IFF_FLAGS_WMM_MODE;
3627 		}
3628 
3629 		if (multi_buflet) {
3630 			fakeif->iff_flags |= IFF_FLAGS_MULTIBUFLETS;
3631 		}
3632 
3633 		fakeif->iff_pp_mode = pktpool_mode;
3634 		if (tso_support) {
3635 			feth_set_supports_tso(fakeif);
3636 		}
3637 
3638 		fakeif->iff_tx_headroom = if_fake_tx_headroom;
3639 		fakeif->iff_adv_interval = if_fake_if_adv_interval;
3640 		if (fakeif->iff_adv_interval > 0) {
3641 			feth_init.flags |= IFNET_INIT_IF_ADV;
3642 		}
3643 		fakeif->iff_tx_drop_rate = if_fake_tx_drops;
3644 		fakeif->iff_tx_completion_mode = if_tx_completion_mode;
3645 		fakeif->iff_tx_exp_policy = if_fake_tx_exp_policy;
3646 
3647 		if (rx_flow_steering_support) {
3648 			feth_set_supports_rx_flow_steering(fakeif);
3649 		}
3650 	}
3651 	feth_init.tx_headroom = fakeif->iff_tx_headroom;
3652 #endif /* SKYWALK */
3653 	if (if_fake_nxattach == 0) {
3654 		feth_init.flags |= IFNET_INIT_NX_NOAUTO;
3655 	}
3656 	feth_init.uniqueid_len = (uint32_t)strbuflen(fakeif->iff_name);
3657 	feth_init.uniqueid = fakeif->iff_name;
3658 	feth_init.name = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
3659 	feth_init.unit = unit;
3660 	feth_init.family = IFNET_FAMILY_ETHERNET;
3661 	feth_init.type = IFT_ETHER;
3662 	feth_init.demux = ether_demux;
3663 	feth_init.add_proto = ether_add_proto;
3664 	feth_init.del_proto = ether_del_proto;
3665 	feth_init.check_multi = ether_check_multi;
3666 	feth_init.framer_extended = ether_frameout_extended;
3667 	feth_init.softc = fakeif;
3668 	feth_init.ioctl = feth_ioctl;
3669 	feth_init.set_bpf_tap = NULL;
3670 	feth_init.detach = feth_if_free;
3671 	feth_init.broadcast_addr = etherbroadcastaddr;
3672 	feth_init.broadcast_len = ETHER_ADDR_LEN;
3673 	if (feth_in_bsd_mode(fakeif)) {
3674 		error = ifnet_allocate_extended(&feth_init, &ifp);
3675 		if (error) {
3676 			feth_release(fakeif);
3677 			return error;
3678 		}
3679 		feth_ifnet_set_attrs(fakeif, ifp);
3680 		if (feth_supports_tso(fakeif)) {
3681 			feth_set_tso_mtu(ifp, IP_MAXPACKET, IP_MAXPACKET);
3682 		}
3683 	}
3684 #if SKYWALK
3685 	else {
3686 		if (feth_in_wmm_mode(fakeif)) {
3687 			feth_init.output_sched_model =
3688 			    IFNET_SCHED_MODEL_DRIVER_MANAGED;
3689 		}
3690 		error = feth_attach_netif_nexus(fakeif, &feth_init, &ifp);
3691 		if (error != 0) {
3692 			feth_release(fakeif);
3693 			return error;
3694 		}
3695 		/* take an additional reference to ensure that it doesn't go away */
3696 		feth_retain(fakeif);
3697 		fakeif->iff_flags |= IFF_FLAGS_NX_ATTACHED;
3698 		fakeif->iff_ifp = ifp;
3699 	}
3700 #endif /* SKYWALK */
3701 	fakeif->iff_media_count = MIN(default_media_words_count, IF_FAKE_MEDIA_LIST_MAX);
3702 	bcopy(default_media_words, fakeif->iff_media_list,
3703 	    fakeif->iff_media_count * sizeof(fakeif->iff_media_list[0]));
3704 	if (feth_in_bsd_mode(fakeif)) {
3705 		error = ifnet_attach(ifp, NULL);
3706 		if (error) {
3707 			ifnet_release(ifp);
3708 			feth_release(fakeif);
3709 			return error;
3710 		}
3711 		fakeif->iff_ifp = ifp;
3712 	}
3713 
3714 	ifnet_set_lladdr(ifp, mac_address, sizeof(mac_address));
3715 
3716 	/* attach as ethernet */
3717 	bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
3718 	return 0;
3719 }
3720 
3721 static int
feth_clone_destroy(ifnet_t ifp)3722 feth_clone_destroy(ifnet_t ifp)
3723 {
3724 	if_fake_ref     fakeif;
3725 #if SKYWALK
3726 	boolean_t       nx_attached = FALSE;
3727 #endif /* SKYWALK */
3728 
3729 	feth_lock();
3730 	fakeif = ifnet_get_if_fake(ifp);
3731 	if (fakeif == NULL || feth_is_detaching(fakeif)) {
3732 		feth_unlock();
3733 		return 0;
3734 	}
3735 	feth_set_detaching(fakeif);
3736 #if SKYWALK
3737 	nx_attached = (fakeif->iff_flags & IFF_FLAGS_NX_ATTACHED) != 0;
3738 #endif /* SKYWALK */
3739 	feth_unlock();
3740 	feth_config(ifp, NULL);
3741 #if SKYWALK
3742 	if (nx_attached) {
3743 		feth_detach_netif_nexus(fakeif);
3744 		feth_release(fakeif);
3745 	}
3746 #endif /* SKYWALK */
3747 	ifnet_detach(ifp);
3748 	return 0;
3749 }
3750 
3751 static void
feth_enqueue_input(ifnet_t ifp,struct mbuf * m)3752 feth_enqueue_input(ifnet_t ifp, struct mbuf * m)
3753 {
3754 	struct ifnet_stat_increment_param stats = {};
3755 
3756 	stats.packets_in = 1;
3757 	stats.bytes_in = (uint32_t)mbuf_pkthdr_len(m) + ETHER_HDR_LEN;
3758 	ifnet_input(ifp, m, &stats);
3759 }
3760 
3761 
3762 static int
feth_add_mbuf_trailer(struct mbuf * m,void * trailer __sized_by (trailer_len),size_t trailer_len)3763 feth_add_mbuf_trailer(struct mbuf *m, void *trailer __sized_by(trailer_len), size_t trailer_len)
3764 {
3765 	int ret;
3766 	ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
3767 
3768 	ret = m_append(m, trailer_len, (caddr_t)trailer);
3769 	if (ret == 1) {
3770 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3771 		    "%zuB trailer added", trailer_len);
3772 		return 0;
3773 	}
3774 	FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT, "m_append failed");
3775 	return ENOTSUP;
3776 }
3777 
3778 static int
feth_add_mbuf_fcs(struct mbuf * m)3779 feth_add_mbuf_fcs(struct mbuf *m)
3780 {
3781 	uint32_t pkt_len, offset = 0;
3782 	uint32_t crc = 0;
3783 	int err = 0;
3784 
3785 	ASSERT(sizeof(crc) == ETHER_CRC_LEN);
3786 
3787 	pkt_len = m->m_pkthdr.len;
3788 	struct mbuf *iter = m;
3789 	while (iter != NULL && offset < pkt_len) {
3790 		uint32_t frag_len = iter->m_len;
3791 		ASSERT(frag_len <= (pkt_len - offset));
3792 		crc = crc32(crc, mtod(iter, void *), frag_len);
3793 		offset += frag_len;
3794 		iter = iter->m_next;
3795 	}
3796 
3797 	err = feth_add_mbuf_trailer(m, &crc, ETHER_CRC_LEN);
3798 	if (err != 0) {
3799 		return err;
3800 	}
3801 
3802 	m->m_flags |= M_HASFCS;
3803 
3804 	return 0;
3805 }
3806 
3807 static void
feth_output_common(ifnet_t ifp,struct mbuf * m,ifnet_t peer,iff_flags_t flags,bool fcs,void * trailer __sized_by (trailer_len),size_t trailer_len)3808 feth_output_common(ifnet_t ifp, struct mbuf * m, ifnet_t peer,
3809     iff_flags_t flags, bool fcs, void *trailer __sized_by(trailer_len), size_t trailer_len)
3810 {
3811 	void *                  frame_header;
3812 
3813 	if ((flags & IFF_FLAGS_HWCSUM) != 0) {
3814 		m->m_pkthdr.csum_data = 0xffff;
3815 		m->m_pkthdr.csum_flags =
3816 		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3817 		    CSUM_IP_CHECKED | CSUM_IP_VALID;
3818 	}
3819 
3820 	(void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
3821 	if (trailer_len != 0 && trailer != NULL) {
3822 		feth_add_mbuf_trailer(m, trailer, trailer_len);
3823 	}
3824 	if (fcs) {
3825 		feth_add_mbuf_fcs(m);
3826 	}
3827 	if ((flags & IFF_FLAGS_SEPARATE_FRAME_HEADER) != 0) {
3828 		m = m_copyup(m, ETHER_HDR_LEN, 0);
3829 		if (m == NULL) {
3830 			FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT, "m_copyup failed");
3831 			goto done;
3832 		}
3833 		frame_header = mtod(m, void *);
3834 		mbuf_pkthdr_setheader(m, frame_header);
3835 		m_adj(m, ETHER_HDR_LEN);
3836 		FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3837 		    "%s: frame 0x%llx data 0x%llx len %ld",
3838 		    ifp->if_xname,
3839 		    (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
3840 		    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
3841 		    mbuf_len(m));
3842 	} else {
3843 		frame_header = mtod(m, void *);
3844 		mbuf_pkthdr_setheader(m, frame_header);
3845 		_mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
3846 	}
3847 
3848 	/* tap it out */
3849 	if (ifp->if_bpf != NULL) {
3850 		fake_bpf_tap_out(ifp, m, frame_header);
3851 	}
3852 
3853 	/* tap it in */
3854 	if (peer->if_bpf != NULL) {
3855 		fake_bpf_tap_in(peer, m, frame_header);
3856 	}
3857 
3858 	(void)mbuf_pkthdr_setrcvif(m, peer);
3859 	feth_enqueue_input(peer, m);
3860 done:
3861 	return;
3862 }
3863 
3864 static void
feth_start(ifnet_t ifp)3865 feth_start(ifnet_t ifp)
3866 {
3867 	if_fake_ref     fakeif;
3868 	iff_flags_t     flags = 0;
3869 	bool            fcs;
3870 	struct mbuf *   __single m;
3871 	ifnet_t         peer = NULL;
3872 	size_t          trailer_len;
3873 
3874 	feth_lock();
3875 	fakeif = ifnet_get_if_fake(ifp);
3876 	if (fakeif == NULL) {
3877 		feth_unlock();
3878 		return;
3879 	}
3880 
3881 	if (fakeif->iff_start_busy) {
3882 		feth_unlock();
3883 		return;
3884 	}
3885 
3886 	peer = fakeif->iff_peer;
3887 	flags = fakeif->iff_flags;
3888 	fcs = fakeif->iff_fcs;
3889 	trailer_len = fakeif->iff_trailer_length;
3890 
3891 	fakeif->iff_start_busy = TRUE;
3892 	feth_unlock();
3893 	for (;;) {
3894 		if (ifnet_dequeue(ifp, &m) != 0) {
3895 			break;
3896 		}
3897 		if (peer == NULL) {
3898 			m_freem(m);
3899 			continue;
3900 		}
3901 		if (m != NULL) {
3902 			feth_output_common(ifp, m, peer, flags, fcs,
3903 			    feth_trailer, trailer_len);
3904 		}
3905 	}
3906 	feth_lock();
3907 	fakeif = ifnet_get_if_fake(ifp);
3908 	if (fakeif != NULL) {
3909 		fakeif->iff_start_busy = FALSE;
3910 	}
3911 	feth_unlock();
3912 }
3913 
3914 static int
feth_output(ifnet_t ifp,struct mbuf * m)3915 feth_output(ifnet_t ifp, struct mbuf * m)
3916 {
3917 	if_fake_ref             fakeif;
3918 	iff_flags_t             flags;
3919 	bool                    fcs;
3920 	size_t                  trailer_len;
3921 	ifnet_t                 peer = NULL;
3922 
3923 	if (m == NULL) {
3924 		return 0;
3925 	}
3926 	feth_lock();
3927 	fakeif = ifnet_get_if_fake(ifp);
3928 	if (fakeif != NULL) {
3929 		peer = fakeif->iff_peer;
3930 		flags = fakeif->iff_flags;
3931 		fcs = fakeif->iff_fcs;
3932 		trailer_len = fakeif->iff_trailer_length;
3933 	}
3934 	feth_unlock();
3935 	if (peer == NULL) {
3936 		m_freem(m);
3937 		ifnet_stat_increment_out(ifp, 0, 0, 1);
3938 		return 0;
3939 	}
3940 	feth_output_common(ifp, m, peer, flags, fcs, feth_trailer, trailer_len);
3941 	return 0;
3942 }
3943 
3944 static int
feth_config(ifnet_t ifp,ifnet_t peer)3945 feth_config(ifnet_t ifp, ifnet_t peer)
3946 {
3947 	int             connected = FALSE;
3948 	int             disconnected = FALSE;
3949 	int             error = 0;
3950 	if_fake_ref     fakeif = NULL;
3951 
3952 	feth_lock();
3953 	fakeif = ifnet_get_if_fake(ifp);
3954 	if (fakeif == NULL) {
3955 		error = EINVAL;
3956 		goto done;
3957 	}
3958 	if (peer != NULL) {
3959 		/* connect to peer */
3960 		if_fake_ref   peer_fakeif;
3961 
3962 		peer_fakeif = ifnet_get_if_fake(peer);
3963 		if (peer_fakeif == NULL) {
3964 			error = EINVAL;
3965 			goto done;
3966 		}
3967 		if (feth_is_detaching(fakeif) ||
3968 		    feth_is_detaching(peer_fakeif) ||
3969 		    peer_fakeif->iff_peer != NULL ||
3970 		    fakeif->iff_peer != NULL) {
3971 			error = EBUSY;
3972 			goto done;
3973 		}
3974 #if SKYWALK
3975 		if (fakeif->iff_pp_mode !=
3976 		    peer_fakeif->iff_pp_mode) {
3977 			error = EINVAL;
3978 			goto done;
3979 		}
3980 #endif /* SKYWALK */
3981 		fakeif->iff_peer = peer;
3982 		peer_fakeif->iff_peer = ifp;
3983 		connected = TRUE;
3984 	} else if (fakeif->iff_peer != NULL) {
3985 		/* disconnect from peer */
3986 		if_fake_ref   peer_fakeif;
3987 
3988 		peer = fakeif->iff_peer;
3989 		peer_fakeif = ifnet_get_if_fake(peer);
3990 		if (peer_fakeif == NULL) {
3991 			/* should not happen */
3992 			error = EINVAL;
3993 			goto done;
3994 		}
3995 		fakeif->iff_peer = NULL;
3996 		peer_fakeif->iff_peer = NULL;
3997 		disconnected = TRUE;
3998 	}
3999 
4000 done:
4001 	feth_unlock();
4002 
4003 	/* generate link status event if we connect or disconnect */
4004 	if (connected) {
4005 		interface_link_event(ifp, KEV_DL_LINK_ON);
4006 		interface_link_event(peer, KEV_DL_LINK_ON);
4007 	} else if (disconnected) {
4008 		interface_link_event(ifp, KEV_DL_LINK_OFF);
4009 		interface_link_event(peer, KEV_DL_LINK_OFF);
4010 	}
4011 	return error;
4012 }
4013 
4014 static int
feth_set_media(ifnet_t ifp,struct if_fake_request * iffr)4015 feth_set_media(ifnet_t ifp, struct if_fake_request * iffr)
4016 {
4017 	if_fake_ref     fakeif;
4018 	int             error;
4019 
4020 	if (iffr->iffr_media.iffm_count > IF_FAKE_MEDIA_LIST_MAX) {
4021 		/* list is too long */
4022 		return EINVAL;
4023 	}
4024 	feth_lock();
4025 	fakeif = ifnet_get_if_fake(ifp);
4026 	if (fakeif == NULL) {
4027 		error = EINVAL;
4028 		goto done;
4029 	}
4030 	fakeif->iff_media_count = iffr->iffr_media.iffm_count;
4031 	bcopy(iffr->iffr_media.iffm_list, fakeif->iff_media_list,
4032 	    iffr->iffr_media.iffm_count * sizeof(fakeif->iff_media_list[0]));
4033 #if 0
4034 	/* XXX: "auto-negotiate" active with peer? */
4035 	/* generate link status event? */
4036 	fakeif->iff_media_current = iffr->iffr_media.iffm_current;
4037 #endif
4038 	error = 0;
4039 done:
4040 	feth_unlock();
4041 	return error;
4042 }
4043 
4044 static int
if_fake_request_copyin(user_addr_t user_addr,struct if_fake_request * iffr,u_int32_t len)4045 if_fake_request_copyin(user_addr_t user_addr,
4046     struct if_fake_request *iffr, u_int32_t len)
4047 {
4048 	int     error;
4049 
4050 	if (user_addr == USER_ADDR_NULL || len < sizeof(*iffr)) {
4051 		error = EINVAL;
4052 		goto done;
4053 	}
4054 	error = copyin(user_addr, iffr, sizeof(*iffr));
4055 	if (error != 0) {
4056 		goto done;
4057 	}
4058 	if (iffr->iffr_reserved[0] != 0 || iffr->iffr_reserved[1] != 0 ||
4059 	    iffr->iffr_reserved[2] != 0 || iffr->iffr_reserved[3] != 0) {
4060 		error = EINVAL;
4061 		goto done;
4062 	}
4063 done:
4064 	return error;
4065 }
4066 
4067 static int
feth_set_drvspec(ifnet_t ifp,uint32_t cmd,u_int32_t len,user_addr_t user_addr)4068 feth_set_drvspec(ifnet_t ifp, uint32_t cmd, u_int32_t len,
4069     user_addr_t user_addr)
4070 {
4071 	int                     error;
4072 	struct if_fake_request  iffr;
4073 	ifnet_t                 peer;
4074 
4075 	switch (cmd) {
4076 	case IF_FAKE_S_CMD_SET_PEER:
4077 		error = if_fake_request_copyin(user_addr, &iffr, len);
4078 		if (error != 0) {
4079 			break;
4080 		}
4081 		if (iffr.iffr_peer_name[0] == '\0') {
4082 			error = feth_config(ifp, NULL);
4083 			break;
4084 		}
4085 
4086 		/* ensure nul termination */
4087 		iffr.iffr_peer_name[IFNAMSIZ - 1] = '\0';
4088 		peer = ifunit(__unsafe_null_terminated_from_indexable(iffr.iffr_peer_name));
4089 		if (peer == NULL) {
4090 			error = ENXIO;
4091 			break;
4092 		}
4093 		if (ifnet_type(peer) != IFT_ETHER) {
4094 			error = EINVAL;
4095 			break;
4096 		}
4097 		if (strcmp(ifnet_name(peer), FAKE_ETHER_NAME) != 0) {
4098 			error = EINVAL;
4099 			break;
4100 		}
4101 		error = feth_config(ifp, peer);
4102 		break;
4103 	case IF_FAKE_S_CMD_SET_MEDIA:
4104 		error = if_fake_request_copyin(user_addr, &iffr, len);
4105 		if (error != 0) {
4106 			break;
4107 		}
4108 		error = feth_set_media(ifp, &iffr);
4109 		break;
4110 	case IF_FAKE_S_CMD_SET_DEQUEUE_STALL:
4111 		error = if_fake_request_copyin(user_addr, &iffr, len);
4112 		if (error != 0) {
4113 			break;
4114 		}
4115 		error = feth_enable_dequeue_stall(ifp,
4116 		    iffr.iffr_dequeue_stall);
4117 		break;
4118 	default:
4119 		error = EOPNOTSUPP;
4120 		break;
4121 	}
4122 	return error;
4123 }
4124 
4125 static int
feth_get_drvspec(ifnet_t ifp,u_int32_t cmd,u_int32_t len,user_addr_t user_addr)4126 feth_get_drvspec(ifnet_t ifp, u_int32_t cmd, u_int32_t len,
4127     user_addr_t user_addr)
4128 {
4129 	int                     error = EOPNOTSUPP;
4130 	if_fake_ref             fakeif;
4131 	struct if_fake_request  iffr;
4132 	ifnet_t                 peer;
4133 
4134 	switch (cmd) {
4135 	case IF_FAKE_G_CMD_GET_PEER:
4136 		if (len < sizeof(iffr)) {
4137 			error = EINVAL;
4138 			break;
4139 		}
4140 		feth_lock();
4141 		fakeif = ifnet_get_if_fake(ifp);
4142 		if (fakeif == NULL) {
4143 			feth_unlock();
4144 			error = EOPNOTSUPP;
4145 			break;
4146 		}
4147 		peer = fakeif->iff_peer;
4148 		feth_unlock();
4149 		bzero(&iffr, sizeof(iffr));
4150 		if (peer != NULL) {
4151 			strlcpy(iffr.iffr_peer_name,
4152 			    if_name(peer),
4153 			    sizeof(iffr.iffr_peer_name));
4154 		}
4155 		error = copyout(&iffr, user_addr, sizeof(iffr));
4156 		break;
4157 	default:
4158 		break;
4159 	}
4160 	return error;
4161 }
4162 
4163 union ifdrvu {
4164 	struct ifdrv32  *ifdrvu_32;
4165 	struct ifdrv64  *ifdrvu_64;
4166 	void            *ifdrvu_p;
4167 };
4168 
4169 static int
feth_ioctl(ifnet_t ifp,u_long cmd,void * data)4170 feth_ioctl(ifnet_t ifp, u_long cmd, void * data)
4171 {
4172 	unsigned int            count;
4173 	struct ifdevmtu *       devmtu_p;
4174 	union ifdrvu            drv;
4175 	uint32_t                drv_cmd;
4176 	uint32_t                drv_len;
4177 	boolean_t               drv_set_command = FALSE;
4178 	int                     error = 0;
4179 	struct ifmediareq32 *   ifmr;
4180 	struct ifreq *          ifr;
4181 	if_fake_ref             fakeif;
4182 	int                     status;
4183 	user_addr_t             user_addr;
4184 
4185 	ifr = (struct ifreq *)data;
4186 	switch (cmd) {
4187 	case SIOCSIFADDR:
4188 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
4189 		break;
4190 
4191 	case SIOCGIFMEDIA32:
4192 	case SIOCGIFMEDIA64:
4193 		feth_lock();
4194 		fakeif = ifnet_get_if_fake(ifp);
4195 		if (fakeif == NULL) {
4196 			feth_unlock();
4197 			return EOPNOTSUPP;
4198 		}
4199 		status = (fakeif->iff_peer != NULL)
4200 		    ? (IFM_AVALID | IFM_ACTIVE) : IFM_AVALID;
4201 		ifmr = (struct ifmediareq32 *)data;
4202 		user_addr = (cmd == SIOCGIFMEDIA64) ?
4203 		    ((struct ifmediareq64 *)data)->ifmu_ulist :
4204 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
4205 		count = ifmr->ifm_count;
4206 		ifmr->ifm_active = (fakeif->iff_peer != NULL)
4207 		    ? FAKE_DEFAULT_MEDIA : IFM_ETHER;
4208 		ifmr->ifm_current = IFM_ETHER;
4209 		ifmr->ifm_mask = 0;
4210 		ifmr->ifm_status = status;
4211 		if (user_addr == USER_ADDR_NULL) {
4212 			ifmr->ifm_count = fakeif->iff_media_count;
4213 		} else if (count > 0) {
4214 			if (count > fakeif->iff_media_count) {
4215 				count = fakeif->iff_media_count;
4216 			}
4217 			ifmr->ifm_count = count;
4218 			error = copyout(&fakeif->iff_media_list, user_addr,
4219 			    count * sizeof(int));
4220 		}
4221 		feth_unlock();
4222 		break;
4223 
4224 	case SIOCGIFDEVMTU:
4225 		devmtu_p = &ifr->ifr_devmtu;
4226 		devmtu_p->ifdm_current = ifnet_mtu(ifp);
4227 		devmtu_p->ifdm_max = feth_max_mtu(ifp);
4228 		devmtu_p->ifdm_min = IF_MINMTU;
4229 		break;
4230 
4231 	case SIOCSIFMTU:
4232 		if ((unsigned int)ifr->ifr_mtu > feth_max_mtu(ifp) ||
4233 		    ifr->ifr_mtu < IF_MINMTU) {
4234 			error = EINVAL;
4235 		} else {
4236 			error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
4237 		}
4238 		break;
4239 
4240 	case SIOCSDRVSPEC32:
4241 	case SIOCSDRVSPEC64:
4242 		error = proc_suser(current_proc());
4243 		if (error != 0) {
4244 			break;
4245 		}
4246 		drv_set_command = TRUE;
4247 		OS_FALLTHROUGH;
4248 	case SIOCGDRVSPEC32:
4249 	case SIOCGDRVSPEC64:
4250 		drv.ifdrvu_p = data;
4251 		if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
4252 			drv_cmd = drv.ifdrvu_32->ifd_cmd;
4253 			drv_len = drv.ifdrvu_32->ifd_len;
4254 			user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
4255 		} else {
4256 			drv_cmd = drv.ifdrvu_64->ifd_cmd;
4257 			drv_len = drv.ifdrvu_64->ifd_len;
4258 			user_addr = drv.ifdrvu_64->ifd_data;
4259 		}
4260 		if (drv_set_command) {
4261 			error = feth_set_drvspec(ifp, drv_cmd, drv_len,
4262 			    user_addr);
4263 		} else {
4264 			error = feth_get_drvspec(ifp, drv_cmd, drv_len,
4265 			    user_addr);
4266 		}
4267 		break;
4268 
4269 	case SIOCSIFLLADDR:
4270 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
4271 		    ifr->ifr_addr.sa_len);
4272 		break;
4273 
4274 	case SIOCSIFFLAGS:
4275 		if ((ifp->if_flags & IFF_UP) != 0) {
4276 			/* marked up, set running if not already set */
4277 			if ((ifp->if_flags & IFF_RUNNING) == 0) {
4278 				/* set running */
4279 				error = ifnet_set_flags(ifp, IFF_RUNNING,
4280 				    IFF_RUNNING);
4281 			}
4282 		} else if ((ifp->if_flags & IFF_RUNNING) != 0) {
4283 			/* marked down, clear running */
4284 			error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
4285 		}
4286 		break;
4287 
4288 	case SIOCDIFADDR:
4289 		if (if_fake_fail_ioctl != 0) {
4290 			FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
4291 			    "%s: failing SIOCDIFADDR with EPWROFF",
4292 			    ifp->if_xname);
4293 			error = EPWROFF;
4294 		}
4295 		break;
4296 
4297 	case SIOCADDMULTI:
4298 	case SIOCDELMULTI:
4299 		error = 0;
4300 		break;
4301 	case SIOCSIFCAP: {
4302 		uint32_t        cap;
4303 
4304 		feth_lock();
4305 		fakeif = ifnet_get_if_fake(ifp);
4306 		if (fakeif == NULL ||
4307 		    (fakeif->iff_flags & IFF_FLAGS_LRO) == 0) {
4308 			feth_unlock();
4309 			return EOPNOTSUPP;
4310 		}
4311 		feth_unlock();
4312 		cap = (ifr->ifr_reqcap & IFCAP_LRO) != 0 ? IFCAP_LRO : 0;
4313 		error = ifnet_set_capabilities_enabled(ifp, cap, IFCAP_LRO);
4314 		break;
4315 	}
4316 	default:
4317 		error = EOPNOTSUPP;
4318 		break;
4319 	}
4320 	return error;
4321 }
4322 
4323 static void
feth_if_free(ifnet_t ifp)4324 feth_if_free(ifnet_t ifp)
4325 {
4326 	if_fake_ref           fakeif;
4327 
4328 	if (ifp == NULL) {
4329 		return;
4330 	}
4331 	feth_lock();
4332 	fakeif = ifnet_get_if_fake(ifp);
4333 	if (fakeif == NULL) {
4334 		feth_unlock();
4335 		return;
4336 	}
4337 	ifp->if_softc = NULL;
4338 #if SKYWALK
4339 	VERIFY(fakeif->iff_doorbell_tcall == NULL);
4340 #endif /* SKYWALK */
4341 	feth_unlock();
4342 	feth_release(fakeif);
4343 	ifnet_release(ifp);
4344 	return;
4345 }
4346 
4347 __private_extern__ void
if_fake_init(void)4348 if_fake_init(void)
4349 {
4350 	int error;
4351 
4352 #if SKYWALK
4353 	(void)feth_register_nexus_domain_provider();
4354 #endif /* SKYWALK */
4355 	error = if_clone_attach(&feth_cloner);
4356 	if (error != 0) {
4357 		return;
4358 	}
4359 	return;
4360 }
4361