xref: /xnu-10002.81.5/bsd/net/if_fake.c (revision 5e3eaea39dcf651e66cb99ba7d70e32cc4a99587)
1 /*
2  * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * if_fake.c
31  * - fake network interface used for testing
32  * - "feth" (e.g. "feth0", "feth1") is a virtual ethernet interface that allows
33  *   two instances to have their output/input paths "crossed-over" so that
34  *   output on one is input on the other
35  */
36 
37 /*
38  * Modification History:
39  *
40  * September 9, 2015	Dieter Siegmund ([email protected])
41  * - created
42  */
43 
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/queue.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/kern_event.h>
54 #include <sys/mcache.h>
55 #include <sys/syslog.h>
56 
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_vlan_var.h>
61 #include <net/if_fake_var.h>
62 #include <net/if_arp.h>
63 #include <net/if_dl.h>
64 #include <net/if_ether.h>
65 #include <net/if_types.h>
66 #include <libkern/OSAtomic.h>
67 
68 #include <net/dlil.h>
69 
70 #include <net/kpi_interface.h>
71 #include <net/kpi_protocol.h>
72 
73 #include <kern/locks.h>
74 #include <kern/zalloc.h>
75 
76 #include <mach/mach_time.h>
77 
78 #ifdef INET
79 #include <netinet/in.h>
80 #include <netinet/if_ether.h>
81 #endif
82 
83 #include <net/if_media.h>
84 #include <net/ether_if_module.h>
85 #if SKYWALK
86 #include <skywalk/os_skywalk_private.h>
87 #include <skywalk/nexus/netif/nx_netif.h>
88 #include <skywalk/channel/channel_var.h>
89 #endif /* SKYWALK */
90 
91 static boolean_t
is_power_of_two(unsigned int val)92 is_power_of_two(unsigned int val)
93 {
94 	return (val & (val - 1)) == 0;
95 }
96 
97 #define FAKE_ETHER_NAME         "feth"
98 
99 SYSCTL_DECL(_net_link);
100 SYSCTL_NODE(_net_link, OID_AUTO, fake, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
101     "Fake interface");
102 
103 static int if_fake_txstart = 1;
104 SYSCTL_INT(_net_link_fake, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
105     &if_fake_txstart, 0, "Fake interface TXSTART mode");
106 
107 static int if_fake_hwcsum = 0;
108 SYSCTL_INT(_net_link_fake, OID_AUTO, hwcsum, CTLFLAG_RW | CTLFLAG_LOCKED,
109     &if_fake_hwcsum, 0, "Fake interface simulate hardware checksum");
110 
111 static int if_fake_nxattach = 0;
112 SYSCTL_INT(_net_link_fake, OID_AUTO, nxattach, CTLFLAG_RW | CTLFLAG_LOCKED,
113     &if_fake_nxattach, 0, "Fake interface auto-attach nexus");
114 
115 static int if_fake_bsd_mode = 1;
116 SYSCTL_INT(_net_link_fake, OID_AUTO, bsd_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
117     &if_fake_bsd_mode, 0, "Fake interface attach as BSD interface");
118 
119 static int if_fake_debug = 0;
120 SYSCTL_INT(_net_link_fake, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
121     &if_fake_debug, 0, "Fake interface debug logs");
122 
123 #define FETH_DPRINTF(fmt, ...)                                  \
124 	{ if (if_fake_debug != 0) printf("%s " fmt, __func__, ## __VA_ARGS__); }
125 
126 static int if_fake_wmm_mode = 0;
127 SYSCTL_INT(_net_link_fake, OID_AUTO, wmm_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
128     &if_fake_wmm_mode, 0, "Fake interface in 802.11 WMM mode");
129 
130 static int if_fake_multibuflet = 0;
131 SYSCTL_INT(_net_link_fake, OID_AUTO, multibuflet, CTLFLAG_RW | CTLFLAG_LOCKED,
132     &if_fake_multibuflet, 0, "Fake interface using multi-buflet packets");
133 
134 static int if_fake_low_latency = 0;
135 SYSCTL_INT(_net_link_fake, OID_AUTO, low_latency, CTLFLAG_RW | CTLFLAG_LOCKED,
136     &if_fake_low_latency, 0, "Fake interface with a low latency qset");
137 
138 static int if_fake_switch_combined_mode = 0;
139 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_combined_mode,
140     CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_combined_mode, 0,
141     "Switch a qset between combined and separate mode during dequeues");
142 
143 static int if_fake_switch_mode_frequency = 10;
144 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_mode_frequency,
145     CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_mode_frequency, 0,
146     "The number of dequeues before we switch between the combined and separated mode");
147 
148 static int if_fake_tso_support = 0;
149 SYSCTL_INT(_net_link_fake, OID_AUTO, tso_support, CTLFLAG_RW | CTLFLAG_LOCKED,
150     &if_fake_tso_support, 0, "Fake interface with support for TSO offload");
151 
152 #define DEFAULT_EXPIRATION_THRESHOLD 500 /* usec */
153 static int if_fake_expiration_threshold_us = DEFAULT_EXPIRATION_THRESHOLD;
154 SYSCTL_INT(_net_link_fake, OID_AUTO, expiration_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
155     &if_fake_expiration_threshold_us, DEFAULT_EXPIRATION_THRESHOLD,
156     "Expiration threshold (usec) for expiration testing");
157 
158 static int if_fake_lro = 0;
159 SYSCTL_INT(_net_link_fake, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_LOCKED,
160     &if_fake_lro, 0, "Fake interface report LRO capability");
161 
162 typedef enum {
163 	IFF_PP_MODE_GLOBAL = 0,         /* share a global pool */
164 	IFF_PP_MODE_PRIVATE = 1,        /* creates its own rx/tx pool */
165 	IFF_PP_MODE_PRIVATE_SPLIT = 2,  /* creates its own split rx & tx pool */
166 } iff_pktpool_mode_t;
167 static iff_pktpool_mode_t if_fake_pktpool_mode = IFF_PP_MODE_GLOBAL;
168 SYSCTL_INT(_net_link_fake, OID_AUTO, pktpool_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
169     &if_fake_pktpool_mode, IFF_PP_MODE_GLOBAL,
170     "Fake interface packet pool mode (0 global, 1 private, 2 private split");
171 
172 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX 512
173 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF 96
174 static int if_fake_link_layer_aggregation_factor =
175     FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF;
176 static int
177 feth_link_layer_aggregation_factor_sysctl SYSCTL_HANDLER_ARGS
178 {
179 #pragma unused(oidp, arg1, arg2)
180 	unsigned int new_value;
181 	int changed;
182 	int error;
183 
184 	error = sysctl_io_number(req, if_fake_link_layer_aggregation_factor,
185 	    sizeof(if_fake_link_layer_aggregation_factor), &new_value,
186 	    &changed);
187 	if (error == 0 && changed != 0) {
188 		if (new_value <= 0 ||
189 		    new_value > FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX) {
190 			return EINVAL;
191 		}
192 		if_fake_link_layer_aggregation_factor = new_value;
193 	}
194 	return error;
195 }
196 
197 SYSCTL_PROC(_net_link_fake, OID_AUTO, link_layer_aggregation_factor,
198     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
199     0, 0, feth_link_layer_aggregation_factor_sysctl, "IU",
200     "Fake interface link layer aggregation factor");
201 
202 #define FETH_TX_HEADROOM_MAX      32
203 static unsigned int if_fake_tx_headroom = FETH_TX_HEADROOM_MAX;
204 static int
205 feth_tx_headroom_sysctl SYSCTL_HANDLER_ARGS
206 {
207 #pragma unused(oidp, arg1, arg2)
208 	unsigned int new_value;
209 	int changed;
210 	int error;
211 
212 	error = sysctl_io_number(req, if_fake_tx_headroom,
213 	    sizeof(if_fake_tx_headroom), &new_value, &changed);
214 	if (error == 0 && changed != 0) {
215 		if (new_value > FETH_TX_HEADROOM_MAX ||
216 		    (new_value % 8) != 0) {
217 			return EINVAL;
218 		}
219 		if_fake_tx_headroom = new_value;
220 	}
221 	return 0;
222 }
223 
224 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_headroom,
225     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
226     0, 0, feth_tx_headroom_sysctl, "IU", "Fake ethernet Tx headroom");
227 
228 static int if_fake_fcs = 0;
229 SYSCTL_INT(_net_link_fake, OID_AUTO, fcs, CTLFLAG_RW | CTLFLAG_LOCKED,
230     &if_fake_fcs, 0, "Fake interface using frame check sequence");
231 
232 #define FETH_TRAILER_LENGTH_MAX 28
233 char feth_trailer[FETH_TRAILER_LENGTH_MAX + 1] = "trailertrailertrailertrailer";
234 static unsigned int if_fake_trailer_length = 0;
235 static int
236 feth_trailer_length_sysctl SYSCTL_HANDLER_ARGS
237 {
238 #pragma unused(oidp, arg1, arg2)
239 	unsigned int new_value;
240 	int changed;
241 	int error;
242 
243 	error = sysctl_io_number(req, if_fake_trailer_length,
244 	    sizeof(if_fake_trailer_length), &new_value, &changed);
245 	if (error == 0 && changed != 0) {
246 		if (new_value > FETH_TRAILER_LENGTH_MAX) {
247 			return EINVAL;
248 		}
249 		if_fake_trailer_length = new_value;
250 	}
251 	return 0;
252 }
253 
254 SYSCTL_PROC(_net_link_fake, OID_AUTO, trailer_length,
255     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
256     feth_trailer_length_sysctl, "IU", "Fake interface frame trailer length");
257 
258 /* sysctl net.link.fake.max_mtu */
259 #define FETH_MAX_MTU_DEFAULT    2048
260 #define FETH_MAX_MTU_MAX        ((16 * 1024) - ETHER_HDR_LEN)
261 
262 static unsigned int if_fake_max_mtu = FETH_MAX_MTU_DEFAULT;
263 
264 /* sysctl net.link.fake.buflet_size */
265 #define FETH_BUFLET_SIZE_MIN            512
266 #define FETH_BUFLET_SIZE_MAX            (32 * 1024)
267 #define FETH_TSO_BUFLET_SIZE            (16 * 1024)
268 
269 static unsigned int if_fake_buflet_size = FETH_BUFLET_SIZE_MIN;
270 static unsigned int if_fake_tso_buffer_size = FETH_TSO_BUFLET_SIZE;
271 
272 static int
273 feth_tso_buffer_size_sysctl SYSCTL_HANDLER_ARGS
274 {
275 #pragma unused(oidp, arg1, arg2)
276 	unsigned int new_value;
277 	int changed;
278 	int error;
279 
280 	error = sysctl_io_number(req, if_fake_tso_buffer_size,
281 	    sizeof(if_fake_tso_buffer_size), &new_value, &changed);
282 	if (error == 0 && changed != 0) {
283 		/* must be a power of 2 between min and max */
284 		if (new_value > FETH_BUFLET_SIZE_MAX ||
285 		    new_value < FETH_BUFLET_SIZE_MIN ||
286 		    !is_power_of_two(new_value)) {
287 			return EINVAL;
288 		}
289 		if_fake_tso_buffer_size = new_value;
290 	}
291 	return 0;
292 }
293 
294 SYSCTL_PROC(_net_link_fake, OID_AUTO, tso_buf_size,
295     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
296     0, 0, feth_tso_buffer_size_sysctl, "IU", "Fake interface TSO buffer size");
297 
298 static int
299 feth_max_mtu_sysctl SYSCTL_HANDLER_ARGS
300 {
301 #pragma unused(oidp, arg1, arg2)
302 	unsigned int new_value;
303 	int changed;
304 	int error;
305 
306 	error = sysctl_io_number(req, if_fake_max_mtu,
307 	    sizeof(if_fake_max_mtu), &new_value, &changed);
308 	if (error == 0 && changed != 0) {
309 		if (new_value > FETH_MAX_MTU_MAX ||
310 		    new_value < ETHERMTU ||
311 		    new_value <= if_fake_buflet_size) {
312 			return EINVAL;
313 		}
314 		if_fake_max_mtu = new_value;
315 	}
316 	return 0;
317 }
318 
319 SYSCTL_PROC(_net_link_fake, OID_AUTO, max_mtu,
320     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
321     0, 0, feth_max_mtu_sysctl, "IU", "Fake interface maximum MTU");
322 
323 static int
324 feth_buflet_size_sysctl SYSCTL_HANDLER_ARGS
325 {
326 #pragma unused(oidp, arg1, arg2)
327 	unsigned int new_value;
328 	int changed;
329 	int error;
330 
331 	error = sysctl_io_number(req, if_fake_buflet_size,
332 	    sizeof(if_fake_buflet_size), &new_value, &changed);
333 	if (error == 0 && changed != 0) {
334 		/* must be a power of 2 between min and max */
335 		if (new_value > FETH_BUFLET_SIZE_MAX ||
336 		    new_value < FETH_BUFLET_SIZE_MIN ||
337 		    !is_power_of_two(new_value) ||
338 		    new_value >= if_fake_max_mtu) {
339 			return EINVAL;
340 		}
341 		if_fake_buflet_size = new_value;
342 	}
343 	return 0;
344 }
345 
346 SYSCTL_PROC(_net_link_fake, OID_AUTO, buflet_size,
347     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
348     0, 0, feth_buflet_size_sysctl, "IU", "Fake interface buflet size");
349 
350 static unsigned int if_fake_user_access = 0;
351 
352 static int
353 feth_user_access_sysctl SYSCTL_HANDLER_ARGS
354 {
355 #pragma unused(oidp, arg1, arg2)
356 	unsigned int new_value;
357 	int changed;
358 	int error;
359 
360 	error = sysctl_io_number(req, if_fake_user_access,
361 	    sizeof(if_fake_user_access), &new_value, &changed);
362 	if (error == 0 && changed != 0) {
363 		if (new_value != 0) {
364 			if (new_value != 1) {
365 				return EINVAL;
366 			}
367 		}
368 		if_fake_user_access = new_value;
369 	}
370 	return 0;
371 }
372 
373 SYSCTL_PROC(_net_link_fake, OID_AUTO, user_access,
374     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
375     0, 0, feth_user_access_sysctl, "IU", "Fake interface user access");
376 
377 /* sysctl net.link.fake.if_adv_intvl (unit: millisecond) */
378 #define FETH_IF_ADV_INTVL_MIN            10
379 #define FETH_IF_ADV_INTVL_MAX            INT_MAX
380 
381 static int if_fake_if_adv_interval = 0; /* no interface advisory */
382 static int
383 feth_if_adv_interval_sysctl SYSCTL_HANDLER_ARGS
384 {
385 #pragma unused(oidp, arg1, arg2)
386 	unsigned int new_value;
387 	int changed;
388 	int error;
389 
390 	error = sysctl_io_number(req, if_fake_if_adv_interval,
391 	    sizeof(if_fake_if_adv_interval), &new_value, &changed);
392 	if (error == 0 && changed != 0) {
393 		if ((new_value != 0) && (new_value > FETH_IF_ADV_INTVL_MAX ||
394 		    new_value < FETH_IF_ADV_INTVL_MIN)) {
395 			return EINVAL;
396 		}
397 		if_fake_if_adv_interval = new_value;
398 	}
399 	return 0;
400 }
401 
402 SYSCTL_PROC(_net_link_fake, OID_AUTO, if_adv_intvl,
403     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
404     feth_if_adv_interval_sysctl, "IU",
405     "Fake interface will generate interface advisories reports at the specified interval in ms");
406 
407 /* sysctl net.link.fake.tx_drops */
408 /*
409  * Fake ethernet will drop packet on the transmit path at the specified
410  * rate, i.e drop one in every if_fake_tx_drops number of packets.
411  */
412 #define FETH_TX_DROPS_MIN            0
413 #define FETH_TX_DROPS_MAX            INT_MAX
414 static int if_fake_tx_drops = 0; /* no packets are dropped */
415 static int
416 feth_fake_tx_drops_sysctl SYSCTL_HANDLER_ARGS
417 {
418 #pragma unused(oidp, arg1, arg2)
419 	unsigned int new_value;
420 	int changed;
421 	int error;
422 
423 	error = sysctl_io_number(req, if_fake_tx_drops,
424 	    sizeof(if_fake_tx_drops), &new_value, &changed);
425 	if (error == 0 && changed != 0) {
426 		if (new_value > FETH_TX_DROPS_MAX ||
427 		    new_value < FETH_TX_DROPS_MIN) {
428 			return EINVAL;
429 		}
430 		if_fake_tx_drops = new_value;
431 	}
432 	return 0;
433 }
434 
435 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_drops,
436     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
437     feth_fake_tx_drops_sysctl, "IU",
438     "Fake interface will intermittently drop packets on Tx path");
439 
440 /* sysctl.net.link.fake.tx_exp_policy */
441 
442 typedef enum {
443 	IFF_TX_EXP_POLICY_DISABLED = 0,          /* Expiry notification disabled */
444 	IFF_TX_EXP_POLICY_DROP_AND_NOTIFY = 1,   /* Expiry notification enabled; drop + notify mode */
445 	IFF_TX_EXP_POLICY_NOTIFY_ONLY = 2,       /* Expiry notification enabled; notify only mode */
446 	IFF_TX_EXP_POLICY_METADATA = 3,          /* Expiry notification enabled; use packet metadata */
447 } iff_tx_exp_policy_t;
448 static iff_tx_exp_policy_t if_fake_tx_exp_policy = IFF_TX_EXP_POLICY_DISABLED;
449 
450 static int
451 feth_fake_tx_exp_policy_sysctl SYSCTL_HANDLER_ARGS
452 {
453 #pragma unused(oidp, arg1, arg2)
454 	unsigned int new_value;
455 	int changed;
456 	int error;
457 
458 	error = sysctl_io_number(req, if_fake_tx_exp_policy,
459 	    sizeof(if_fake_tx_exp_policy), &new_value, &changed);
460 	FETH_DPRINTF("if_fake_tx_exp_policy: %u -> %u (%d)",
461 	    if_fake_tx_exp_policy, new_value, changed);
462 	if (error == 0 && changed != 0) {
463 		if (new_value > IFF_TX_EXP_POLICY_METADATA ||
464 		    new_value < IFF_TX_EXP_POLICY_DISABLED) {
465 			return EINVAL;
466 		}
467 		if_fake_tx_exp_policy = new_value;
468 	}
469 	return 0;
470 }
471 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_exp_policy,
472     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
473     feth_fake_tx_exp_policy_sysctl, "IU",
474     "Fake interface handling policy for expired TX attempts "
475     "(0 disabled, 1 drop and notify, 2 notify only, 3 packet metadata)");
476 
477 /* sysctl net.link.fake.tx_completion_mode */
478 typedef enum {
479 	IFF_TX_COMPL_MODE_SYNC = 0,
480 	IFF_TX_COMPL_MODE_ASYNC = 1,
481 } iff_tx_completion_mode_t;
482 static iff_tx_completion_mode_t if_tx_completion_mode = IFF_TX_COMPL_MODE_SYNC;
483 static int
484 feth_fake_tx_completion_mode_sysctl SYSCTL_HANDLER_ARGS
485 {
486 #pragma unused(oidp, arg1, arg2)
487 	unsigned int new_value;
488 	int changed;
489 	int error;
490 
491 	error = sysctl_io_number(req, if_tx_completion_mode,
492 	    sizeof(if_tx_completion_mode), &new_value, &changed);
493 	if (error == 0 && changed != 0) {
494 		if (new_value > IFF_TX_COMPL_MODE_ASYNC ||
495 		    new_value < IFF_TX_COMPL_MODE_SYNC) {
496 			return EINVAL;
497 		}
498 		if_tx_completion_mode = new_value;
499 	}
500 	return 0;
501 }
502 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_completion_mode,
503     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
504     feth_fake_tx_completion_mode_sysctl, "IU",
505     "Fake interface tx completion mode (0 synchronous, 1 asynchronous)");
506 
507 /* sysctl net.link.fake.llink_cnt */
508 
509 /* The maximum number of logical links (including default link) */
510 #define FETH_MAX_LLINKS 16
511 /*
512  * The default number of logical links (including default link).
513  * Zero means logical link mode is disabled.
514  */
515 #define FETH_DEF_LLINKS 0
516 
517 static uint32_t if_fake_llink_cnt = FETH_DEF_LLINKS;
518 static int
519 feth_fake_llink_cnt_sysctl SYSCTL_HANDLER_ARGS
520 {
521 #pragma unused(oidp, arg1, arg2)
522 	unsigned int new_value;
523 	int changed;
524 	int error;
525 
526 	error = sysctl_io_number(req, if_fake_llink_cnt,
527 	    sizeof(if_fake_llink_cnt), &new_value, &changed);
528 	if (error == 0 && changed != 0) {
529 		if (new_value > FETH_MAX_LLINKS) {
530 			return EINVAL;
531 		}
532 		if_fake_llink_cnt = new_value;
533 	}
534 	return 0;
535 }
536 
537 SYSCTL_PROC(_net_link_fake, OID_AUTO, llink_cnt,
538     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
539     feth_fake_llink_cnt_sysctl, "IU",
540     "Fake interface logical link count");
541 
542 /* sysctl net.link.fake.qset_cnt */
543 
544 /* The maximum number of qsets for each logical link */
545 #define FETH_MAX_QSETS  16
546 /* The default number of qsets for each logical link */
547 #define FETH_DEF_QSETS  4
548 
549 static uint32_t if_fake_qset_cnt = FETH_DEF_QSETS;
550 static int
551 feth_fake_qset_cnt_sysctl SYSCTL_HANDLER_ARGS
552 {
553 #pragma unused(oidp, arg1, arg2)
554 	unsigned int new_value;
555 	int changed;
556 	int error;
557 
558 	error = sysctl_io_number(req, if_fake_qset_cnt,
559 	    sizeof(if_fake_qset_cnt), &new_value, &changed);
560 	if (error == 0 && changed != 0) {
561 		if (new_value == 0 ||
562 		    new_value > FETH_MAX_QSETS) {
563 			return EINVAL;
564 		}
565 		if_fake_qset_cnt = new_value;
566 	}
567 	return 0;
568 }
569 
570 SYSCTL_PROC(_net_link_fake, OID_AUTO, qset_cnt,
571     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
572     feth_fake_qset_cnt_sysctl, "IU",
573     "Fake interface queue set count");
574 
575 /**
576 ** virtual ethernet structures, types
577 **/
578 
579 #define IFF_NUM_TX_RINGS_WMM_MODE       4
580 #define IFF_NUM_RX_RINGS_WMM_MODE       1
581 #define IFF_MAX_TX_RINGS        IFF_NUM_TX_RINGS_WMM_MODE
582 #define IFF_MAX_RX_RINGS        IFF_NUM_RX_RINGS_WMM_MODE
583 #define IFF_NUM_TX_QUEUES_WMM_MODE      4
584 #define IFF_NUM_RX_QUEUES_WMM_MODE      1
585 #define IFF_MAX_TX_QUEUES       IFF_NUM_TX_QUEUES_WMM_MODE
586 #define IFF_MAX_RX_QUEUES       IFF_NUM_RX_QUEUES_WMM_MODE
587 
588 #define IFF_MAX_BATCH_SIZE 32
589 
590 typedef uint16_t        iff_flags_t;
591 #define IFF_FLAGS_HWCSUM                0x0001
592 #define IFF_FLAGS_BSD_MODE              0x0002
593 #define IFF_FLAGS_DETACHING             0x0004
594 #define IFF_FLAGS_WMM_MODE              0x0008
595 #define IFF_FLAGS_MULTIBUFLETS          0x0010
596 #define IFF_FLAGS_TSO_SUPPORT           0x0020
597 #define IFF_FLAGS_LRO                   0x0040
598 
599 #if SKYWALK
600 
601 typedef struct {
602 	uuid_t                  fnx_provider;
603 	uuid_t                  fnx_instance;
604 } fake_nx, *fake_nx_t;
605 
606 typedef struct {
607 	kern_netif_queue_t      fq_queue;
608 } fake_queue;
609 
610 typedef struct {
611 	kern_netif_qset_t       fqs_qset; /* provided by xnu */
612 	fake_queue              fqs_rx_queue[IFF_MAX_RX_QUEUES];
613 	fake_queue              fqs_tx_queue[IFF_MAX_TX_QUEUES];
614 	uint32_t                fqs_rx_queue_cnt;
615 	uint32_t                fqs_tx_queue_cnt;
616 	uint32_t                fqs_llink_idx;
617 	uint32_t                fqs_idx;
618 	uint32_t                fqs_dequeue_cnt;
619 	uint64_t                fqs_id;
620 	boolean_t               fqs_combined_mode;
621 } fake_qset;
622 
623 typedef struct {
624 	uint64_t                fl_id;
625 	uint32_t                fl_idx;
626 	uint32_t                fl_qset_cnt;
627 	fake_qset               fl_qset[FETH_MAX_QSETS];
628 } fake_llink;
629 
630 static kern_pbufpool_t         S_pp;
631 
632 #define IFF_TT_OUTPUT   0x01 /* generate trace_tag on output */
633 #define IFF_TT_INPUT    0x02 /* generate trace_tag on input */
634 static int if_fake_trace_tag_flags = 0;
635 SYSCTL_INT(_net_link_fake, OID_AUTO, trace_tag, CTLFLAG_RW | CTLFLAG_LOCKED,
636     &if_fake_trace_tag_flags, 0, "Fake interface generate trace_tag");
637 static packet_trace_tag_t if_fake_trace_tag_current = 1;
638 
639 #endif /* SKYWALK */
640 
641 struct if_fake {
642 	char                    iff_name[IFNAMSIZ]; /* our unique id */
643 	ifnet_t                 iff_ifp;
644 	iff_flags_t             iff_flags;
645 	uint32_t                iff_retain_count;
646 	ifnet_t                 iff_peer;       /* the other end */
647 	int                     iff_media_current;
648 	int                     iff_media_active;
649 	uint32_t                iff_media_count;
650 	int                     iff_media_list[IF_FAKE_MEDIA_LIST_MAX];
651 	struct mbuf *           iff_pending_tx_packet;
652 	boolean_t               iff_start_busy;
653 	unsigned int            iff_max_mtu;
654 	uint32_t                iff_fcs;
655 	uint32_t                iff_trailer_length;
656 #if SKYWALK
657 	fake_nx                 iff_nx;
658 	struct netif_stats      *iff_nifs;
659 	uint32_t                iff_nifs_ref;
660 	uint32_t                iff_llink_cnt;
661 	kern_channel_ring_t     iff_rx_ring[IFF_MAX_RX_RINGS];
662 	kern_channel_ring_t     iff_tx_ring[IFF_MAX_TX_RINGS];
663 	fake_llink             *iff_llink __counted_by(FETH_MAX_LLINKS);
664 	thread_call_t           iff_doorbell_tcall;
665 	thread_call_t           iff_if_adv_tcall;
666 	boolean_t               iff_doorbell_tcall_active;
667 	boolean_t               iff_waiting_for_tcall;
668 	boolean_t               iff_channel_connected;
669 	iff_pktpool_mode_t      iff_pp_mode;
670 	kern_pbufpool_t         iff_rx_pp;
671 	kern_pbufpool_t         iff_tx_pp;
672 	uint32_t                iff_tx_headroom;
673 	unsigned int            iff_adv_interval;
674 	uint32_t                iff_tx_drop_rate;
675 	uint32_t                iff_tx_pkts_count;
676 	iff_tx_completion_mode_t iff_tx_completion_mode;
677 	bool                    iff_intf_adv_enabled;
678 	void                    *iff_intf_adv_kern_ctx;
679 	kern_nexus_capab_interface_advisory_notify_fn_t iff_intf_adv_notify;
680 	iff_tx_exp_policy_t     iff_tx_exp_policy;
681 #endif /* SKYWALK */
682 };
683 
684 typedef struct if_fake * if_fake_ref;
685 
686 static if_fake_ref
687 ifnet_get_if_fake(ifnet_t ifp);
688 
689 static inline boolean_t
feth_in_bsd_mode(if_fake_ref fakeif)690 feth_in_bsd_mode(if_fake_ref fakeif)
691 {
692 	return (fakeif->iff_flags & IFF_FLAGS_BSD_MODE) != 0;
693 }
694 
695 static inline void
feth_set_detaching(if_fake_ref fakeif)696 feth_set_detaching(if_fake_ref fakeif)
697 {
698 	fakeif->iff_flags |= IFF_FLAGS_DETACHING;
699 }
700 
701 static inline boolean_t
feth_is_detaching(if_fake_ref fakeif)702 feth_is_detaching(if_fake_ref fakeif)
703 {
704 	return (fakeif->iff_flags & IFF_FLAGS_DETACHING) != 0;
705 }
706 
707 static int
feth_enable_dequeue_stall(ifnet_t ifp,uint32_t enable)708 feth_enable_dequeue_stall(ifnet_t ifp, uint32_t enable)
709 {
710 	int error;
711 
712 	if (enable != 0) {
713 		error = ifnet_disable_output(ifp);
714 	} else {
715 		error = ifnet_enable_output(ifp);
716 	}
717 
718 	return error;
719 }
720 
721 #if SKYWALK
722 static inline boolean_t
feth_in_wmm_mode(if_fake_ref fakeif)723 feth_in_wmm_mode(if_fake_ref fakeif)
724 {
725 	return (fakeif->iff_flags & IFF_FLAGS_WMM_MODE) != 0;
726 }
727 
728 static inline boolean_t
feth_using_multibuflets(if_fake_ref fakeif)729 feth_using_multibuflets(if_fake_ref fakeif)
730 {
731 	return (fakeif->iff_flags & IFF_FLAGS_MULTIBUFLETS) != 0;
732 }
733 static void feth_detach_netif_nexus(if_fake_ref fakeif);
734 
735 static inline boolean_t
feth_has_intf_advisory_configured(if_fake_ref fakeif)736 feth_has_intf_advisory_configured(if_fake_ref fakeif)
737 {
738 	return fakeif->iff_adv_interval > 0;
739 }
740 
741 static inline bool
feth_supports_tso(if_fake_ref fakeif)742 feth_supports_tso(if_fake_ref fakeif)
743 {
744 	return (fakeif->iff_flags & IFF_FLAGS_TSO_SUPPORT) != 0;
745 }
746 #endif /* SKYWALK */
747 
748 #define FETH_MAXUNIT    IF_MAXUNIT
749 #define FETH_ZONE_MAX_ELEM      MIN(IFNETS_MAX, FETH_MAXUNIT)
750 
751 static  int feth_clone_create(struct if_clone *, u_int32_t, void *);
752 static  int feth_clone_destroy(ifnet_t);
753 static  int feth_output(ifnet_t ifp, struct mbuf *m);
754 static  void feth_start(ifnet_t ifp);
755 static  int feth_ioctl(ifnet_t ifp, u_long cmd, void * addr);
756 static  int feth_config(ifnet_t ifp, ifnet_t peer);
757 static  void feth_if_free(ifnet_t ifp);
758 static  void feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp);
759 static  void feth_free(if_fake_ref fakeif);
760 
761 static struct if_clone
762     feth_cloner = IF_CLONE_INITIALIZER(FAKE_ETHER_NAME,
763     feth_clone_create,
764     feth_clone_destroy,
765     0,
766     FETH_MAXUNIT);
767 static  void interface_link_event(ifnet_t ifp, u_int32_t event_code);
768 
769 /* some media words to pretend to be ethernet */
770 static int default_media_words[] = {
771 	IFM_MAKEWORD(IFM_ETHER, 0, 0, 0),
772 	IFM_MAKEWORD(IFM_ETHER, IFM_10G_T, IFM_FDX, 0),
773 	IFM_MAKEWORD(IFM_ETHER, IFM_2500_T, IFM_FDX, 0),
774 	IFM_MAKEWORD(IFM_ETHER, IFM_5000_T, IFM_FDX, 0),
775 
776 	IFM_MAKEWORD(IFM_ETHER, IFM_10G_KX4, IFM_FDX, 0),
777 	IFM_MAKEWORD(IFM_ETHER, IFM_20G_KR2, IFM_FDX, 0),
778 	IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX, 0),
779 	IFM_MAKEWORD(IFM_ETHER, IFM_25G_KR, IFM_FDX, 0),
780 	IFM_MAKEWORD(IFM_ETHER, IFM_40G_SR4, IFM_FDX, 0),
781 	IFM_MAKEWORD(IFM_ETHER, IFM_50G_CR2, IFM_FDX, 0),
782 	IFM_MAKEWORD(IFM_ETHER, IFM_56G_R4, IFM_FDX, 0),
783 	IFM_MAKEWORD(IFM_ETHER, IFM_100G_CR4, IFM_FDX, 0),
784 	IFM_MAKEWORD(IFM_ETHER, IFM_400G_AUI8, IFM_FDX, 0),
785 };
786 #define default_media_words_count (sizeof(default_media_words)          \
787 	                           / sizeof (default_media_words[0]))
788 
789 /**
790 ** veth locks
791 **/
792 
793 static LCK_GRP_DECLARE(feth_lck_grp, "fake");
794 static LCK_MTX_DECLARE(feth_lck_mtx, &feth_lck_grp);
795 
796 static inline void
feth_lock(void)797 feth_lock(void)
798 {
799 	lck_mtx_lock(&feth_lck_mtx);
800 }
801 
802 static inline void
feth_unlock(void)803 feth_unlock(void)
804 {
805 	lck_mtx_unlock(&feth_lck_mtx);
806 }
807 
808 static inline int
get_max_mtu(int bsd_mode,unsigned int max_mtu)809 get_max_mtu(int bsd_mode, unsigned int max_mtu)
810 {
811 	unsigned int    mtu;
812 
813 	if (bsd_mode != 0) {
814 		mtu = (njcl > 0) ? (M16KCLBYTES - ETHER_HDR_LEN)
815 		    : MBIGCLBYTES - ETHER_HDR_LEN;
816 		if (mtu > max_mtu) {
817 			mtu = max_mtu;
818 		}
819 	} else {
820 		mtu = max_mtu;
821 	}
822 	return mtu;
823 }
824 
825 static inline unsigned int
feth_max_mtu(ifnet_t ifp)826 feth_max_mtu(ifnet_t ifp)
827 {
828 	if_fake_ref     fakeif;
829 	unsigned int    max_mtu = ETHERMTU;
830 
831 	feth_lock();
832 	fakeif = ifnet_get_if_fake(ifp);
833 	if (fakeif != NULL) {
834 		max_mtu = fakeif->iff_max_mtu;
835 	}
836 	feth_unlock();
837 	return max_mtu;
838 }
839 
840 static void
feth_free(if_fake_ref fakeif)841 feth_free(if_fake_ref fakeif)
842 {
843 	VERIFY(fakeif->iff_retain_count == 0);
844 	if (feth_in_bsd_mode(fakeif)) {
845 		if (fakeif->iff_pending_tx_packet) {
846 			m_freem(fakeif->iff_pending_tx_packet);
847 		}
848 	}
849 #if SKYWALK
850 	else {
851 		if (fakeif->iff_pp_mode == IFF_PP_MODE_GLOBAL) {
852 			VERIFY(fakeif->iff_rx_pp == S_pp);
853 			VERIFY(fakeif->iff_tx_pp == S_pp);
854 			pp_release(fakeif->iff_rx_pp);
855 			fakeif->iff_rx_pp = NULL;
856 			pp_release(fakeif->iff_tx_pp);
857 			fakeif->iff_tx_pp = NULL;
858 			feth_lock();
859 			if (S_pp->pp_refcnt == 1) {
860 				pp_release(S_pp);
861 				S_pp = NULL;
862 			}
863 			feth_unlock();
864 		} else {
865 			if (fakeif->iff_rx_pp != NULL) {
866 				pp_release(fakeif->iff_rx_pp);
867 				fakeif->iff_rx_pp = NULL;
868 			}
869 			if (fakeif->iff_tx_pp != NULL) {
870 				pp_release(fakeif->iff_tx_pp);
871 				fakeif->iff_tx_pp = NULL;
872 			}
873 		}
874 	}
875 #endif /* SKYWALK */
876 
877 	FETH_DPRINTF("%s\n", fakeif->iff_name);
878 	kfree_type(fake_llink, FETH_MAX_LLINKS, fakeif->iff_llink);
879 	kfree_type(struct if_fake, fakeif);
880 }
881 
882 static void
feth_release(if_fake_ref fakeif)883 feth_release(if_fake_ref fakeif)
884 {
885 	u_int32_t               old_retain_count;
886 
887 	old_retain_count = OSDecrementAtomic(&fakeif->iff_retain_count);
888 	switch (old_retain_count) {
889 	case 0:
890 		VERIFY(old_retain_count != 0);
891 		break;
892 	case 1:
893 		feth_free(fakeif);
894 		break;
895 	default:
896 		break;
897 	}
898 	return;
899 }
900 
901 #if SKYWALK
902 
903 static void
feth_retain(if_fake_ref fakeif)904 feth_retain(if_fake_ref fakeif)
905 {
906 	OSIncrementAtomic(&fakeif->iff_retain_count);
907 }
908 
909 static void
feth_packet_pool_init_prepare(if_fake_ref fakeif,struct kern_pbufpool_init * pp_init)910 feth_packet_pool_init_prepare(if_fake_ref fakeif,
911     struct kern_pbufpool_init *pp_init)
912 {
913 	uint32_t max_mtu = fakeif->iff_max_mtu;
914 	uint32_t buflet_size = if_fake_buflet_size;
915 
916 	bzero(pp_init, sizeof(*pp_init));
917 	pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
918 	pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
919 	pp_init->kbi_packets = 1024; /* TBD configurable */
920 	if (feth_supports_tso(fakeif)) {
921 		buflet_size = if_fake_tso_buffer_size;
922 	}
923 	if (feth_using_multibuflets(fakeif)) {
924 		pp_init->kbi_bufsize = buflet_size;
925 		pp_init->kbi_max_frags = howmany(max_mtu, buflet_size);
926 		pp_init->kbi_buflets = pp_init->kbi_packets *
927 		    pp_init->kbi_max_frags;
928 		pp_init->kbi_flags |= KBIF_BUFFER_ON_DEMAND;
929 	} else {
930 		pp_init->kbi_bufsize = max(max_mtu, buflet_size);
931 		pp_init->kbi_max_frags = 1;
932 		pp_init->kbi_buflets = pp_init->kbi_packets;
933 	}
934 	pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
935 	if (if_fake_user_access != 0) {
936 		pp_init->kbi_flags |= KBIF_USER_ACCESS;
937 	}
938 	pp_init->kbi_ctx = NULL;
939 	pp_init->kbi_ctx_retain = NULL;
940 	pp_init->kbi_ctx_release = NULL;
941 }
942 
943 static errno_t
feth_packet_pool_make(if_fake_ref fakeif)944 feth_packet_pool_make(if_fake_ref fakeif)
945 {
946 	struct kern_pbufpool_init pp_init;
947 	errno_t err;
948 
949 	feth_packet_pool_init_prepare(fakeif, &pp_init);
950 
951 	switch (fakeif->iff_pp_mode) {
952 	case IFF_PP_MODE_GLOBAL:
953 		feth_lock();
954 		if (S_pp == NULL) {
955 			(void)snprintf((char *)pp_init.kbi_name,
956 			    sizeof(pp_init.kbi_name), "%s", "feth shared pp");
957 			err = kern_pbufpool_create(&pp_init, &S_pp, NULL);
958 		}
959 		pp_retain(S_pp);
960 		feth_unlock();
961 		fakeif->iff_rx_pp = S_pp;
962 		pp_retain(S_pp);
963 		fakeif->iff_tx_pp = S_pp;
964 		break;
965 	case IFF_PP_MODE_PRIVATE:
966 		(void)snprintf((char *)pp_init.kbi_name,
967 		    sizeof(pp_init.kbi_name), "%s pp", fakeif->iff_name);
968 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
969 		pp_retain(fakeif->iff_rx_pp);
970 		fakeif->iff_tx_pp = fakeif->iff_rx_pp;
971 		break;
972 	case IFF_PP_MODE_PRIVATE_SPLIT:
973 		(void)snprintf((char *)pp_init.kbi_name,
974 		    sizeof(pp_init.kbi_name), "%s rx pp", fakeif->iff_name);
975 		pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
976 		    KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
977 		pp_init.kbi_flags |= (KBIF_IODIR_IN | KBIF_BUFFER_ON_DEMAND);
978 		pp_init.kbi_packets = 1024;
979 		pp_init.kbi_bufsize = if_fake_link_layer_aggregation_factor * 1024;
980 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
981 		if (err != 0) {
982 			printf("%s: rx pp create failed %d\n", __func__, err);
983 			return err;
984 		}
985 		pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
986 		    KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
987 		pp_init.kbi_flags |= KBIF_IODIR_OUT;
988 		pp_init.kbi_packets = 1024;            /* TBD configurable */
989 		pp_init.kbi_bufsize = fakeif->iff_max_mtu;
990 		(void)snprintf((char *)pp_init.kbi_name,
991 		    sizeof(pp_init.kbi_name), "%s tx pp", fakeif->iff_name);
992 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_tx_pp, NULL);
993 		if (err != 0) {
994 			printf("%s: tx pp create failed %d\n", __func__, err);
995 			pp_release(fakeif->iff_rx_pp);
996 			return err;
997 		}
998 		break;
999 	default:
1000 		VERIFY(0);
1001 		__builtin_unreachable();
1002 	}
1003 
1004 	return 0;
1005 }
1006 
1007 static void
feth_packet_set_trace_tag(kern_packet_t ph,int flag)1008 feth_packet_set_trace_tag(kern_packet_t ph, int flag)
1009 {
1010 	if (if_fake_trace_tag_flags & flag) {
1011 		if (++if_fake_trace_tag_current == 0) {
1012 			if_fake_trace_tag_current = 1;
1013 		}
1014 		kern_packet_set_trace_tag(ph, if_fake_trace_tag_current);
1015 	}
1016 }
1017 
1018 static errno_t
feth_clone_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1019 feth_clone_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1020 {
1021 	errno_t err = 0;
1022 	kern_pbufpool_t pp = dif->iff_rx_pp;
1023 	kern_packet_t dph = 0, dph0 = 0;
1024 	kern_buflet_t sbuf, dbuf0 = NULL, dbuf;
1025 	void *saddr, *daddr;
1026 	uint32_t soff, doff;
1027 	uint32_t slen, dlen;
1028 	uint32_t dlim0, dlim;
1029 
1030 	sbuf = kern_packet_get_next_buflet(sph, NULL);
1031 	saddr = kern_buflet_get_data_address(sbuf);
1032 	doff = soff = kern_buflet_get_data_offset(sbuf);
1033 	dlen = slen = kern_buflet_get_data_length(sbuf);
1034 
1035 	/* packet clone is only supported for single-buflet */
1036 	ASSERT(kern_packet_get_buflet_count(sph) == 1);
1037 	ASSERT(soff == kern_packet_get_headroom(sph));
1038 	ASSERT(slen == kern_packet_get_data_length(sph));
1039 
1040 	dph0 = *pdph;
1041 	if (dph0 == 0) {
1042 		dlim0 = 0;
1043 	} else {
1044 		dbuf0 = kern_packet_get_next_buflet(dph0, NULL);
1045 		ASSERT(kern_buflet_get_object_limit(dbuf0) ==
1046 		    PP_BUF_OBJ_SIZE_DEF(pp));
1047 		ASSERT(kern_buflet_get_data_limit(dbuf0) % 16 == 0);
1048 		dlim0 = ((uintptr_t)kern_buflet_get_object_address(dbuf0) +
1049 		    kern_buflet_get_object_limit(dbuf0)) -
1050 		    ((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1051 		    kern_buflet_get_data_limit(dbuf0));
1052 	}
1053 
1054 	if (doff + dlen > dlim0) {
1055 		err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1056 		if (err != 0) {
1057 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1058 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1059 			return err;
1060 		}
1061 		dbuf = kern_packet_get_next_buflet(dph, NULL);
1062 		ASSERT(kern_buflet_get_data_address(dbuf) ==
1063 		    kern_buflet_get_object_address(dbuf));
1064 		daddr = kern_buflet_get_data_address(dbuf);
1065 		dlim = kern_buflet_get_object_limit(dbuf);
1066 		ASSERT(dlim == PP_BUF_OBJ_SIZE_DEF(pp));
1067 	} else {
1068 		err = kern_packet_clone_nosleep(dph0, &dph, KPKT_COPY_LIGHT);
1069 		if (err != 0) {
1070 			printf("%s: packet clone err %d\n", __func__, err);
1071 			return err;
1072 		}
1073 		dbuf = kern_packet_get_next_buflet(dph, NULL);
1074 		ASSERT(kern_buflet_get_object_address(dbuf) ==
1075 		    kern_buflet_get_object_address(dbuf0));
1076 		daddr = (void *)((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1077 		    kern_buflet_get_data_limit(dbuf0));
1078 		dlim = dlim0;
1079 	}
1080 
1081 	ASSERT(doff + dlen <= dlim);
1082 
1083 	ASSERT((uintptr_t)daddr % 16 == 0);
1084 
1085 	bcopy((const void *)((uintptr_t)saddr + soff),
1086 	    (void *)((uintptr_t)daddr + doff), slen);
1087 
1088 	dlim = MIN(dlim, P2ROUNDUP(doff + dlen, 16));
1089 	err = kern_buflet_set_data_address(dbuf, daddr);
1090 	VERIFY(err == 0);
1091 	err = kern_buflet_set_data_limit(dbuf, dlim);
1092 	VERIFY(err == 0);
1093 	err = kern_buflet_set_data_length(dbuf, dlen);
1094 	VERIFY(err == 0);
1095 	err = kern_buflet_set_data_offset(dbuf, doff);
1096 	VERIFY(err == 0);
1097 	err = kern_packet_set_headroom(dph, doff);
1098 	VERIFY(err == 0);
1099 	err = kern_packet_set_link_header_length(dph,
1100 	    kern_packet_get_link_header_length(sph));
1101 	VERIFY(err == 0);
1102 	err = kern_packet_set_service_class(dph,
1103 	    kern_packet_get_service_class(sph));
1104 	VERIFY(err == 0);
1105 	err = kern_packet_finalize(dph);
1106 	VERIFY(err == 0);
1107 	*pdph = dph;
1108 
1109 	return err;
1110 }
1111 
1112 static inline void
feth_copy_buflet(kern_buflet_t sbuf,kern_buflet_t dbuf)1113 feth_copy_buflet(kern_buflet_t sbuf, kern_buflet_t dbuf)
1114 {
1115 	errno_t err;
1116 	uint32_t off, len;
1117 	uint8_t *saddr, *daddr;
1118 
1119 	saddr = kern_buflet_get_data_address(sbuf);
1120 	off = kern_buflet_get_data_offset(sbuf);
1121 	len = kern_buflet_get_data_length(sbuf);
1122 	daddr = kern_buflet_get_data_address(dbuf);
1123 	bcopy((saddr + off), (daddr + off), len);
1124 	err = kern_buflet_set_data_offset(dbuf, off);
1125 	VERIFY(err == 0);
1126 	err = kern_buflet_set_data_length(dbuf, len);
1127 	VERIFY(err == 0);
1128 }
1129 
1130 static int
feth_add_packet_trailer(kern_packet_t ph,void * trailer,size_t trailer_len)1131 feth_add_packet_trailer(kern_packet_t ph, void *trailer, size_t trailer_len)
1132 {
1133 	errno_t err = 0;
1134 
1135 	ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
1136 
1137 	kern_buflet_t buf = NULL, iter = NULL;
1138 	while ((iter = kern_packet_get_next_buflet(ph, iter)) != NULL) {
1139 		buf = iter;
1140 	}
1141 	ASSERT(buf != NULL);
1142 
1143 	uint32_t dlim = kern_buflet_get_data_limit(buf);
1144 	uint32_t doff = kern_buflet_get_data_offset(buf);
1145 	uint32_t dlen = kern_buflet_get_data_length(buf);
1146 
1147 	size_t trailer_room = dlim - doff - dlen;
1148 
1149 	if (trailer_room < trailer_len) {
1150 		printf("not enough room");
1151 		return ERANGE;
1152 	}
1153 
1154 	void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff + dlen);
1155 	memcpy(data, trailer, trailer_len);
1156 
1157 	err = kern_buflet_set_data_length(buf, dlen + trailer_len);
1158 	VERIFY(err == 0);
1159 
1160 	err = kern_packet_finalize(ph);
1161 	VERIFY(err == 0);
1162 
1163 	FETH_DPRINTF("%s %zuB trailer added\n", __func__, trailer_len);
1164 
1165 	return 0;
1166 }
1167 
1168 static int
feth_add_packet_fcs(kern_packet_t ph)1169 feth_add_packet_fcs(kern_packet_t ph)
1170 {
1171 	uint32_t crc = 0;
1172 	int err;
1173 
1174 	ASSERT(sizeof(crc) == ETHER_CRC_LEN);
1175 
1176 	kern_buflet_t buf = NULL;
1177 	while ((buf = kern_packet_get_next_buflet(ph, buf)) != NULL) {
1178 		uint32_t doff = kern_buflet_get_data_offset(buf);
1179 		uint32_t dlen = kern_buflet_get_data_length(buf);
1180 		void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff);
1181 		crc = crc32(crc, data, dlen);
1182 	}
1183 
1184 	err = feth_add_packet_trailer(ph, &crc, ETHER_CRC_LEN);
1185 	if (!err) {
1186 		return err;
1187 	}
1188 
1189 	err = kern_packet_set_link_ethfcs(ph);
1190 	VERIFY(err == 0);
1191 
1192 	return 0;
1193 }
1194 
1195 static errno_t
feth_copy_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1196 feth_copy_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1197 {
1198 	errno_t err = 0;
1199 	uint16_t i, bufcnt;
1200 	mach_vm_address_t baddr;
1201 	kern_buflet_t sbuf = NULL, dbuf = NULL;
1202 	kern_pbufpool_t pp = dif->iff_rx_pp;
1203 	kern_packet_t dph;
1204 	boolean_t multi_buflet = feth_using_multibuflets(dif);
1205 
1206 	bufcnt = kern_packet_get_buflet_count(sph);
1207 	ASSERT((bufcnt == 1) || multi_buflet);
1208 	*pdph = 0;
1209 
1210 	err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1211 	if (err != 0) {
1212 		STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1213 		STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1214 		return err;
1215 	}
1216 
1217 	/* pre-constructed single buflet packet copy */
1218 	sbuf = kern_packet_get_next_buflet(sph, NULL);
1219 	dbuf = kern_packet_get_next_buflet(dph, NULL);
1220 	feth_copy_buflet(sbuf, dbuf);
1221 
1222 	if (!multi_buflet) {
1223 		goto done;
1224 	}
1225 
1226 	/* un-constructed multi-buflet packet copy */
1227 	for (i = 1; i < bufcnt; i++) {
1228 		kern_buflet_t dbuf_next = NULL;
1229 
1230 		sbuf = kern_packet_get_next_buflet(sph, sbuf);
1231 		VERIFY(sbuf != NULL);
1232 		err = kern_pbufpool_alloc_buflet_nosleep(pp, &dbuf_next);
1233 		if (err != 0) {
1234 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1235 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_BUF);
1236 			break;
1237 		}
1238 		ASSERT(dbuf_next != NULL);
1239 		feth_copy_buflet(sbuf, dbuf_next);
1240 		err = kern_packet_add_buflet(dph, dbuf, dbuf_next);
1241 		VERIFY(err == 0);
1242 		dbuf = dbuf_next;
1243 	}
1244 	if (__improbable(err != 0)) {
1245 		dbuf = NULL;
1246 		while (i-- != 0) {
1247 			dbuf = kern_packet_get_next_buflet(dph, dbuf);
1248 			VERIFY(dbuf != NULL);
1249 			baddr = (mach_vm_address_t)
1250 			    kern_buflet_get_data_address(dbuf);
1251 			VERIFY(baddr != 0);
1252 		}
1253 		kern_pbufpool_free(pp, dph);
1254 		dph = 0;
1255 	}
1256 
1257 done:
1258 	if (__probable(err == 0)) {
1259 		err = kern_packet_set_headroom(dph,
1260 		    kern_packet_get_headroom(sph));
1261 		VERIFY(err == 0);
1262 		err = kern_packet_set_link_header_length(dph,
1263 		    kern_packet_get_link_header_length(sph));
1264 		VERIFY(err == 0);
1265 		err = kern_packet_set_service_class(dph,
1266 		    kern_packet_get_service_class(sph));
1267 		VERIFY(err == 0);
1268 		err = kern_packet_finalize(dph);
1269 		VERIFY(err == 0);
1270 		VERIFY(bufcnt == kern_packet_get_buflet_count(dph));
1271 		*pdph = dph;
1272 	}
1273 	return err;
1274 }
1275 
1276 static inline void
feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)1277 feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)
1278 {
1279 	/*
1280 	 * Nothing to do if not a TSO offloaded packet.
1281 	 */
1282 	uint16_t seg_sz = 0;
1283 	seg_sz = kern_packet_get_protocol_segment_size(ph);
1284 	if (seg_sz == 0) {
1285 		return;
1286 	}
1287 	/*
1288 	 * For RX, make the packet appear as a fully validated LRO packet.
1289 	 */
1290 	packet_csum_flags_t csum_flags = PACKET_CSUM_IP_CHECKED |
1291 	    PACKET_CSUM_IP_VALID | PACKET_CSUM_DATA_VALID |
1292 	    PACKET_CSUM_PSEUDO_HDR;
1293 	(void) kern_packet_set_inet_checksum(ph, csum_flags, 0, 0xFFFF, FALSE);
1294 	return;
1295 }
1296 
1297 static void
feth_rx_submit(if_fake_ref sif,if_fake_ref dif,kern_packet_t sphs[],uint32_t n_pkts)1298 feth_rx_submit(if_fake_ref sif, if_fake_ref dif, kern_packet_t sphs[],
1299     uint32_t n_pkts)
1300 {
1301 	errno_t err = 0;
1302 	struct kern_channel_ring_stat_increment stats;
1303 	kern_channel_ring_t rx_ring = NULL;
1304 	kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
1305 	kern_packet_t sph = 0, dph = 0;
1306 
1307 	memset(&stats, 0, sizeof(stats));
1308 
1309 	rx_ring = dif->iff_rx_ring[0];
1310 	if (rx_ring == NULL) {
1311 		return;
1312 	}
1313 
1314 	kr_enter(rx_ring, TRUE);
1315 	kern_channel_reclaim(rx_ring);
1316 	rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1317 
1318 	for (uint32_t i = 0; i < n_pkts && rx_slot != NULL; i++) {
1319 		sph = sphs[i];
1320 
1321 		switch (dif->iff_pp_mode) {
1322 		case IFF_PP_MODE_GLOBAL:
1323 			sphs[i] = 0;
1324 			dph = sph;
1325 			feth_update_pkt_tso_metadata_for_rx(dph);
1326 			err = kern_packet_finalize(dph);
1327 			VERIFY(err == 0);
1328 			break;
1329 		case IFF_PP_MODE_PRIVATE:
1330 			err = feth_copy_packet(dif, sph, &dph);
1331 			break;
1332 		case IFF_PP_MODE_PRIVATE_SPLIT:
1333 			err = feth_clone_packet(dif, sph, &dph);
1334 			break;
1335 		default:
1336 			VERIFY(0);
1337 			__builtin_unreachable();
1338 		}
1339 		if (__improbable(err != 0)) {
1340 			continue;
1341 		}
1342 
1343 		if (sif->iff_trailer_length != 0) {
1344 			feth_add_packet_trailer(dph, feth_trailer,
1345 			    sif->iff_trailer_length);
1346 		}
1347 		if (sif->iff_fcs != 0) {
1348 			feth_add_packet_fcs(dph);
1349 		}
1350 		feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1351 		bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1352 		stats.kcrsi_slots_transferred++;
1353 		stats.kcrsi_bytes_transferred
1354 		        += kern_packet_get_data_length(dph);
1355 
1356 		/* attach the packet to the RX ring */
1357 		err = kern_channel_slot_attach_packet(rx_ring, rx_slot, dph);
1358 		VERIFY(err == 0);
1359 		last_rx_slot = rx_slot;
1360 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1361 	}
1362 
1363 	if (last_rx_slot != NULL) {
1364 		kern_channel_advance_slot(rx_ring, last_rx_slot);
1365 		kern_channel_increment_ring_net_stats(rx_ring, dif->iff_ifp,
1366 		    &stats);
1367 	}
1368 
1369 	if (rx_ring != NULL) {
1370 		kr_exit(rx_ring);
1371 		kern_channel_notify(rx_ring, 0);
1372 	}
1373 }
1374 
1375 static void
feth_rx_queue_submit(if_fake_ref sif,if_fake_ref dif,uint32_t llink_idx,uint32_t qset_idx,kern_packet_t sphs[],uint32_t n_pkts)1376 feth_rx_queue_submit(if_fake_ref sif, if_fake_ref dif, uint32_t llink_idx,
1377     uint32_t qset_idx, kern_packet_t sphs[], uint32_t n_pkts)
1378 {
1379 	errno_t err = 0;
1380 	kern_netif_queue_t queue;
1381 	kern_packet_t sph = 0, dph = 0;
1382 	fake_llink *llink;
1383 	fake_qset *qset;
1384 
1385 	if (llink_idx >= dif->iff_llink_cnt) {
1386 		printf("%s: invalid llink_idx idx %d (max %d) on peer %s\n",
1387 		    __func__, llink_idx, dif->iff_llink_cnt, dif->iff_name);
1388 		return;
1389 	}
1390 	llink = &dif->iff_llink[llink_idx];
1391 	if (qset_idx >= llink->fl_qset_cnt) {
1392 		printf("%s: invalid qset_idx %d (max %d) on peer %s\n",
1393 		    __func__, qset_idx, llink->fl_qset_cnt, dif->iff_name);
1394 		return;
1395 	}
1396 	qset = &dif->iff_llink[llink_idx].fl_qset[qset_idx];
1397 	queue = qset->fqs_rx_queue[0].fq_queue;
1398 	if (queue == NULL) {
1399 		printf("%s: NULL default queue (llink_idx %d, qset_idx %d) "
1400 		    "on peer %s\n", __func__, llink_idx, qset_idx,
1401 		    dif->iff_name);
1402 		return;
1403 	}
1404 	for (uint32_t i = 0; i < n_pkts; i++) {
1405 		uint32_t flags;
1406 
1407 		sph = sphs[i];
1408 
1409 		switch (dif->iff_pp_mode) {
1410 		case IFF_PP_MODE_GLOBAL:
1411 			sphs[i] = 0;
1412 			dph = sph;
1413 			feth_update_pkt_tso_metadata_for_rx(dph);
1414 			break;
1415 		case IFF_PP_MODE_PRIVATE:
1416 			err = feth_copy_packet(dif, sph, &dph);
1417 			break;
1418 		case IFF_PP_MODE_PRIVATE_SPLIT:
1419 			err = feth_clone_packet(dif, sph, &dph);
1420 			break;
1421 		default:
1422 			VERIFY(0);
1423 			__builtin_unreachable();
1424 		}
1425 		if (__improbable(err != 0)) {
1426 			continue;
1427 		}
1428 
1429 		if (sif->iff_trailer_length != 0) {
1430 			feth_add_packet_trailer(dph, feth_trailer,
1431 			    sif->iff_trailer_length);
1432 		}
1433 		if (sif->iff_fcs != 0) {
1434 			feth_add_packet_fcs(dph);
1435 		}
1436 		feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1437 		bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1438 
1439 		flags = (i == n_pkts - 1) ?
1440 		    KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH : 0;
1441 		kern_netif_queue_rx_enqueue(queue, dph, 1, flags);
1442 	}
1443 }
1444 
1445 static void
feth_tx_complete(if_fake_ref fakeif,kern_packet_t phs[],uint32_t nphs)1446 feth_tx_complete(if_fake_ref fakeif, kern_packet_t phs[], uint32_t nphs)
1447 {
1448 	for (uint32_t i = 0; i < nphs; i++) {
1449 		kern_packet_t ph = phs[i];
1450 		if (ph == 0) {
1451 			continue;
1452 		}
1453 		int err = kern_packet_set_tx_completion_status(ph, 0);
1454 		VERIFY(err == 0);
1455 		kern_packet_tx_completion(ph, fakeif->iff_ifp);
1456 		kern_pbufpool_free(fakeif->iff_tx_pp, phs[i]);
1457 		phs[i] = 0;
1458 	}
1459 }
1460 
1461 #define NSEC_PER_USEC 1000ull
1462 /*
1463  * Calculate the time delta that passed from `since' to `until'.
1464  * If `until' happens before `since', returns negative value.
1465  */
1466 static bool
feth_packet_has_expired(if_fake_ref __unused fakeif,kern_packet_t ph,uint64_t * out_deadline)1467 feth_packet_has_expired(if_fake_ref __unused fakeif, kern_packet_t ph,
1468     uint64_t *out_deadline)
1469 {
1470 	uint64_t now;
1471 	uint64_t packet_expire_time_mach;
1472 	int64_t time_until_expiration;
1473 	errno_t err;
1474 	bool expired = false;
1475 
1476 	static mach_timebase_info_data_t clock_timebase = {0, 0};
1477 
1478 	if (clock_timebase.denom == 0) {
1479 		clock_timebase_info(&clock_timebase);
1480 		VERIFY(clock_timebase.denom != 0);
1481 	}
1482 
1483 	err = kern_packet_get_expire_time(ph, &packet_expire_time_mach);
1484 	if (err) {
1485 		goto out;
1486 	}
1487 
1488 	now = mach_absolute_time();
1489 	time_until_expiration = packet_expire_time_mach - now;
1490 	if (time_until_expiration < 0) {
1491 		/* The packet had expired */
1492 		expired = true;
1493 		goto out;
1494 	}
1495 
1496 	/* Convert the time_delta from mach ticks to nanoseconds */
1497 	time_until_expiration *= clock_timebase.numer;
1498 	time_until_expiration /= clock_timebase.denom;
1499 	/* convert from nanoseconds to microseconds */
1500 	time_until_expiration /= 1000ull;
1501 
1502 	if (if_fake_expiration_threshold_us < time_until_expiration) {
1503 		/* packet has some life ahead of it */
1504 		FETH_DPRINTF("Packet has %llu usec until expiration", time_until_expiration);
1505 		goto out;
1506 	}
1507 
1508 out:
1509 	if (expired && out_deadline) {
1510 		*out_deadline = packet_expire_time_mach;
1511 	}
1512 
1513 	return expired;
1514 }
1515 
1516 static errno_t
feth_get_packet_notification_details(if_fake_ref fakeif,kern_packet_t ph,packet_id_t * pkt_id,uint32_t * nx_port_id)1517 feth_get_packet_notification_details(if_fake_ref fakeif, kern_packet_t ph,
1518     packet_id_t *pkt_id, uint32_t *nx_port_id)
1519 {
1520 	errno_t err = 0;
1521 
1522 	err = kern_packet_get_packetid(ph, pkt_id);
1523 	if (err != 0) {
1524 		FETH_DPRINTF("%s err=%d getting packetid", fakeif->iff_name, err);
1525 		return err;
1526 	}
1527 
1528 	err = kern_packet_get_tx_nexus_port_id(ph, nx_port_id);
1529 	if (err != 0) {
1530 		FETH_DPRINTF("%s err=%d getting nx_port_id", fakeif->iff_name, err);
1531 		return err;
1532 	}
1533 
1534 	return 0;
1535 }
1536 
1537 static packet_expiry_action_t
feth_get_effective_expn_action(if_fake_ref fakeif,kern_packet_t ph)1538 feth_get_effective_expn_action(if_fake_ref fakeif, kern_packet_t ph)
1539 {
1540 	errno_t err;
1541 	packet_expiry_action_t expiry_action;
1542 
1543 	switch (fakeif->iff_tx_exp_policy) {
1544 	case IFF_TX_EXP_POLICY_DISABLED:
1545 		expiry_action = PACKET_EXPIRY_ACTION_NONE;
1546 		break;
1547 	case IFF_TX_EXP_POLICY_NOTIFY_ONLY:
1548 		expiry_action = PACKET_EXPIRY_ACTION_NOTIFY;
1549 		break;
1550 	case IFF_TX_EXP_POLICY_DROP_AND_NOTIFY:
1551 		expiry_action = PACKET_EXPIRY_ACTION_DROP;
1552 		break;
1553 	case IFF_TX_EXP_POLICY_METADATA:
1554 		err = kern_packet_get_expiry_action(ph, &expiry_action);
1555 		if (err != 0) {
1556 			if (err != ENOENT) {
1557 				FETH_DPRINTF("Error %d when getting expiry action", err);
1558 			}
1559 			expiry_action = PACKET_EXPIRY_ACTION_NONE;
1560 		}
1561 		break;
1562 	default:
1563 		FETH_DPRINTF("Unrecognized value %d for \"net.link.fake.tx_exp_policy\"",
1564 		    fakeif->iff_tx_exp_policy);
1565 		expiry_action = PACKET_EXPIRY_ACTION_NONE;
1566 	}
1567 
1568 	return expiry_action;
1569 }
1570 
1571 /* returns true if the packet is selected for epxiration and should be dropped */
1572 static bool
feth_tx_expired_error(if_fake_ref fakeif,kern_packet_t ph)1573 feth_tx_expired_error(if_fake_ref fakeif, kern_packet_t ph)
1574 {
1575 	int err = 0;
1576 	uint32_t nx_port_id = 0;
1577 	os_channel_event_packet_transmit_expired_t expn = {0};
1578 	packet_expiry_action_t expiry_action = PACKET_EXPIRY_ACTION_NONE;
1579 
1580 	FETH_DPRINTF("%s\n", fakeif->iff_name);
1581 
1582 	if (feth_packet_has_expired(fakeif, ph, &expn.packet_tx_expiration_deadline)) {
1583 		expiry_action = feth_get_effective_expn_action(fakeif, ph);
1584 	}
1585 
1586 	bool drop_packet = (expiry_action == PACKET_EXPIRY_ACTION_DROP);
1587 	if (expiry_action != PACKET_EXPIRY_ACTION_NONE) {
1588 		/* set the expiration status code */
1589 		expn.packet_tx_expiration_status = drop_packet ?
1590 		    CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_DROPPED :
1591 		    CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_NOT_DROPPED;
1592 
1593 		/* Mark the expiration timestamp */
1594 		expn.packet_tx_expiration_timestamp = mach_absolute_time();
1595 
1596 		err = feth_get_packet_notification_details(fakeif, ph,
1597 		    &expn.packet_id, &nx_port_id);
1598 
1599 		if (err == 0) {
1600 			err = kern_channel_event_transmit_expired(
1601 				fakeif->iff_ifp, &expn, nx_port_id);
1602 			FETH_DPRINTF("%s sent epxiry notification on nexus port %u notif code %u\n",
1603 			    fakeif->iff_name, nx_port_id, expn.packet_tx_expiration_status);
1604 		}
1605 		if (err != 0) {
1606 			FETH_DPRINTF("%s err=%d, nx_port_id: 0x%x\n",
1607 			    fakeif->iff_name, err, nx_port_id);
1608 		}
1609 	}
1610 
1611 	return drop_packet;
1612 }
1613 
1614 /* returns true if the packet is selected for TX error & dropped */
1615 static bool
feth_tx_complete_error(if_fake_ref fakeif,kern_packet_t ph)1616 feth_tx_complete_error(if_fake_ref fakeif, kern_packet_t ph)
1617 {
1618 	int err;
1619 
1620 	if (fakeif->iff_tx_drop_rate == 0 ||
1621 	    fakeif->iff_tx_pkts_count != fakeif->iff_tx_drop_rate) {
1622 		return false;
1623 	}
1624 	/* simulate TX completion error on the packet */
1625 	if (fakeif->iff_tx_completion_mode == IFF_TX_COMPL_MODE_SYNC) {
1626 		err = kern_packet_set_tx_completion_status(ph,
1627 		    CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED);
1628 		VERIFY(err == 0);
1629 		kern_packet_tx_completion(ph, fakeif->iff_ifp);
1630 	} else {
1631 		uint32_t nx_port_id = 0;
1632 		os_channel_event_packet_transmit_status_t pkt_tx_status = {0};
1633 
1634 		pkt_tx_status.packet_status =
1635 		    CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED;
1636 		err = feth_get_packet_notification_details(fakeif, ph,
1637 		    &pkt_tx_status.packet_id, &nx_port_id);
1638 		if (err == 0) {
1639 			err = kern_channel_event_transmit_status(
1640 				fakeif->iff_ifp, &pkt_tx_status, nx_port_id);
1641 		}
1642 		if (err != 0) {
1643 			FETH_DPRINTF("%s err=%d, nx_port_id: 0x%x\n",
1644 			    fakeif->iff_name, err, nx_port_id);
1645 		}
1646 	}
1647 
1648 	return true;
1649 }
1650 
1651 static void
feth_if_adv(thread_call_param_t arg0,thread_call_param_t arg1)1652 feth_if_adv(thread_call_param_t arg0, thread_call_param_t arg1)
1653 {
1654 #pragma unused(arg1)
1655 	errno_t                            error;
1656 	if_fake_ref                        fakeif = (if_fake_ref)arg0;
1657 	struct ifnet_interface_advisory    if_adv;
1658 	struct ifnet_stats_param           if_stat;
1659 
1660 	feth_lock();
1661 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1662 		feth_unlock();
1663 		return;
1664 	}
1665 	feth_unlock();
1666 
1667 	if (!fakeif->iff_intf_adv_enabled) {
1668 		goto done;
1669 	}
1670 
1671 	error = ifnet_stat(fakeif->iff_ifp, &if_stat);
1672 	if (error != 0) {
1673 		FETH_DPRINTF("%s: ifnet_stat() failed %d\n",
1674 		    fakeif->iff_name, error);
1675 		goto done;
1676 	}
1677 	if_adv.header.version = IF_INTERFACE_ADVISORY_VERSION_CURRENT;
1678 	if_adv.header.direction = IF_INTERFACE_ADVISORY_DIRECTION_TX;
1679 	if_adv.header.interface_type =
1680 	    IF_INTERFACE_ADVISORY_INTERFACE_TYPE_WIFI;
1681 	if_adv.capacity.timestamp = mach_absolute_time();
1682 	if_adv.capacity.rate_trend_suggestion =
1683 	    IF_INTERFACE_ADVISORY_RATE_SUGGESTION_RAMP_NEUTRAL;
1684 	if_adv.capacity.max_bandwidth = 1000 * 1000 * 1000; /* 1Gbps */
1685 	if_adv.capacity.total_byte_count = if_stat.packets_out;
1686 	if_adv.capacity.average_throughput = 1000 * 1000 * 1000; /* 1Gbps */
1687 	if_adv.capacity.flushable_queue_size = UINT32_MAX;
1688 	if_adv.capacity.non_flushable_queue_size = UINT32_MAX;
1689 	if_adv.capacity.average_delay = 1; /* ms */
1690 
1691 	error = fakeif->iff_intf_adv_notify(fakeif->iff_intf_adv_kern_ctx,
1692 	    &if_adv);
1693 	if (error != 0) {
1694 		FETH_DPRINTF("%s: interface advisory report failed %d\n",
1695 		    fakeif->iff_name, error);
1696 	}
1697 
1698 done:
1699 	feth_lock();
1700 	if (!feth_is_detaching(fakeif) && fakeif->iff_channel_connected) {
1701 		uint64_t deadline;
1702 		clock_interval_to_deadline(fakeif->iff_adv_interval,
1703 		    NSEC_PER_MSEC, &deadline);
1704 		thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1705 	}
1706 	feth_unlock();
1707 }
1708 
1709 static int
feth_if_adv_tcall_create(if_fake_ref fakeif)1710 feth_if_adv_tcall_create(if_fake_ref fakeif)
1711 {
1712 	uint64_t deadline;
1713 
1714 	feth_lock();
1715 	ASSERT(fakeif->iff_if_adv_tcall == NULL);
1716 	ASSERT(fakeif->iff_adv_interval > 0);
1717 	ASSERT(fakeif->iff_channel_connected);
1718 	fakeif->iff_if_adv_tcall =
1719 	    thread_call_allocate_with_options(feth_if_adv,
1720 	    (thread_call_param_t)fakeif, THREAD_CALL_PRIORITY_KERNEL,
1721 	    THREAD_CALL_OPTIONS_ONCE);
1722 	if (fakeif->iff_if_adv_tcall == NULL) {
1723 		printf("%s: %s if_adv tcall alloc failed\n", __func__,
1724 		    fakeif->iff_name);
1725 		return ENXIO;
1726 	}
1727 	/* retain for the interface advisory thread call */
1728 	feth_retain(fakeif);
1729 	clock_interval_to_deadline(fakeif->iff_adv_interval,
1730 	    NSEC_PER_MSEC, &deadline);
1731 	thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1732 	feth_unlock();
1733 	return 0;
1734 }
1735 
1736 static void
feth_if_adv_tcall_destroy(if_fake_ref fakeif)1737 feth_if_adv_tcall_destroy(if_fake_ref fakeif)
1738 {
1739 	thread_call_t tcall;
1740 
1741 	feth_lock();
1742 	ASSERT(fakeif->iff_if_adv_tcall != NULL);
1743 	tcall = fakeif->iff_if_adv_tcall;
1744 	feth_unlock();
1745 	(void) thread_call_cancel_wait(tcall);
1746 	if (!thread_call_free(tcall)) {
1747 		boolean_t freed;
1748 		(void) thread_call_cancel_wait(tcall);
1749 		freed = thread_call_free(tcall);
1750 		VERIFY(freed);
1751 	}
1752 	feth_lock();
1753 	fakeif->iff_if_adv_tcall = NULL;
1754 	feth_unlock();
1755 	/* release for the interface advisory thread call */
1756 	feth_release(fakeif);
1757 }
1758 
1759 
1760 /**
1761 ** nexus netif domain provider
1762 **/
1763 static errno_t
feth_nxdp_init(kern_nexus_domain_provider_t domprov)1764 feth_nxdp_init(kern_nexus_domain_provider_t domprov)
1765 {
1766 #pragma unused(domprov)
1767 	return 0;
1768 }
1769 
1770 static void
feth_nxdp_fini(kern_nexus_domain_provider_t domprov)1771 feth_nxdp_fini(kern_nexus_domain_provider_t domprov)
1772 {
1773 #pragma unused(domprov)
1774 }
1775 
1776 static uuid_t                   feth_nx_dom_prov;
1777 
1778 static errno_t
feth_register_nexus_domain_provider(void)1779 feth_register_nexus_domain_provider(void)
1780 {
1781 	const struct kern_nexus_domain_provider_init dp_init = {
1782 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1783 		.nxdpi_flags = 0,
1784 		.nxdpi_init = feth_nxdp_init,
1785 		.nxdpi_fini = feth_nxdp_fini
1786 	};
1787 	errno_t                         err = 0;
1788 
1789 	/* feth_nxdp_init() is called before this function returns */
1790 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
1791 	    (const uint8_t *)
1792 	    "com.apple.feth",
1793 	    &dp_init, sizeof(dp_init),
1794 	    &feth_nx_dom_prov);
1795 	if (err != 0) {
1796 		printf("%s: failed to register domain provider\n", __func__);
1797 		return err;
1798 	}
1799 	return 0;
1800 }
1801 
1802 /**
1803 ** netif nexus routines
1804 **/
1805 static if_fake_ref
feth_nexus_context(kern_nexus_t nexus)1806 feth_nexus_context(kern_nexus_t nexus)
1807 {
1808 	if_fake_ref fakeif;
1809 
1810 	fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
1811 	assert(fakeif != NULL);
1812 	return fakeif;
1813 }
1814 
1815 static uint8_t
feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1816 feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1817 {
1818 	switch (svc_class) {
1819 	case KPKT_SC_VO:
1820 		return 0;
1821 	case KPKT_SC_VI:
1822 		return 1;
1823 	case KPKT_SC_BE:
1824 		return 2;
1825 	case KPKT_SC_BK:
1826 		return 3;
1827 	default:
1828 		VERIFY(0);
1829 		return 0;
1830 	}
1831 }
1832 
1833 static errno_t
feth_nx_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1834 feth_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1835     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1836     void **ring_ctx)
1837 {
1838 	if_fake_ref     fakeif;
1839 	int             err;
1840 #pragma unused(nxprov, channel, ring_ctx)
1841 	feth_lock();
1842 	fakeif = feth_nexus_context(nexus);
1843 	if (feth_is_detaching(fakeif)) {
1844 		feth_unlock();
1845 		return 0;
1846 	}
1847 	if (is_tx_ring) {
1848 		if (feth_in_wmm_mode(fakeif)) {
1849 			kern_packet_svc_class_t svc_class;
1850 			uint8_t ring_idx;
1851 
1852 			err = kern_channel_get_service_class(ring, &svc_class);
1853 			VERIFY(err == 0);
1854 			ring_idx = feth_find_tx_ring_by_svc(svc_class);
1855 			VERIFY(ring_idx < IFF_NUM_TX_RINGS_WMM_MODE);
1856 			VERIFY(fakeif->iff_tx_ring[ring_idx] == NULL);
1857 			fakeif->iff_tx_ring[ring_idx] = ring;
1858 		} else {
1859 			VERIFY(fakeif->iff_tx_ring[0] == NULL);
1860 			fakeif->iff_tx_ring[0] = ring;
1861 		}
1862 	} else {
1863 		VERIFY(fakeif->iff_rx_ring[0] == NULL);
1864 		fakeif->iff_rx_ring[0] = ring;
1865 	}
1866 	fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1867 	feth_unlock();
1868 	FETH_DPRINTF("%s: %s ring init\n",
1869 	    fakeif->iff_name, is_tx_ring ? "TX" : "RX");
1870 	return 0;
1871 }
1872 
1873 static void
feth_nx_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)1874 feth_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1875     kern_channel_ring_t ring)
1876 {
1877 #pragma unused(nxprov, ring)
1878 	if_fake_ref     fakeif;
1879 	thread_call_t   tcall = NULL;
1880 
1881 	feth_lock();
1882 	fakeif = feth_nexus_context(nexus);
1883 	if (fakeif->iff_rx_ring[0] == ring) {
1884 		fakeif->iff_rx_ring[0] = NULL;
1885 		FETH_DPRINTF("%s: RX ring fini\n", fakeif->iff_name);
1886 	} else if (feth_in_wmm_mode(fakeif)) {
1887 		int i;
1888 		for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
1889 			if (fakeif->iff_tx_ring[i] == ring) {
1890 				fakeif->iff_tx_ring[i] = NULL;
1891 				break;
1892 			}
1893 		}
1894 		for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
1895 			if (fakeif->iff_tx_ring[i] != NULL) {
1896 				break;
1897 			}
1898 		}
1899 		if (i == IFF_MAX_TX_RINGS) {
1900 			tcall = fakeif->iff_doorbell_tcall;
1901 			fakeif->iff_doorbell_tcall = NULL;
1902 		}
1903 		FETH_DPRINTF("%s: TX ring fini\n", fakeif->iff_name);
1904 	} else if (fakeif->iff_tx_ring[0] == ring) {
1905 		tcall = fakeif->iff_doorbell_tcall;
1906 		fakeif->iff_doorbell_tcall = NULL;
1907 		fakeif->iff_tx_ring[0] = NULL;
1908 	}
1909 	fakeif->iff_nifs = NULL;
1910 	feth_unlock();
1911 	if (tcall != NULL) {
1912 		boolean_t       success;
1913 
1914 		success = thread_call_cancel_wait(tcall);
1915 		FETH_DPRINTF("%s: thread_call_cancel %s\n",
1916 		    fakeif->iff_name,
1917 		    success ? "SUCCESS" : "FAILURE");
1918 		if (!success) {
1919 			feth_lock();
1920 			if (fakeif->iff_doorbell_tcall_active) {
1921 				fakeif->iff_waiting_for_tcall = TRUE;
1922 				FETH_DPRINTF("%s: *waiting for threadcall\n",
1923 				    fakeif->iff_name);
1924 				do {
1925 					msleep(fakeif, &feth_lck_mtx,
1926 					    PZERO, "feth threadcall", 0);
1927 				} while (fakeif->iff_doorbell_tcall_active);
1928 				FETH_DPRINTF("%s: ^threadcall done\n",
1929 				    fakeif->iff_name);
1930 				fakeif->iff_waiting_for_tcall = FALSE;
1931 			}
1932 			feth_unlock();
1933 		}
1934 		success = thread_call_free(tcall);
1935 		FETH_DPRINTF("%s: thread_call_free %s\n",
1936 		    fakeif->iff_name,
1937 		    success ? "SUCCESS" : "FAILURE");
1938 		feth_release(fakeif);
1939 		VERIFY(success == TRUE);
1940 	}
1941 }
1942 
1943 static errno_t
feth_nx_pre_connect(kern_nexus_provider_t nxprov,proc_t proc,kern_nexus_t nexus,nexus_port_t port,kern_channel_t channel,void ** channel_context)1944 feth_nx_pre_connect(kern_nexus_provider_t nxprov,
1945     proc_t proc, kern_nexus_t nexus, nexus_port_t port, kern_channel_t channel,
1946     void **channel_context)
1947 {
1948 #pragma unused(nxprov, proc, nexus, port, channel, channel_context)
1949 	return 0;
1950 }
1951 
1952 static errno_t
feth_nx_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1953 feth_nx_connected(kern_nexus_provider_t nxprov,
1954     kern_nexus_t nexus, kern_channel_t channel)
1955 {
1956 #pragma unused(nxprov, channel)
1957 	int err;
1958 	if_fake_ref fakeif;
1959 
1960 	fakeif = feth_nexus_context(nexus);
1961 	feth_lock();
1962 	if (feth_is_detaching(fakeif)) {
1963 		feth_unlock();
1964 		return EBUSY;
1965 	}
1966 	feth_retain(fakeif);
1967 	fakeif->iff_channel_connected = TRUE;
1968 	feth_unlock();
1969 	if (feth_has_intf_advisory_configured(fakeif)) {
1970 		err = feth_if_adv_tcall_create(fakeif);
1971 		if (err != 0) {
1972 			return err;
1973 		}
1974 	}
1975 	FETH_DPRINTF("%s: connected channel %p\n",
1976 	    fakeif->iff_name, channel);
1977 	return 0;
1978 }
1979 
1980 static void
feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1981 feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,
1982     kern_nexus_t nexus, kern_channel_t channel)
1983 {
1984 #pragma unused(nxprov, channel)
1985 	if_fake_ref fakeif;
1986 
1987 	fakeif = feth_nexus_context(nexus);
1988 	FETH_DPRINTF("%s: pre-disconnect channel %p\n",
1989 	    fakeif->iff_name, channel);
1990 	/* Quiesce the interface and flush any pending outbound packets. */
1991 	if_down(fakeif->iff_ifp);
1992 	feth_lock();
1993 	fakeif->iff_channel_connected = FALSE;
1994 	feth_unlock();
1995 	if (fakeif->iff_if_adv_tcall != NULL) {
1996 		feth_if_adv_tcall_destroy(fakeif);
1997 	}
1998 }
1999 
2000 static void
feth_nx_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2001 feth_nx_disconnected(kern_nexus_provider_t nxprov,
2002     kern_nexus_t nexus, kern_channel_t channel)
2003 {
2004 #pragma unused(nxprov, channel)
2005 	if_fake_ref fakeif;
2006 
2007 	fakeif = feth_nexus_context(nexus);
2008 	FETH_DPRINTF("%s: disconnected channel %p\n",
2009 	    fakeif->iff_name, channel);
2010 	feth_release(fakeif);
2011 }
2012 
2013 static errno_t
feth_nx_slot_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index,struct kern_slot_prop ** slot_prop_addr,void ** slot_context)2014 feth_nx_slot_init(kern_nexus_provider_t nxprov,
2015     kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2016     uint32_t slot_index, struct kern_slot_prop **slot_prop_addr,
2017     void **slot_context)
2018 {
2019 #pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
2020 	return 0;
2021 }
2022 
2023 static void
feth_nx_slot_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index)2024 feth_nx_slot_fini(kern_nexus_provider_t nxprov,
2025     kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2026     uint32_t slot_index)
2027 {
2028 #pragma unused(nxprov, nexus, ring, slot, slot_index)
2029 }
2030 
2031 static errno_t
feth_nx_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)2032 feth_nx_sync_tx(kern_nexus_provider_t nxprov,
2033     kern_nexus_t nexus, kern_channel_ring_t tx_ring, uint32_t flags)
2034 {
2035 #pragma unused(nxprov)
2036 	if_fake_ref             fakeif;
2037 	ifnet_t                 ifp;
2038 	kern_channel_slot_t     last_tx_slot = NULL;
2039 	ifnet_t                 peer_ifp;
2040 	if_fake_ref             peer_fakeif = NULL;
2041 	struct kern_channel_ring_stat_increment stats;
2042 	kern_channel_slot_t     tx_slot;
2043 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2044 	kern_packet_t           pkts[IFF_MAX_BATCH_SIZE];
2045 	uint32_t                n_pkts = 0;
2046 
2047 	memset(&stats, 0, sizeof(stats));
2048 
2049 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2050 	fakeif = feth_nexus_context(nexus);
2051 	FETH_DPRINTF("%s ring %d flags 0x%x\n", fakeif->iff_name,
2052 	    tx_ring->ckr_ring_id, flags);
2053 	(void)flags;
2054 	feth_lock();
2055 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2056 		feth_unlock();
2057 		return 0;
2058 	}
2059 	ifp = fakeif->iff_ifp;
2060 	peer_ifp = fakeif->iff_peer;
2061 	if (peer_ifp != NULL) {
2062 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
2063 		if (peer_fakeif != NULL) {
2064 			if (feth_is_detaching(peer_fakeif)) {
2065 				FETH_DPRINTF("%s peer fakeif %s is detaching\n",
2066 				    fakeif->iff_name, peer_fakeif->iff_name);
2067 				goto done;
2068 			}
2069 			if (!peer_fakeif->iff_channel_connected) {
2070 				if (fakeif->iff_tx_exp_policy ==
2071 				    IFF_TX_EXP_POLICY_DISABLED) {
2072 					FETH_DPRINTF("%s peer fakeif %s channel not connected, expn: %d\n",
2073 					    fakeif->iff_name, peer_fakeif->iff_name,
2074 					    fakeif->iff_tx_exp_policy);
2075 					goto done;
2076 				}
2077 			}
2078 		} else {
2079 			FETH_DPRINTF("%s no peer fakeif (peer %p)\n", fakeif->iff_name, peer_ifp);
2080 			goto done;
2081 		}
2082 	} else {
2083 		FETH_DPRINTF("%s no peer\n", fakeif->iff_name);
2084 		goto done;
2085 	}
2086 	tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
2087 	while (tx_slot != NULL) {
2088 		uint16_t off;
2089 		kern_packet_t sph;
2090 
2091 		/* detach the packet from the TX ring */
2092 		sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
2093 		VERIFY(sph != 0);
2094 		kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
2095 
2096 		/* bpf tap output */
2097 		off = kern_packet_get_headroom(sph);
2098 		VERIFY(off >= fakeif->iff_tx_headroom);
2099 		kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2100 		feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2101 		bpf_tap_packet_out(ifp, DLT_EN10MB, sph, NULL, 0);
2102 
2103 		/* drop packets, if requested */
2104 		fakeif->iff_tx_pkts_count++;
2105 		if (feth_tx_expired_error(fakeif, sph) ||
2106 		    feth_tx_complete_error(fakeif, sph) ||
2107 		    !peer_fakeif->iff_channel_connected) {
2108 			fakeif->iff_tx_pkts_count = 0;
2109 			kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2110 			STATS_INC(nifs, NETIF_STATS_DROP);
2111 			goto next_tx_slot;
2112 		}
2113 
2114 		ASSERT(sph != 0);
2115 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2116 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2117 
2118 		stats.kcrsi_slots_transferred++;
2119 		stats.kcrsi_bytes_transferred
2120 		        += kern_packet_get_data_length(sph);
2121 
2122 		/* prepare batch for receiver */
2123 		pkts[n_pkts++] = sph;
2124 		if (n_pkts == IFF_MAX_BATCH_SIZE) {
2125 			feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2126 			feth_tx_complete(fakeif, pkts, n_pkts);
2127 			n_pkts = 0;
2128 		}
2129 
2130 next_tx_slot:
2131 		last_tx_slot = tx_slot;
2132 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
2133 	}
2134 
2135 	/* catch last batch for receiver */
2136 	if (n_pkts != 0) {
2137 		feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2138 		feth_tx_complete(fakeif, pkts, n_pkts);
2139 		n_pkts = 0;
2140 	}
2141 
2142 	if (last_tx_slot != NULL) {
2143 		kern_channel_advance_slot(tx_ring, last_tx_slot);
2144 		kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
2145 	}
2146 done:
2147 	feth_unlock();
2148 	return 0;
2149 }
2150 
2151 static errno_t
feth_nx_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2152 feth_nx_sync_rx(kern_nexus_provider_t nxprov,
2153     kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2154 {
2155 #pragma unused(nxprov, ring, flags)
2156 	if_fake_ref             fakeif;
2157 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2158 
2159 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
2160 	fakeif = feth_nexus_context(nexus);
2161 	FETH_DPRINTF("%s:\n", fakeif->iff_name);
2162 	return 0;
2163 }
2164 
2165 static errno_t
feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif,boolean_t doorbell_ctxt)2166 feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif, boolean_t doorbell_ctxt)
2167 {
2168 	int i;
2169 	errno_t error = 0;
2170 	boolean_t more;
2171 
2172 	for (i = 0; i < IFF_NUM_TX_RINGS_WMM_MODE; i++) {
2173 		kern_channel_ring_t ring = fakeif->iff_tx_ring[i];
2174 		if (ring != NULL) {
2175 			error = kern_channel_tx_refill(ring, UINT32_MAX,
2176 			    UINT32_MAX, doorbell_ctxt, &more);
2177 		}
2178 		if (error != 0) {
2179 			FETH_DPRINTF("%s: TX refill ring %d (%s) %d\n",
2180 			    fakeif->iff_name, ring->ckr_ring_id,
2181 			    doorbell_ctxt ? "sync" : "async", error);
2182 			if (!((error == EAGAIN) || (error == EBUSY))) {
2183 				break;
2184 			}
2185 		} else {
2186 			FETH_DPRINTF("%s: TX refilled ring %d (%s)\n",
2187 			    fakeif->iff_name, ring->ckr_ring_id,
2188 			    doorbell_ctxt ? "sync" : "async");
2189 		}
2190 	}
2191 	return error;
2192 }
2193 
2194 static void
feth_async_doorbell(thread_call_param_t arg0,thread_call_param_t arg1)2195 feth_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
2196 {
2197 #pragma unused(arg1)
2198 	errno_t                 error;
2199 	if_fake_ref             fakeif = (if_fake_ref)arg0;
2200 	kern_channel_ring_t     ring;
2201 	boolean_t               more;
2202 
2203 	feth_lock();
2204 	ring = fakeif->iff_tx_ring[0];
2205 	if (feth_is_detaching(fakeif) ||
2206 	    !fakeif->iff_channel_connected ||
2207 	    ring == NULL) {
2208 		goto done;
2209 	}
2210 	fakeif->iff_doorbell_tcall_active = TRUE;
2211 	feth_unlock();
2212 	if (feth_in_wmm_mode(fakeif)) {
2213 		error = feth_nx_tx_dequeue_driver_managed(fakeif, FALSE);
2214 	} else {
2215 		error = kern_channel_tx_refill(ring, UINT32_MAX,
2216 		    UINT32_MAX, FALSE, &more);
2217 	}
2218 	if (error != 0) {
2219 		FETH_DPRINTF("%s: TX refill failed %d\n",
2220 		    fakeif->iff_name, error);
2221 	} else {
2222 		FETH_DPRINTF("%s: TX refilled\n", fakeif->iff_name);
2223 	}
2224 
2225 	feth_lock();
2226 done:
2227 	fakeif->iff_doorbell_tcall_active = FALSE;
2228 	if (fakeif->iff_waiting_for_tcall) {
2229 		FETH_DPRINTF("%s: threadcall waking up waiter\n",
2230 		    fakeif->iff_name);
2231 		wakeup((caddr_t)fakeif);
2232 	}
2233 	feth_unlock();
2234 }
2235 
2236 static void
feth_schedule_async_doorbell(if_fake_ref fakeif)2237 feth_schedule_async_doorbell(if_fake_ref fakeif)
2238 {
2239 	thread_call_t   tcall;
2240 
2241 	feth_lock();
2242 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2243 		feth_unlock();
2244 		return;
2245 	}
2246 	tcall = fakeif->iff_doorbell_tcall;
2247 	if (tcall != NULL) {
2248 		thread_call_enter(tcall);
2249 	} else {
2250 		tcall = thread_call_allocate_with_options(feth_async_doorbell,
2251 		    (thread_call_param_t)fakeif,
2252 		    THREAD_CALL_PRIORITY_KERNEL,
2253 		    THREAD_CALL_OPTIONS_ONCE);
2254 		if (tcall == NULL) {
2255 			printf("%s: %s tcall alloc failed\n",
2256 			    __func__, fakeif->iff_name);
2257 		} else {
2258 			fakeif->iff_doorbell_tcall = tcall;
2259 			feth_retain(fakeif);
2260 			thread_call_enter(tcall);
2261 		}
2262 	}
2263 	feth_unlock();
2264 }
2265 
2266 static errno_t
feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2267 feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,
2268     kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2269 {
2270 #pragma unused(nxprov, ring, flags)
2271 	errno_t         error;
2272 	if_fake_ref     fakeif;
2273 
2274 	fakeif = feth_nexus_context(nexus);
2275 	FETH_DPRINTF("%s\n", fakeif->iff_name);
2276 
2277 	if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
2278 		boolean_t       more;
2279 		/* synchronous tx refill */
2280 		if (feth_in_wmm_mode(fakeif)) {
2281 			error = feth_nx_tx_dequeue_driver_managed(fakeif, TRUE);
2282 		} else {
2283 			error = kern_channel_tx_refill(ring, UINT32_MAX,
2284 			    UINT32_MAX, TRUE, &more);
2285 		}
2286 		if (error != 0) {
2287 			FETH_DPRINTF("%s: TX refill (sync) %d\n",
2288 			    fakeif->iff_name, error);
2289 		} else {
2290 			FETH_DPRINTF("%s: TX refilled (sync)\n",
2291 			    fakeif->iff_name);
2292 		}
2293 	} else {
2294 		FETH_DPRINTF("%s: schedule async refill\n", fakeif->iff_name);
2295 		feth_schedule_async_doorbell(fakeif);
2296 	}
2297 	return 0;
2298 }
2299 
2300 static errno_t
feth_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)2301 feth_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
2302 {
2303 	if_fake_ref fakeif;
2304 
2305 	fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
2306 	feth_ifnet_set_attrs(fakeif, ifp);
2307 	return 0;
2308 }
2309 
2310 static errno_t
feth_nx_intf_adv_config(void * prov_ctx,bool enable)2311 feth_nx_intf_adv_config(void *prov_ctx, bool enable)
2312 {
2313 	if_fake_ref fakeif = prov_ctx;
2314 
2315 	feth_lock();
2316 	fakeif->iff_intf_adv_enabled = enable;
2317 	feth_unlock();
2318 	FETH_DPRINTF("%s enable %d\n", fakeif->iff_name, enable);
2319 	return 0;
2320 }
2321 
2322 static errno_t
fill_capab_interface_advisory(if_fake_ref fakeif,void * contents,uint32_t * len)2323 fill_capab_interface_advisory(if_fake_ref fakeif, void *contents, uint32_t *len)
2324 {
2325 	struct kern_nexus_capab_interface_advisory *capab = contents;
2326 
2327 	if (*len != sizeof(*capab)) {
2328 		return EINVAL;
2329 	}
2330 	if (capab->kncia_version !=
2331 	    KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
2332 		return EINVAL;
2333 	}
2334 	if (!feth_has_intf_advisory_configured(fakeif)) {
2335 		return ENOTSUP;
2336 	}
2337 	VERIFY(capab->kncia_notify != NULL);
2338 	fakeif->iff_intf_adv_kern_ctx = capab->kncia_kern_context;
2339 	fakeif->iff_intf_adv_notify = capab->kncia_notify;
2340 	capab->kncia_provider_context = fakeif;
2341 	capab->kncia_config = feth_nx_intf_adv_config;
2342 	return 0;
2343 }
2344 
2345 static errno_t
feth_notify_steering_info(void * prov_ctx,void * qset_ctx,struct ifnet_traffic_descriptor_common * td,bool add)2346 feth_notify_steering_info(void *prov_ctx, void *qset_ctx,
2347     struct ifnet_traffic_descriptor_common *td, bool add)
2348 {
2349 #pragma unused(td)
2350 	if_fake_ref fakeif = prov_ctx;
2351 	fake_qset *qset = qset_ctx;
2352 
2353 	FETH_DPRINTF("%s: notify_steering_info: qset_id 0x%llx, %s\n",
2354 	    fakeif->iff_name, qset->fqs_id, add ? "add" : "remove");
2355 	return 0;
2356 }
2357 
2358 static errno_t
fill_capab_qset_extensions(if_fake_ref fakeif,void * contents,uint32_t * len)2359 fill_capab_qset_extensions(if_fake_ref fakeif, void *contents, uint32_t *len)
2360 {
2361 	struct kern_nexus_capab_qset_extensions *capab = contents;
2362 
2363 	if (*len != sizeof(*capab)) {
2364 		return EINVAL;
2365 	}
2366 	if (capab->cqe_version !=
2367 	    KERN_NEXUS_CAPAB_QSET_EXTENSIONS_VERSION_1) {
2368 		return EINVAL;
2369 	}
2370 	capab->cqe_prov_ctx = fakeif;
2371 	capab->cqe_notify_steering_info = feth_notify_steering_info;
2372 	return 0;
2373 }
2374 
2375 static errno_t
feth_nx_capab_config(kern_nexus_provider_t nxprov,kern_nexus_t nx,kern_nexus_capab_t capab,void * contents,uint32_t * len)2376 feth_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
2377     kern_nexus_capab_t capab, void *contents, uint32_t *len)
2378 {
2379 #pragma unused(nxprov)
2380 	errno_t error;
2381 	if_fake_ref fakeif;
2382 
2383 	fakeif = feth_nexus_context(nx);
2384 	FETH_DPRINTF("%s\n", fakeif->iff_name);
2385 
2386 	switch (capab) {
2387 	case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
2388 		error = fill_capab_interface_advisory(fakeif, contents, len);
2389 		break;
2390 	case KERN_NEXUS_CAPAB_QSET_EXTENSIONS:
2391 		error = fill_capab_qset_extensions(fakeif, contents, len);
2392 		break;
2393 	default:
2394 		error = ENOTSUP;
2395 		break;
2396 	}
2397 	return error;
2398 }
2399 
2400 static int
feth_set_tso(ifnet_t ifp)2401 feth_set_tso(ifnet_t ifp)
2402 {
2403 	ifnet_offload_t offload;
2404 	uint32_t tso_v4_mtu, tso_v6_mtu;
2405 	int error;
2406 
2407 	offload = IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2408 	tso_v4_mtu = if_fake_tso_buffer_size;
2409 	tso_v6_mtu = if_fake_tso_buffer_size;
2410 	error = ifnet_set_offload(ifp, offload);
2411 	if (error != 0) {
2412 		printf("%s: set TSO offload failed on %s, err %d\n", __func__,
2413 		    if_name(ifp), error);
2414 		return error;
2415 	}
2416 	error = ifnet_set_tso_mtu(ifp, AF_INET, tso_v4_mtu);
2417 	if (error != 0) {
2418 		printf("%s: set TSO MTU IPv4 failed on %s, err %d\n", __func__,
2419 		    if_name(ifp), error);
2420 		return error;
2421 	}
2422 	error = ifnet_set_tso_mtu(ifp, AF_INET6, tso_v6_mtu);
2423 	if (error != 0) {
2424 		printf("%s: set TSO MTU IPv6 failed on %s, err %d\n", __func__,
2425 		    if_name(ifp), error);
2426 		return error;
2427 	}
2428 	return 0;
2429 }
2430 
2431 static errno_t
create_netif_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2432 create_netif_provider_and_instance(if_fake_ref fakeif,
2433     struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2434     uuid_t * provider, uuid_t * instance)
2435 {
2436 	errno_t                 err;
2437 	nexus_controller_t      controller = kern_nexus_shared_controller();
2438 	struct kern_nexus_net_init net_init;
2439 	nexus_name_t            provider_name;
2440 	nexus_attr_t            nexus_attr = NULL;
2441 	struct kern_nexus_provider_init prov_init = {
2442 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2443 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2444 		.nxpi_pre_connect = feth_nx_pre_connect,
2445 		.nxpi_connected = feth_nx_connected,
2446 		.nxpi_pre_disconnect = feth_nx_pre_disconnect,
2447 		.nxpi_disconnected = feth_nx_disconnected,
2448 		.nxpi_ring_init = feth_nx_ring_init,
2449 		.nxpi_ring_fini = feth_nx_ring_fini,
2450 		.nxpi_slot_init = feth_nx_slot_init,
2451 		.nxpi_slot_fini = feth_nx_slot_fini,
2452 		.nxpi_sync_tx = feth_nx_sync_tx,
2453 		.nxpi_sync_rx = feth_nx_sync_rx,
2454 		.nxpi_tx_doorbell = feth_nx_tx_doorbell,
2455 		.nxpi_config_capab = feth_nx_capab_config,
2456 	};
2457 
2458 	_CASSERT(IFF_MAX_RX_RINGS == 1);
2459 	err = kern_nexus_attr_create(&nexus_attr);
2460 	if (err != 0) {
2461 		printf("%s nexus attribute creation failed, error %d\n",
2462 		    __func__, err);
2463 		goto failed;
2464 	}
2465 	if (feth_in_wmm_mode(fakeif)) {
2466 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_TX_RINGS,
2467 		    IFF_NUM_TX_RINGS_WMM_MODE);
2468 		VERIFY(err == 0);
2469 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_RX_RINGS,
2470 		    IFF_NUM_RX_RINGS_WMM_MODE);
2471 		VERIFY(err == 0);
2472 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_QMAP,
2473 		    NEXUS_QMAP_TYPE_WMM);
2474 		VERIFY(err == 0);
2475 	}
2476 
2477 	err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2478 	VERIFY(err == 0);
2479 	snprintf((char *)provider_name, sizeof(provider_name),
2480 	    "com.apple.netif.%s", fakeif->iff_name);
2481 	err = kern_nexus_controller_register_provider(controller,
2482 	    feth_nx_dom_prov,
2483 	    provider_name,
2484 	    &prov_init,
2485 	    sizeof(prov_init),
2486 	    nexus_attr,
2487 	    provider);
2488 	if (err != 0) {
2489 		printf("%s register provider failed, error %d\n",
2490 		    __func__, err);
2491 		goto failed;
2492 	}
2493 	bzero(&net_init, sizeof(net_init));
2494 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2495 	net_init.nxneti_flags = 0;
2496 	net_init.nxneti_eparams = init_params;
2497 	net_init.nxneti_lladdr = NULL;
2498 	net_init.nxneti_prepare = feth_netif_prepare;
2499 	net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2500 	net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2501 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
2502 	    *provider,
2503 	    fakeif,
2504 	    NULL,
2505 	    instance,
2506 	    &net_init,
2507 	    ifp);
2508 	if (err != 0) {
2509 		printf("%s alloc_net_provider_instance failed, %d\n",
2510 		    __func__, err);
2511 		kern_nexus_controller_deregister_provider(controller,
2512 		    *provider);
2513 		uuid_clear(*provider);
2514 		goto failed;
2515 	}
2516 	if (feth_supports_tso(fakeif)) {
2517 		if ((err = feth_set_tso(*ifp)) != 0) {
2518 			goto failed;
2519 		}
2520 	}
2521 
2522 failed:
2523 	if (nexus_attr != NULL) {
2524 		kern_nexus_attr_destroy(nexus_attr);
2525 	}
2526 	return err;
2527 }
2528 
2529 /*
2530  * The nif_stats need to be referenced because we don't want it set
2531  * to NULL until the last llink is removed.
2532  */
2533 static void
get_nexus_stats(if_fake_ref fakeif,kern_nexus_t nexus)2534 get_nexus_stats(if_fake_ref fakeif, kern_nexus_t nexus)
2535 {
2536 	if (++fakeif->iff_nifs_ref == 1) {
2537 		ASSERT(fakeif->iff_nifs == NULL);
2538 		fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2539 	}
2540 }
2541 
2542 static void
clear_nexus_stats(if_fake_ref fakeif)2543 clear_nexus_stats(if_fake_ref fakeif)
2544 {
2545 	if (--fakeif->iff_nifs_ref == 0) {
2546 		ASSERT(fakeif->iff_nifs != NULL);
2547 		fakeif->iff_nifs = NULL;
2548 	}
2549 }
2550 
2551 static errno_t
feth_nx_qset_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * llink_ctx,uint8_t qset_idx,uint64_t qset_id,kern_netif_qset_t qset,void ** qset_ctx)2552 feth_nx_qset_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2553     void *llink_ctx, uint8_t qset_idx, uint64_t qset_id, kern_netif_qset_t qset,
2554     void **qset_ctx)
2555 {
2556 #pragma unused(nxprov)
2557 	if_fake_ref fakeif;
2558 	fake_llink *fl = llink_ctx;
2559 	fake_qset *fqs;
2560 
2561 	feth_lock();
2562 	fakeif = feth_nexus_context(nexus);
2563 	if (feth_is_detaching(fakeif)) {
2564 		feth_unlock();
2565 		printf("%s: %s: detaching\n", __func__, fakeif->iff_name);
2566 		return ENXIO;
2567 	}
2568 	if (qset_idx >= fl->fl_qset_cnt) {
2569 		feth_unlock();
2570 		printf("%s: %s: invalid qset_idx %d\n", __func__,
2571 		    fakeif->iff_name, qset_idx);
2572 		return EINVAL;
2573 	}
2574 	fqs = &fl->fl_qset[qset_idx];
2575 	ASSERT(fqs->fqs_qset == NULL);
2576 	fqs->fqs_qset = qset;
2577 	fqs->fqs_id = qset_id;
2578 	*qset_ctx = fqs;
2579 
2580 	/* XXX This should really be done during registration */
2581 	get_nexus_stats(fakeif, nexus);
2582 	feth_unlock();
2583 	return 0;
2584 }
2585 
2586 static void
feth_nx_qset_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx)2587 feth_nx_qset_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2588     void *qset_ctx)
2589 {
2590 #pragma unused(nxprov)
2591 	if_fake_ref fakeif;
2592 	fake_qset *fqs = qset_ctx;
2593 
2594 	feth_lock();
2595 	fakeif = feth_nexus_context(nexus);
2596 	clear_nexus_stats(fakeif);
2597 	ASSERT(fqs->fqs_qset != NULL);
2598 	fqs->fqs_qset = NULL;
2599 	fqs->fqs_id = 0;
2600 	feth_unlock();
2601 }
2602 
2603 static errno_t
feth_nx_queue_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint8_t qidx,bool tx,kern_netif_queue_t queue,void ** queue_ctx)2604 feth_nx_queue_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2605     void *qset_ctx, uint8_t qidx, bool tx, kern_netif_queue_t queue,
2606     void **queue_ctx)
2607 {
2608 #pragma unused(nxprov)
2609 	if_fake_ref fakeif;
2610 	fake_qset *fqs = qset_ctx;
2611 	fake_queue *fq;
2612 
2613 	feth_lock();
2614 	fakeif = feth_nexus_context(nexus);
2615 	if (feth_is_detaching(fakeif)) {
2616 		printf("%s: %s: detaching\n", __func__, fakeif->iff_name);
2617 		feth_unlock();
2618 		return ENXIO;
2619 	}
2620 	if (tx) {
2621 		if (qidx >= fqs->fqs_tx_queue_cnt) {
2622 			printf("%s: %s: invalid tx qidx %d\n", __func__,
2623 			    fakeif->iff_name, qidx);
2624 			feth_unlock();
2625 			return EINVAL;
2626 		}
2627 		fq = &fqs->fqs_tx_queue[qidx];
2628 	} else {
2629 		if (qidx >= fqs->fqs_rx_queue_cnt) {
2630 			printf("%s: %s: invalid rx qidx %d\n", __func__,
2631 			    fakeif->iff_name, qidx);
2632 			feth_unlock();
2633 			return EINVAL;
2634 		}
2635 		fq = &fqs->fqs_rx_queue[qidx];
2636 	}
2637 	ASSERT(fq->fq_queue == NULL);
2638 	fq->fq_queue = queue;
2639 	*queue_ctx = fq;
2640 	feth_unlock();
2641 	return 0;
2642 }
2643 
2644 static void
feth_nx_queue_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx)2645 feth_nx_queue_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2646     void *queue_ctx)
2647 {
2648 #pragma unused(nxprov, nexus)
2649 	fake_queue *fq = queue_ctx;
2650 
2651 	feth_lock();
2652 	ASSERT(fq->fq_queue != NULL);
2653 	fq->fq_queue = NULL;
2654 	feth_unlock();
2655 }
2656 
2657 static void
feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif,kern_packet_t sph,struct netif_stats * nifs,if_fake_ref peer_fakeif,uint32_t llink_idx,uint32_t qset_idx)2658 feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif, kern_packet_t sph,
2659     struct netif_stats *nifs, if_fake_ref peer_fakeif,
2660     uint32_t llink_idx, uint32_t qset_idx)
2661 {
2662 	kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2663 	uint32_t n_pkts = 0;
2664 
2665 	FETH_DPRINTF("%s -> %s\n", fakeif->iff_name, peer_fakeif->iff_name);
2666 
2667 	while (sph != 0) {
2668 		uint16_t off;
2669 		kern_packet_t next;
2670 
2671 		next = kern_packet_get_next(sph);
2672 		kern_packet_set_next(sph, 0);
2673 
2674 		/* bpf tap output */
2675 		off = kern_packet_get_headroom(sph);
2676 		VERIFY(off >= fakeif->iff_tx_headroom);
2677 		kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2678 		feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2679 		bpf_tap_packet_out(fakeif->iff_ifp, DLT_EN10MB, sph, NULL, 0);
2680 
2681 		/* drop packets, if requested */
2682 		fakeif->iff_tx_pkts_count++;
2683 		if (feth_tx_expired_error(fakeif, sph) ||
2684 		    feth_tx_complete_error(fakeif, sph)) {
2685 			fakeif->iff_tx_pkts_count = 0;
2686 			kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2687 			STATS_INC(nifs, NETIF_STATS_DROP);
2688 			goto next_pkt;
2689 		}
2690 		ASSERT(sph != 0);
2691 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2692 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2693 
2694 		/* prepare batch for receiver */
2695 		pkts[n_pkts++] = sph;
2696 		if (n_pkts == IFF_MAX_BATCH_SIZE) {
2697 			feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx,
2698 			    qset_idx, pkts, n_pkts);
2699 			feth_tx_complete(fakeif, pkts, n_pkts);
2700 			n_pkts = 0;
2701 		}
2702 next_pkt:
2703 		sph = next;
2704 	}
2705 	/* catch last batch for receiver */
2706 	if (n_pkts != 0) {
2707 		feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx, qset_idx,
2708 		    pkts, n_pkts);
2709 		feth_tx_complete(fakeif, pkts, n_pkts);
2710 		n_pkts = 0;
2711 	}
2712 }
2713 
2714 static errno_t
feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint32_t flags)2715 feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2716     void *qset_ctx, uint32_t flags)
2717 {
2718 #pragma unused(nxprov)
2719 	if_fake_ref             fakeif;
2720 	ifnet_t                 ifp;
2721 	ifnet_t                 peer_ifp;
2722 	if_fake_ref             peer_fakeif = NULL;
2723 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2724 	fake_qset               *qset = qset_ctx;
2725 	boolean_t               detaching, connected;
2726 	uint32_t                i;
2727 	errno_t                 err;
2728 
2729 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2730 	fakeif = feth_nexus_context(nexus);
2731 	FETH_DPRINTF("%s qset %p, idx %d, flags 0x%x\n", fakeif->iff_name, qset,
2732 	    qset->fqs_idx, flags);
2733 
2734 	feth_lock();
2735 	detaching = feth_is_detaching(fakeif);
2736 	connected = fakeif->iff_channel_connected;
2737 	if (detaching || !connected) {
2738 		FETH_DPRINTF("%s: %s: detaching %s, channel connected %s\n",
2739 		    __func__, fakeif->iff_name,
2740 		    (detaching ? "true" : "false"),
2741 		    (connected ? "true" : "false"));
2742 		feth_unlock();
2743 		return 0;
2744 	}
2745 	ifp = fakeif->iff_ifp;
2746 	peer_ifp = fakeif->iff_peer;
2747 	if (peer_ifp != NULL) {
2748 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
2749 		if (peer_fakeif != NULL) {
2750 			detaching = feth_is_detaching(peer_fakeif);
2751 			connected = peer_fakeif->iff_channel_connected;
2752 			if (detaching || !connected) {
2753 				FETH_DPRINTF("%s: peer %s: detaching %s, "
2754 				    "channel connected %s\n",
2755 				    __func__, peer_fakeif->iff_name,
2756 				    (detaching ? "true" : "false"),
2757 				    (connected ? "true" : "false"));
2758 				goto done;
2759 			}
2760 		} else {
2761 			FETH_DPRINTF("%s: peer_fakeif is NULL\n", __func__);
2762 			goto done;
2763 		}
2764 	} else {
2765 		printf("%s: peer_ifp is NULL\n", __func__);
2766 		goto done;
2767 	}
2768 
2769 	if (if_fake_switch_combined_mode &&
2770 	    qset->fqs_dequeue_cnt >= if_fake_switch_mode_frequency) {
2771 		if (qset->fqs_combined_mode) {
2772 			kern_netif_set_qset_separate(qset->fqs_qset);
2773 		} else {
2774 			kern_netif_set_qset_combined(qset->fqs_qset);
2775 		}
2776 		qset->fqs_combined_mode = !qset->fqs_combined_mode;
2777 		qset->fqs_dequeue_cnt = 0;
2778 	}
2779 
2780 	for (i = 0; i < qset->fqs_tx_queue_cnt; i++) {
2781 		kern_packet_t sph = 0;
2782 		kern_netif_queue_t queue = qset->fqs_tx_queue[i].fq_queue;
2783 		boolean_t more = FALSE;
2784 
2785 		err = kern_netif_queue_tx_dequeue(queue, UINT32_MAX, UINT32_MAX,
2786 		    &more, &sph);
2787 		if (err != 0 && err != EAGAIN) {
2788 			FETH_DPRINTF("%s queue %p dequeue failed: err "
2789 			    "%d\n", fakeif->iff_name, queue, err);
2790 		}
2791 		feth_nx_tx_queue_deliver_pkt_chain(fakeif, sph, nifs,
2792 		    peer_fakeif, qset->fqs_llink_idx, qset->fqs_idx);
2793 	}
2794 
2795 done:
2796 	feth_unlock();
2797 	return 0;
2798 }
2799 
2800 static void
fill_qset_info_and_params(if_fake_ref fakeif,fake_llink * llink_info,uint32_t qset_idx,struct kern_nexus_netif_llink_qset_init * qset_init,bool is_def,bool is_low_latency)2801 fill_qset_info_and_params(if_fake_ref fakeif, fake_llink *llink_info,
2802     uint32_t qset_idx, struct kern_nexus_netif_llink_qset_init *qset_init,
2803     bool is_def, bool is_low_latency)
2804 {
2805 	fake_qset *qset_info = &llink_info->fl_qset[qset_idx];
2806 
2807 	qset_init->nlqi_flags =
2808 	    (is_def ? KERN_NEXUS_NET_LLINK_QSET_DEFAULT : 0) |
2809 	    (is_low_latency ? KERN_NEXUS_NET_LLINK_QSET_LOW_LATENCY : 0) |
2810 	    KERN_NEXUS_NET_LLINK_QSET_AQM;
2811 
2812 	if (feth_in_wmm_mode(fakeif)) {
2813 		qset_init->nlqi_flags |= KERN_NEXUS_NET_LLINK_QSET_WMM_MODE;
2814 		qset_init->nlqi_num_txqs = IFF_NUM_TX_QUEUES_WMM_MODE;
2815 		qset_init->nlqi_num_rxqs = IFF_NUM_RX_QUEUES_WMM_MODE;
2816 	} else {
2817 		qset_init->nlqi_num_txqs = 1;
2818 		qset_init->nlqi_num_rxqs = 1;
2819 	}
2820 	qset_info->fqs_tx_queue_cnt = qset_init->nlqi_num_txqs;
2821 	qset_info->fqs_rx_queue_cnt = qset_init->nlqi_num_rxqs;
2822 
2823 	/* These are needed for locating the peer qset */
2824 	qset_info->fqs_llink_idx = llink_info->fl_idx;
2825 	qset_info->fqs_idx = qset_idx;
2826 }
2827 
2828 static void
fill_llink_info_and_params(if_fake_ref fakeif,uint32_t llink_idx,struct kern_nexus_netif_llink_init * llink_init,uint32_t llink_id,struct kern_nexus_netif_llink_qset_init * qset_init,uint32_t qset_cnt,uint32_t flags)2829 fill_llink_info_and_params(if_fake_ref fakeif, uint32_t llink_idx,
2830     struct kern_nexus_netif_llink_init *llink_init, uint32_t llink_id,
2831     struct kern_nexus_netif_llink_qset_init *qset_init, uint32_t qset_cnt,
2832     uint32_t flags)
2833 {
2834 	fake_llink *llink_info = &fakeif->iff_llink[llink_idx];
2835 	uint32_t i;
2836 	bool create_ll_qset = if_fake_low_latency && (llink_idx != 0);
2837 
2838 	for (i = 0; i < qset_cnt; i++) {
2839 		fill_qset_info_and_params(fakeif, llink_info, i,
2840 		    &qset_init[i], i == 0, create_ll_qset && i == 1);
2841 	}
2842 	llink_info->fl_idx = llink_idx;
2843 
2844 	/* This doesn't have to be the same as llink_idx */
2845 	llink_info->fl_id = llink_id;
2846 	llink_info->fl_qset_cnt = qset_cnt;
2847 
2848 	llink_init->nli_link_id = llink_id;
2849 	llink_init->nli_num_qsets = qset_cnt;
2850 	llink_init->nli_qsets = qset_init;
2851 	llink_init->nli_flags = flags;
2852 	llink_init->nli_ctx = llink_info;
2853 }
2854 
2855 static errno_t
create_non_default_llinks(if_fake_ref fakeif)2856 create_non_default_llinks(if_fake_ref fakeif)
2857 {
2858 	struct kern_nexus *nx;
2859 	fake_nx_t fnx = &fakeif->iff_nx;
2860 	struct kern_nexus_netif_llink_init llink_init;
2861 	struct kern_nexus_netif_llink_qset_init qset_init[FETH_MAX_QSETS];
2862 	errno_t err;
2863 	uint64_t llink_id;
2864 	uint32_t i;
2865 
2866 	nx = nx_find(fnx->fnx_instance, FALSE);
2867 	if (nx == NULL) {
2868 		printf("%s: %s: nx not found\n", __func__, fakeif->iff_name);
2869 		return ENXIO;
2870 	}
2871 	/* Default llink starts at index 0 */
2872 	for (i = 1; i < if_fake_llink_cnt; i++) {
2873 		llink_id = (uint64_t)i;
2874 
2875 		/*
2876 		 * The llink_init and qset_init structures are reused for
2877 		 * each llink creation.
2878 		 */
2879 		fill_llink_info_and_params(fakeif, i, &llink_init,
2880 		    llink_id, qset_init, if_fake_qset_cnt, 0);
2881 		err = kern_nexus_netif_llink_add(nx, &llink_init);
2882 		if (err != 0) {
2883 			printf("%s: %s: llink add failed, error %d\n",
2884 			    __func__, fakeif->iff_name, err);
2885 			goto fail;
2886 		}
2887 		fakeif->iff_llink_cnt++;
2888 	}
2889 	nx_release(nx);
2890 	return 0;
2891 
2892 fail:
2893 	for (i = 0; i < fakeif->iff_llink_cnt; i++) {
2894 		int e;
2895 
2896 		e = kern_nexus_netif_llink_remove(nx, fakeif->
2897 		    iff_llink[i].fl_id);
2898 		if (e != 0) {
2899 			printf("%s: %s: llink remove failed, llink_id 0x%llx, "
2900 			    "error %d\n", __func__, fakeif->iff_name,
2901 			    fakeif->iff_llink[i].fl_id, e);
2902 		}
2903 		fakeif->iff_llink[i].fl_id = 0;
2904 	}
2905 	fakeif->iff_llink_cnt = 0;
2906 	nx_release(nx);
2907 	return err;
2908 }
2909 
2910 static errno_t
create_netif_llink_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2911 create_netif_llink_provider_and_instance(if_fake_ref fakeif,
2912     struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2913     uuid_t * provider, uuid_t * instance)
2914 {
2915 	errno_t                 err;
2916 	nexus_controller_t      controller = kern_nexus_shared_controller();
2917 	struct kern_nexus_net_init net_init;
2918 	struct kern_nexus_netif_llink_init llink_init;
2919 	struct kern_nexus_netif_llink_qset_init qsets[FETH_MAX_QSETS];
2920 
2921 	nexus_name_t            provider_name;
2922 	nexus_attr_t            nexus_attr = NULL;
2923 	struct kern_nexus_netif_provider_init prov_init = {
2924 		.nxnpi_version = KERN_NEXUS_DOMAIN_PROVIDER_NETIF,
2925 		.nxnpi_flags = NXPIF_VIRTUAL_DEVICE,
2926 		.nxnpi_pre_connect = feth_nx_pre_connect,
2927 		.nxnpi_connected = feth_nx_connected,
2928 		.nxnpi_pre_disconnect = feth_nx_pre_disconnect,
2929 		.nxnpi_disconnected = feth_nx_disconnected,
2930 		.nxnpi_qset_init = feth_nx_qset_init,
2931 		.nxnpi_qset_fini = feth_nx_qset_fini,
2932 		.nxnpi_queue_init = feth_nx_queue_init,
2933 		.nxnpi_queue_fini = feth_nx_queue_fini,
2934 		.nxnpi_tx_qset_notify = feth_nx_tx_qset_notify,
2935 		.nxnpi_config_capab = feth_nx_capab_config,
2936 	};
2937 
2938 	err = kern_nexus_attr_create(&nexus_attr);
2939 	if (err != 0) {
2940 		printf("%s nexus attribute creation failed, error %d\n",
2941 		    __func__, err);
2942 		goto failed;
2943 	}
2944 
2945 	err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2946 	VERIFY(err == 0);
2947 
2948 	snprintf((char *)provider_name, sizeof(provider_name),
2949 	    "com.apple.netif.%s", fakeif->iff_name);
2950 	err = kern_nexus_controller_register_provider(controller,
2951 	    feth_nx_dom_prov,
2952 	    provider_name,
2953 	    (struct kern_nexus_provider_init *)&prov_init,
2954 	    sizeof(prov_init),
2955 	    nexus_attr,
2956 	    provider);
2957 	if (err != 0) {
2958 		printf("%s register provider failed, error %d\n",
2959 		    __func__, err);
2960 		goto failed;
2961 	}
2962 	bzero(&net_init, sizeof(net_init));
2963 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2964 	net_init.nxneti_flags = 0;
2965 	net_init.nxneti_eparams = init_params;
2966 	net_init.nxneti_lladdr = NULL;
2967 	net_init.nxneti_prepare = feth_netif_prepare;
2968 	net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2969 	net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2970 
2971 	/*
2972 	 * Assume llink id is same as the index for if_fake.
2973 	 * This is not required for other drivers.
2974 	 */
2975 	_CASSERT(NETIF_LLINK_ID_DEFAULT == 0);
2976 	fill_llink_info_and_params(fakeif, 0, &llink_init,
2977 	    NETIF_LLINK_ID_DEFAULT, qsets, if_fake_qset_cnt,
2978 	    KERN_NEXUS_NET_LLINK_DEFAULT);
2979 
2980 	net_init.nxneti_llink = &llink_init;
2981 
2982 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
2983 	    *provider, fakeif, NULL, instance, &net_init, ifp);
2984 	if (err != 0) {
2985 		printf("%s alloc_net_provider_instance failed, %d\n",
2986 		    __func__, err);
2987 		kern_nexus_controller_deregister_provider(controller,
2988 		    *provider);
2989 		uuid_clear(*provider);
2990 		goto failed;
2991 	}
2992 	fakeif->iff_llink_cnt++;
2993 
2994 	if (if_fake_llink_cnt > 1) {
2995 		err = create_non_default_llinks(fakeif);
2996 		if (err != 0) {
2997 			printf("%s create_non_default_llinks failed, %d\n",
2998 			    __func__, err);
2999 			feth_detach_netif_nexus(fakeif);
3000 			goto failed;
3001 		}
3002 	}
3003 	if (feth_supports_tso(fakeif)) {
3004 		if ((err = feth_set_tso(*ifp)) != 0) {
3005 			goto failed;
3006 		}
3007 	}
3008 failed:
3009 	if (nexus_attr != NULL) {
3010 		kern_nexus_attr_destroy(nexus_attr);
3011 	}
3012 	return err;
3013 }
3014 
3015 static errno_t
feth_attach_netif_nexus(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp)3016 feth_attach_netif_nexus(if_fake_ref fakeif,
3017     struct ifnet_init_eparams * init_params, ifnet_t *ifp)
3018 {
3019 	errno_t                 error;
3020 	fake_nx_t               nx = &fakeif->iff_nx;
3021 
3022 	error = feth_packet_pool_make(fakeif);
3023 	if (error != 0) {
3024 		return error;
3025 	}
3026 	if (if_fake_llink_cnt == 0) {
3027 		return create_netif_provider_and_instance(fakeif, init_params,
3028 		           ifp, &nx->fnx_provider, &nx->fnx_instance);
3029 	} else {
3030 		return create_netif_llink_provider_and_instance(fakeif,
3031 		           init_params, ifp, &nx->fnx_provider,
3032 		           &nx->fnx_instance);
3033 	}
3034 }
3035 
3036 static void
remove_non_default_llinks(if_fake_ref fakeif)3037 remove_non_default_llinks(if_fake_ref fakeif)
3038 {
3039 	struct kern_nexus *nx;
3040 	fake_nx_t fnx = &fakeif->iff_nx;
3041 	uint32_t i;
3042 
3043 	if (fakeif->iff_llink_cnt <= 1) {
3044 		return;
3045 	}
3046 	nx = nx_find(fnx->fnx_instance, FALSE);
3047 	if (nx == NULL) {
3048 		printf("%s: %s: nx not found\n", __func__,
3049 		    fakeif->iff_name);
3050 		return;
3051 	}
3052 	/* Default llink (at index 0) is freed separately */
3053 	for (i = 1; i < fakeif->iff_llink_cnt; i++) {
3054 		int err;
3055 
3056 		err = kern_nexus_netif_llink_remove(nx, fakeif->
3057 		    iff_llink[i].fl_id);
3058 		if (err != 0) {
3059 			printf("%s: %s: llink remove failed, llink_id 0x%llx, "
3060 			    "error %d\n", __func__, fakeif->iff_name,
3061 			    fakeif->iff_llink[i].fl_id, err);
3062 		}
3063 		fakeif->iff_llink[i].fl_id = 0;
3064 	}
3065 	fakeif->iff_llink_cnt = 0;
3066 	nx_release(nx);
3067 }
3068 
3069 static void
detach_provider_and_instance(uuid_t provider,uuid_t instance)3070 detach_provider_and_instance(uuid_t provider, uuid_t instance)
3071 {
3072 	nexus_controller_t controller = kern_nexus_shared_controller();
3073 	errno_t err;
3074 
3075 	if (!uuid_is_null(instance)) {
3076 		err = kern_nexus_controller_free_provider_instance(controller,
3077 		    instance);
3078 		if (err != 0) {
3079 			printf("%s free_provider_instance failed %d\n",
3080 			    __func__, err);
3081 		}
3082 		uuid_clear(instance);
3083 	}
3084 	if (!uuid_is_null(provider)) {
3085 		err = kern_nexus_controller_deregister_provider(controller,
3086 		    provider);
3087 		if (err != 0) {
3088 			printf("%s deregister_provider %d\n", __func__, err);
3089 		}
3090 		uuid_clear(provider);
3091 	}
3092 	return;
3093 }
3094 
3095 static void
feth_detach_netif_nexus(if_fake_ref fakeif)3096 feth_detach_netif_nexus(if_fake_ref fakeif)
3097 {
3098 	fake_nx_t fnx = &fakeif->iff_nx;
3099 
3100 	remove_non_default_llinks(fakeif);
3101 	detach_provider_and_instance(fnx->fnx_provider, fnx->fnx_instance);
3102 }
3103 
3104 #endif /* SKYWALK */
3105 
3106 /**
3107 ** feth interface routines
3108 **/
3109 static void
feth_ifnet_set_attrs(if_fake_ref fakeif,ifnet_t ifp)3110 feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp)
3111 {
3112 	uint32_t        cap;
3113 
3114 	cap = ((fakeif->iff_flags & IFF_FLAGS_LRO) != 0) ? IFCAP_LRO : 0;
3115 	if (cap != 0) {
3116 		errno_t         error;
3117 
3118 		error = ifnet_set_capabilities_supported(ifp, cap, IFCAP_VALID);
3119 		if (error != 0) {
3120 			printf("%s: failed to enable LRO, %d\n",
3121 			    ifp->if_xname, error);
3122 		}
3123 	}
3124 	(void)ifnet_set_capabilities_enabled(ifp, cap, IFCAP_VALID);
3125 	ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
3126 	ifnet_set_baudrate(ifp, 0);
3127 	ifnet_set_mtu(ifp, ETHERMTU);
3128 	ifnet_set_flags(ifp,
3129 	    IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX,
3130 	    0xffff);
3131 	ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
3132 	if ((fakeif->iff_flags & IFF_FLAGS_HWCSUM) != 0) {
3133 		ifnet_set_offload(ifp,
3134 		    IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
3135 		    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6);
3136 	} else {
3137 		ifnet_set_offload(ifp, 0);
3138 	}
3139 }
3140 
3141 static void
interface_link_event(ifnet_t ifp,u_int32_t event_code)3142 interface_link_event(ifnet_t ifp, u_int32_t event_code)
3143 {
3144 	struct event {
3145 		u_int32_t ifnet_family;
3146 		u_int32_t unit;
3147 		char if_name[IFNAMSIZ];
3148 	};
3149 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3150 	struct kern_event_msg *header = (struct kern_event_msg*)message;
3151 	struct event *data = (struct event *)(header + 1);
3152 
3153 	header->total_size   = sizeof(message);
3154 	header->vendor_code  = KEV_VENDOR_APPLE;
3155 	header->kev_class    = KEV_NETWORK_CLASS;
3156 	header->kev_subclass = KEV_DL_SUBCLASS;
3157 	header->event_code   = event_code;
3158 	data->ifnet_family   = ifnet_family(ifp);
3159 	data->unit           = (u_int32_t)ifnet_unit(ifp);
3160 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
3161 	ifnet_event(ifp, header);
3162 }
3163 
3164 static if_fake_ref
ifnet_get_if_fake(ifnet_t ifp)3165 ifnet_get_if_fake(ifnet_t ifp)
3166 {
3167 	return (if_fake_ref)ifnet_softc(ifp);
3168 }
3169 
3170 static int
feth_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)3171 feth_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
3172 {
3173 	int                             error;
3174 	if_fake_ref                     fakeif;
3175 	struct ifnet_init_eparams       feth_init;
3176 	ifnet_t                         ifp;
3177 	uint8_t                         mac_address[ETHER_ADDR_LEN];
3178 	fake_llink                     *iff_llink;
3179 
3180 	iff_llink = kalloc_type(fake_llink, FETH_MAX_LLINKS, Z_WAITOK_ZERO);
3181 	if (iff_llink == NULL) {
3182 		return ENOBUFS;
3183 	}
3184 	fakeif = kalloc_type(struct if_fake, Z_WAITOK_ZERO_NOFAIL);
3185 	fakeif->iff_llink = iff_llink;
3186 	fakeif->iff_retain_count = 1;
3187 #define FAKE_ETHER_NAME_LEN     (sizeof(FAKE_ETHER_NAME) - 1)
3188 	_CASSERT(FAKE_ETHER_NAME_LEN == 4);
3189 	bcopy(FAKE_ETHER_NAME, mac_address, FAKE_ETHER_NAME_LEN);
3190 	mac_address[ETHER_ADDR_LEN - 2] = (unit & 0xff00) >> 8;
3191 	mac_address[ETHER_ADDR_LEN - 1] = unit & 0xff;
3192 	if (if_fake_bsd_mode != 0) {
3193 		fakeif->iff_flags |= IFF_FLAGS_BSD_MODE;
3194 	}
3195 	if (if_fake_hwcsum != 0) {
3196 		fakeif->iff_flags |= IFF_FLAGS_HWCSUM;
3197 	}
3198 	if (if_fake_lro != 0) {
3199 		fakeif->iff_flags |= IFF_FLAGS_LRO;
3200 	}
3201 	fakeif->iff_max_mtu = get_max_mtu(if_fake_bsd_mode, if_fake_max_mtu);
3202 	fakeif->iff_fcs = if_fake_fcs;
3203 	fakeif->iff_trailer_length = if_fake_trailer_length;
3204 
3205 	/* use the interface name as the unique id for ifp recycle */
3206 	if ((unsigned int)
3207 	    snprintf(fakeif->iff_name, sizeof(fakeif->iff_name), "%s%d",
3208 	    ifc->ifc_name, unit) >= sizeof(fakeif->iff_name)) {
3209 		feth_release(fakeif);
3210 		return EINVAL;
3211 	}
3212 	bzero(&feth_init, sizeof(feth_init));
3213 	feth_init.ver = IFNET_INIT_CURRENT_VERSION;
3214 	feth_init.len = sizeof(feth_init);
3215 	if (feth_in_bsd_mode(fakeif)) {
3216 		if (if_fake_txstart != 0) {
3217 			feth_init.start = feth_start;
3218 		} else {
3219 			feth_init.flags |= IFNET_INIT_LEGACY;
3220 			feth_init.output = feth_output;
3221 		}
3222 	}
3223 #if SKYWALK
3224 	else {
3225 		feth_init.flags |= IFNET_INIT_SKYWALK_NATIVE;
3226 		/*
3227 		 * Currently we support WMM mode only for Skywalk native
3228 		 * interface.
3229 		 */
3230 		if (if_fake_wmm_mode != 0) {
3231 			fakeif->iff_flags |= IFF_FLAGS_WMM_MODE;
3232 		}
3233 
3234 		if (if_fake_multibuflet != 0) {
3235 			fakeif->iff_flags |= IFF_FLAGS_MULTIBUFLETS;
3236 		}
3237 
3238 		if (if_fake_multibuflet != 0 &&
3239 		    if_fake_pktpool_mode == IFF_PP_MODE_PRIVATE_SPLIT) {
3240 			printf("%s: multi-buflet not supported for split rx &"
3241 			    " tx pool", __func__);
3242 			feth_release(fakeif);
3243 			return EINVAL;
3244 		}
3245 
3246 		fakeif->iff_pp_mode = if_fake_pktpool_mode;
3247 		if (if_fake_tso_support != 0) {
3248 			if (fakeif->iff_pp_mode != IFF_PP_MODE_GLOBAL) {
3249 				printf("%s: TSO mode requires global packet"
3250 				    " pool mode\n", __func__);
3251 				return EINVAL;
3252 			}
3253 			fakeif->iff_flags |= IFF_FLAGS_TSO_SUPPORT;
3254 		}
3255 
3256 		fakeif->iff_tx_headroom = if_fake_tx_headroom;
3257 		fakeif->iff_adv_interval = if_fake_if_adv_interval;
3258 		if (fakeif->iff_adv_interval > 0) {
3259 			feth_init.flags |= IFNET_INIT_IF_ADV;
3260 		}
3261 		fakeif->iff_tx_drop_rate = if_fake_tx_drops;
3262 		fakeif->iff_tx_completion_mode = if_tx_completion_mode;
3263 		fakeif->iff_tx_exp_policy = if_fake_tx_exp_policy;
3264 	}
3265 	feth_init.tx_headroom = fakeif->iff_tx_headroom;
3266 #endif /* SKYWALK */
3267 	if (if_fake_nxattach == 0) {
3268 		feth_init.flags |= IFNET_INIT_NX_NOAUTO;
3269 	}
3270 	feth_init.uniqueid = fakeif->iff_name;
3271 	feth_init.uniqueid_len = strlen(fakeif->iff_name);
3272 	feth_init.name = ifc->ifc_name;
3273 	feth_init.unit = unit;
3274 	feth_init.family = IFNET_FAMILY_ETHERNET;
3275 	feth_init.type = IFT_ETHER;
3276 	feth_init.demux = ether_demux;
3277 	feth_init.add_proto = ether_add_proto;
3278 	feth_init.del_proto = ether_del_proto;
3279 	feth_init.check_multi = ether_check_multi;
3280 	feth_init.framer_extended = ether_frameout_extended;
3281 	feth_init.softc = fakeif;
3282 	feth_init.ioctl = feth_ioctl;
3283 	feth_init.set_bpf_tap = NULL;
3284 	feth_init.detach = feth_if_free;
3285 	feth_init.broadcast_addr = etherbroadcastaddr;
3286 	feth_init.broadcast_len = ETHER_ADDR_LEN;
3287 	if (feth_in_bsd_mode(fakeif)) {
3288 		error = ifnet_allocate_extended(&feth_init, &ifp);
3289 		if (error) {
3290 			feth_release(fakeif);
3291 			return error;
3292 		}
3293 		feth_ifnet_set_attrs(fakeif, ifp);
3294 	}
3295 #if SKYWALK
3296 	else {
3297 		if (feth_in_wmm_mode(fakeif)) {
3298 			feth_init.output_sched_model =
3299 			    IFNET_SCHED_MODEL_DRIVER_MANAGED;
3300 		}
3301 		error = feth_attach_netif_nexus(fakeif, &feth_init, &ifp);
3302 		if (error != 0) {
3303 			feth_release(fakeif);
3304 			return error;
3305 		}
3306 		/* take an additional reference to ensure that it doesn't go away */
3307 		feth_retain(fakeif);
3308 		fakeif->iff_ifp = ifp;
3309 	}
3310 #endif /* SKYWALK */
3311 	fakeif->iff_media_count = MIN(default_media_words_count, IF_FAKE_MEDIA_LIST_MAX);
3312 	bcopy(default_media_words, fakeif->iff_media_list,
3313 	    fakeif->iff_media_count * sizeof(fakeif->iff_media_list[0]));
3314 	if (feth_in_bsd_mode(fakeif)) {
3315 		error = ifnet_attach(ifp, NULL);
3316 		if (error) {
3317 			ifnet_release(ifp);
3318 			feth_release(fakeif);
3319 			return error;
3320 		}
3321 		fakeif->iff_ifp = ifp;
3322 	}
3323 
3324 	ifnet_set_lladdr(ifp, mac_address, sizeof(mac_address));
3325 
3326 	/* attach as ethernet */
3327 	bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
3328 	return 0;
3329 }
3330 
3331 static int
feth_clone_destroy(ifnet_t ifp)3332 feth_clone_destroy(ifnet_t ifp)
3333 {
3334 	if_fake_ref     fakeif;
3335 #if SKYWALK
3336 	boolean_t       nx_attached = FALSE;
3337 #endif /* SKYWALK */
3338 
3339 	feth_lock();
3340 	fakeif = ifnet_get_if_fake(ifp);
3341 	if (fakeif == NULL || feth_is_detaching(fakeif)) {
3342 		feth_unlock();
3343 		return 0;
3344 	}
3345 	feth_set_detaching(fakeif);
3346 #if SKYWALK
3347 	nx_attached = !feth_in_bsd_mode(fakeif);
3348 #endif /* SKYWALK */
3349 	feth_unlock();
3350 
3351 #if SKYWALK
3352 	if (nx_attached) {
3353 		feth_detach_netif_nexus(fakeif);
3354 		feth_release(fakeif);
3355 	}
3356 #endif /* SKYWALK */
3357 	feth_config(ifp, NULL);
3358 	ifnet_detach(ifp);
3359 	return 0;
3360 }
3361 
3362 static void
feth_enqueue_input(ifnet_t ifp,struct mbuf * m)3363 feth_enqueue_input(ifnet_t ifp, struct mbuf * m)
3364 {
3365 	struct ifnet_stat_increment_param stats = {};
3366 
3367 	stats.packets_in = 1;
3368 	stats.bytes_in = (uint32_t)mbuf_pkthdr_len(m) + ETHER_HDR_LEN;
3369 	ifnet_input(ifp, m, &stats);
3370 }
3371 
3372 static struct mbuf *
copy_mbuf(struct mbuf * m)3373 copy_mbuf(struct mbuf *m)
3374 {
3375 	struct mbuf *   copy_m;
3376 	uint32_t        pkt_len;
3377 	uint32_t        offset;
3378 
3379 	if ((m->m_flags & M_PKTHDR) == 0) {
3380 		return NULL;
3381 	}
3382 	pkt_len = m->m_pkthdr.len;
3383 	MGETHDR(copy_m, M_DONTWAIT, MT_DATA);
3384 	if (copy_m == NULL) {
3385 		goto failed;
3386 	}
3387 	if (pkt_len > MHLEN) {
3388 		if (pkt_len <= MCLBYTES) {
3389 			MCLGET(copy_m, M_DONTWAIT);
3390 		} else if (pkt_len <= MBIGCLBYTES) {
3391 			copy_m = m_mbigget(copy_m, M_DONTWAIT);
3392 		} else if (pkt_len <= M16KCLBYTES && njcl > 0) {
3393 			copy_m = m_m16kget(copy_m, M_DONTWAIT);
3394 		} else {
3395 			printf("if_fake: copy_mbuf(): packet too large %d\n",
3396 			    pkt_len);
3397 			goto failed;
3398 		}
3399 		if (copy_m == NULL || (copy_m->m_flags & M_EXT) == 0) {
3400 			goto failed;
3401 		}
3402 	}
3403 	mbuf_setlen(copy_m, pkt_len);
3404 	copy_m->m_pkthdr.len = pkt_len;
3405 	copy_m->m_pkthdr.pkt_svc = m->m_pkthdr.pkt_svc;
3406 	offset = 0;
3407 	while (m != NULL && offset < pkt_len) {
3408 		uint32_t        frag_len;
3409 
3410 		frag_len = m->m_len;
3411 		if (frag_len > (pkt_len - offset)) {
3412 			printf("if_fake_: Large mbuf fragment %d > %d\n",
3413 			    frag_len, (pkt_len - offset));
3414 			goto failed;
3415 		}
3416 		m_copydata(m, 0, frag_len, mtodo(copy_m, offset));
3417 		offset += frag_len;
3418 		m = m->m_next;
3419 	}
3420 	return copy_m;
3421 
3422 failed:
3423 	if (copy_m != NULL) {
3424 		m_freem(copy_m);
3425 	}
3426 	return NULL;
3427 }
3428 
3429 static int
feth_add_mbuf_trailer(struct mbuf * m,void * trailer,size_t trailer_len)3430 feth_add_mbuf_trailer(struct mbuf *m, void *trailer, size_t trailer_len)
3431 {
3432 	int ret;
3433 	ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
3434 
3435 	ret = m_append(m, trailer_len, (caddr_t)trailer);
3436 	if (ret == 1) {
3437 		FETH_DPRINTF("%s %zuB trailer added\n", __func__, trailer_len);
3438 		return 0;
3439 	}
3440 	printf("%s m_append failed\n", __func__);
3441 	return ENOTSUP;
3442 }
3443 
3444 static int
feth_add_mbuf_fcs(struct mbuf * m)3445 feth_add_mbuf_fcs(struct mbuf *m)
3446 {
3447 	uint32_t pkt_len, offset = 0;
3448 	uint32_t crc = 0;
3449 	int err = 0;
3450 
3451 	ASSERT(sizeof(crc) == ETHER_CRC_LEN);
3452 
3453 	pkt_len = m->m_pkthdr.len;
3454 	struct mbuf *iter = m;
3455 	while (iter != NULL && offset < pkt_len) {
3456 		uint32_t frag_len = iter->m_len;
3457 		ASSERT(frag_len <= (pkt_len - offset));
3458 		crc = crc32(crc, mtod(iter, void *), frag_len);
3459 		offset += frag_len;
3460 		iter = m->m_next;
3461 	}
3462 
3463 	err = feth_add_mbuf_trailer(m, &crc, ETHER_CRC_LEN);
3464 	if (err != 0) {
3465 		return err;
3466 	}
3467 
3468 	m->m_flags |= M_HASFCS;
3469 
3470 	return 0;
3471 }
3472 
3473 static void
feth_output_common(ifnet_t ifp,struct mbuf * m,ifnet_t peer,iff_flags_t flags,bool fcs,void * trailer,size_t trailer_len)3474 feth_output_common(ifnet_t ifp, struct mbuf * m, ifnet_t peer,
3475     iff_flags_t flags, bool fcs, void *trailer, size_t trailer_len)
3476 {
3477 	void *          frame_header;
3478 
3479 	frame_header = mbuf_data(m);
3480 	if ((flags & IFF_FLAGS_HWCSUM) != 0) {
3481 		m->m_pkthdr.csum_data = 0xffff;
3482 		m->m_pkthdr.csum_flags =
3483 		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3484 		    CSUM_IP_CHECKED | CSUM_IP_VALID;
3485 	}
3486 
3487 	(void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
3488 	bpf_tap_out(ifp, DLT_EN10MB, m, NULL, 0);
3489 
3490 	if (trailer != 0) {
3491 		feth_add_mbuf_trailer(m, trailer, trailer_len);
3492 	}
3493 	if (fcs) {
3494 		feth_add_mbuf_fcs(m);
3495 	}
3496 
3497 	(void)mbuf_pkthdr_setrcvif(m, peer);
3498 	mbuf_pkthdr_setheader(m, frame_header);
3499 	mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
3500 	(void)mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
3501 	    mbuf_len(m) - ETHER_HDR_LEN);
3502 	bpf_tap_in(peer, DLT_EN10MB, m, frame_header,
3503 	    sizeof(struct ether_header));
3504 	feth_enqueue_input(peer, m);
3505 }
3506 
3507 static void
feth_start(ifnet_t ifp)3508 feth_start(ifnet_t ifp)
3509 {
3510 	struct mbuf *   copy_m = NULL;
3511 	if_fake_ref     fakeif;
3512 	iff_flags_t     flags = 0;
3513 	bool            fcs;
3514 	size_t          trailer_len;
3515 	ifnet_t         peer = NULL;
3516 	struct mbuf *   m;
3517 	struct mbuf *   save_m;
3518 
3519 	feth_lock();
3520 	fakeif = ifnet_get_if_fake(ifp);
3521 	if (fakeif == NULL) {
3522 		feth_unlock();
3523 		return;
3524 	}
3525 
3526 	if (fakeif->iff_start_busy) {
3527 		feth_unlock();
3528 		printf("if_fake: start is busy\n");
3529 		return;
3530 	}
3531 
3532 	peer = fakeif->iff_peer;
3533 	flags = fakeif->iff_flags;
3534 	fcs = fakeif->iff_fcs;
3535 	trailer_len = fakeif->iff_trailer_length;
3536 
3537 	/* check for pending TX */
3538 	m = fakeif->iff_pending_tx_packet;
3539 	if (m != NULL) {
3540 		if (peer != NULL) {
3541 			copy_m = copy_mbuf(m);
3542 			if (copy_m == NULL) {
3543 				feth_unlock();
3544 				return;
3545 			}
3546 		}
3547 		fakeif->iff_pending_tx_packet = NULL;
3548 		m_freem(m);
3549 		m = NULL;
3550 	}
3551 	fakeif->iff_start_busy = TRUE;
3552 	feth_unlock();
3553 	save_m = NULL;
3554 	for (;;) {
3555 		if (copy_m != NULL) {
3556 			VERIFY(peer != NULL);
3557 			feth_output_common(ifp, copy_m, peer, flags, fcs,
3558 			    feth_trailer, trailer_len);
3559 			copy_m = NULL;
3560 		}
3561 		if (ifnet_dequeue(ifp, &m) != 0) {
3562 			break;
3563 		}
3564 		if (peer == NULL) {
3565 			m_freem(m);
3566 		} else {
3567 			copy_m = copy_mbuf(m);
3568 			if (copy_m == NULL) {
3569 				save_m = m;
3570 				break;
3571 			}
3572 			m_freem(m);
3573 		}
3574 	}
3575 	peer = NULL;
3576 	feth_lock();
3577 	fakeif = ifnet_get_if_fake(ifp);
3578 	if (fakeif != NULL) {
3579 		fakeif->iff_start_busy = FALSE;
3580 		if (save_m != NULL && fakeif->iff_peer != NULL) {
3581 			/* save it for next time */
3582 			fakeif->iff_pending_tx_packet = save_m;
3583 			save_m = NULL;
3584 		}
3585 	}
3586 	feth_unlock();
3587 	if (save_m != NULL) {
3588 		/* didn't save packet, so free it */
3589 		m_freem(save_m);
3590 	}
3591 }
3592 
3593 static int
feth_output(ifnet_t ifp,struct mbuf * m)3594 feth_output(ifnet_t ifp, struct mbuf * m)
3595 {
3596 	struct mbuf *           copy_m;
3597 	if_fake_ref             fakeif;
3598 	iff_flags_t             flags;
3599 	bool                    fcs;
3600 	size_t                  trailer_len;
3601 	ifnet_t                 peer = NULL;
3602 
3603 	if (m == NULL) {
3604 		return 0;
3605 	}
3606 	copy_m = copy_mbuf(m);
3607 	m_freem(m);
3608 	m = NULL;
3609 	if (copy_m == NULL) {
3610 		/* count this as an output error */
3611 		ifnet_stat_increment_out(ifp, 0, 0, 1);
3612 		return 0;
3613 	}
3614 	feth_lock();
3615 	fakeif = ifnet_get_if_fake(ifp);
3616 	if (fakeif != NULL) {
3617 		peer = fakeif->iff_peer;
3618 		flags = fakeif->iff_flags;
3619 		fcs = fakeif->iff_fcs;
3620 		trailer_len = fakeif->iff_trailer_length;
3621 	}
3622 	feth_unlock();
3623 	if (peer == NULL) {
3624 		m_freem(copy_m);
3625 		ifnet_stat_increment_out(ifp, 0, 0, 1);
3626 		return 0;
3627 	}
3628 	feth_output_common(ifp, copy_m, peer, flags, fcs, feth_trailer,
3629 	    trailer_len);
3630 	return 0;
3631 }
3632 
3633 static int
feth_config(ifnet_t ifp,ifnet_t peer)3634 feth_config(ifnet_t ifp, ifnet_t peer)
3635 {
3636 	int             connected = FALSE;
3637 	int             disconnected = FALSE;
3638 	int             error = 0;
3639 	if_fake_ref     fakeif = NULL;
3640 
3641 	feth_lock();
3642 	fakeif = ifnet_get_if_fake(ifp);
3643 	if (fakeif == NULL) {
3644 		error = EINVAL;
3645 		goto done;
3646 	}
3647 	if (peer != NULL) {
3648 		/* connect to peer */
3649 		if_fake_ref     peer_fakeif;
3650 
3651 		peer_fakeif = ifnet_get_if_fake(peer);
3652 		if (peer_fakeif == NULL) {
3653 			error = EINVAL;
3654 			goto done;
3655 		}
3656 		if (feth_is_detaching(fakeif) ||
3657 		    feth_is_detaching(peer_fakeif) ||
3658 		    peer_fakeif->iff_peer != NULL ||
3659 		    fakeif->iff_peer != NULL) {
3660 			error = EBUSY;
3661 			goto done;
3662 		}
3663 #if SKYWALK
3664 		if (fakeif->iff_pp_mode !=
3665 		    peer_fakeif->iff_pp_mode) {
3666 			error = EINVAL;
3667 			goto done;
3668 		}
3669 #endif /* SKYWALK */
3670 		fakeif->iff_peer = peer;
3671 		peer_fakeif->iff_peer = ifp;
3672 		connected = TRUE;
3673 	} else if (fakeif->iff_peer != NULL) {
3674 		/* disconnect from peer */
3675 		if_fake_ref     peer_fakeif;
3676 
3677 		peer = fakeif->iff_peer;
3678 		peer_fakeif = ifnet_get_if_fake(peer);
3679 		if (peer_fakeif == NULL) {
3680 			/* should not happen */
3681 			error = EINVAL;
3682 			goto done;
3683 		}
3684 		fakeif->iff_peer = NULL;
3685 		peer_fakeif->iff_peer = NULL;
3686 		disconnected = TRUE;
3687 	}
3688 
3689 done:
3690 	feth_unlock();
3691 
3692 	/* generate link status event if we connect or disconnect */
3693 	if (connected) {
3694 		interface_link_event(ifp, KEV_DL_LINK_ON);
3695 		interface_link_event(peer, KEV_DL_LINK_ON);
3696 	} else if (disconnected) {
3697 		interface_link_event(ifp, KEV_DL_LINK_OFF);
3698 		interface_link_event(peer, KEV_DL_LINK_OFF);
3699 	}
3700 	return error;
3701 }
3702 
3703 static int
feth_set_media(ifnet_t ifp,struct if_fake_request * iffr)3704 feth_set_media(ifnet_t ifp, struct if_fake_request * iffr)
3705 {
3706 	if_fake_ref     fakeif;
3707 	int             error;
3708 
3709 	if (iffr->iffr_media.iffm_count > IF_FAKE_MEDIA_LIST_MAX) {
3710 		/* list is too long */
3711 		return EINVAL;
3712 	}
3713 	feth_lock();
3714 	fakeif = ifnet_get_if_fake(ifp);
3715 	if (fakeif == NULL) {
3716 		error = EINVAL;
3717 		goto done;
3718 	}
3719 	fakeif->iff_media_count = iffr->iffr_media.iffm_count;
3720 	bcopy(iffr->iffr_media.iffm_list, fakeif->iff_media_list,
3721 	    iffr->iffr_media.iffm_count * sizeof(fakeif->iff_media_list[0]));
3722 #if 0
3723 	/* XXX: "auto-negotiate" active with peer? */
3724 	/* generate link status event? */
3725 	fakeif->iff_media_current = iffr->iffr_media.iffm_current;
3726 #endif
3727 	error = 0;
3728 done:
3729 	feth_unlock();
3730 	return error;
3731 }
3732 
3733 static int
if_fake_request_copyin(user_addr_t user_addr,struct if_fake_request * iffr,u_int32_t len)3734 if_fake_request_copyin(user_addr_t user_addr,
3735     struct if_fake_request *iffr, u_int32_t len)
3736 {
3737 	int     error;
3738 
3739 	if (user_addr == USER_ADDR_NULL || len < sizeof(*iffr)) {
3740 		error = EINVAL;
3741 		goto done;
3742 	}
3743 	error = copyin(user_addr, iffr, sizeof(*iffr));
3744 	if (error != 0) {
3745 		goto done;
3746 	}
3747 	if (iffr->iffr_reserved[0] != 0 || iffr->iffr_reserved[1] != 0 ||
3748 	    iffr->iffr_reserved[2] != 0 || iffr->iffr_reserved[3] != 0) {
3749 		error = EINVAL;
3750 		goto done;
3751 	}
3752 done:
3753 	return error;
3754 }
3755 
3756 static int
feth_set_drvspec(ifnet_t ifp,uint32_t cmd,u_int32_t len,user_addr_t user_addr)3757 feth_set_drvspec(ifnet_t ifp, uint32_t cmd, u_int32_t len,
3758     user_addr_t user_addr)
3759 {
3760 	int                     error;
3761 	struct if_fake_request  iffr;
3762 	ifnet_t                 peer;
3763 
3764 	switch (cmd) {
3765 	case IF_FAKE_S_CMD_SET_PEER:
3766 		error = if_fake_request_copyin(user_addr, &iffr, len);
3767 		if (error != 0) {
3768 			break;
3769 		}
3770 		if (iffr.iffr_peer_name[0] == '\0') {
3771 			error = feth_config(ifp, NULL);
3772 			break;
3773 		}
3774 
3775 		/* ensure nul termination */
3776 		iffr.iffr_peer_name[IFNAMSIZ - 1] = '\0';
3777 		peer = ifunit(iffr.iffr_peer_name);
3778 		if (peer == NULL) {
3779 			error = ENXIO;
3780 			break;
3781 		}
3782 		if (ifnet_type(peer) != IFT_ETHER) {
3783 			error = EINVAL;
3784 			break;
3785 		}
3786 		if (strcmp(ifnet_name(peer), FAKE_ETHER_NAME) != 0) {
3787 			error = EINVAL;
3788 			break;
3789 		}
3790 		error = feth_config(ifp, peer);
3791 		break;
3792 	case IF_FAKE_S_CMD_SET_MEDIA:
3793 		error = if_fake_request_copyin(user_addr, &iffr, len);
3794 		if (error != 0) {
3795 			break;
3796 		}
3797 		error = feth_set_media(ifp, &iffr);
3798 		break;
3799 	case IF_FAKE_S_CMD_SET_DEQUEUE_STALL:
3800 		error = if_fake_request_copyin(user_addr, &iffr, len);
3801 		if (error != 0) {
3802 			break;
3803 		}
3804 		error = feth_enable_dequeue_stall(ifp,
3805 		    iffr.iffr_dequeue_stall);
3806 		break;
3807 	default:
3808 		error = EOPNOTSUPP;
3809 		break;
3810 	}
3811 	return error;
3812 }
3813 
3814 static int
feth_get_drvspec(ifnet_t ifp,u_int32_t cmd,u_int32_t len,user_addr_t user_addr)3815 feth_get_drvspec(ifnet_t ifp, u_int32_t cmd, u_int32_t len,
3816     user_addr_t user_addr)
3817 {
3818 	int                     error = EOPNOTSUPP;
3819 	if_fake_ref             fakeif;
3820 	struct if_fake_request  iffr;
3821 	ifnet_t                 peer;
3822 
3823 	switch (cmd) {
3824 	case IF_FAKE_G_CMD_GET_PEER:
3825 		if (len < sizeof(iffr)) {
3826 			error = EINVAL;
3827 			break;
3828 		}
3829 		feth_lock();
3830 		fakeif = ifnet_get_if_fake(ifp);
3831 		if (fakeif == NULL) {
3832 			feth_unlock();
3833 			error = EOPNOTSUPP;
3834 			break;
3835 		}
3836 		peer = fakeif->iff_peer;
3837 		feth_unlock();
3838 		bzero(&iffr, sizeof(iffr));
3839 		if (peer != NULL) {
3840 			strlcpy(iffr.iffr_peer_name,
3841 			    if_name(peer),
3842 			    sizeof(iffr.iffr_peer_name));
3843 		}
3844 		error = copyout(&iffr, user_addr, sizeof(iffr));
3845 		break;
3846 	default:
3847 		break;
3848 	}
3849 	return error;
3850 }
3851 
3852 union ifdrvu {
3853 	struct ifdrv32  *ifdrvu_32;
3854 	struct ifdrv64  *ifdrvu_64;
3855 	void            *ifdrvu_p;
3856 };
3857 
3858 static int
feth_ioctl(ifnet_t ifp,u_long cmd,void * data)3859 feth_ioctl(ifnet_t ifp, u_long cmd, void * data)
3860 {
3861 	unsigned int            count;
3862 	struct ifdevmtu *       devmtu_p;
3863 	union ifdrvu            drv;
3864 	uint32_t                drv_cmd;
3865 	uint32_t                drv_len;
3866 	boolean_t               drv_set_command = FALSE;
3867 	int                     error = 0;
3868 	struct ifmediareq *     ifmr;
3869 	struct ifreq *          ifr;
3870 	if_fake_ref             fakeif;
3871 	int                     status;
3872 	user_addr_t             user_addr;
3873 
3874 	ifr = (struct ifreq *)data;
3875 	switch (cmd) {
3876 	case SIOCSIFADDR:
3877 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
3878 		break;
3879 
3880 	case SIOCGIFMEDIA32:
3881 	case SIOCGIFMEDIA64:
3882 		feth_lock();
3883 		fakeif = ifnet_get_if_fake(ifp);
3884 		if (fakeif == NULL) {
3885 			feth_unlock();
3886 			return EOPNOTSUPP;
3887 		}
3888 		status = (fakeif->iff_peer != NULL)
3889 		    ? (IFM_AVALID | IFM_ACTIVE) : IFM_AVALID;
3890 		ifmr = (struct ifmediareq *)data;
3891 		user_addr = (cmd == SIOCGIFMEDIA64) ?
3892 		    ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
3893 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
3894 		count = ifmr->ifm_count;
3895 		ifmr->ifm_active = IFM_ETHER;
3896 		ifmr->ifm_current = IFM_ETHER;
3897 		ifmr->ifm_mask = 0;
3898 		ifmr->ifm_status = status;
3899 		if (user_addr == USER_ADDR_NULL) {
3900 			ifmr->ifm_count = fakeif->iff_media_count;
3901 		} else if (count > 0) {
3902 			if (count > fakeif->iff_media_count) {
3903 				count = fakeif->iff_media_count;
3904 			}
3905 			ifmr->ifm_count = count;
3906 			error = copyout(&fakeif->iff_media_list, user_addr,
3907 			    count * sizeof(int));
3908 		}
3909 		feth_unlock();
3910 		break;
3911 
3912 	case SIOCGIFDEVMTU:
3913 		devmtu_p = &ifr->ifr_devmtu;
3914 		devmtu_p->ifdm_current = ifnet_mtu(ifp);
3915 		devmtu_p->ifdm_max = feth_max_mtu(ifp);
3916 		devmtu_p->ifdm_min = IF_MINMTU;
3917 		break;
3918 
3919 	case SIOCSIFMTU:
3920 		if ((unsigned int)ifr->ifr_mtu > feth_max_mtu(ifp) ||
3921 		    ifr->ifr_mtu < IF_MINMTU) {
3922 			error = EINVAL;
3923 		} else {
3924 			error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
3925 		}
3926 		break;
3927 
3928 	case SIOCSDRVSPEC32:
3929 	case SIOCSDRVSPEC64:
3930 		error = proc_suser(current_proc());
3931 		if (error != 0) {
3932 			break;
3933 		}
3934 		drv_set_command = TRUE;
3935 		OS_FALLTHROUGH;
3936 	case SIOCGDRVSPEC32:
3937 	case SIOCGDRVSPEC64:
3938 		drv.ifdrvu_p = data;
3939 		if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
3940 			drv_cmd = drv.ifdrvu_32->ifd_cmd;
3941 			drv_len = drv.ifdrvu_32->ifd_len;
3942 			user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
3943 		} else {
3944 			drv_cmd = drv.ifdrvu_64->ifd_cmd;
3945 			drv_len = drv.ifdrvu_64->ifd_len;
3946 			user_addr = drv.ifdrvu_64->ifd_data;
3947 		}
3948 		if (drv_set_command) {
3949 			error = feth_set_drvspec(ifp, drv_cmd, drv_len,
3950 			    user_addr);
3951 		} else {
3952 			error = feth_get_drvspec(ifp, drv_cmd, drv_len,
3953 			    user_addr);
3954 		}
3955 		break;
3956 
3957 	case SIOCSIFLLADDR:
3958 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
3959 		    ifr->ifr_addr.sa_len);
3960 		break;
3961 
3962 	case SIOCSIFFLAGS:
3963 		if ((ifp->if_flags & IFF_UP) != 0) {
3964 			/* marked up, set running if not already set */
3965 			if ((ifp->if_flags & IFF_RUNNING) == 0) {
3966 				/* set running */
3967 				error = ifnet_set_flags(ifp, IFF_RUNNING,
3968 				    IFF_RUNNING);
3969 			}
3970 		} else if ((ifp->if_flags & IFF_RUNNING) != 0) {
3971 			/* marked down, clear running */
3972 			error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
3973 		}
3974 		break;
3975 
3976 	case SIOCADDMULTI:
3977 	case SIOCDELMULTI:
3978 		error = 0;
3979 		break;
3980 	case SIOCSIFCAP: {
3981 		uint32_t        cap;
3982 
3983 		feth_lock();
3984 		fakeif = ifnet_get_if_fake(ifp);
3985 		if (fakeif == NULL ||
3986 		    (fakeif->iff_flags & IFF_FLAGS_LRO) == 0) {
3987 			feth_unlock();
3988 			return EOPNOTSUPP;
3989 		}
3990 		feth_unlock();
3991 		cap = (ifr->ifr_reqcap & IFCAP_LRO) != 0 ? IFCAP_LRO : 0;
3992 		error = ifnet_set_capabilities_enabled(ifp, cap, IFCAP_LRO);
3993 		break;
3994 	}
3995 	default:
3996 		error = EOPNOTSUPP;
3997 		break;
3998 	}
3999 	return error;
4000 }
4001 
4002 static void
feth_if_free(ifnet_t ifp)4003 feth_if_free(ifnet_t ifp)
4004 {
4005 	if_fake_ref             fakeif;
4006 
4007 	if (ifp == NULL) {
4008 		return;
4009 	}
4010 	feth_lock();
4011 	fakeif = ifnet_get_if_fake(ifp);
4012 	if (fakeif == NULL) {
4013 		feth_unlock();
4014 		return;
4015 	}
4016 	ifp->if_softc = NULL;
4017 #if SKYWALK
4018 	VERIFY(fakeif->iff_doorbell_tcall == NULL);
4019 #endif /* SKYWALK */
4020 	feth_unlock();
4021 	feth_release(fakeif);
4022 	ifnet_release(ifp);
4023 	return;
4024 }
4025 
4026 __private_extern__ void
if_fake_init(void)4027 if_fake_init(void)
4028 {
4029 	int error;
4030 
4031 #if SKYWALK
4032 	(void)feth_register_nexus_domain_provider();
4033 #endif /* SKYWALK */
4034 	error = if_clone_attach(&feth_cloner);
4035 	if (error != 0) {
4036 		return;
4037 	}
4038 	return;
4039 }
4040