xref: /xnu-8019.80.24/bsd/net/if_fake.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * if_fake.c
31  * - fake network interface used for testing
32  * - "feth" (e.g. "feth0", "feth1") is a virtual ethernet interface that allows
33  *   two instances to have their output/input paths "crossed-over" so that
34  *   output on one is input on the other
35  */
36 
37 /*
38  * Modification History:
39  *
40  * September 9, 2015	Dieter Siegmund ([email protected])
41  * - created
42  */
43 
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/queue.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/kern_event.h>
54 #include <sys/mcache.h>
55 #include <sys/syslog.h>
56 
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_vlan_var.h>
61 #include <net/if_fake_var.h>
62 #include <net/if_arp.h>
63 #include <net/if_dl.h>
64 #include <net/if_ether.h>
65 #include <net/if_types.h>
66 #include <libkern/OSAtomic.h>
67 
68 #include <net/dlil.h>
69 
70 #include <net/kpi_interface.h>
71 #include <net/kpi_protocol.h>
72 
73 #include <kern/locks.h>
74 #include <kern/zalloc.h>
75 
76 #ifdef INET
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #endif
80 
81 #include <net/if_media.h>
82 #include <net/ether_if_module.h>
83 #if SKYWALK
84 #include <skywalk/os_skywalk_private.h>
85 #include <skywalk/nexus/netif/nx_netif.h>
86 #include <skywalk/channel/channel_var.h>
87 #endif /* SKYWALK */
88 
89 static boolean_t
is_power_of_two(unsigned int val)90 is_power_of_two(unsigned int val)
91 {
92 	return (val & (val - 1)) == 0;
93 }
94 
95 #define FAKE_ETHER_NAME         "feth"
96 
97 SYSCTL_DECL(_net_link);
98 SYSCTL_NODE(_net_link, OID_AUTO, fake, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
99     "Fake interface");
100 
101 static int if_fake_txstart = 1;
102 SYSCTL_INT(_net_link_fake, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
103     &if_fake_txstart, 0, "Fake interface TXSTART mode");
104 
105 static int if_fake_hwcsum = 0;
106 SYSCTL_INT(_net_link_fake, OID_AUTO, hwcsum, CTLFLAG_RW | CTLFLAG_LOCKED,
107     &if_fake_hwcsum, 0, "Fake interface simulate hardware checksum");
108 
109 static int if_fake_nxattach = 0;
110 SYSCTL_INT(_net_link_fake, OID_AUTO, nxattach, CTLFLAG_RW | CTLFLAG_LOCKED,
111     &if_fake_nxattach, 0, "Fake interface auto-attach nexus");
112 
113 static int if_fake_bsd_mode = 1;
114 SYSCTL_INT(_net_link_fake, OID_AUTO, bsd_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
115     &if_fake_bsd_mode, 0, "Fake interface attach as BSD interface");
116 
117 static int if_fake_debug = 0;
118 SYSCTL_INT(_net_link_fake, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
119     &if_fake_debug, 0, "Fake interface debug logs");
120 
121 static int if_fake_wmm_mode = 0;
122 SYSCTL_INT(_net_link_fake, OID_AUTO, wmm_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
123     &if_fake_wmm_mode, 0, "Fake interface in 802.11 WMM mode");
124 
125 static int if_fake_multibuflet = 0;
126 SYSCTL_INT(_net_link_fake, OID_AUTO, multibuflet, CTLFLAG_RW | CTLFLAG_LOCKED,
127     &if_fake_multibuflet, 0, "Fake interface using multi-buflet packets");
128 
129 typedef enum {
130 	IFF_PP_MODE_GLOBAL = 0,         /* share a global pool */
131 	IFF_PP_MODE_PRIVATE = 1,        /* creates its own rx/tx pool */
132 	IFF_PP_MODE_PRIVATE_SPLIT = 2,  /* creates its own split rx & tx pool */
133 } iff_pktpool_mode_t;
134 static iff_pktpool_mode_t if_fake_pktpool_mode = 0;
135 SYSCTL_INT(_net_link_fake, OID_AUTO, pktpool_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
136     &if_fake_pktpool_mode, 0,
137     "Fake interface packet pool mode (0 global, 1 private, 2 private split");
138 
139 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX 32
140 static int if_fake_link_layer_aggregation_factor =
141     FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX;
142 static int
143 feth_link_layer_aggregation_factor_sysctl SYSCTL_HANDLER_ARGS
144 {
145 #pragma unused(oidp, arg1, arg2)
146 	unsigned int new_value;
147 	int changed;
148 	int error;
149 
150 	error = sysctl_io_number(req, if_fake_link_layer_aggregation_factor,
151 	    sizeof(if_fake_link_layer_aggregation_factor), &new_value,
152 	    &changed);
153 	if (error == 0 && changed != 0) {
154 		if (new_value <= 0 ||
155 		    new_value > FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX) {
156 			return EINVAL;
157 		}
158 		if_fake_link_layer_aggregation_factor = new_value;
159 	}
160 	return error;
161 }
162 
163 SYSCTL_PROC(_net_link_fake, OID_AUTO, link_layer_aggregation_factor,
164     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
165     0, 0, feth_link_layer_aggregation_factor_sysctl, "IU",
166     "Fake interface link layer aggregation factor");
167 
168 #define FETH_TX_HEADROOM_MAX      32
169 static unsigned int if_fake_tx_headroom = FETH_TX_HEADROOM_MAX;
170 static int
171 feth_tx_headroom_sysctl SYSCTL_HANDLER_ARGS
172 {
173 #pragma unused(oidp, arg1, arg2)
174 	unsigned int new_value;
175 	int changed;
176 	int error;
177 
178 	error = sysctl_io_number(req, if_fake_tx_headroom,
179 	    sizeof(if_fake_tx_headroom), &new_value, &changed);
180 	if (error == 0 && changed != 0) {
181 		if (new_value > FETH_TX_HEADROOM_MAX ||
182 		    (new_value % 8) != 0) {
183 			return EINVAL;
184 		}
185 		if_fake_tx_headroom = new_value;
186 	}
187 	return 0;
188 }
189 
190 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_headroom,
191     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
192     0, 0, feth_tx_headroom_sysctl, "IU", "Fake ethernet Tx headroom");
193 
194 static int if_fake_fcs = 0;
195 SYSCTL_INT(_net_link_fake, OID_AUTO, fcs, CTLFLAG_RW | CTLFLAG_LOCKED,
196     &if_fake_fcs, 0, "Fake interface using frame check sequence");
197 
198 #define FETH_TRAILER_LENGTH_MAX 28
199 char feth_trailer[FETH_TRAILER_LENGTH_MAX + 1] = "trailertrailertrailertrailer";
200 static unsigned int if_fake_trailer_length = 0;
201 static int
202 feth_trailer_length_sysctl SYSCTL_HANDLER_ARGS
203 {
204 #pragma unused(oidp, arg1, arg2)
205 	unsigned int new_value;
206 	int changed;
207 	int error;
208 
209 	error = sysctl_io_number(req, if_fake_trailer_length,
210 	    sizeof(if_fake_trailer_length), &new_value, &changed);
211 	if (error == 0 && changed != 0) {
212 		if (new_value > FETH_TRAILER_LENGTH_MAX) {
213 			return EINVAL;
214 		}
215 		if_fake_trailer_length = new_value;
216 	}
217 	return 0;
218 }
219 
220 SYSCTL_PROC(_net_link_fake, OID_AUTO, trailer_length,
221     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
222     feth_trailer_length_sysctl, "IU", "Fake interface frame trailer length");
223 
224 /* sysctl net.link.fake.max_mtu */
225 #define FETH_MAX_MTU_DEFAULT    2048
226 #define FETH_MAX_MTU_MAX        ((16 * 1024) - ETHER_HDR_LEN)
227 
228 static unsigned int if_fake_max_mtu = FETH_MAX_MTU_DEFAULT;
229 
230 /* sysctl net.link.fake.buflet_size */
231 #define FETH_BUFLET_SIZE_MIN            512
232 #define FETH_BUFLET_SIZE_MAX            2048
233 
234 static unsigned int if_fake_buflet_size = FETH_BUFLET_SIZE_MIN;
235 
236 static int
237 feth_max_mtu_sysctl SYSCTL_HANDLER_ARGS
238 {
239 #pragma unused(oidp, arg1, arg2)
240 	unsigned int new_value;
241 	int changed;
242 	int error;
243 
244 	error = sysctl_io_number(req, if_fake_max_mtu,
245 	    sizeof(if_fake_max_mtu), &new_value, &changed);
246 	if (error == 0 && changed != 0) {
247 		if (new_value > FETH_MAX_MTU_MAX ||
248 		    new_value < ETHERMTU ||
249 		    new_value <= if_fake_buflet_size) {
250 			return EINVAL;
251 		}
252 		if_fake_max_mtu = new_value;
253 	}
254 	return 0;
255 }
256 
257 SYSCTL_PROC(_net_link_fake, OID_AUTO, max_mtu,
258     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
259     0, 0, feth_max_mtu_sysctl, "IU", "Fake interface maximum MTU");
260 
261 static int
262 feth_buflet_size_sysctl SYSCTL_HANDLER_ARGS
263 {
264 #pragma unused(oidp, arg1, arg2)
265 	unsigned int new_value;
266 	int changed;
267 	int error;
268 
269 	error = sysctl_io_number(req, if_fake_buflet_size,
270 	    sizeof(if_fake_buflet_size), &new_value, &changed);
271 	if (error == 0 && changed != 0) {
272 		/* must be a power of 2 between min and max */
273 		if (new_value > FETH_BUFLET_SIZE_MAX ||
274 		    new_value < FETH_BUFLET_SIZE_MIN ||
275 		    !is_power_of_two(new_value) ||
276 		    new_value >= if_fake_max_mtu) {
277 			return EINVAL;
278 		}
279 		if_fake_buflet_size = new_value;
280 	}
281 	return 0;
282 }
283 
284 SYSCTL_PROC(_net_link_fake, OID_AUTO, buflet_size,
285     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
286     0, 0, feth_buflet_size_sysctl, "IU", "Fake interface buflet size");
287 
288 static unsigned int if_fake_user_access = 0;
289 
290 static int
291 feth_user_access_sysctl SYSCTL_HANDLER_ARGS
292 {
293 #pragma unused(oidp, arg1, arg2)
294 	unsigned int new_value;
295 	int changed;
296 	int error;
297 
298 	error = sysctl_io_number(req, if_fake_user_access,
299 	    sizeof(if_fake_user_access), &new_value, &changed);
300 	if (error == 0 && changed != 0) {
301 		if (new_value != 0) {
302 			if (new_value != 1) {
303 				return EINVAL;
304 			}
305 		}
306 		if_fake_user_access = new_value;
307 	}
308 	return 0;
309 }
310 
311 SYSCTL_PROC(_net_link_fake, OID_AUTO, user_access,
312     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
313     0, 0, feth_user_access_sysctl, "IU", "Fake interface user access");
314 
315 /* sysctl net.link.fake.if_adv_intvl (unit: millisecond) */
316 #define FETH_IF_ADV_INTVL_MIN            10
317 #define FETH_IF_ADV_INTVL_MAX            INT_MAX
318 
319 static int if_fake_if_adv_interval = 0; /* no interface advisory */
320 static int
321 feth_if_adv_interval_sysctl SYSCTL_HANDLER_ARGS
322 {
323 #pragma unused(oidp, arg1, arg2)
324 	unsigned int new_value;
325 	int changed;
326 	int error;
327 
328 	error = sysctl_io_number(req, if_fake_if_adv_interval,
329 	    sizeof(if_fake_if_adv_interval), &new_value, &changed);
330 	if (error == 0 && changed != 0) {
331 		if ((new_value != 0) && (new_value > FETH_IF_ADV_INTVL_MAX ||
332 		    new_value < FETH_IF_ADV_INTVL_MIN)) {
333 			return EINVAL;
334 		}
335 		if_fake_if_adv_interval = new_value;
336 	}
337 	return 0;
338 }
339 
340 SYSCTL_PROC(_net_link_fake, OID_AUTO, if_adv_intvl,
341     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
342     feth_if_adv_interval_sysctl, "IU",
343     "Fake interface will generate interface advisories reports at the specified interval in ms");
344 
345 /* sysctl net.link.fake.tx_drops */
346 /*
347  * Fake ethernet will drop packet on the transmit path at the specified
348  * rate, i.e drop one in every if_fake_tx_drops number of packets.
349  */
350 #define FETH_TX_DROPS_MIN            0
351 #define FETH_TX_DROPS_MAX            INT_MAX
352 static int if_fake_tx_drops = 0; /* no packets are dropped */
353 static int
354 feth_fake_tx_drops_sysctl SYSCTL_HANDLER_ARGS
355 {
356 #pragma unused(oidp, arg1, arg2)
357 	unsigned int new_value;
358 	int changed;
359 	int error;
360 
361 	error = sysctl_io_number(req, if_fake_tx_drops,
362 	    sizeof(if_fake_tx_drops), &new_value, &changed);
363 	if (error == 0 && changed != 0) {
364 		if (new_value > FETH_TX_DROPS_MAX ||
365 		    new_value < FETH_TX_DROPS_MIN) {
366 			return EINVAL;
367 		}
368 		if_fake_tx_drops = new_value;
369 	}
370 	return 0;
371 }
372 
373 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_drops,
374     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
375     feth_fake_tx_drops_sysctl, "IU",
376     "Fake interface will intermittently drop packets on Tx path");
377 
378 /* sysctl net.link.fake.llink_cnt */
379 
380 /* The maximum number of logical links (including default link) */
381 #define FETH_MAX_LLINKS 16
382 /*
383  * The default number of logical links (including default link).
384  * Zero means logical link mode is disabled.
385  */
386 #define FETH_DEF_LLINKS 0
387 
388 static uint32_t if_fake_llink_cnt = FETH_DEF_LLINKS;
389 static int
390 feth_fake_llink_cnt_sysctl SYSCTL_HANDLER_ARGS
391 {
392 #pragma unused(oidp, arg1, arg2)
393 	unsigned int new_value;
394 	int changed;
395 	int error;
396 
397 	error = sysctl_io_number(req, if_fake_llink_cnt,
398 	    sizeof(if_fake_llink_cnt), &new_value, &changed);
399 	if (error == 0 && changed != 0) {
400 		if (new_value > FETH_MAX_LLINKS) {
401 			return EINVAL;
402 		}
403 		if_fake_llink_cnt = new_value;
404 	}
405 	return 0;
406 }
407 
408 SYSCTL_PROC(_net_link_fake, OID_AUTO, llink_cnt,
409     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
410     feth_fake_llink_cnt_sysctl, "IU",
411     "Fake interface logical link count");
412 
413 /* sysctl net.link.fake.qset_cnt */
414 
415 /* The maximum number of qsets for each logical link */
416 #define FETH_MAX_QSETS  16
417 /* The default number of qsets for each logical link */
418 #define FETH_DEF_QSETS  4
419 
420 static uint32_t if_fake_qset_cnt = FETH_DEF_QSETS;
421 static int
422 feth_fake_qset_cnt_sysctl SYSCTL_HANDLER_ARGS
423 {
424 #pragma unused(oidp, arg1, arg2)
425 	unsigned int new_value;
426 	int changed;
427 	int error;
428 
429 	error = sysctl_io_number(req, if_fake_qset_cnt,
430 	    sizeof(if_fake_qset_cnt), &new_value, &changed);
431 	if (error == 0 && changed != 0) {
432 		if (new_value == 0 ||
433 		    new_value > FETH_MAX_QSETS) {
434 			return EINVAL;
435 		}
436 		if_fake_qset_cnt = new_value;
437 	}
438 	return 0;
439 }
440 
441 SYSCTL_PROC(_net_link_fake, OID_AUTO, qset_cnt,
442     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
443     feth_fake_qset_cnt_sysctl, "IU",
444     "Fake interface queue set count");
445 
446 /**
447 ** virtual ethernet structures, types
448 **/
449 
450 #define IFF_NUM_TX_RINGS_WMM_MODE       4
451 #define IFF_NUM_RX_RINGS_WMM_MODE       1
452 #define IFF_MAX_TX_RINGS        IFF_NUM_TX_RINGS_WMM_MODE
453 #define IFF_MAX_RX_RINGS        IFF_NUM_RX_RINGS_WMM_MODE
454 #define IFF_NUM_TX_QUEUES_WMM_MODE      4
455 #define IFF_NUM_RX_QUEUES_WMM_MODE      1
456 #define IFF_MAX_TX_QUEUES       IFF_NUM_TX_QUEUES_WMM_MODE
457 #define IFF_MAX_RX_QUEUES       IFF_NUM_RX_QUEUES_WMM_MODE
458 
459 #define IFF_MAX_BATCH_SIZE 32
460 
461 typedef uint16_t        iff_flags_t;
462 #define IFF_FLAGS_HWCSUM                0x0001
463 #define IFF_FLAGS_BSD_MODE              0x0002
464 #define IFF_FLAGS_DETACHING             0x0004
465 #define IFF_FLAGS_WMM_MODE              0x0008
466 #define IFF_FLAGS_MULTIBUFLETS          0x0010
467 
468 #if SKYWALK
469 
470 typedef struct {
471 	uuid_t                  fnx_provider;
472 	uuid_t                  fnx_instance;
473 } fake_nx, *fake_nx_t;
474 
475 typedef struct {
476 	kern_netif_queue_t      fq_queue;
477 } fake_queue;
478 
479 typedef struct {
480 	kern_netif_qset_t       fqs_qset; /* provided by xnu */
481 	fake_queue              fqs_rx_queue[IFF_MAX_RX_QUEUES];
482 	fake_queue              fqs_tx_queue[IFF_MAX_TX_QUEUES];
483 	uint32_t                fqs_rx_queue_cnt;
484 	uint32_t                fqs_tx_queue_cnt;
485 	uint32_t                fqs_llink_idx;
486 	uint32_t                fqs_idx;
487 	uint64_t                fqs_id;
488 } fake_qset;
489 
490 typedef struct {
491 	uint64_t                fl_id;
492 	uint32_t                fl_idx;
493 	fake_qset               fl_qset[FETH_MAX_QSETS];
494 	uint32_t                fl_qset_cnt;
495 } fake_llink;
496 
497 static kern_pbufpool_t         S_pp;
498 #endif /* SKYWALK */
499 
500 struct if_fake {
501 	char                    iff_name[IFNAMSIZ]; /* our unique id */
502 	ifnet_t                 iff_ifp;
503 	iff_flags_t             iff_flags;
504 	uint32_t                iff_retain_count;
505 	ifnet_t                 iff_peer;       /* the other end */
506 	int                     iff_media_current;
507 	int                     iff_media_active;
508 	uint32_t                iff_media_count;
509 	int                     iff_media_list[IF_FAKE_MEDIA_LIST_MAX];
510 	struct mbuf *           iff_pending_tx_packet;
511 	boolean_t               iff_start_busy;
512 	unsigned int            iff_max_mtu;
513 	uint32_t                iff_fcs;
514 	uint32_t                iff_trailer_length;
515 #if SKYWALK
516 	fake_nx                 iff_nx;
517 	struct netif_stats      *iff_nifs;
518 	uint32_t                iff_nifs_ref;
519 	kern_channel_ring_t     iff_rx_ring[IFF_MAX_RX_RINGS];
520 	kern_channel_ring_t     iff_tx_ring[IFF_MAX_TX_RINGS];
521 	fake_llink              iff_llink[FETH_MAX_LLINKS];
522 	uint32_t                iff_llink_cnt;
523 	thread_call_t           iff_doorbell_tcall;
524 	thread_call_t           iff_if_adv_tcall;
525 	boolean_t               iff_doorbell_tcall_active;
526 	boolean_t               iff_waiting_for_tcall;
527 	boolean_t               iff_channel_connected;
528 	iff_pktpool_mode_t      iff_pp_mode;
529 	kern_pbufpool_t         iff_rx_pp;
530 	kern_pbufpool_t         iff_tx_pp;
531 	uint32_t                iff_tx_headroom;
532 	unsigned int            iff_adv_interval;
533 	uint32_t                iff_tx_drop_rate;
534 	uint32_t                iff_tx_pkts_count;
535 	bool                    iff_intf_adv_enabled;
536 	void                    *iff_intf_adv_kern_ctx;
537 	kern_nexus_capab_interface_advisory_notify_fn_t iff_intf_adv_notify;
538 #endif /* SKYWALK */
539 };
540 
541 typedef struct if_fake * if_fake_ref;
542 
543 static if_fake_ref
544 ifnet_get_if_fake(ifnet_t ifp);
545 
546 #define FETH_DPRINTF(fmt, ...)                                  \
547 	{ if (if_fake_debug != 0) printf("%s " fmt, __func__, ## __VA_ARGS__); }
548 
549 static inline boolean_t
feth_in_bsd_mode(if_fake_ref fakeif)550 feth_in_bsd_mode(if_fake_ref fakeif)
551 {
552 	return (fakeif->iff_flags & IFF_FLAGS_BSD_MODE) != 0;
553 }
554 
555 static inline void
feth_set_detaching(if_fake_ref fakeif)556 feth_set_detaching(if_fake_ref fakeif)
557 {
558 	fakeif->iff_flags |= IFF_FLAGS_DETACHING;
559 }
560 
561 static inline boolean_t
feth_is_detaching(if_fake_ref fakeif)562 feth_is_detaching(if_fake_ref fakeif)
563 {
564 	return (fakeif->iff_flags & IFF_FLAGS_DETACHING) != 0;
565 }
566 
567 static int
feth_enable_dequeue_stall(ifnet_t ifp,uint32_t enable)568 feth_enable_dequeue_stall(ifnet_t ifp, uint32_t enable)
569 {
570 	int error;
571 
572 	if (enable != 0) {
573 		error = ifnet_disable_output(ifp);
574 	} else {
575 		error = ifnet_enable_output(ifp);
576 	}
577 
578 	return error;
579 }
580 
581 #if SKYWALK
582 static inline boolean_t
feth_in_wmm_mode(if_fake_ref fakeif)583 feth_in_wmm_mode(if_fake_ref fakeif)
584 {
585 	return (fakeif->iff_flags & IFF_FLAGS_WMM_MODE) != 0;
586 }
587 
588 static inline boolean_t
feth_using_multibuflets(if_fake_ref fakeif)589 feth_using_multibuflets(if_fake_ref fakeif)
590 {
591 	return (fakeif->iff_flags & IFF_FLAGS_MULTIBUFLETS) != 0;
592 }
593 static void feth_detach_netif_nexus(if_fake_ref fakeif);
594 
595 static inline boolean_t
feth_has_intf_advisory_configured(if_fake_ref fakeif)596 feth_has_intf_advisory_configured(if_fake_ref fakeif)
597 {
598 	return fakeif->iff_adv_interval > 0;
599 }
600 #endif /* SKYWALK */
601 
602 #define FETH_MAXUNIT    IF_MAXUNIT
603 #define FETH_ZONE_MAX_ELEM      MIN(IFNETS_MAX, FETH_MAXUNIT)
604 #define M_FAKE          M_DEVBUF
605 
606 static  int feth_clone_create(struct if_clone *, u_int32_t, void *);
607 static  int feth_clone_destroy(ifnet_t);
608 static  int feth_output(ifnet_t ifp, struct mbuf *m);
609 static  void feth_start(ifnet_t ifp);
610 static  int feth_ioctl(ifnet_t ifp, u_long cmd, void * addr);
611 static  int feth_config(ifnet_t ifp, ifnet_t peer);
612 static  void feth_if_free(ifnet_t ifp);
613 static  void feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp);
614 static  void feth_free(if_fake_ref fakeif);
615 
616 static struct if_clone
617     feth_cloner = IF_CLONE_INITIALIZER(FAKE_ETHER_NAME,
618     feth_clone_create,
619     feth_clone_destroy,
620     0,
621     FETH_MAXUNIT,
622     FETH_ZONE_MAX_ELEM,
623     sizeof(struct if_fake));
624 static  void interface_link_event(ifnet_t ifp, u_int32_t event_code);
625 
626 /* some media words to pretend to be ethernet */
627 static int default_media_words[] = {
628 	IFM_MAKEWORD(IFM_ETHER, 0, 0, 0),
629 	IFM_MAKEWORD(IFM_ETHER, IFM_10G_T, IFM_FDX, 0),
630 	IFM_MAKEWORD(IFM_ETHER, IFM_2500_T, IFM_FDX, 0),
631 	IFM_MAKEWORD(IFM_ETHER, IFM_5000_T, IFM_FDX, 0),
632 
633 	IFM_MAKEWORD(IFM_ETHER, IFM_10G_KX4, IFM_FDX, 0),
634 	IFM_MAKEWORD(IFM_ETHER, IFM_20G_KR2, IFM_FDX, 0),
635 	IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX, 0),
636 	IFM_MAKEWORD(IFM_ETHER, IFM_25G_KR, IFM_FDX, 0),
637 	IFM_MAKEWORD(IFM_ETHER, IFM_40G_SR4, IFM_FDX, 0),
638 	IFM_MAKEWORD(IFM_ETHER, IFM_50G_CR2, IFM_FDX, 0),
639 	IFM_MAKEWORD(IFM_ETHER, IFM_56G_R4, IFM_FDX, 0),
640 	IFM_MAKEWORD(IFM_ETHER, IFM_100G_CR4, IFM_FDX, 0),
641 	IFM_MAKEWORD(IFM_ETHER, IFM_400G_AUI8, IFM_FDX, 0),
642 };
643 #define default_media_words_count (sizeof(default_media_words)          \
644 	                           / sizeof (default_media_words[0]))
645 
646 /**
647 ** veth locks
648 **/
649 
650 static LCK_GRP_DECLARE(feth_lck_grp, "fake");
651 static LCK_MTX_DECLARE(feth_lck_mtx, &feth_lck_grp);
652 
653 static inline void
feth_lock(void)654 feth_lock(void)
655 {
656 	lck_mtx_lock(&feth_lck_mtx);
657 }
658 
659 static inline void
feth_unlock(void)660 feth_unlock(void)
661 {
662 	lck_mtx_unlock(&feth_lck_mtx);
663 }
664 
665 static inline int
get_max_mtu(int bsd_mode,unsigned int max_mtu)666 get_max_mtu(int bsd_mode, unsigned int max_mtu)
667 {
668 	unsigned int    mtu;
669 
670 	if (bsd_mode != 0) {
671 		mtu = (njcl > 0) ? (M16KCLBYTES - ETHER_HDR_LEN)
672 		    : MBIGCLBYTES - ETHER_HDR_LEN;
673 		if (mtu > max_mtu) {
674 			mtu = max_mtu;
675 		}
676 	} else {
677 		mtu = max_mtu;
678 	}
679 	return mtu;
680 }
681 
682 static inline unsigned int
feth_max_mtu(ifnet_t ifp)683 feth_max_mtu(ifnet_t ifp)
684 {
685 	if_fake_ref     fakeif;
686 	unsigned int    max_mtu = ETHERMTU;
687 
688 	feth_lock();
689 	fakeif = ifnet_get_if_fake(ifp);
690 	if (fakeif != NULL) {
691 		max_mtu = fakeif->iff_max_mtu;
692 	}
693 	feth_unlock();
694 	return max_mtu;
695 }
696 
697 static void
feth_free(if_fake_ref fakeif)698 feth_free(if_fake_ref fakeif)
699 {
700 	VERIFY(fakeif->iff_retain_count == 0);
701 	if (feth_in_bsd_mode(fakeif)) {
702 		if (fakeif->iff_pending_tx_packet) {
703 			m_freem(fakeif->iff_pending_tx_packet);
704 		}
705 	}
706 #if SKYWALK
707 	else {
708 		if (fakeif->iff_pp_mode == IFF_PP_MODE_GLOBAL) {
709 			VERIFY(fakeif->iff_rx_pp == S_pp);
710 			VERIFY(fakeif->iff_tx_pp == S_pp);
711 			pp_release(fakeif->iff_rx_pp);
712 			fakeif->iff_rx_pp = NULL;
713 			pp_release(fakeif->iff_tx_pp);
714 			fakeif->iff_tx_pp = NULL;
715 			feth_lock();
716 			if (S_pp->pp_refcnt == 1) {
717 				pp_release(S_pp);
718 				S_pp = NULL;
719 			}
720 			feth_unlock();
721 		} else {
722 			if (fakeif->iff_rx_pp != NULL) {
723 				pp_release(fakeif->iff_rx_pp);
724 				fakeif->iff_rx_pp = NULL;
725 			}
726 			if (fakeif->iff_tx_pp != NULL) {
727 				pp_release(fakeif->iff_tx_pp);
728 				fakeif->iff_tx_pp = NULL;
729 			}
730 		}
731 	}
732 #endif /* SKYWALK */
733 
734 	FETH_DPRINTF("%s\n", fakeif->iff_name);
735 	if_clone_softc_deallocate(&feth_cloner, fakeif);
736 }
737 
738 static void
feth_release(if_fake_ref fakeif)739 feth_release(if_fake_ref fakeif)
740 {
741 	u_int32_t               old_retain_count;
742 
743 	old_retain_count = OSDecrementAtomic(&fakeif->iff_retain_count);
744 	switch (old_retain_count) {
745 	case 0:
746 		VERIFY(old_retain_count != 0);
747 		break;
748 	case 1:
749 		feth_free(fakeif);
750 		break;
751 	default:
752 		break;
753 	}
754 	return;
755 }
756 
757 #if SKYWALK
758 
759 static void
feth_retain(if_fake_ref fakeif)760 feth_retain(if_fake_ref fakeif)
761 {
762 	OSIncrementAtomic(&fakeif->iff_retain_count);
763 }
764 
765 static void
feth_packet_pool_init_prepare(if_fake_ref fakeif,struct kern_pbufpool_init * pp_init)766 feth_packet_pool_init_prepare(if_fake_ref fakeif,
767     struct kern_pbufpool_init *pp_init)
768 {
769 	uint32_t max_mtu = fakeif->iff_max_mtu;
770 
771 	bzero(pp_init, sizeof(*pp_init));
772 	pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
773 	pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
774 	pp_init->kbi_packets = 1024; /* TBD configurable */
775 	if (feth_using_multibuflets(fakeif)) {
776 		pp_init->kbi_bufsize = if_fake_buflet_size;
777 		pp_init->kbi_max_frags = howmany(max_mtu, if_fake_buflet_size);
778 		pp_init->kbi_buflets = pp_init->kbi_packets *
779 		    pp_init->kbi_max_frags;
780 		pp_init->kbi_flags |= KBIF_BUFFER_ON_DEMAND;
781 	} else {
782 		pp_init->kbi_bufsize = max_mtu;
783 		pp_init->kbi_max_frags = 1;
784 		pp_init->kbi_buflets = pp_init->kbi_packets;
785 	}
786 	pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
787 	if (if_fake_user_access != 0) {
788 		pp_init->kbi_flags |= KBIF_USER_ACCESS;
789 	}
790 	pp_init->kbi_ctx = NULL;
791 	pp_init->kbi_ctx_retain = NULL;
792 	pp_init->kbi_ctx_release = NULL;
793 }
794 
795 static errno_t
feth_packet_pool_make(if_fake_ref fakeif)796 feth_packet_pool_make(if_fake_ref fakeif)
797 {
798 	struct kern_pbufpool_init pp_init;
799 	errno_t err;
800 
801 	feth_packet_pool_init_prepare(fakeif, &pp_init);
802 
803 	switch (fakeif->iff_pp_mode) {
804 	case IFF_PP_MODE_GLOBAL:
805 		feth_lock();
806 		if (S_pp == NULL) {
807 			(void)snprintf((char *)pp_init.kbi_name,
808 			    sizeof(pp_init.kbi_name), "%s", "feth shared pp");
809 			err = kern_pbufpool_create(&pp_init, &S_pp, NULL);
810 		}
811 		pp_retain(S_pp);
812 		feth_unlock();
813 		fakeif->iff_rx_pp = S_pp;
814 		pp_retain(S_pp);
815 		fakeif->iff_tx_pp = S_pp;
816 		break;
817 	case IFF_PP_MODE_PRIVATE:
818 		(void)snprintf((char *)pp_init.kbi_name,
819 		    sizeof(pp_init.kbi_name), "%s pp", fakeif->iff_name);
820 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
821 		pp_retain(fakeif->iff_rx_pp);
822 		fakeif->iff_tx_pp = fakeif->iff_rx_pp;
823 		break;
824 	case IFF_PP_MODE_PRIVATE_SPLIT:
825 		(void)snprintf((char *)pp_init.kbi_name,
826 		    sizeof(pp_init.kbi_name), "%s rx pp", fakeif->iff_name);
827 		pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
828 		    KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
829 		pp_init.kbi_flags |= (KBIF_IODIR_IN | KBIF_BUFFER_ON_DEMAND);
830 		pp_init.kbi_packets = 1024;
831 		pp_init.kbi_bufsize = if_fake_link_layer_aggregation_factor * 1024;
832 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
833 		if (err != 0) {
834 			printf("%s: rx pp create failed %d\n", __func__, err);
835 			return err;
836 		}
837 		pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
838 		    KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
839 		pp_init.kbi_flags |= KBIF_IODIR_OUT;
840 		pp_init.kbi_packets = 1024;            /* TBD configurable */
841 		pp_init.kbi_bufsize = fakeif->iff_max_mtu;
842 		(void)snprintf((char *)pp_init.kbi_name,
843 		    sizeof(pp_init.kbi_name), "%s tx pp", fakeif->iff_name);
844 		err = kern_pbufpool_create(&pp_init, &fakeif->iff_tx_pp, NULL);
845 		if (err != 0) {
846 			printf("%s: tx pp create failed %d\n", __func__, err);
847 			pp_release(fakeif->iff_rx_pp);
848 			return err;
849 		}
850 		break;
851 	default:
852 		VERIFY(0);
853 		__builtin_unreachable();
854 	}
855 
856 	return 0;
857 }
858 
859 static errno_t
feth_clone_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)860 feth_clone_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
861 {
862 	errno_t err = 0;
863 	kern_pbufpool_t pp = dif->iff_rx_pp;
864 	kern_packet_t dph = 0, dph0 = 0;
865 	kern_buflet_t sbuf, dbuf0, dbuf;
866 	void *saddr, *daddr;
867 	uint32_t soff, doff;
868 	uint32_t slen, dlen;
869 	uint32_t dlim0, dlim;
870 
871 	sbuf = kern_packet_get_next_buflet(sph, NULL);
872 	saddr = kern_buflet_get_data_address(sbuf);
873 	doff = soff = kern_buflet_get_data_offset(sbuf);
874 	dlen = slen = kern_buflet_get_data_length(sbuf);
875 
876 	/* packet clone is only supported for single-buflet */
877 	ASSERT(kern_packet_get_buflet_count(sph) == 1);
878 	ASSERT(soff == kern_packet_get_headroom(sph));
879 	ASSERT(slen == kern_packet_get_data_length(sph));
880 
881 	dph0 = *pdph;
882 	if (dph0 == 0) {
883 		dlim0 = 0;
884 	} else {
885 		dbuf0 = kern_packet_get_next_buflet(dph0, NULL);
886 		ASSERT(kern_buflet_get_object_limit(dbuf0) ==
887 		    pp->pp_buflet_size);
888 		ASSERT(kern_buflet_get_data_limit(dbuf0) % 16 == 0);
889 		dlim0 = ((uintptr_t)kern_buflet_get_object_address(dbuf0) +
890 		    kern_buflet_get_object_limit(dbuf0)) -
891 		    ((uintptr_t)kern_buflet_get_data_address(dbuf0) +
892 		    kern_buflet_get_data_limit(dbuf0));
893 	}
894 
895 	if (doff + dlen > dlim0) {
896 		err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
897 		if (err != 0) {
898 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
899 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
900 			return err;
901 		}
902 		dbuf = kern_packet_get_next_buflet(dph, NULL);
903 		ASSERT(kern_buflet_get_data_address(dbuf) ==
904 		    kern_buflet_get_object_address(dbuf));
905 		daddr = kern_buflet_get_data_address(dbuf);
906 		dlim = kern_buflet_get_object_limit(dbuf);
907 		ASSERT(dlim == pp->pp_buflet_size);
908 	} else {
909 		err = kern_packet_clone_nosleep(dph0, &dph, KPKT_COPY_LIGHT);
910 		if (err != 0) {
911 			printf("%s: packet clone err %d\n", __func__, err);
912 			return err;
913 		}
914 		dbuf = kern_packet_get_next_buflet(dph, NULL);
915 		ASSERT(kern_buflet_get_object_address(dbuf) ==
916 		    kern_buflet_get_object_address(dbuf0));
917 		daddr = (void *)((uintptr_t)kern_buflet_get_data_address(dbuf0) +
918 		    kern_buflet_get_data_limit(dbuf0));
919 		dlim = dlim0;
920 	}
921 
922 	ASSERT(doff + dlen <= dlim);
923 
924 	ASSERT((uintptr_t)daddr % 16 == 0);
925 
926 	bcopy((const void *)((uintptr_t)saddr + soff),
927 	    (void *)((uintptr_t)daddr + doff), slen);
928 
929 	dlim = MIN(dlim, P2ROUNDUP(doff + dlen, 16));
930 	err = kern_buflet_set_data_address(dbuf, daddr);
931 	VERIFY(err == 0);
932 	err = kern_buflet_set_data_limit(dbuf, dlim);
933 	VERIFY(err == 0);
934 	err = kern_buflet_set_data_length(dbuf, dlen);
935 	VERIFY(err == 0);
936 	err = kern_buflet_set_data_offset(dbuf, doff);
937 	VERIFY(err == 0);
938 	err = kern_packet_set_headroom(dph, doff);
939 	VERIFY(err == 0);
940 	err = kern_packet_set_link_header_length(dph,
941 	    kern_packet_get_link_header_length(sph));
942 	VERIFY(err == 0);
943 	err = kern_packet_set_service_class(dph,
944 	    kern_packet_get_service_class(sph));
945 	VERIFY(err == 0);
946 	err = kern_packet_finalize(dph);
947 	VERIFY(err == 0);
948 	*pdph = dph;
949 
950 	return err;
951 }
952 
953 static inline void
feth_copy_buflet(kern_buflet_t sbuf,kern_buflet_t dbuf)954 feth_copy_buflet(kern_buflet_t sbuf, kern_buflet_t dbuf)
955 {
956 	errno_t err;
957 	uint16_t off, len;
958 	uint8_t *saddr, *daddr;
959 
960 	saddr = kern_buflet_get_data_address(sbuf);
961 	off = kern_buflet_get_data_offset(sbuf);
962 	len = kern_buflet_get_data_length(sbuf);
963 	daddr = kern_buflet_get_data_address(dbuf);
964 	bcopy((saddr + off), (daddr + off), len);
965 	err = kern_buflet_set_data_offset(dbuf, off);
966 	VERIFY(err == 0);
967 	err = kern_buflet_set_data_length(dbuf, len);
968 	VERIFY(err == 0);
969 }
970 
971 static int
feth_add_packet_trailer(kern_packet_t ph,void * trailer,size_t trailer_len)972 feth_add_packet_trailer(kern_packet_t ph, void *trailer, size_t trailer_len)
973 {
974 	errno_t err = 0;
975 
976 	ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
977 
978 	kern_buflet_t buf = NULL, iter = NULL;
979 	while ((iter = kern_packet_get_next_buflet(ph, iter)) != NULL) {
980 		buf = iter;
981 	}
982 	ASSERT(buf != NULL);
983 
984 	uint16_t dlim = kern_buflet_get_data_limit(buf);
985 	uint16_t doff = kern_buflet_get_data_offset(buf);
986 	uint16_t dlen = kern_buflet_get_data_length(buf);
987 
988 	size_t trailer_room = dlim - doff - dlen;
989 
990 	if (trailer_room < trailer_len) {
991 		printf("not enough room");
992 		return ERANGE;
993 	}
994 
995 	void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff + dlen);
996 	memcpy(data, trailer, trailer_len);
997 
998 	err = kern_buflet_set_data_length(buf, dlen + trailer_len);
999 	VERIFY(err == 0);
1000 
1001 	err = kern_packet_finalize(ph);
1002 	VERIFY(err == 0);
1003 
1004 	FETH_DPRINTF("%s %zuB trailer added\n", __func__, trailer_len);
1005 
1006 	return 0;
1007 }
1008 
1009 static int
feth_add_packet_fcs(kern_packet_t ph)1010 feth_add_packet_fcs(kern_packet_t ph)
1011 {
1012 	uint32_t crc = 0;
1013 	int err;
1014 
1015 	ASSERT(sizeof(crc) == ETHER_CRC_LEN);
1016 
1017 	kern_buflet_t buf = NULL;
1018 	while ((buf = kern_packet_get_next_buflet(ph, buf)) != NULL) {
1019 		uint16_t doff = kern_buflet_get_data_offset(buf);
1020 		uint16_t dlen = kern_buflet_get_data_length(buf);
1021 		void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff);
1022 		crc = crc32(crc, data, dlen);
1023 	}
1024 
1025 	err = feth_add_packet_trailer(ph, &crc, ETHER_CRC_LEN);
1026 	if (!err) {
1027 		return err;
1028 	}
1029 
1030 	err = kern_packet_set_link_ethfcs(ph);
1031 	VERIFY(err == 0);
1032 
1033 	return 0;
1034 }
1035 
1036 static errno_t
feth_copy_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1037 feth_copy_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1038 {
1039 	errno_t err = 0;
1040 	uint16_t i, bufcnt;
1041 	mach_vm_address_t baddr;
1042 	kern_buflet_t sbuf = NULL, dbuf = NULL;
1043 	kern_pbufpool_t pp = dif->iff_rx_pp;
1044 	kern_packet_t dph;
1045 	boolean_t multi_buflet = feth_using_multibuflets(dif);
1046 
1047 	bufcnt = kern_packet_get_buflet_count(sph);
1048 	ASSERT((bufcnt == 1) || multi_buflet);
1049 	*pdph = 0;
1050 
1051 	err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1052 	if (err != 0) {
1053 		STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1054 		STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1055 		return err;
1056 	}
1057 
1058 	/* pre-constructed single buflet packet copy */
1059 	sbuf = kern_packet_get_next_buflet(sph, NULL);
1060 	dbuf = kern_packet_get_next_buflet(dph, NULL);
1061 	feth_copy_buflet(sbuf, dbuf);
1062 
1063 	if (!multi_buflet) {
1064 		goto done;
1065 	}
1066 
1067 	/* un-constructed multi-buflet packet copy */
1068 	for (i = 1; i < bufcnt; i++) {
1069 		kern_buflet_t dbuf_next = NULL;
1070 
1071 		sbuf = kern_packet_get_next_buflet(sph, sbuf);
1072 		VERIFY(sbuf != NULL);
1073 		err = kern_pbufpool_alloc_buflet_nosleep(pp, &dbuf_next);
1074 		if (err != 0) {
1075 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1076 			STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_BUF);
1077 			break;
1078 		}
1079 		ASSERT(dbuf_next != NULL);
1080 		feth_copy_buflet(sbuf, dbuf_next);
1081 		err = kern_packet_add_buflet(dph, dbuf, dbuf_next);
1082 		VERIFY(err == 0);
1083 		dbuf = dbuf_next;
1084 	}
1085 	if (__improbable(err != 0)) {
1086 		dbuf = NULL;
1087 		while (i-- != 0) {
1088 			dbuf = kern_packet_get_next_buflet(dph, dbuf);
1089 			VERIFY(dbuf != NULL);
1090 			baddr = (mach_vm_address_t)
1091 			    kern_buflet_get_data_address(dbuf);
1092 			VERIFY(baddr != 0);
1093 		}
1094 		kern_pbufpool_free(pp, dph);
1095 		dph = 0;
1096 	}
1097 
1098 done:
1099 	if (__probable(err == 0)) {
1100 		err = kern_packet_set_headroom(dph,
1101 		    kern_packet_get_headroom(sph));
1102 		VERIFY(err == 0);
1103 		err = kern_packet_set_link_header_length(dph,
1104 		    kern_packet_get_link_header_length(sph));
1105 		VERIFY(err == 0);
1106 		err = kern_packet_set_service_class(dph,
1107 		    kern_packet_get_service_class(sph));
1108 		VERIFY(err == 0);
1109 		err = kern_packet_finalize(dph);
1110 		VERIFY(err == 0);
1111 		VERIFY(bufcnt == kern_packet_get_buflet_count(dph));
1112 		*pdph = dph;
1113 	}
1114 	return err;
1115 }
1116 
1117 static void
feth_rx_submit(if_fake_ref sif,if_fake_ref dif,kern_packet_t sphs[],uint32_t n_pkts)1118 feth_rx_submit(if_fake_ref sif, if_fake_ref dif, kern_packet_t sphs[],
1119     uint32_t n_pkts)
1120 {
1121 	errno_t err = 0;
1122 	struct kern_channel_ring_stat_increment stats;
1123 	kern_channel_ring_t rx_ring = NULL;
1124 	kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
1125 	kern_packet_t sph = 0, dph = 0;
1126 
1127 	memset(&stats, 0, sizeof(stats));
1128 
1129 	rx_ring = dif->iff_rx_ring[0];
1130 	if (rx_ring == NULL) {
1131 		return;
1132 	}
1133 
1134 	kr_enter(rx_ring, TRUE);
1135 	kern_channel_reclaim(rx_ring);
1136 	rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1137 
1138 	for (uint32_t i = 0; i < n_pkts && rx_slot != NULL; i++) {
1139 		sph = sphs[i];
1140 
1141 		switch (dif->iff_pp_mode) {
1142 		case IFF_PP_MODE_GLOBAL:
1143 			sphs[i] = 0;
1144 			dph = sph;
1145 			err = kern_packet_finalize(dph);
1146 			VERIFY(err == 0);
1147 			break;
1148 		case IFF_PP_MODE_PRIVATE:
1149 			err = feth_copy_packet(dif, sph, &dph);
1150 			break;
1151 		case IFF_PP_MODE_PRIVATE_SPLIT:
1152 			err = feth_clone_packet(dif, sph, &dph);
1153 			break;
1154 		default:
1155 			VERIFY(0);
1156 			__builtin_unreachable();
1157 		}
1158 		if (__improbable(err != 0)) {
1159 			continue;
1160 		}
1161 
1162 		if (sif->iff_trailer_length != 0) {
1163 			feth_add_packet_trailer(dph, feth_trailer,
1164 			    sif->iff_trailer_length);
1165 		}
1166 		if (sif->iff_fcs != 0) {
1167 			feth_add_packet_fcs(dph);
1168 		}
1169 
1170 		bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1171 		stats.kcrsi_slots_transferred++;
1172 		stats.kcrsi_bytes_transferred
1173 		        += kern_packet_get_data_length(dph);
1174 
1175 		/* attach the packet to the RX ring */
1176 		err = kern_channel_slot_attach_packet(rx_ring, rx_slot, dph);
1177 		VERIFY(err == 0);
1178 		last_rx_slot = rx_slot;
1179 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1180 	}
1181 
1182 	if (last_rx_slot != NULL) {
1183 		kern_channel_advance_slot(rx_ring, last_rx_slot);
1184 		kern_channel_increment_ring_net_stats(rx_ring, dif->iff_ifp,
1185 		    &stats);
1186 	}
1187 
1188 	if (rx_ring != NULL) {
1189 		kr_exit(rx_ring);
1190 		kern_channel_notify(rx_ring, 0);
1191 	}
1192 }
1193 
1194 static void
feth_rx_queue_submit(if_fake_ref sif,if_fake_ref dif,uint32_t llink_idx,uint32_t qset_idx,kern_packet_t sphs[],uint32_t n_pkts)1195 feth_rx_queue_submit(if_fake_ref sif, if_fake_ref dif, uint32_t llink_idx,
1196     uint32_t qset_idx, kern_packet_t sphs[], uint32_t n_pkts)
1197 {
1198 	errno_t err = 0;
1199 	kern_netif_queue_t queue;
1200 	kern_packet_t sph = 0, dph = 0;
1201 	fake_llink *llink;
1202 	fake_qset *qset;
1203 
1204 	if (llink_idx >= dif->iff_llink_cnt) {
1205 		printf("%s: invalid llink_idx idx %d (max %d) on peer %s\n",
1206 		    __func__, llink_idx, dif->iff_llink_cnt, dif->iff_name);
1207 		return;
1208 	}
1209 	llink = &dif->iff_llink[llink_idx];
1210 	if (qset_idx >= llink->fl_qset_cnt) {
1211 		printf("%s: invalid qset_idx %d (max %d) on peer %s\n",
1212 		    __func__, qset_idx, llink->fl_qset_cnt, dif->iff_name);
1213 		return;
1214 	}
1215 	qset = &dif->iff_llink[llink_idx].fl_qset[qset_idx];
1216 	queue = qset->fqs_rx_queue[0].fq_queue;
1217 	if (queue == NULL) {
1218 		printf("%s: NULL default queue (llink_idx %d, qset_idx %d) "
1219 		    "on peer %s\n", __func__, llink_idx, qset_idx,
1220 		    dif->iff_name);
1221 		return;
1222 	}
1223 	for (uint32_t i = 0; i < n_pkts; i++) {
1224 		uint32_t flags;
1225 
1226 		sph = sphs[i];
1227 
1228 		switch (dif->iff_pp_mode) {
1229 		case IFF_PP_MODE_GLOBAL:
1230 			sphs[i] = 0;
1231 			dph = sph;
1232 			break;
1233 		case IFF_PP_MODE_PRIVATE:
1234 			err = feth_copy_packet(dif, sph, &dph);
1235 			break;
1236 		case IFF_PP_MODE_PRIVATE_SPLIT:
1237 			err = feth_clone_packet(dif, sph, &dph);
1238 			break;
1239 		default:
1240 			VERIFY(0);
1241 			__builtin_unreachable();
1242 		}
1243 		if (__improbable(err != 0)) {
1244 			continue;
1245 		}
1246 
1247 		if (sif->iff_trailer_length != 0) {
1248 			feth_add_packet_trailer(dph, feth_trailer,
1249 			    sif->iff_trailer_length);
1250 		}
1251 		if (sif->iff_fcs != 0) {
1252 			feth_add_packet_fcs(dph);
1253 		}
1254 
1255 		bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1256 
1257 		flags = (i == n_pkts - 1) ?
1258 		    KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH : 0;
1259 		kern_netif_queue_rx_enqueue(queue, dph, 1, flags);
1260 	}
1261 }
1262 
1263 static void
feth_tx_complete(if_fake_ref fakeif,kern_packet_t phs[],uint32_t nphs)1264 feth_tx_complete(if_fake_ref fakeif, kern_packet_t phs[], uint32_t nphs)
1265 {
1266 	for (uint32_t i = 0; i < nphs; i++) {
1267 		kern_packet_t ph = phs[i];
1268 		if (ph == 0) {
1269 			continue;
1270 		}
1271 		int err = kern_packet_set_tx_completion_status(ph, 0);
1272 		VERIFY(err == 0);
1273 		kern_packet_tx_completion(ph, fakeif->iff_ifp);
1274 		kern_pbufpool_free(fakeif->iff_tx_pp, phs[i]);
1275 		phs[i] = 0;
1276 	}
1277 }
1278 
1279 static void
feth_if_adv(thread_call_param_t arg0,thread_call_param_t arg1)1280 feth_if_adv(thread_call_param_t arg0, thread_call_param_t arg1)
1281 {
1282 #pragma unused(arg1)
1283 	errno_t                            error;
1284 	if_fake_ref                        fakeif = (if_fake_ref)arg0;
1285 	struct ifnet_interface_advisory    if_adv;
1286 	struct ifnet_stats_param           if_stat;
1287 
1288 	feth_lock();
1289 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1290 		feth_unlock();
1291 		return;
1292 	}
1293 	feth_unlock();
1294 
1295 	if (!fakeif->iff_intf_adv_enabled) {
1296 		goto done;
1297 	}
1298 
1299 	error = ifnet_stat(fakeif->iff_ifp, &if_stat);
1300 	if (error != 0) {
1301 		FETH_DPRINTF("%s: ifnet_stat() failed %d\n",
1302 		    fakeif->iff_name, error);
1303 		goto done;
1304 	}
1305 	if_adv.version = IF_INTERFACE_ADVISORY_VERSION_CURRENT;
1306 	if_adv.direction = IF_INTERFACE_ADVISORY_DIRECTION_TX;
1307 	if_adv.timestamp = mach_absolute_time();
1308 	if_adv.rate_trend_suggestion =
1309 	    IF_INTERFACE_ADVISORY_RATE_SUGGESTION_RAMP_NEUTRAL;
1310 	if_adv.max_bandwidth = 1000 * 1000 * 1000; /* 1Gbps */
1311 	if_adv.total_byte_count = if_stat.packets_out;
1312 	if_adv.average_throughput = 1000 * 1000 * 1000; /* 1Gbps */
1313 	if_adv.flushable_queue_size = UINT32_MAX;
1314 	if_adv.non_flushable_queue_size = UINT32_MAX;
1315 	if_adv.average_delay = 1; /* ms */
1316 
1317 	error = fakeif->iff_intf_adv_notify(fakeif->iff_intf_adv_kern_ctx, &if_adv);
1318 	if (error != 0) {
1319 		FETH_DPRINTF("%s: ifnet_interface_advisory_report() failed %d\n",
1320 		    fakeif->iff_name, error);
1321 	}
1322 
1323 done:
1324 	feth_lock();
1325 	if (!feth_is_detaching(fakeif) && fakeif->iff_channel_connected) {
1326 		uint64_t deadline;
1327 		clock_interval_to_deadline(fakeif->iff_adv_interval,
1328 		    NSEC_PER_MSEC, &deadline);
1329 		thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1330 	}
1331 	feth_unlock();
1332 }
1333 
1334 static int
feth_if_adv_tcall_create(if_fake_ref fakeif)1335 feth_if_adv_tcall_create(if_fake_ref fakeif)
1336 {
1337 	uint64_t deadline;
1338 
1339 	feth_lock();
1340 	ASSERT(fakeif->iff_if_adv_tcall == NULL);
1341 	ASSERT(fakeif->iff_adv_interval > 0);
1342 	ASSERT(fakeif->iff_channel_connected);
1343 	fakeif->iff_if_adv_tcall =
1344 	    thread_call_allocate_with_options(feth_if_adv,
1345 	    (thread_call_param_t)fakeif, THREAD_CALL_PRIORITY_KERNEL,
1346 	    THREAD_CALL_OPTIONS_ONCE);
1347 	if (fakeif->iff_if_adv_tcall == NULL) {
1348 		printf("%s: %s if_adv tcall alloc failed\n", __func__,
1349 		    fakeif->iff_name);
1350 		return ENXIO;
1351 	}
1352 	/* retain for the interface advisory thread call */
1353 	feth_retain(fakeif);
1354 	clock_interval_to_deadline(fakeif->iff_adv_interval,
1355 	    NSEC_PER_MSEC, &deadline);
1356 	thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1357 	feth_unlock();
1358 	return 0;
1359 }
1360 
1361 static void
feth_if_adv_tcall_destroy(if_fake_ref fakeif)1362 feth_if_adv_tcall_destroy(if_fake_ref fakeif)
1363 {
1364 	thread_call_t tcall;
1365 
1366 	feth_lock();
1367 	ASSERT(fakeif->iff_if_adv_tcall != NULL);
1368 	tcall = fakeif->iff_if_adv_tcall;
1369 	feth_unlock();
1370 	(void) thread_call_cancel_wait(tcall);
1371 	if (!thread_call_free(tcall)) {
1372 		boolean_t freed;
1373 		(void) thread_call_cancel_wait(tcall);
1374 		freed = thread_call_free(tcall);
1375 		VERIFY(freed);
1376 	}
1377 	feth_lock();
1378 	fakeif->iff_if_adv_tcall = NULL;
1379 	feth_unlock();
1380 	/* release for the interface advisory thread call */
1381 	feth_release(fakeif);
1382 }
1383 
1384 
1385 /**
1386 ** nexus netif domain provider
1387 **/
1388 static errno_t
feth_nxdp_init(kern_nexus_domain_provider_t domprov)1389 feth_nxdp_init(kern_nexus_domain_provider_t domprov)
1390 {
1391 #pragma unused(domprov)
1392 	return 0;
1393 }
1394 
1395 static void
feth_nxdp_fini(kern_nexus_domain_provider_t domprov)1396 feth_nxdp_fini(kern_nexus_domain_provider_t domprov)
1397 {
1398 #pragma unused(domprov)
1399 }
1400 
1401 static uuid_t                   feth_nx_dom_prov;
1402 
1403 static errno_t
feth_register_nexus_domain_provider(void)1404 feth_register_nexus_domain_provider(void)
1405 {
1406 	const struct kern_nexus_domain_provider_init dp_init = {
1407 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1408 		.nxdpi_flags = 0,
1409 		.nxdpi_init = feth_nxdp_init,
1410 		.nxdpi_fini = feth_nxdp_fini
1411 	};
1412 	errno_t                         err = 0;
1413 
1414 	/* feth_nxdp_init() is called before this function returns */
1415 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
1416 	    (const uint8_t *)
1417 	    "com.apple.feth",
1418 	    &dp_init, sizeof(dp_init),
1419 	    &feth_nx_dom_prov);
1420 	if (err != 0) {
1421 		printf("%s: failed to register domain provider\n", __func__);
1422 		return err;
1423 	}
1424 	return 0;
1425 }
1426 
1427 /**
1428 ** netif nexus routines
1429 **/
1430 static if_fake_ref
feth_nexus_context(kern_nexus_t nexus)1431 feth_nexus_context(kern_nexus_t nexus)
1432 {
1433 	if_fake_ref fakeif;
1434 
1435 	fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
1436 	assert(fakeif != NULL);
1437 	return fakeif;
1438 }
1439 
1440 static uint8_t
feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1441 feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1442 {
1443 	switch (svc_class) {
1444 	case KPKT_SC_VO:
1445 		return 0;
1446 	case KPKT_SC_VI:
1447 		return 1;
1448 	case KPKT_SC_BE:
1449 		return 2;
1450 	case KPKT_SC_BK:
1451 		return 3;
1452 	default:
1453 		VERIFY(0);
1454 		return 0;
1455 	}
1456 }
1457 
1458 static errno_t
feth_nx_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1459 feth_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1460     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1461     void **ring_ctx)
1462 {
1463 	if_fake_ref     fakeif;
1464 	int             err;
1465 #pragma unused(nxprov, channel, ring_ctx)
1466 	feth_lock();
1467 	fakeif = feth_nexus_context(nexus);
1468 	if (feth_is_detaching(fakeif)) {
1469 		feth_unlock();
1470 		return 0;
1471 	}
1472 	if (is_tx_ring) {
1473 		if (feth_in_wmm_mode(fakeif)) {
1474 			kern_packet_svc_class_t svc_class;
1475 			uint8_t ring_idx;
1476 
1477 			err = kern_channel_get_service_class(ring, &svc_class);
1478 			VERIFY(err == 0);
1479 			ring_idx = feth_find_tx_ring_by_svc(svc_class);
1480 			VERIFY(ring_idx < IFF_NUM_TX_RINGS_WMM_MODE);
1481 			VERIFY(fakeif->iff_tx_ring[ring_idx] == NULL);
1482 			fakeif->iff_tx_ring[ring_idx] = ring;
1483 		} else {
1484 			VERIFY(fakeif->iff_tx_ring[0] == NULL);
1485 			fakeif->iff_tx_ring[0] = ring;
1486 		}
1487 	} else {
1488 		VERIFY(fakeif->iff_rx_ring[0] == NULL);
1489 		fakeif->iff_rx_ring[0] = ring;
1490 	}
1491 	fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1492 	feth_unlock();
1493 	FETH_DPRINTF("%s: %s ring init\n",
1494 	    fakeif->iff_name, is_tx_ring ? "TX" : "RX");
1495 	return 0;
1496 }
1497 
1498 static void
feth_nx_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)1499 feth_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1500     kern_channel_ring_t ring)
1501 {
1502 #pragma unused(nxprov, ring)
1503 	if_fake_ref     fakeif;
1504 	thread_call_t   tcall = NULL;
1505 
1506 	feth_lock();
1507 	fakeif = feth_nexus_context(nexus);
1508 	if (fakeif->iff_rx_ring[0] == ring) {
1509 		fakeif->iff_rx_ring[0] = NULL;
1510 		FETH_DPRINTF("%s: RX ring fini\n", fakeif->iff_name);
1511 	} else if (feth_in_wmm_mode(fakeif)) {
1512 		int i;
1513 		for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
1514 			if (fakeif->iff_tx_ring[i] == ring) {
1515 				fakeif->iff_tx_ring[i] = NULL;
1516 				break;
1517 			}
1518 		}
1519 		for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
1520 			if (fakeif->iff_tx_ring[i] != NULL) {
1521 				break;
1522 			}
1523 		}
1524 		if (i == IFF_MAX_TX_RINGS) {
1525 			tcall = fakeif->iff_doorbell_tcall;
1526 			fakeif->iff_doorbell_tcall = NULL;
1527 		}
1528 		FETH_DPRINTF("%s: TX ring fini\n", fakeif->iff_name);
1529 	} else if (fakeif->iff_tx_ring[0] == ring) {
1530 		tcall = fakeif->iff_doorbell_tcall;
1531 		fakeif->iff_doorbell_tcall = NULL;
1532 		fakeif->iff_tx_ring[0] = NULL;
1533 	}
1534 	fakeif->iff_nifs = NULL;
1535 	feth_unlock();
1536 	if (tcall != NULL) {
1537 		boolean_t       success;
1538 
1539 		success = thread_call_cancel_wait(tcall);
1540 		FETH_DPRINTF("%s: thread_call_cancel %s\n",
1541 		    fakeif->iff_name,
1542 		    success ? "SUCCESS" : "FAILURE");
1543 		if (!success) {
1544 			feth_lock();
1545 			if (fakeif->iff_doorbell_tcall_active) {
1546 				fakeif->iff_waiting_for_tcall = TRUE;
1547 				FETH_DPRINTF("%s: *waiting for threadcall\n",
1548 				    fakeif->iff_name);
1549 				do {
1550 					msleep(fakeif, &feth_lck_mtx,
1551 					    PZERO, "feth threadcall", 0);
1552 				} while (fakeif->iff_doorbell_tcall_active);
1553 				FETH_DPRINTF("%s: ^threadcall done\n",
1554 				    fakeif->iff_name);
1555 				fakeif->iff_waiting_for_tcall = FALSE;
1556 			}
1557 			feth_unlock();
1558 		}
1559 		success = thread_call_free(tcall);
1560 		FETH_DPRINTF("%s: thread_call_free %s\n",
1561 		    fakeif->iff_name,
1562 		    success ? "SUCCESS" : "FAILURE");
1563 		feth_release(fakeif);
1564 		VERIFY(success == TRUE);
1565 	}
1566 }
1567 
1568 static errno_t
feth_nx_pre_connect(kern_nexus_provider_t nxprov,proc_t proc,kern_nexus_t nexus,nexus_port_t port,kern_channel_t channel,void ** channel_context)1569 feth_nx_pre_connect(kern_nexus_provider_t nxprov,
1570     proc_t proc, kern_nexus_t nexus, nexus_port_t port, kern_channel_t channel,
1571     void **channel_context)
1572 {
1573 #pragma unused(nxprov, proc, nexus, port, channel, channel_context)
1574 	return 0;
1575 }
1576 
1577 static errno_t
feth_nx_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1578 feth_nx_connected(kern_nexus_provider_t nxprov,
1579     kern_nexus_t nexus, kern_channel_t channel)
1580 {
1581 #pragma unused(nxprov, channel)
1582 	int err;
1583 	if_fake_ref fakeif;
1584 
1585 	fakeif = feth_nexus_context(nexus);
1586 	feth_lock();
1587 	if (feth_is_detaching(fakeif)) {
1588 		feth_unlock();
1589 		return EBUSY;
1590 	}
1591 	feth_retain(fakeif);
1592 	fakeif->iff_channel_connected = TRUE;
1593 	feth_unlock();
1594 	if (feth_has_intf_advisory_configured(fakeif)) {
1595 		err = feth_if_adv_tcall_create(fakeif);
1596 		if (err != 0) {
1597 			return err;
1598 		}
1599 	}
1600 	FETH_DPRINTF("%s: connected channel %p\n",
1601 	    fakeif->iff_name, channel);
1602 	return 0;
1603 }
1604 
1605 static void
feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1606 feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,
1607     kern_nexus_t nexus, kern_channel_t channel)
1608 {
1609 #pragma unused(nxprov, channel)
1610 	if_fake_ref fakeif;
1611 
1612 	fakeif = feth_nexus_context(nexus);
1613 	FETH_DPRINTF("%s: pre-disconnect channel %p\n",
1614 	    fakeif->iff_name, channel);
1615 	/* Quiesce the interface and flush any pending outbound packets. */
1616 	if_down(fakeif->iff_ifp);
1617 	feth_lock();
1618 	fakeif->iff_channel_connected = FALSE;
1619 	feth_unlock();
1620 	if (fakeif->iff_if_adv_tcall != NULL) {
1621 		feth_if_adv_tcall_destroy(fakeif);
1622 	}
1623 }
1624 
1625 static void
feth_nx_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1626 feth_nx_disconnected(kern_nexus_provider_t nxprov,
1627     kern_nexus_t nexus, kern_channel_t channel)
1628 {
1629 #pragma unused(nxprov, channel)
1630 	if_fake_ref fakeif;
1631 
1632 	fakeif = feth_nexus_context(nexus);
1633 	FETH_DPRINTF("%s: disconnected channel %p\n",
1634 	    fakeif->iff_name, channel);
1635 	feth_release(fakeif);
1636 }
1637 
1638 static errno_t
feth_nx_slot_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index,struct kern_slot_prop ** slot_prop_addr,void ** slot_context)1639 feth_nx_slot_init(kern_nexus_provider_t nxprov,
1640     kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
1641     uint32_t slot_index, struct kern_slot_prop **slot_prop_addr,
1642     void **slot_context)
1643 {
1644 #pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
1645 	return 0;
1646 }
1647 
1648 static void
feth_nx_slot_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index)1649 feth_nx_slot_fini(kern_nexus_provider_t nxprov,
1650     kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
1651     uint32_t slot_index)
1652 {
1653 #pragma unused(nxprov, nexus, ring, slot, slot_index)
1654 }
1655 
1656 static errno_t
feth_nx_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)1657 feth_nx_sync_tx(kern_nexus_provider_t nxprov,
1658     kern_nexus_t nexus, kern_channel_ring_t tx_ring, uint32_t flags)
1659 {
1660 #pragma unused(nxprov)
1661 	if_fake_ref             fakeif;
1662 	ifnet_t                 ifp;
1663 	kern_channel_slot_t     last_tx_slot = NULL;
1664 	ifnet_t                 peer_ifp;
1665 	if_fake_ref             peer_fakeif = NULL;
1666 	struct kern_channel_ring_stat_increment stats;
1667 	kern_channel_slot_t     tx_slot;
1668 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1669 	kern_packet_t           pkts[IFF_MAX_BATCH_SIZE];
1670 	uint32_t                n_pkts = 0;
1671 
1672 	memset(&stats, 0, sizeof(stats));
1673 
1674 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
1675 	fakeif = feth_nexus_context(nexus);
1676 	FETH_DPRINTF("%s ring %d flags 0x%x\n", fakeif->iff_name,
1677 	    tx_ring->ckr_ring_id, flags);
1678 
1679 	feth_lock();
1680 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1681 		feth_unlock();
1682 		return 0;
1683 	}
1684 	ifp = fakeif->iff_ifp;
1685 	peer_ifp = fakeif->iff_peer;
1686 	if (peer_ifp != NULL) {
1687 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
1688 		if (peer_fakeif != NULL) {
1689 			if (feth_is_detaching(peer_fakeif) ||
1690 			    !peer_fakeif->iff_channel_connected) {
1691 				goto done;
1692 			}
1693 		} else {
1694 			goto done;
1695 		}
1696 	} else {
1697 		goto done;
1698 	}
1699 	tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1700 	while (tx_slot != NULL) {
1701 		errno_t err;
1702 		uint16_t off;
1703 		kern_packet_t sph;
1704 
1705 		/* detach the packet from the TX ring */
1706 		sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1707 		VERIFY(sph != 0);
1708 		kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
1709 
1710 		/* bpf tap output */
1711 		off = kern_packet_get_headroom(sph);
1712 		VERIFY(off >= fakeif->iff_tx_headroom);
1713 		kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
1714 		bpf_tap_packet_out(ifp, DLT_EN10MB, sph, NULL, 0);
1715 
1716 		/* drop packets, if requested */
1717 		fakeif->iff_tx_pkts_count++;
1718 		if (fakeif->iff_tx_drop_rate != 0 &&
1719 		    fakeif->iff_tx_pkts_count == fakeif->iff_tx_drop_rate) {
1720 			fakeif->iff_tx_pkts_count = 0;
1721 			err = kern_packet_set_tx_completion_status(sph,
1722 			    CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED);
1723 			VERIFY(err == 0);
1724 			kern_packet_tx_completion(sph, fakeif->iff_ifp);
1725 			kern_pbufpool_free(fakeif->iff_tx_pp, sph);
1726 			sph = 0;
1727 			STATS_INC(nifs, NETIF_STATS_DROP);
1728 			goto next_tx_slot;
1729 		}
1730 
1731 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
1732 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1733 
1734 		stats.kcrsi_slots_transferred++;
1735 		stats.kcrsi_bytes_transferred
1736 		        += kern_packet_get_data_length(sph);
1737 
1738 		/* prepare batch for receiver */
1739 		pkts[n_pkts++] = sph;
1740 		if (n_pkts == IFF_MAX_BATCH_SIZE) {
1741 			feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
1742 			feth_tx_complete(fakeif, pkts, n_pkts);
1743 			n_pkts = 0;
1744 		}
1745 
1746 next_tx_slot:
1747 		last_tx_slot = tx_slot;
1748 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1749 	}
1750 
1751 	/* catch last batch for receiver */
1752 	if (n_pkts != 0) {
1753 		feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
1754 		feth_tx_complete(fakeif, pkts, n_pkts);
1755 		n_pkts = 0;
1756 	}
1757 
1758 	if (last_tx_slot != NULL) {
1759 		kern_channel_advance_slot(tx_ring, last_tx_slot);
1760 		kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
1761 	}
1762 done:
1763 	feth_unlock();
1764 	return 0;
1765 }
1766 
1767 static errno_t
feth_nx_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)1768 feth_nx_sync_rx(kern_nexus_provider_t nxprov,
1769     kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
1770 {
1771 #pragma unused(nxprov, ring, flags)
1772 	if_fake_ref             fakeif;
1773 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1774 
1775 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1776 	fakeif = feth_nexus_context(nexus);
1777 	FETH_DPRINTF("%s:\n", fakeif->iff_name);
1778 	return 0;
1779 }
1780 
1781 static errno_t
feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif,boolean_t doorbell_ctxt)1782 feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif, boolean_t doorbell_ctxt)
1783 {
1784 	int i;
1785 	errno_t error = 0;
1786 	boolean_t more;
1787 
1788 	for (i = 0; i < IFF_NUM_TX_RINGS_WMM_MODE; i++) {
1789 		kern_channel_ring_t ring = fakeif->iff_tx_ring[i];
1790 		if (ring != NULL) {
1791 			error = kern_channel_tx_refill(ring, UINT32_MAX,
1792 			    UINT32_MAX, doorbell_ctxt, &more);
1793 		}
1794 		if (error != 0) {
1795 			FETH_DPRINTF("%s: TX refill ring %d (%s) %d\n",
1796 			    fakeif->iff_name, ring->ckr_ring_id,
1797 			    doorbell_ctxt ? "sync" : "async", error);
1798 			if (!((error == EAGAIN) || (error == EBUSY))) {
1799 				break;
1800 			}
1801 		} else {
1802 			FETH_DPRINTF("%s: TX refilled ring %d (%s)\n",
1803 			    fakeif->iff_name, ring->ckr_ring_id,
1804 			    doorbell_ctxt ? "sync" : "async");
1805 		}
1806 	}
1807 	return error;
1808 }
1809 
1810 static void
feth_async_doorbell(thread_call_param_t arg0,thread_call_param_t arg1)1811 feth_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
1812 {
1813 #pragma unused(arg1)
1814 	errno_t                 error;
1815 	if_fake_ref             fakeif = (if_fake_ref)arg0;
1816 	kern_channel_ring_t     ring;
1817 	boolean_t               more;
1818 
1819 	feth_lock();
1820 	ring = fakeif->iff_tx_ring[0];
1821 	if (feth_is_detaching(fakeif) ||
1822 	    !fakeif->iff_channel_connected ||
1823 	    ring == NULL) {
1824 		goto done;
1825 	}
1826 	fakeif->iff_doorbell_tcall_active = TRUE;
1827 	feth_unlock();
1828 	if (feth_in_wmm_mode(fakeif)) {
1829 		error = feth_nx_tx_dequeue_driver_managed(fakeif, FALSE);
1830 	} else {
1831 		error = kern_channel_tx_refill(ring, UINT32_MAX,
1832 		    UINT32_MAX, FALSE, &more);
1833 	}
1834 	if (error != 0) {
1835 		FETH_DPRINTF("%s: TX refill failed %d\n",
1836 		    fakeif->iff_name, error);
1837 	} else {
1838 		FETH_DPRINTF("%s: TX refilled\n", fakeif->iff_name);
1839 	}
1840 
1841 	feth_lock();
1842 done:
1843 	fakeif->iff_doorbell_tcall_active = FALSE;
1844 	if (fakeif->iff_waiting_for_tcall) {
1845 		FETH_DPRINTF("%s: threadcall waking up waiter\n",
1846 		    fakeif->iff_name);
1847 		wakeup((caddr_t)fakeif);
1848 	}
1849 	feth_unlock();
1850 }
1851 
1852 static void
feth_schedule_async_doorbell(if_fake_ref fakeif)1853 feth_schedule_async_doorbell(if_fake_ref fakeif)
1854 {
1855 	thread_call_t   tcall;
1856 
1857 	feth_lock();
1858 	if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1859 		feth_unlock();
1860 		return;
1861 	}
1862 	tcall = fakeif->iff_doorbell_tcall;
1863 	if (tcall != NULL) {
1864 		thread_call_enter(tcall);
1865 	} else {
1866 		tcall = thread_call_allocate_with_options(feth_async_doorbell,
1867 		    (thread_call_param_t)fakeif,
1868 		    THREAD_CALL_PRIORITY_KERNEL,
1869 		    THREAD_CALL_OPTIONS_ONCE);
1870 		if (tcall == NULL) {
1871 			printf("%s: %s tcall alloc failed\n",
1872 			    __func__, fakeif->iff_name);
1873 		} else {
1874 			fakeif->iff_doorbell_tcall = tcall;
1875 			feth_retain(fakeif);
1876 			thread_call_enter(tcall);
1877 		}
1878 	}
1879 	feth_unlock();
1880 }
1881 
1882 static errno_t
feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)1883 feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,
1884     kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
1885 {
1886 #pragma unused(nxprov, ring, flags)
1887 	errno_t         error;
1888 	if_fake_ref     fakeif;
1889 
1890 	fakeif = feth_nexus_context(nexus);
1891 	FETH_DPRINTF("%s\n", fakeif->iff_name);
1892 
1893 	if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
1894 		boolean_t       more;
1895 		/* synchronous tx refill */
1896 		if (feth_in_wmm_mode(fakeif)) {
1897 			error = feth_nx_tx_dequeue_driver_managed(fakeif, TRUE);
1898 		} else {
1899 			error = kern_channel_tx_refill(ring, UINT32_MAX,
1900 			    UINT32_MAX, TRUE, &more);
1901 		}
1902 		if (error != 0) {
1903 			FETH_DPRINTF("%s: TX refill (sync) %d\n",
1904 			    fakeif->iff_name, error);
1905 		} else {
1906 			FETH_DPRINTF("%s: TX refilled (sync)\n",
1907 			    fakeif->iff_name);
1908 		}
1909 	} else {
1910 		FETH_DPRINTF("%s: schedule async refill\n", fakeif->iff_name);
1911 		feth_schedule_async_doorbell(fakeif);
1912 	}
1913 	return 0;
1914 }
1915 
1916 static errno_t
feth_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)1917 feth_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
1918 {
1919 	if_fake_ref fakeif;
1920 
1921 	fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
1922 	feth_ifnet_set_attrs(fakeif, ifp);
1923 	return 0;
1924 }
1925 
1926 static errno_t
feth_nx_intf_adv_config(void * prov_ctx,bool enable)1927 feth_nx_intf_adv_config(void *prov_ctx, bool enable)
1928 {
1929 	if_fake_ref fakeif = prov_ctx;
1930 
1931 	feth_lock();
1932 	fakeif->iff_intf_adv_enabled = enable;
1933 	feth_unlock();
1934 	FETH_DPRINTF("%s enable %d\n", fakeif->iff_name, enable);
1935 	return 0;
1936 }
1937 
1938 static errno_t
feth_nx_capab_config(kern_nexus_provider_t nxprov,kern_nexus_t nx,kern_nexus_capab_t capab,void * contents,uint32_t * len)1939 feth_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
1940     kern_nexus_capab_t capab, void *contents, uint32_t *len)
1941 {
1942 #pragma unused(nxprov)
1943 	errno_t error = 0;
1944 	if_fake_ref fakeif;
1945 	struct kern_nexus_capab_interface_advisory *adv_capab;
1946 
1947 	fakeif = feth_nexus_context(nx);
1948 	FETH_DPRINTF("%s\n", fakeif->iff_name);
1949 
1950 	switch (capab) {
1951 	case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
1952 		adv_capab = contents;
1953 		VERIFY(*len = sizeof(*adv_capab));
1954 		if (adv_capab->kncia_version !=
1955 		    KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
1956 			error = EINVAL;
1957 			break;
1958 		}
1959 		if (!feth_has_intf_advisory_configured(fakeif)) {
1960 			error = ENOTSUP;
1961 			break;
1962 		}
1963 		VERIFY(adv_capab->kncia_notify != NULL);
1964 		fakeif->iff_intf_adv_kern_ctx = adv_capab->kncia_kern_context;
1965 		fakeif->iff_intf_adv_notify = adv_capab->kncia_notify;
1966 		adv_capab->kncia_provider_context = fakeif;
1967 		adv_capab->kncia_config = feth_nx_intf_adv_config;
1968 		break;
1969 
1970 	default:
1971 		error = ENOTSUP;
1972 		break;
1973 	}
1974 	return error;
1975 }
1976 
1977 static errno_t
create_netif_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)1978 create_netif_provider_and_instance(if_fake_ref fakeif,
1979     struct ifnet_init_eparams * init_params, ifnet_t *ifp,
1980     uuid_t * provider, uuid_t * instance)
1981 {
1982 	errno_t                 err;
1983 	nexus_controller_t      controller = kern_nexus_shared_controller();
1984 	struct kern_nexus_net_init net_init;
1985 	nexus_name_t            provider_name;
1986 	nexus_attr_t            nexus_attr = NULL;
1987 	struct kern_nexus_provider_init prov_init = {
1988 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1989 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1990 		.nxpi_pre_connect = feth_nx_pre_connect,
1991 		.nxpi_connected = feth_nx_connected,
1992 		.nxpi_pre_disconnect = feth_nx_pre_disconnect,
1993 		.nxpi_disconnected = feth_nx_disconnected,
1994 		.nxpi_ring_init = feth_nx_ring_init,
1995 		.nxpi_ring_fini = feth_nx_ring_fini,
1996 		.nxpi_slot_init = feth_nx_slot_init,
1997 		.nxpi_slot_fini = feth_nx_slot_fini,
1998 		.nxpi_sync_tx = feth_nx_sync_tx,
1999 		.nxpi_sync_rx = feth_nx_sync_rx,
2000 		.nxpi_tx_doorbell = feth_nx_tx_doorbell,
2001 		.nxpi_config_capab = feth_nx_capab_config,
2002 	};
2003 
2004 	_CASSERT(IFF_MAX_RX_RINGS == 1);
2005 	err = kern_nexus_attr_create(&nexus_attr);
2006 	if (err != 0) {
2007 		printf("%s nexus attribute creation failed, error %d\n",
2008 		    __func__, err);
2009 		goto failed;
2010 	}
2011 	if (feth_in_wmm_mode(fakeif)) {
2012 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_TX_RINGS,
2013 		    IFF_NUM_TX_RINGS_WMM_MODE);
2014 		VERIFY(err == 0);
2015 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_RX_RINGS,
2016 		    IFF_NUM_RX_RINGS_WMM_MODE);
2017 		VERIFY(err == 0);
2018 		err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_QMAP,
2019 		    NEXUS_QMAP_TYPE_WMM);
2020 		VERIFY(err == 0);
2021 	}
2022 
2023 	err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2024 	VERIFY(err == 0);
2025 	snprintf((char *)provider_name, sizeof(provider_name),
2026 	    "com.apple.netif.%s", fakeif->iff_name);
2027 	err = kern_nexus_controller_register_provider(controller,
2028 	    feth_nx_dom_prov,
2029 	    provider_name,
2030 	    &prov_init,
2031 	    sizeof(prov_init),
2032 	    nexus_attr,
2033 	    provider);
2034 	if (err != 0) {
2035 		printf("%s register provider failed, error %d\n",
2036 		    __func__, err);
2037 		goto failed;
2038 	}
2039 	bzero(&net_init, sizeof(net_init));
2040 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2041 	net_init.nxneti_flags = 0;
2042 	net_init.nxneti_eparams = init_params;
2043 	net_init.nxneti_lladdr = NULL;
2044 	net_init.nxneti_prepare = feth_netif_prepare;
2045 	net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2046 	net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2047 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
2048 	    *provider,
2049 	    fakeif,
2050 	    NULL,
2051 	    instance,
2052 	    &net_init,
2053 	    ifp);
2054 	if (err != 0) {
2055 		printf("%s alloc_net_provider_instance failed, %d\n",
2056 		    __func__, err);
2057 		kern_nexus_controller_deregister_provider(controller,
2058 		    *provider);
2059 		uuid_clear(*provider);
2060 		goto failed;
2061 	}
2062 
2063 failed:
2064 	if (nexus_attr != NULL) {
2065 		kern_nexus_attr_destroy(nexus_attr);
2066 	}
2067 	return err;
2068 }
2069 
2070 /*
2071  * The nif_stats need to be referenced because we don't want it set
2072  * to NULL until the last llink is removed.
2073  */
2074 static void
get_nexus_stats(if_fake_ref fakeif,kern_nexus_t nexus)2075 get_nexus_stats(if_fake_ref fakeif, kern_nexus_t nexus)
2076 {
2077 	if (++fakeif->iff_nifs_ref == 1) {
2078 		ASSERT(fakeif->iff_nifs == NULL);
2079 		fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2080 	}
2081 }
2082 
2083 static void
clear_nexus_stats(if_fake_ref fakeif)2084 clear_nexus_stats(if_fake_ref fakeif)
2085 {
2086 	if (--fakeif->iff_nifs_ref == 0) {
2087 		ASSERT(fakeif->iff_nifs != NULL);
2088 		fakeif->iff_nifs = NULL;
2089 	}
2090 }
2091 
2092 static errno_t
feth_nx_qset_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * llink_ctx,uint8_t qset_idx,uint64_t qset_id,kern_netif_qset_t qset,void ** qset_ctx)2093 feth_nx_qset_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2094     void *llink_ctx, uint8_t qset_idx, uint64_t qset_id, kern_netif_qset_t qset,
2095     void **qset_ctx)
2096 {
2097 #pragma unused(nxprov)
2098 	if_fake_ref fakeif;
2099 	fake_llink *fl = llink_ctx;
2100 	fake_qset *fqs;
2101 
2102 	feth_lock();
2103 	fakeif = feth_nexus_context(nexus);
2104 	if (feth_is_detaching(fakeif)) {
2105 		feth_unlock();
2106 		printf("%s: %s: detaching\n", __func__, fakeif->iff_name);
2107 		return ENXIO;
2108 	}
2109 	if (qset_idx >= fl->fl_qset_cnt) {
2110 		feth_unlock();
2111 		printf("%s: %s: invalid qset_idx %d\n", __func__,
2112 		    fakeif->iff_name, qset_idx);
2113 		return EINVAL;
2114 	}
2115 	fqs = &fl->fl_qset[qset_idx];
2116 	ASSERT(fqs->fqs_qset == NULL);
2117 	fqs->fqs_qset = qset;
2118 	fqs->fqs_id = qset_id;
2119 	*qset_ctx = fqs;
2120 
2121 	/* XXX This should really be done during registration */
2122 	get_nexus_stats(fakeif, nexus);
2123 	feth_unlock();
2124 	return 0;
2125 }
2126 
2127 static void
feth_nx_qset_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx)2128 feth_nx_qset_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2129     void *qset_ctx)
2130 {
2131 #pragma unused(nxprov)
2132 	if_fake_ref fakeif;
2133 	fake_qset *fqs = qset_ctx;
2134 
2135 	feth_lock();
2136 	fakeif = feth_nexus_context(nexus);
2137 	clear_nexus_stats(fakeif);
2138 	ASSERT(fqs->fqs_qset != NULL);
2139 	fqs->fqs_qset = NULL;
2140 	fqs->fqs_id = 0;
2141 	feth_unlock();
2142 }
2143 
2144 static errno_t
feth_nx_queue_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint8_t qidx,bool tx,kern_netif_queue_t queue,void ** queue_ctx)2145 feth_nx_queue_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2146     void *qset_ctx, uint8_t qidx, bool tx, kern_netif_queue_t queue,
2147     void **queue_ctx)
2148 {
2149 #pragma unused(nxprov)
2150 	if_fake_ref fakeif;
2151 	fake_qset *fqs = qset_ctx;
2152 	fake_queue *fq;
2153 
2154 	feth_lock();
2155 	fakeif = feth_nexus_context(nexus);
2156 	if (feth_is_detaching(fakeif)) {
2157 		printf("%s: %s: detaching\n", __func__, fakeif->iff_name);
2158 		feth_unlock();
2159 		return ENXIO;
2160 	}
2161 	if (tx) {
2162 		if (qidx >= fqs->fqs_tx_queue_cnt) {
2163 			printf("%s: %s: invalid tx qidx %d\n", __func__,
2164 			    fakeif->iff_name, qidx);
2165 			feth_unlock();
2166 			return EINVAL;
2167 		}
2168 		fq = &fqs->fqs_tx_queue[qidx];
2169 	} else {
2170 		if (qidx >= fqs->fqs_rx_queue_cnt) {
2171 			printf("%s: %s: invalid rx qidx %d\n", __func__,
2172 			    fakeif->iff_name, qidx);
2173 			feth_unlock();
2174 			return EINVAL;
2175 		}
2176 		fq = &fqs->fqs_rx_queue[qidx];
2177 	}
2178 	ASSERT(fq->fq_queue == NULL);
2179 	fq->fq_queue = queue;
2180 	*queue_ctx = fq;
2181 	feth_unlock();
2182 	return 0;
2183 }
2184 
2185 static void
feth_nx_queue_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx)2186 feth_nx_queue_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2187     void *queue_ctx)
2188 {
2189 #pragma unused(nxprov, nexus)
2190 	fake_queue *fq = queue_ctx;
2191 
2192 	feth_lock();
2193 	ASSERT(fq->fq_queue != NULL);
2194 	fq->fq_queue = NULL;
2195 	feth_unlock();
2196 }
2197 
2198 static void
feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif,kern_packet_t sph,struct netif_stats * nifs,if_fake_ref peer_fakeif,uint32_t llink_idx,uint32_t qset_idx)2199 feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif, kern_packet_t sph,
2200     struct netif_stats *nifs, if_fake_ref peer_fakeif,
2201     uint32_t llink_idx, uint32_t qset_idx)
2202 {
2203 	kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2204 	uint32_t n_pkts = 0;
2205 
2206 	while (sph != 0) {
2207 		uint16_t off;
2208 		kern_packet_t next;
2209 
2210 		next = kern_packet_get_next(sph);
2211 		kern_packet_set_next(sph, 0);
2212 
2213 		/* bpf tap output */
2214 		off = kern_packet_get_headroom(sph);
2215 		VERIFY(off >= fakeif->iff_tx_headroom);
2216 		kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2217 		bpf_tap_packet_out(fakeif->iff_ifp, DLT_EN10MB, sph, NULL, 0);
2218 
2219 		/* drop packets, if requested */
2220 		fakeif->iff_tx_pkts_count++;
2221 		if (fakeif->iff_tx_drop_rate != 0 &&
2222 		    fakeif->iff_tx_pkts_count == fakeif->iff_tx_drop_rate) {
2223 			fakeif->iff_tx_pkts_count = 0;
2224 			int err = kern_packet_set_tx_completion_status(sph,
2225 			    CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED);
2226 			VERIFY(err == 0);
2227 			kern_packet_tx_completion(sph, fakeif->iff_ifp);
2228 			kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2229 			sph = 0;
2230 			STATS_INC(nifs, NETIF_STATS_DROP);
2231 			goto next_pkt;
2232 		}
2233 
2234 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2235 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2236 
2237 		/* prepare batch for receiver */
2238 		pkts[n_pkts++] = sph;
2239 		if (n_pkts == IFF_MAX_BATCH_SIZE) {
2240 			feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx,
2241 			    qset_idx, pkts, n_pkts);
2242 			feth_tx_complete(fakeif, pkts, n_pkts);
2243 			n_pkts = 0;
2244 		}
2245 next_pkt:
2246 		sph = next;
2247 	}
2248 	/* catch last batch for receiver */
2249 	if (n_pkts != 0) {
2250 		feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx, qset_idx,
2251 		    pkts, n_pkts);
2252 		feth_tx_complete(fakeif, pkts, n_pkts);
2253 		n_pkts = 0;
2254 	}
2255 }
2256 
2257 static errno_t
feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint32_t flags)2258 feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2259     void *qset_ctx, uint32_t flags)
2260 {
2261 #pragma unused(nxprov)
2262 	if_fake_ref             fakeif;
2263 	ifnet_t                 ifp;
2264 	ifnet_t                 peer_ifp;
2265 	if_fake_ref             peer_fakeif = NULL;
2266 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2267 	fake_qset               *qset = qset_ctx;
2268 	boolean_t               detaching, connected;
2269 	uint32_t                i;
2270 	errno_t                 err;
2271 
2272 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2273 	fakeif = feth_nexus_context(nexus);
2274 	FETH_DPRINTF("%s qset %p, idx %d, flags 0x%x\n", fakeif->iff_name, qset,
2275 	    qset->fqs_idx, flags);
2276 
2277 	feth_lock();
2278 	detaching = feth_is_detaching(fakeif);
2279 	connected = fakeif->iff_channel_connected;
2280 	if (detaching || !connected) {
2281 		FETH_DPRINTF("%s: %s: detaching %s, channel connected %s\n",
2282 		    __func__, fakeif->iff_name,
2283 		    (detaching ? "true" : "false"),
2284 		    (connected ? "true" : "false"));
2285 		feth_unlock();
2286 		return 0;
2287 	}
2288 	ifp = fakeif->iff_ifp;
2289 	peer_ifp = fakeif->iff_peer;
2290 	if (peer_ifp != NULL) {
2291 		peer_fakeif = ifnet_get_if_fake(peer_ifp);
2292 		if (peer_fakeif != NULL) {
2293 			detaching = feth_is_detaching(peer_fakeif);
2294 			connected = peer_fakeif->iff_channel_connected;
2295 			if (detaching || !connected) {
2296 				FETH_DPRINTF("%s: peer %s: detaching %s, "
2297 				    "channel connected %s\n",
2298 				    __func__, peer_fakeif->iff_name,
2299 				    (detaching ? "true" : "false"),
2300 				    (connected ? "true" : "false"));
2301 				goto done;
2302 			}
2303 		} else {
2304 			FETH_DPRINTF("%s: peer_fakeif is NULL\n", __func__);
2305 			goto done;
2306 		}
2307 	} else {
2308 		printf("%s: peer_ifp is NULL\n", __func__);
2309 		goto done;
2310 	}
2311 
2312 	for (i = 0; i < qset->fqs_tx_queue_cnt; i++) {
2313 		kern_packet_t sph = 0;
2314 		kern_netif_queue_t queue = qset->fqs_tx_queue[i].fq_queue;
2315 		boolean_t more = FALSE;
2316 
2317 		err = kern_netif_queue_tx_dequeue(queue, UINT32_MAX, UINT32_MAX,
2318 		    &more, &sph);
2319 		if (err != 0 && err != EAGAIN) {
2320 			FETH_DPRINTF("%s queue %p dequeue failed: err "
2321 			    "%d\n", fakeif->iff_name, queue, err);
2322 		}
2323 		feth_nx_tx_queue_deliver_pkt_chain(fakeif, sph, nifs,
2324 		    peer_fakeif, qset->fqs_llink_idx, qset->fqs_idx);
2325 	}
2326 
2327 done:
2328 	feth_unlock();
2329 	return 0;
2330 }
2331 
2332 static void
fill_qset_info_and_params(if_fake_ref fakeif,fake_llink * llink_info,uint32_t qset_idx,struct kern_nexus_netif_llink_qset_init * qset_init,bool is_def)2333 fill_qset_info_and_params(if_fake_ref fakeif, fake_llink *llink_info,
2334     uint32_t qset_idx, struct kern_nexus_netif_llink_qset_init *qset_init,
2335     bool is_def)
2336 {
2337 	fake_qset *qset_info = &llink_info->fl_qset[qset_idx];
2338 
2339 	qset_init->nlqi_flags =
2340 	    (is_def ? KERN_NEXUS_NET_LLINK_QSET_DEFAULT : 0) |
2341 	    KERN_NEXUS_NET_LLINK_QSET_AQM;
2342 
2343 	if (feth_in_wmm_mode(fakeif)) {
2344 		qset_init->nlqi_flags |= KERN_NEXUS_NET_LLINK_QSET_WMM_MODE;
2345 		qset_init->nlqi_num_txqs = IFF_NUM_TX_QUEUES_WMM_MODE;
2346 		qset_init->nlqi_num_rxqs = IFF_NUM_RX_QUEUES_WMM_MODE;
2347 	} else {
2348 		qset_init->nlqi_num_txqs = 1;
2349 		qset_init->nlqi_num_rxqs = 1;
2350 	}
2351 	qset_info->fqs_tx_queue_cnt = qset_init->nlqi_num_txqs;
2352 	qset_info->fqs_rx_queue_cnt = qset_init->nlqi_num_rxqs;
2353 
2354 	/* These are needed for locating the peer qset */
2355 	qset_info->fqs_llink_idx = llink_info->fl_idx;
2356 	qset_info->fqs_idx = qset_idx;
2357 }
2358 
2359 static void
fill_llink_info_and_params(if_fake_ref fakeif,uint32_t llink_idx,struct kern_nexus_netif_llink_init * llink_init,uint32_t llink_id,struct kern_nexus_netif_llink_qset_init * qset_init,uint32_t qset_cnt,uint32_t flags)2360 fill_llink_info_and_params(if_fake_ref fakeif, uint32_t llink_idx,
2361     struct kern_nexus_netif_llink_init *llink_init, uint32_t llink_id,
2362     struct kern_nexus_netif_llink_qset_init *qset_init, uint32_t qset_cnt,
2363     uint32_t flags)
2364 {
2365 	fake_llink *llink_info = &fakeif->iff_llink[llink_idx];
2366 	uint32_t i;
2367 
2368 	for (i = 0; i < qset_cnt; i++) {
2369 		fill_qset_info_and_params(fakeif, llink_info, i,
2370 		    &qset_init[i], i == 0);
2371 	}
2372 	llink_info->fl_idx = llink_idx;
2373 
2374 	/* This doesn't have to be the same as llink_idx */
2375 	llink_info->fl_id = llink_id;
2376 	llink_info->fl_qset_cnt = qset_cnt;
2377 
2378 	llink_init->nli_link_id = llink_id;
2379 	llink_init->nli_num_qsets = qset_cnt;
2380 	llink_init->nli_qsets = qset_init;
2381 	llink_init->nli_flags = flags;
2382 	llink_init->nli_ctx = llink_info;
2383 }
2384 
2385 static errno_t
create_non_default_llinks(if_fake_ref fakeif)2386 create_non_default_llinks(if_fake_ref fakeif)
2387 {
2388 	struct kern_nexus *nx;
2389 	fake_nx_t fnx = &fakeif->iff_nx;
2390 	struct kern_nexus_netif_llink_init llink_init;
2391 	struct kern_nexus_netif_llink_qset_init qset_init[FETH_MAX_QSETS];
2392 	errno_t err;
2393 	uint64_t llink_id;
2394 	uint32_t i;
2395 
2396 	nx = nx_find(fnx->fnx_instance, FALSE);
2397 	if (nx == NULL) {
2398 		printf("%s: %s: nx not found\n", __func__, fakeif->iff_name);
2399 		return ENXIO;
2400 	}
2401 	/* Default llink starts at index 0 */
2402 	for (i = 1; i < if_fake_llink_cnt; i++) {
2403 		llink_id = (uint64_t)i;
2404 
2405 		/*
2406 		 * The llink_init and qset_init structures are reused for
2407 		 * each llink creation.
2408 		 */
2409 		fill_llink_info_and_params(fakeif, i, &llink_init,
2410 		    llink_id, qset_init, if_fake_qset_cnt, 0);
2411 		err = kern_nexus_netif_llink_add(nx, &llink_init);
2412 		if (err != 0) {
2413 			printf("%s: %s: llink add failed, error %d\n",
2414 			    __func__, fakeif->iff_name, err);
2415 			goto fail;
2416 		}
2417 		fakeif->iff_llink_cnt++;
2418 	}
2419 	nx_release(nx);
2420 	return 0;
2421 
2422 fail:
2423 	for (i = 0; i < fakeif->iff_llink_cnt; i++) {
2424 		int e;
2425 
2426 		e = kern_nexus_netif_llink_remove(nx, fakeif->
2427 		    iff_llink[i].fl_id);
2428 		if (e != 0) {
2429 			printf("%s: %s: llink remove failed, llink_id 0x%llx, "
2430 			    "error %d\n", __func__, fakeif->iff_name,
2431 			    fakeif->iff_llink[i].fl_id, e);
2432 		}
2433 		fakeif->iff_llink[i].fl_id = 0;
2434 	}
2435 	fakeif->iff_llink_cnt = 0;
2436 	nx_release(nx);
2437 	return err;
2438 }
2439 
2440 static errno_t
create_netif_llink_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2441 create_netif_llink_provider_and_instance(if_fake_ref fakeif,
2442     struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2443     uuid_t * provider, uuid_t * instance)
2444 {
2445 	errno_t                 err;
2446 	nexus_controller_t      controller = kern_nexus_shared_controller();
2447 	struct kern_nexus_net_init net_init;
2448 	struct kern_nexus_netif_llink_init llink_init;
2449 	struct kern_nexus_netif_llink_qset_init qsets[FETH_MAX_QSETS];
2450 
2451 	nexus_name_t            provider_name;
2452 	nexus_attr_t            nexus_attr = NULL;
2453 	struct kern_nexus_netif_provider_init prov_init = {
2454 		.nxnpi_version = KERN_NEXUS_DOMAIN_PROVIDER_NETIF,
2455 		.nxnpi_flags = NXPIF_VIRTUAL_DEVICE,
2456 		.nxnpi_pre_connect = feth_nx_pre_connect,
2457 		.nxnpi_connected = feth_nx_connected,
2458 		.nxnpi_pre_disconnect = feth_nx_pre_disconnect,
2459 		.nxnpi_disconnected = feth_nx_disconnected,
2460 		.nxnpi_qset_init = feth_nx_qset_init,
2461 		.nxnpi_qset_fini = feth_nx_qset_fini,
2462 		.nxnpi_queue_init = feth_nx_queue_init,
2463 		.nxnpi_queue_fini = feth_nx_queue_fini,
2464 		.nxnpi_tx_qset_notify = feth_nx_tx_qset_notify,
2465 		.nxnpi_config_capab = feth_nx_capab_config,
2466 	};
2467 
2468 	err = kern_nexus_attr_create(&nexus_attr);
2469 	if (err != 0) {
2470 		printf("%s nexus attribute creation failed, error %d\n",
2471 		    __func__, err);
2472 		goto failed;
2473 	}
2474 
2475 	err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2476 	VERIFY(err == 0);
2477 
2478 	snprintf((char *)provider_name, sizeof(provider_name),
2479 	    "com.apple.netif.%s", fakeif->iff_name);
2480 	err = kern_nexus_controller_register_provider(controller,
2481 	    feth_nx_dom_prov,
2482 	    provider_name,
2483 	    (struct kern_nexus_provider_init *)&prov_init,
2484 	    sizeof(prov_init),
2485 	    nexus_attr,
2486 	    provider);
2487 	if (err != 0) {
2488 		printf("%s register provider failed, error %d\n",
2489 		    __func__, err);
2490 		goto failed;
2491 	}
2492 	bzero(&net_init, sizeof(net_init));
2493 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2494 	net_init.nxneti_flags = 0;
2495 	net_init.nxneti_eparams = init_params;
2496 	net_init.nxneti_lladdr = NULL;
2497 	net_init.nxneti_prepare = feth_netif_prepare;
2498 	net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2499 	net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2500 
2501 	/*
2502 	 * Assume llink id is same as the index for if_fake.
2503 	 * This is not required for other drivers.
2504 	 */
2505 	_CASSERT(NETIF_LLINK_ID_DEFAULT == 0);
2506 	fill_llink_info_and_params(fakeif, 0, &llink_init,
2507 	    NETIF_LLINK_ID_DEFAULT, qsets, if_fake_qset_cnt,
2508 	    KERN_NEXUS_NET_LLINK_DEFAULT);
2509 
2510 	net_init.nxneti_llink = &llink_init;
2511 
2512 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
2513 	    *provider, fakeif, NULL, instance, &net_init, ifp);
2514 	if (err != 0) {
2515 		printf("%s alloc_net_provider_instance failed, %d\n",
2516 		    __func__, err);
2517 		kern_nexus_controller_deregister_provider(controller,
2518 		    *provider);
2519 		uuid_clear(*provider);
2520 		goto failed;
2521 	}
2522 	fakeif->iff_llink_cnt++;
2523 
2524 	if (if_fake_llink_cnt > 1) {
2525 		err = create_non_default_llinks(fakeif);
2526 		if (err != 0) {
2527 			printf("%s create_non_default_llinks failed, %d\n",
2528 			    __func__, err);
2529 			feth_detach_netif_nexus(fakeif);
2530 			goto failed;
2531 		}
2532 	}
2533 failed:
2534 	if (nexus_attr != NULL) {
2535 		kern_nexus_attr_destroy(nexus_attr);
2536 	}
2537 	return err;
2538 }
2539 
2540 static errno_t
feth_attach_netif_nexus(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp)2541 feth_attach_netif_nexus(if_fake_ref fakeif,
2542     struct ifnet_init_eparams * init_params, ifnet_t *ifp)
2543 {
2544 	errno_t                 error;
2545 	fake_nx_t               nx = &fakeif->iff_nx;
2546 
2547 	error = feth_packet_pool_make(fakeif);
2548 	if (error != 0) {
2549 		return error;
2550 	}
2551 	if (if_fake_llink_cnt == 0) {
2552 		return create_netif_provider_and_instance(fakeif, init_params,
2553 		           ifp, &nx->fnx_provider, &nx->fnx_instance);
2554 	} else {
2555 		return create_netif_llink_provider_and_instance(fakeif,
2556 		           init_params, ifp, &nx->fnx_provider,
2557 		           &nx->fnx_instance);
2558 	}
2559 }
2560 
2561 static void
remove_non_default_llinks(if_fake_ref fakeif)2562 remove_non_default_llinks(if_fake_ref fakeif)
2563 {
2564 	struct kern_nexus *nx;
2565 	fake_nx_t fnx = &fakeif->iff_nx;
2566 	uint32_t i;
2567 
2568 	if (fakeif->iff_llink_cnt <= 1) {
2569 		return;
2570 	}
2571 	nx = nx_find(fnx->fnx_instance, FALSE);
2572 	if (nx == NULL) {
2573 		printf("%s: %s: nx not found\n", __func__,
2574 		    fakeif->iff_name);
2575 		return;
2576 	}
2577 	/* Default llink (at index 0) is freed separately */
2578 	for (i = 1; i < fakeif->iff_llink_cnt; i++) {
2579 		int err;
2580 
2581 		err = kern_nexus_netif_llink_remove(nx, fakeif->
2582 		    iff_llink[i].fl_id);
2583 		if (err != 0) {
2584 			printf("%s: %s: llink remove failed, llink_id 0x%llx, "
2585 			    "error %d\n", __func__, fakeif->iff_name,
2586 			    fakeif->iff_llink[i].fl_id, err);
2587 		}
2588 		fakeif->iff_llink[i].fl_id = 0;
2589 	}
2590 	fakeif->iff_llink_cnt = 0;
2591 	nx_release(nx);
2592 }
2593 
2594 static void
detach_provider_and_instance(uuid_t provider,uuid_t instance)2595 detach_provider_and_instance(uuid_t provider, uuid_t instance)
2596 {
2597 	nexus_controller_t controller = kern_nexus_shared_controller();
2598 	errno_t err;
2599 
2600 	if (!uuid_is_null(instance)) {
2601 		err = kern_nexus_controller_free_provider_instance(controller,
2602 		    instance);
2603 		if (err != 0) {
2604 			printf("%s free_provider_instance failed %d\n",
2605 			    __func__, err);
2606 		}
2607 		uuid_clear(instance);
2608 	}
2609 	if (!uuid_is_null(provider)) {
2610 		err = kern_nexus_controller_deregister_provider(controller,
2611 		    provider);
2612 		if (err != 0) {
2613 			printf("%s deregister_provider %d\n", __func__, err);
2614 		}
2615 		uuid_clear(provider);
2616 	}
2617 	return;
2618 }
2619 
2620 static void
feth_detach_netif_nexus(if_fake_ref fakeif)2621 feth_detach_netif_nexus(if_fake_ref fakeif)
2622 {
2623 	fake_nx_t fnx = &fakeif->iff_nx;
2624 
2625 	remove_non_default_llinks(fakeif);
2626 	detach_provider_and_instance(fnx->fnx_provider, fnx->fnx_instance);
2627 }
2628 
2629 #endif /* SKYWALK */
2630 
2631 /**
2632 ** feth interface routines
2633 **/
2634 static void
feth_ifnet_set_attrs(if_fake_ref fakeif,ifnet_t ifp)2635 feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp)
2636 {
2637 	(void)ifnet_set_capabilities_enabled(ifp, 0, -1);
2638 	ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
2639 	ifnet_set_baudrate(ifp, 0);
2640 	ifnet_set_mtu(ifp, ETHERMTU);
2641 	ifnet_set_flags(ifp,
2642 	    IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX,
2643 	    0xffff);
2644 	ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
2645 	if ((fakeif->iff_flags & IFF_FLAGS_HWCSUM) != 0) {
2646 		ifnet_set_offload(ifp,
2647 		    IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
2648 		    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6);
2649 	} else {
2650 		ifnet_set_offload(ifp, 0);
2651 	}
2652 }
2653 
2654 static void
interface_link_event(ifnet_t ifp,u_int32_t event_code)2655 interface_link_event(ifnet_t ifp, u_int32_t event_code)
2656 {
2657 	struct event {
2658 		u_int32_t ifnet_family;
2659 		u_int32_t unit;
2660 		char if_name[IFNAMSIZ];
2661 	};
2662 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
2663 	struct kern_event_msg *header = (struct kern_event_msg*)message;
2664 	struct event *data = (struct event *)(header + 1);
2665 
2666 	header->total_size   = sizeof(message);
2667 	header->vendor_code  = KEV_VENDOR_APPLE;
2668 	header->kev_class    = KEV_NETWORK_CLASS;
2669 	header->kev_subclass = KEV_DL_SUBCLASS;
2670 	header->event_code   = event_code;
2671 	data->ifnet_family   = ifnet_family(ifp);
2672 	data->unit           = (u_int32_t)ifnet_unit(ifp);
2673 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
2674 	ifnet_event(ifp, header);
2675 }
2676 
2677 static if_fake_ref
ifnet_get_if_fake(ifnet_t ifp)2678 ifnet_get_if_fake(ifnet_t ifp)
2679 {
2680 	return (if_fake_ref)ifnet_softc(ifp);
2681 }
2682 
2683 static int
feth_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)2684 feth_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
2685 {
2686 	int                             error;
2687 	if_fake_ref                     fakeif;
2688 	struct ifnet_init_eparams       feth_init;
2689 	ifnet_t                         ifp;
2690 	uint8_t                         mac_address[ETHER_ADDR_LEN];
2691 
2692 	fakeif = if_clone_softc_allocate(&feth_cloner);
2693 	if (fakeif == NULL) {
2694 		return ENOBUFS;
2695 	}
2696 	fakeif->iff_retain_count = 1;
2697 #define FAKE_ETHER_NAME_LEN     (sizeof(FAKE_ETHER_NAME) - 1)
2698 	_CASSERT(FAKE_ETHER_NAME_LEN == 4);
2699 	bcopy(FAKE_ETHER_NAME, mac_address, FAKE_ETHER_NAME_LEN);
2700 	mac_address[ETHER_ADDR_LEN - 2] = (unit & 0xff00) >> 8;
2701 	mac_address[ETHER_ADDR_LEN - 1] = unit & 0xff;
2702 	if (if_fake_bsd_mode != 0) {
2703 		fakeif->iff_flags |= IFF_FLAGS_BSD_MODE;
2704 	}
2705 	if (if_fake_hwcsum != 0) {
2706 		fakeif->iff_flags |= IFF_FLAGS_HWCSUM;
2707 	}
2708 	fakeif->iff_max_mtu = get_max_mtu(if_fake_bsd_mode, if_fake_max_mtu);
2709 	fakeif->iff_fcs = if_fake_fcs;
2710 	fakeif->iff_trailer_length = if_fake_trailer_length;
2711 
2712 	/* use the interface name as the unique id for ifp recycle */
2713 	if ((unsigned int)
2714 	    snprintf(fakeif->iff_name, sizeof(fakeif->iff_name), "%s%d",
2715 	    ifc->ifc_name, unit) >= sizeof(fakeif->iff_name)) {
2716 		feth_release(fakeif);
2717 		return EINVAL;
2718 	}
2719 	bzero(&feth_init, sizeof(feth_init));
2720 	feth_init.ver = IFNET_INIT_CURRENT_VERSION;
2721 	feth_init.len = sizeof(feth_init);
2722 	if (feth_in_bsd_mode(fakeif)) {
2723 		if (if_fake_txstart != 0) {
2724 			feth_init.start = feth_start;
2725 		} else {
2726 			feth_init.flags |= IFNET_INIT_LEGACY;
2727 			feth_init.output = feth_output;
2728 		}
2729 	}
2730 #if SKYWALK
2731 	else {
2732 		feth_init.flags |= IFNET_INIT_SKYWALK_NATIVE;
2733 		/*
2734 		 * Currently we support WMM mode only for Skywalk native
2735 		 * interface.
2736 		 */
2737 		if (if_fake_wmm_mode != 0) {
2738 			fakeif->iff_flags |= IFF_FLAGS_WMM_MODE;
2739 		}
2740 
2741 		if (if_fake_multibuflet != 0) {
2742 			fakeif->iff_flags |= IFF_FLAGS_MULTIBUFLETS;
2743 		}
2744 
2745 		if (if_fake_multibuflet != 0 &&
2746 		    if_fake_pktpool_mode == IFF_PP_MODE_PRIVATE_SPLIT) {
2747 			printf("%s: multi-buflet not supported for split rx &"
2748 			    " tx pool", __func__);
2749 			feth_release(fakeif);
2750 			return EINVAL;
2751 		}
2752 		fakeif->iff_pp_mode = if_fake_pktpool_mode;
2753 
2754 		fakeif->iff_tx_headroom = if_fake_tx_headroom;
2755 		fakeif->iff_adv_interval = if_fake_if_adv_interval;
2756 		if (fakeif->iff_adv_interval > 0) {
2757 			feth_init.flags |= IFNET_INIT_IF_ADV;
2758 		}
2759 		fakeif->iff_tx_drop_rate = if_fake_tx_drops;
2760 	}
2761 	feth_init.tx_headroom = fakeif->iff_tx_headroom;
2762 #endif /* SKYWALK */
2763 	if (if_fake_nxattach == 0) {
2764 		feth_init.flags |= IFNET_INIT_NX_NOAUTO;
2765 	}
2766 	feth_init.uniqueid = fakeif->iff_name;
2767 	feth_init.uniqueid_len = strlen(fakeif->iff_name);
2768 	feth_init.name = ifc->ifc_name;
2769 	feth_init.unit = unit;
2770 	feth_init.family = IFNET_FAMILY_ETHERNET;
2771 	feth_init.type = IFT_ETHER;
2772 	feth_init.demux = ether_demux;
2773 	feth_init.add_proto = ether_add_proto;
2774 	feth_init.del_proto = ether_del_proto;
2775 	feth_init.check_multi = ether_check_multi;
2776 	feth_init.framer_extended = ether_frameout_extended;
2777 	feth_init.softc = fakeif;
2778 	feth_init.ioctl = feth_ioctl;
2779 	feth_init.set_bpf_tap = NULL;
2780 	feth_init.detach = feth_if_free;
2781 	feth_init.broadcast_addr = etherbroadcastaddr;
2782 	feth_init.broadcast_len = ETHER_ADDR_LEN;
2783 	if (feth_in_bsd_mode(fakeif)) {
2784 		error = ifnet_allocate_extended(&feth_init, &ifp);
2785 		if (error) {
2786 			feth_release(fakeif);
2787 			return error;
2788 		}
2789 		feth_ifnet_set_attrs(fakeif, ifp);
2790 	}
2791 #if SKYWALK
2792 	else {
2793 		if (feth_in_wmm_mode(fakeif)) {
2794 			feth_init.output_sched_model =
2795 			    IFNET_SCHED_MODEL_DRIVER_MANAGED;
2796 		}
2797 		error = feth_attach_netif_nexus(fakeif, &feth_init, &ifp);
2798 		if (error != 0) {
2799 			feth_release(fakeif);
2800 			return error;
2801 		}
2802 		/* take an additional reference to ensure that it doesn't go away */
2803 		feth_retain(fakeif);
2804 		fakeif->iff_ifp = ifp;
2805 	}
2806 #endif /* SKYWALK */
2807 	fakeif->iff_media_count = MIN(default_media_words_count, IF_FAKE_MEDIA_LIST_MAX);
2808 	bcopy(default_media_words, fakeif->iff_media_list,
2809 	    fakeif->iff_media_count * sizeof(fakeif->iff_media_list[0]));
2810 	if (feth_in_bsd_mode(fakeif)) {
2811 		error = ifnet_attach(ifp, NULL);
2812 		if (error) {
2813 			ifnet_release(ifp);
2814 			feth_release(fakeif);
2815 			return error;
2816 		}
2817 		fakeif->iff_ifp = ifp;
2818 	}
2819 
2820 	ifnet_set_lladdr(ifp, mac_address, sizeof(mac_address));
2821 
2822 	/* attach as ethernet */
2823 	bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
2824 	return 0;
2825 }
2826 
2827 static int
feth_clone_destroy(ifnet_t ifp)2828 feth_clone_destroy(ifnet_t ifp)
2829 {
2830 	if_fake_ref     fakeif;
2831 #if SKYWALK
2832 	boolean_t       nx_attached = FALSE;
2833 #endif /* SKYWALK */
2834 
2835 	feth_lock();
2836 	fakeif = ifnet_get_if_fake(ifp);
2837 	if (fakeif == NULL || feth_is_detaching(fakeif)) {
2838 		feth_unlock();
2839 		return 0;
2840 	}
2841 	feth_set_detaching(fakeif);
2842 #if SKYWALK
2843 	nx_attached = !feth_in_bsd_mode(fakeif);
2844 #endif /* SKYWALK */
2845 	feth_unlock();
2846 
2847 #if SKYWALK
2848 	if (nx_attached) {
2849 		feth_detach_netif_nexus(fakeif);
2850 		feth_release(fakeif);
2851 	}
2852 #endif /* SKYWALK */
2853 	feth_config(ifp, NULL);
2854 	ifnet_detach(ifp);
2855 	return 0;
2856 }
2857 
2858 static void
feth_enqueue_input(ifnet_t ifp,struct mbuf * m)2859 feth_enqueue_input(ifnet_t ifp, struct mbuf * m)
2860 {
2861 	struct ifnet_stat_increment_param stats = {};
2862 
2863 	stats.packets_in = 1;
2864 	stats.bytes_in = (uint32_t)mbuf_pkthdr_len(m) + ETHER_HDR_LEN;
2865 	ifnet_input(ifp, m, &stats);
2866 }
2867 
2868 static struct mbuf *
copy_mbuf(struct mbuf * m)2869 copy_mbuf(struct mbuf *m)
2870 {
2871 	struct mbuf *   copy_m;
2872 	uint32_t        pkt_len;
2873 	uint32_t        offset;
2874 
2875 	if ((m->m_flags & M_PKTHDR) == 0) {
2876 		return NULL;
2877 	}
2878 	pkt_len = m->m_pkthdr.len;
2879 	MGETHDR(copy_m, M_DONTWAIT, MT_DATA);
2880 	if (copy_m == NULL) {
2881 		goto failed;
2882 	}
2883 	if (pkt_len > MHLEN) {
2884 		if (pkt_len <= MCLBYTES) {
2885 			MCLGET(copy_m, M_DONTWAIT);
2886 		} else if (pkt_len <= MBIGCLBYTES) {
2887 			copy_m = m_mbigget(copy_m, M_DONTWAIT);
2888 		} else if (pkt_len <= M16KCLBYTES && njcl > 0) {
2889 			copy_m = m_m16kget(copy_m, M_DONTWAIT);
2890 		} else {
2891 			printf("if_fake: copy_mbuf(): packet too large %d\n",
2892 			    pkt_len);
2893 			goto failed;
2894 		}
2895 		if (copy_m == NULL || (copy_m->m_flags & M_EXT) == 0) {
2896 			goto failed;
2897 		}
2898 	}
2899 	mbuf_setlen(copy_m, pkt_len);
2900 	copy_m->m_pkthdr.len = pkt_len;
2901 	copy_m->m_pkthdr.pkt_svc = m->m_pkthdr.pkt_svc;
2902 	offset = 0;
2903 	while (m != NULL && offset < pkt_len) {
2904 		uint32_t        frag_len;
2905 
2906 		frag_len = m->m_len;
2907 		if (frag_len > (pkt_len - offset)) {
2908 			printf("if_fake_: Large mbuf fragment %d > %d\n",
2909 			    frag_len, (pkt_len - offset));
2910 			goto failed;
2911 		}
2912 		m_copydata(m, 0, frag_len, mtodo(copy_m, offset));
2913 		offset += frag_len;
2914 		m = m->m_next;
2915 	}
2916 	return copy_m;
2917 
2918 failed:
2919 	if (copy_m != NULL) {
2920 		m_freem(copy_m);
2921 	}
2922 	return NULL;
2923 }
2924 
2925 static int
feth_add_mbuf_trailer(struct mbuf * m,void * trailer,size_t trailer_len)2926 feth_add_mbuf_trailer(struct mbuf *m, void *trailer, size_t trailer_len)
2927 {
2928 	int ret;
2929 	ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
2930 
2931 	ret = m_append(m, trailer_len, (caddr_t)trailer);
2932 	if (ret == 1) {
2933 		FETH_DPRINTF("%s %zuB trailer added\n", __func__, trailer_len);
2934 		return 0;
2935 	}
2936 	printf("%s m_append failed\n", __func__);
2937 	return ENOTSUP;
2938 }
2939 
2940 static int
feth_add_mbuf_fcs(struct mbuf * m)2941 feth_add_mbuf_fcs(struct mbuf *m)
2942 {
2943 	uint32_t pkt_len, offset = 0;
2944 	uint32_t crc = 0;
2945 	int err = 0;
2946 
2947 	ASSERT(sizeof(crc) == ETHER_CRC_LEN);
2948 
2949 	pkt_len = m->m_pkthdr.len;
2950 	struct mbuf *iter = m;
2951 	while (iter != NULL && offset < pkt_len) {
2952 		uint32_t frag_len = iter->m_len;
2953 		ASSERT(frag_len <= (pkt_len - offset));
2954 		crc = crc32(crc, mtod(iter, void *), frag_len);
2955 		offset += frag_len;
2956 		iter = m->m_next;
2957 	}
2958 
2959 	err = feth_add_mbuf_trailer(m, &crc, ETHER_CRC_LEN);
2960 	if (err != 0) {
2961 		return err;
2962 	}
2963 
2964 	m->m_flags |= M_HASFCS;
2965 
2966 	return 0;
2967 }
2968 
2969 static void
feth_output_common(ifnet_t ifp,struct mbuf * m,ifnet_t peer,iff_flags_t flags,bool fcs,void * trailer,size_t trailer_len)2970 feth_output_common(ifnet_t ifp, struct mbuf * m, ifnet_t peer,
2971     iff_flags_t flags, bool fcs, void *trailer, size_t trailer_len)
2972 {
2973 	void *          frame_header;
2974 
2975 	frame_header = mbuf_data(m);
2976 	if ((flags & IFF_FLAGS_HWCSUM) != 0) {
2977 		m->m_pkthdr.csum_data = 0xffff;
2978 		m->m_pkthdr.csum_flags =
2979 		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2980 		    CSUM_IP_CHECKED | CSUM_IP_VALID;
2981 	}
2982 
2983 	(void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
2984 	bpf_tap_out(ifp, DLT_EN10MB, m, NULL, 0);
2985 
2986 	if (trailer != 0) {
2987 		feth_add_mbuf_trailer(m, trailer, trailer_len);
2988 	}
2989 	if (fcs) {
2990 		feth_add_mbuf_fcs(m);
2991 	}
2992 
2993 	(void)mbuf_pkthdr_setrcvif(m, peer);
2994 	mbuf_pkthdr_setheader(m, frame_header);
2995 	mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
2996 	(void)mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
2997 	    mbuf_len(m) - ETHER_HDR_LEN);
2998 	bpf_tap_in(peer, DLT_EN10MB, m, frame_header,
2999 	    sizeof(struct ether_header));
3000 	feth_enqueue_input(peer, m);
3001 }
3002 
3003 static void
feth_start(ifnet_t ifp)3004 feth_start(ifnet_t ifp)
3005 {
3006 	struct mbuf *   copy_m = NULL;
3007 	if_fake_ref     fakeif;
3008 	iff_flags_t     flags = 0;
3009 	bool            fcs;
3010 	size_t          trailer_len;
3011 	ifnet_t         peer = NULL;
3012 	struct mbuf *   m;
3013 	struct mbuf *   save_m;
3014 
3015 	feth_lock();
3016 	fakeif = ifnet_get_if_fake(ifp);
3017 	if (fakeif == NULL) {
3018 		feth_unlock();
3019 		return;
3020 	}
3021 
3022 	if (fakeif->iff_start_busy) {
3023 		feth_unlock();
3024 		printf("if_fake: start is busy\n");
3025 		return;
3026 	}
3027 
3028 	peer = fakeif->iff_peer;
3029 	flags = fakeif->iff_flags;
3030 	fcs = fakeif->iff_fcs;
3031 	trailer_len = fakeif->iff_trailer_length;
3032 
3033 	/* check for pending TX */
3034 	m = fakeif->iff_pending_tx_packet;
3035 	if (m != NULL) {
3036 		if (peer != NULL) {
3037 			copy_m = copy_mbuf(m);
3038 			if (copy_m == NULL) {
3039 				feth_unlock();
3040 				return;
3041 			}
3042 		}
3043 		fakeif->iff_pending_tx_packet = NULL;
3044 		m_freem(m);
3045 		m = NULL;
3046 	}
3047 	fakeif->iff_start_busy = TRUE;
3048 	feth_unlock();
3049 	save_m = NULL;
3050 	for (;;) {
3051 		if (copy_m != NULL) {
3052 			VERIFY(peer != NULL);
3053 			feth_output_common(ifp, copy_m, peer, flags, fcs,
3054 			    feth_trailer, trailer_len);
3055 			copy_m = NULL;
3056 		}
3057 		if (ifnet_dequeue(ifp, &m) != 0) {
3058 			break;
3059 		}
3060 		if (peer == NULL) {
3061 			m_freem(m);
3062 		} else {
3063 			copy_m = copy_mbuf(m);
3064 			if (copy_m == NULL) {
3065 				save_m = m;
3066 				break;
3067 			}
3068 			m_freem(m);
3069 		}
3070 	}
3071 	peer = NULL;
3072 	feth_lock();
3073 	fakeif = ifnet_get_if_fake(ifp);
3074 	if (fakeif != NULL) {
3075 		fakeif->iff_start_busy = FALSE;
3076 		if (save_m != NULL && fakeif->iff_peer != NULL) {
3077 			/* save it for next time */
3078 			fakeif->iff_pending_tx_packet = save_m;
3079 			save_m = NULL;
3080 		}
3081 	}
3082 	feth_unlock();
3083 	if (save_m != NULL) {
3084 		/* didn't save packet, so free it */
3085 		m_freem(save_m);
3086 	}
3087 }
3088 
3089 static int
feth_output(ifnet_t ifp,struct mbuf * m)3090 feth_output(ifnet_t ifp, struct mbuf * m)
3091 {
3092 	struct mbuf *           copy_m;
3093 	if_fake_ref             fakeif;
3094 	iff_flags_t             flags;
3095 	bool                    fcs;
3096 	size_t                  trailer_len;
3097 	ifnet_t                 peer = NULL;
3098 
3099 	if (m == NULL) {
3100 		return 0;
3101 	}
3102 	copy_m = copy_mbuf(m);
3103 	m_freem(m);
3104 	m = NULL;
3105 	if (copy_m == NULL) {
3106 		/* count this as an output error */
3107 		ifnet_stat_increment_out(ifp, 0, 0, 1);
3108 		return 0;
3109 	}
3110 	feth_lock();
3111 	fakeif = ifnet_get_if_fake(ifp);
3112 	if (fakeif != NULL) {
3113 		peer = fakeif->iff_peer;
3114 		flags = fakeif->iff_flags;
3115 		fcs = fakeif->iff_fcs;
3116 		trailer_len = fakeif->iff_trailer_length;
3117 	}
3118 	feth_unlock();
3119 	if (peer == NULL) {
3120 		m_freem(copy_m);
3121 		ifnet_stat_increment_out(ifp, 0, 0, 1);
3122 		return 0;
3123 	}
3124 	feth_output_common(ifp, copy_m, peer, flags, fcs, feth_trailer,
3125 	    trailer_len);
3126 	return 0;
3127 }
3128 
3129 static int
feth_config(ifnet_t ifp,ifnet_t peer)3130 feth_config(ifnet_t ifp, ifnet_t peer)
3131 {
3132 	int             connected = FALSE;
3133 	int             disconnected = FALSE;
3134 	int             error = 0;
3135 	if_fake_ref     fakeif = NULL;
3136 
3137 	feth_lock();
3138 	fakeif = ifnet_get_if_fake(ifp);
3139 	if (fakeif == NULL) {
3140 		error = EINVAL;
3141 		goto done;
3142 	}
3143 	if (peer != NULL) {
3144 		/* connect to peer */
3145 		if_fake_ref     peer_fakeif;
3146 
3147 		peer_fakeif = ifnet_get_if_fake(peer);
3148 		if (peer_fakeif == NULL) {
3149 			error = EINVAL;
3150 			goto done;
3151 		}
3152 		if (feth_is_detaching(fakeif) ||
3153 		    feth_is_detaching(peer_fakeif) ||
3154 		    peer_fakeif->iff_peer != NULL ||
3155 		    fakeif->iff_peer != NULL) {
3156 			error = EBUSY;
3157 			goto done;
3158 		}
3159 #if SKYWALK
3160 		if (fakeif->iff_pp_mode !=
3161 		    peer_fakeif->iff_pp_mode) {
3162 			error = EINVAL;
3163 			goto done;
3164 		}
3165 #endif /* SKYWALK */
3166 		fakeif->iff_peer = peer;
3167 		peer_fakeif->iff_peer = ifp;
3168 		connected = TRUE;
3169 	} else if (fakeif->iff_peer != NULL) {
3170 		/* disconnect from peer */
3171 		if_fake_ref     peer_fakeif;
3172 
3173 		peer = fakeif->iff_peer;
3174 		peer_fakeif = ifnet_get_if_fake(peer);
3175 		if (peer_fakeif == NULL) {
3176 			/* should not happen */
3177 			error = EINVAL;
3178 			goto done;
3179 		}
3180 		fakeif->iff_peer = NULL;
3181 		peer_fakeif->iff_peer = NULL;
3182 		disconnected = TRUE;
3183 	}
3184 
3185 done:
3186 	feth_unlock();
3187 
3188 	/* generate link status event if we connect or disconnect */
3189 	if (connected) {
3190 		interface_link_event(ifp, KEV_DL_LINK_ON);
3191 		interface_link_event(peer, KEV_DL_LINK_ON);
3192 	} else if (disconnected) {
3193 		interface_link_event(ifp, KEV_DL_LINK_OFF);
3194 		interface_link_event(peer, KEV_DL_LINK_OFF);
3195 	}
3196 	return error;
3197 }
3198 
3199 static int
feth_set_media(ifnet_t ifp,struct if_fake_request * iffr)3200 feth_set_media(ifnet_t ifp, struct if_fake_request * iffr)
3201 {
3202 	if_fake_ref     fakeif;
3203 	int             error;
3204 
3205 	if (iffr->iffr_media.iffm_count > IF_FAKE_MEDIA_LIST_MAX) {
3206 		/* list is too long */
3207 		return EINVAL;
3208 	}
3209 	feth_lock();
3210 	fakeif = ifnet_get_if_fake(ifp);
3211 	if (fakeif == NULL) {
3212 		error = EINVAL;
3213 		goto done;
3214 	}
3215 	fakeif->iff_media_count = iffr->iffr_media.iffm_count;
3216 	bcopy(iffr->iffr_media.iffm_list, fakeif->iff_media_list,
3217 	    iffr->iffr_media.iffm_count * sizeof(fakeif->iff_media_list[0]));
3218 #if 0
3219 	/* XXX: "auto-negotiate" active with peer? */
3220 	/* generate link status event? */
3221 	fakeif->iff_media_current = iffr->iffr_media.iffm_current;
3222 #endif
3223 	error = 0;
3224 done:
3225 	feth_unlock();
3226 	return error;
3227 }
3228 
3229 static int
if_fake_request_copyin(user_addr_t user_addr,struct if_fake_request * iffr,u_int32_t len)3230 if_fake_request_copyin(user_addr_t user_addr,
3231     struct if_fake_request *iffr, u_int32_t len)
3232 {
3233 	int     error;
3234 
3235 	if (user_addr == USER_ADDR_NULL || len < sizeof(*iffr)) {
3236 		error = EINVAL;
3237 		goto done;
3238 	}
3239 	error = copyin(user_addr, iffr, sizeof(*iffr));
3240 	if (error != 0) {
3241 		goto done;
3242 	}
3243 	if (iffr->iffr_reserved[0] != 0 || iffr->iffr_reserved[1] != 0 ||
3244 	    iffr->iffr_reserved[2] != 0 || iffr->iffr_reserved[3] != 0) {
3245 		error = EINVAL;
3246 		goto done;
3247 	}
3248 done:
3249 	return error;
3250 }
3251 
3252 static int
feth_set_drvspec(ifnet_t ifp,uint32_t cmd,u_int32_t len,user_addr_t user_addr)3253 feth_set_drvspec(ifnet_t ifp, uint32_t cmd, u_int32_t len,
3254     user_addr_t user_addr)
3255 {
3256 	int                     error;
3257 	struct if_fake_request  iffr;
3258 	ifnet_t                 peer;
3259 
3260 	switch (cmd) {
3261 	case IF_FAKE_S_CMD_SET_PEER:
3262 		error = if_fake_request_copyin(user_addr, &iffr, len);
3263 		if (error != 0) {
3264 			break;
3265 		}
3266 		if (iffr.iffr_peer_name[0] == '\0') {
3267 			error = feth_config(ifp, NULL);
3268 			break;
3269 		}
3270 
3271 		/* ensure nul termination */
3272 		iffr.iffr_peer_name[IFNAMSIZ - 1] = '\0';
3273 		peer = ifunit(iffr.iffr_peer_name);
3274 		if (peer == NULL) {
3275 			error = ENXIO;
3276 			break;
3277 		}
3278 		if (ifnet_type(peer) != IFT_ETHER) {
3279 			error = EINVAL;
3280 			break;
3281 		}
3282 		if (strcmp(ifnet_name(peer), FAKE_ETHER_NAME) != 0) {
3283 			error = EINVAL;
3284 			break;
3285 		}
3286 		error = feth_config(ifp, peer);
3287 		break;
3288 	case IF_FAKE_S_CMD_SET_MEDIA:
3289 		error = if_fake_request_copyin(user_addr, &iffr, len);
3290 		if (error != 0) {
3291 			break;
3292 		}
3293 		error = feth_set_media(ifp, &iffr);
3294 		break;
3295 	case IF_FAKE_S_CMD_SET_DEQUEUE_STALL:
3296 		error = if_fake_request_copyin(user_addr, &iffr, len);
3297 		if (error != 0) {
3298 			break;
3299 		}
3300 		error = feth_enable_dequeue_stall(ifp,
3301 		    iffr.iffr_dequeue_stall);
3302 		break;
3303 	default:
3304 		error = EOPNOTSUPP;
3305 		break;
3306 	}
3307 	return error;
3308 }
3309 
3310 static int
feth_get_drvspec(ifnet_t ifp,u_int32_t cmd,u_int32_t len,user_addr_t user_addr)3311 feth_get_drvspec(ifnet_t ifp, u_int32_t cmd, u_int32_t len,
3312     user_addr_t user_addr)
3313 {
3314 	int                     error = EOPNOTSUPP;
3315 	if_fake_ref             fakeif;
3316 	struct if_fake_request  iffr;
3317 	ifnet_t                 peer;
3318 
3319 	switch (cmd) {
3320 	case IF_FAKE_G_CMD_GET_PEER:
3321 		if (len < sizeof(iffr)) {
3322 			error = EINVAL;
3323 			break;
3324 		}
3325 		feth_lock();
3326 		fakeif = ifnet_get_if_fake(ifp);
3327 		if (fakeif == NULL) {
3328 			feth_unlock();
3329 			error = EOPNOTSUPP;
3330 			break;
3331 		}
3332 		peer = fakeif->iff_peer;
3333 		feth_unlock();
3334 		bzero(&iffr, sizeof(iffr));
3335 		if (peer != NULL) {
3336 			strlcpy(iffr.iffr_peer_name,
3337 			    if_name(peer),
3338 			    sizeof(iffr.iffr_peer_name));
3339 		}
3340 		error = copyout(&iffr, user_addr, sizeof(iffr));
3341 		break;
3342 	default:
3343 		break;
3344 	}
3345 	return error;
3346 }
3347 
3348 union ifdrvu {
3349 	struct ifdrv32  *ifdrvu_32;
3350 	struct ifdrv64  *ifdrvu_64;
3351 	void            *ifdrvu_p;
3352 };
3353 
3354 static int
feth_ioctl(ifnet_t ifp,u_long cmd,void * data)3355 feth_ioctl(ifnet_t ifp, u_long cmd, void * data)
3356 {
3357 	unsigned int            count;
3358 	struct ifdevmtu *       devmtu_p;
3359 	union ifdrvu            drv;
3360 	uint32_t                drv_cmd;
3361 	uint32_t                drv_len;
3362 	boolean_t               drv_set_command = FALSE;
3363 	int                     error = 0;
3364 	struct ifmediareq *     ifmr;
3365 	struct ifreq *          ifr;
3366 	if_fake_ref             fakeif;
3367 	int                     status;
3368 	user_addr_t             user_addr;
3369 
3370 	ifr = (struct ifreq *)data;
3371 	switch (cmd) {
3372 	case SIOCSIFADDR:
3373 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
3374 		break;
3375 
3376 	case SIOCGIFMEDIA32:
3377 	case SIOCGIFMEDIA64:
3378 		feth_lock();
3379 		fakeif = ifnet_get_if_fake(ifp);
3380 		if (fakeif == NULL) {
3381 			feth_unlock();
3382 			return EOPNOTSUPP;
3383 		}
3384 		status = (fakeif->iff_peer != NULL)
3385 		    ? (IFM_AVALID | IFM_ACTIVE) : IFM_AVALID;
3386 		ifmr = (struct ifmediareq *)data;
3387 		user_addr = (cmd == SIOCGIFMEDIA64) ?
3388 		    ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
3389 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
3390 		count = ifmr->ifm_count;
3391 		ifmr->ifm_active = IFM_ETHER;
3392 		ifmr->ifm_current = IFM_ETHER;
3393 		ifmr->ifm_mask = 0;
3394 		ifmr->ifm_status = status;
3395 		if (user_addr == USER_ADDR_NULL) {
3396 			ifmr->ifm_count = fakeif->iff_media_count;
3397 		} else if (count > 0) {
3398 			if (count > fakeif->iff_media_count) {
3399 				count = fakeif->iff_media_count;
3400 			}
3401 			ifmr->ifm_count = count;
3402 			error = copyout(&fakeif->iff_media_list, user_addr,
3403 			    count * sizeof(int));
3404 		}
3405 		feth_unlock();
3406 		break;
3407 
3408 	case SIOCGIFDEVMTU:
3409 		devmtu_p = &ifr->ifr_devmtu;
3410 		devmtu_p->ifdm_current = ifnet_mtu(ifp);
3411 		devmtu_p->ifdm_max = feth_max_mtu(ifp);
3412 		devmtu_p->ifdm_min = IF_MINMTU;
3413 		break;
3414 
3415 	case SIOCSIFMTU:
3416 		if ((unsigned int)ifr->ifr_mtu > feth_max_mtu(ifp) ||
3417 		    ifr->ifr_mtu < IF_MINMTU) {
3418 			error = EINVAL;
3419 		} else {
3420 			error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
3421 		}
3422 		break;
3423 
3424 	case SIOCSDRVSPEC32:
3425 	case SIOCSDRVSPEC64:
3426 		error = proc_suser(current_proc());
3427 		if (error != 0) {
3428 			break;
3429 		}
3430 		drv_set_command = TRUE;
3431 		OS_FALLTHROUGH;
3432 	case SIOCGDRVSPEC32:
3433 	case SIOCGDRVSPEC64:
3434 		drv.ifdrvu_p = data;
3435 		if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
3436 			drv_cmd = drv.ifdrvu_32->ifd_cmd;
3437 			drv_len = drv.ifdrvu_32->ifd_len;
3438 			user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
3439 		} else {
3440 			drv_cmd = drv.ifdrvu_64->ifd_cmd;
3441 			drv_len = drv.ifdrvu_64->ifd_len;
3442 			user_addr = drv.ifdrvu_64->ifd_data;
3443 		}
3444 		if (drv_set_command) {
3445 			error = feth_set_drvspec(ifp, drv_cmd, drv_len,
3446 			    user_addr);
3447 		} else {
3448 			error = feth_get_drvspec(ifp, drv_cmd, drv_len,
3449 			    user_addr);
3450 		}
3451 		break;
3452 
3453 	case SIOCSIFLLADDR:
3454 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
3455 		    ifr->ifr_addr.sa_len);
3456 		break;
3457 
3458 	case SIOCSIFFLAGS:
3459 		if ((ifp->if_flags & IFF_UP) != 0) {
3460 			/* marked up, set running if not already set */
3461 			if ((ifp->if_flags & IFF_RUNNING) == 0) {
3462 				/* set running */
3463 				error = ifnet_set_flags(ifp, IFF_RUNNING,
3464 				    IFF_RUNNING);
3465 			}
3466 		} else if ((ifp->if_flags & IFF_RUNNING) != 0) {
3467 			/* marked down, clear running */
3468 			error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
3469 		}
3470 		break;
3471 
3472 	case SIOCADDMULTI:
3473 	case SIOCDELMULTI:
3474 		error = 0;
3475 		break;
3476 	default:
3477 		error = EOPNOTSUPP;
3478 		break;
3479 	}
3480 	return error;
3481 }
3482 
3483 static void
feth_if_free(ifnet_t ifp)3484 feth_if_free(ifnet_t ifp)
3485 {
3486 	if_fake_ref             fakeif;
3487 
3488 	if (ifp == NULL) {
3489 		return;
3490 	}
3491 	feth_lock();
3492 	fakeif = ifnet_get_if_fake(ifp);
3493 	if (fakeif == NULL) {
3494 		feth_unlock();
3495 		return;
3496 	}
3497 	ifp->if_softc = NULL;
3498 #if SKYWALK
3499 	VERIFY(fakeif->iff_doorbell_tcall == NULL);
3500 #endif /* SKYWALK */
3501 	feth_unlock();
3502 	feth_release(fakeif);
3503 	ifnet_release(ifp);
3504 	return;
3505 }
3506 
3507 __private_extern__ void
if_fake_init(void)3508 if_fake_init(void)
3509 {
3510 	int error;
3511 
3512 #if SKYWALK
3513 	(void)feth_register_nexus_domain_provider();
3514 #endif /* SKYWALK */
3515 	error = if_clone_attach(&feth_cloner);
3516 	if (error != 0) {
3517 		return;
3518 	}
3519 	return;
3520 }
3521