1 /*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * if_fake.c
31 * - fake network interface used for testing
32 * - "feth" (e.g. "feth0", "feth1") is a virtual ethernet interface that allows
33 * two instances to have their output/input paths "crossed-over" so that
34 * output on one is input on the other
35 */
36
37 /*
38 * Modification History:
39 *
40 * September 9, 2015 Dieter Siegmund ([email protected])
41 * - created
42 */
43
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/queue.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/kern_event.h>
54 #include <sys/mcache.h>
55 #include <sys/syslog.h>
56
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_vlan_var.h>
61 #include <net/if_fake_var.h>
62 #include <net/if_arp.h>
63 #include <net/if_dl.h>
64 #include <net/if_ether.h>
65 #include <net/if_types.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <net/dlil.h>
69
70 #include <net/kpi_interface.h>
71 #include <net/kpi_protocol.h>
72
73 #include <kern/locks.h>
74 #include <kern/zalloc.h>
75
76 #include <mach/mach_time.h>
77
78 #ifdef INET
79 #include <netinet/in.h>
80 #include <netinet/if_ether.h>
81 #endif
82
83 #include <net/if_media.h>
84 #include <net/ether_if_module.h>
85 #if SKYWALK
86 #include <skywalk/os_skywalk_private.h>
87 #include <skywalk/nexus/netif/nx_netif.h>
88 #include <skywalk/channel/channel_var.h>
89 #endif /* SKYWALK */
90
91 static boolean_t
is_power_of_two(unsigned int val)92 is_power_of_two(unsigned int val)
93 {
94 return (val & (val - 1)) == 0;
95 }
96
97 #define FAKE_ETHER_NAME "feth"
98
99 SYSCTL_DECL(_net_link);
100 SYSCTL_NODE(_net_link, OID_AUTO, fake, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
101 "Fake interface");
102
103 static int if_fake_txstart = 1;
104 SYSCTL_INT(_net_link_fake, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
105 &if_fake_txstart, 0, "Fake interface TXSTART mode");
106
107 static int if_fake_hwcsum = 0;
108 SYSCTL_INT(_net_link_fake, OID_AUTO, hwcsum, CTLFLAG_RW | CTLFLAG_LOCKED,
109 &if_fake_hwcsum, 0, "Fake interface simulate hardware checksum");
110
111 static int if_fake_nxattach = 0;
112 SYSCTL_INT(_net_link_fake, OID_AUTO, nxattach, CTLFLAG_RW | CTLFLAG_LOCKED,
113 &if_fake_nxattach, 0, "Fake interface auto-attach nexus");
114
115 static int if_fake_bsd_mode = 1;
116 SYSCTL_INT(_net_link_fake, OID_AUTO, bsd_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
117 &if_fake_bsd_mode, 0, "Fake interface attach as BSD interface");
118
119 static int if_fake_debug = 0;
120 SYSCTL_INT(_net_link_fake, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
121 &if_fake_debug, 0, "Fake interface debug logs");
122
123 #define FETH_DPRINTF(fmt, ...) \
124 { if (if_fake_debug != 0) printf("%s " fmt, __func__, ## __VA_ARGS__); }
125
126 static int if_fake_wmm_mode = 0;
127 SYSCTL_INT(_net_link_fake, OID_AUTO, wmm_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
128 &if_fake_wmm_mode, 0, "Fake interface in 802.11 WMM mode");
129
130 static int if_fake_multibuflet = 0;
131 SYSCTL_INT(_net_link_fake, OID_AUTO, multibuflet, CTLFLAG_RW | CTLFLAG_LOCKED,
132 &if_fake_multibuflet, 0, "Fake interface using multi-buflet packets");
133
134 static int if_fake_low_latency = 0;
135 SYSCTL_INT(_net_link_fake, OID_AUTO, low_latency, CTLFLAG_RW | CTLFLAG_LOCKED,
136 &if_fake_low_latency, 0, "Fake interface with a low latency qset");
137
138 static int if_fake_switch_combined_mode = 0;
139 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_combined_mode,
140 CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_combined_mode, 0,
141 "Switch a qset between combined and separate mode during dequeues");
142
143 static int if_fake_switch_mode_frequency = 10;
144 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_mode_frequency,
145 CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_mode_frequency, 0,
146 "The number of dequeues before we switch between the combined and separated mode");
147
148 static int if_fake_tso_support = 0;
149 SYSCTL_INT(_net_link_fake, OID_AUTO, tso_support, CTLFLAG_RW | CTLFLAG_LOCKED,
150 &if_fake_tso_support, 0, "Fake interface with support for TSO offload");
151
152 #define DEFAULT_EXPIRATION_THRESHOLD 500 /* usec */
153 static int if_fake_expiration_threshold_us = DEFAULT_EXPIRATION_THRESHOLD;
154 SYSCTL_INT(_net_link_fake, OID_AUTO, expiration_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
155 &if_fake_expiration_threshold_us, DEFAULT_EXPIRATION_THRESHOLD,
156 "Expiration threshold (usec) for expiration testing");
157
158 static int if_fake_lro = 0;
159 SYSCTL_INT(_net_link_fake, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_LOCKED,
160 &if_fake_lro, 0, "Fake interface report LRO capability");
161
162 typedef enum {
163 IFF_PP_MODE_GLOBAL = 0, /* share a global pool */
164 IFF_PP_MODE_PRIVATE = 1, /* creates its own rx/tx pool */
165 IFF_PP_MODE_PRIVATE_SPLIT = 2, /* creates its own split rx & tx pool */
166 } iff_pktpool_mode_t;
167 static iff_pktpool_mode_t if_fake_pktpool_mode = IFF_PP_MODE_GLOBAL;
168 SYSCTL_INT(_net_link_fake, OID_AUTO, pktpool_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
169 &if_fake_pktpool_mode, IFF_PP_MODE_GLOBAL,
170 "Fake interface packet pool mode (0 global, 1 private, 2 private split");
171
172 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX 512
173 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF 96
174 static int if_fake_link_layer_aggregation_factor =
175 FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF;
176 static int
177 feth_link_layer_aggregation_factor_sysctl SYSCTL_HANDLER_ARGS
178 {
179 #pragma unused(oidp, arg1, arg2)
180 unsigned int new_value;
181 int changed;
182 int error;
183
184 error = sysctl_io_number(req, if_fake_link_layer_aggregation_factor,
185 sizeof(if_fake_link_layer_aggregation_factor), &new_value,
186 &changed);
187 if (error == 0 && changed != 0) {
188 if (new_value <= 0 ||
189 new_value > FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX) {
190 return EINVAL;
191 }
192 if_fake_link_layer_aggregation_factor = new_value;
193 }
194 return error;
195 }
196
197 SYSCTL_PROC(_net_link_fake, OID_AUTO, link_layer_aggregation_factor,
198 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
199 0, 0, feth_link_layer_aggregation_factor_sysctl, "IU",
200 "Fake interface link layer aggregation factor");
201
202 #define FETH_TX_HEADROOM_MAX 32
203 static unsigned int if_fake_tx_headroom = FETH_TX_HEADROOM_MAX;
204 static int
205 feth_tx_headroom_sysctl SYSCTL_HANDLER_ARGS
206 {
207 #pragma unused(oidp, arg1, arg2)
208 unsigned int new_value;
209 int changed;
210 int error;
211
212 error = sysctl_io_number(req, if_fake_tx_headroom,
213 sizeof(if_fake_tx_headroom), &new_value, &changed);
214 if (error == 0 && changed != 0) {
215 if (new_value > FETH_TX_HEADROOM_MAX ||
216 (new_value % 8) != 0) {
217 return EINVAL;
218 }
219 if_fake_tx_headroom = new_value;
220 }
221 return 0;
222 }
223
224 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_headroom,
225 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
226 0, 0, feth_tx_headroom_sysctl, "IU", "Fake ethernet Tx headroom");
227
228 static int if_fake_fcs = 0;
229 SYSCTL_INT(_net_link_fake, OID_AUTO, fcs, CTLFLAG_RW | CTLFLAG_LOCKED,
230 &if_fake_fcs, 0, "Fake interface using frame check sequence");
231
232 #define FETH_TRAILER_LENGTH_MAX 28
233 char feth_trailer[FETH_TRAILER_LENGTH_MAX + 1] = "trailertrailertrailertrailer";
234 static unsigned int if_fake_trailer_length = 0;
235 static int
236 feth_trailer_length_sysctl SYSCTL_HANDLER_ARGS
237 {
238 #pragma unused(oidp, arg1, arg2)
239 unsigned int new_value;
240 int changed;
241 int error;
242
243 error = sysctl_io_number(req, if_fake_trailer_length,
244 sizeof(if_fake_trailer_length), &new_value, &changed);
245 if (error == 0 && changed != 0) {
246 if (new_value > FETH_TRAILER_LENGTH_MAX) {
247 return EINVAL;
248 }
249 if_fake_trailer_length = new_value;
250 }
251 return 0;
252 }
253
254 SYSCTL_PROC(_net_link_fake, OID_AUTO, trailer_length,
255 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
256 feth_trailer_length_sysctl, "IU", "Fake interface frame trailer length");
257
258 /* sysctl net.link.fake.max_mtu */
259 #define FETH_MAX_MTU_DEFAULT 2048
260 #define FETH_MAX_MTU_MAX ((16 * 1024) - ETHER_HDR_LEN)
261
262 static unsigned int if_fake_max_mtu = FETH_MAX_MTU_DEFAULT;
263
264 /* sysctl net.link.fake.buflet_size */
265 #define FETH_BUFLET_SIZE_MIN 512
266 #define FETH_BUFLET_SIZE_MAX (32 * 1024)
267 #define FETH_TSO_BUFLET_SIZE (16 * 1024)
268
269 static unsigned int if_fake_buflet_size = FETH_BUFLET_SIZE_MIN;
270 static unsigned int if_fake_tso_buffer_size = FETH_TSO_BUFLET_SIZE;
271
272 static int
273 feth_tso_buffer_size_sysctl SYSCTL_HANDLER_ARGS
274 {
275 #pragma unused(oidp, arg1, arg2)
276 unsigned int new_value;
277 int changed;
278 int error;
279
280 error = sysctl_io_number(req, if_fake_tso_buffer_size,
281 sizeof(if_fake_tso_buffer_size), &new_value, &changed);
282 if (error == 0 && changed != 0) {
283 /* must be a power of 2 between min and max */
284 if (new_value > FETH_BUFLET_SIZE_MAX ||
285 new_value < FETH_BUFLET_SIZE_MIN ||
286 !is_power_of_two(new_value)) {
287 return EINVAL;
288 }
289 if_fake_tso_buffer_size = new_value;
290 }
291 return 0;
292 }
293
294 SYSCTL_PROC(_net_link_fake, OID_AUTO, tso_buf_size,
295 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
296 0, 0, feth_tso_buffer_size_sysctl, "IU", "Fake interface TSO buffer size");
297
298 static int
299 feth_max_mtu_sysctl SYSCTL_HANDLER_ARGS
300 {
301 #pragma unused(oidp, arg1, arg2)
302 unsigned int new_value;
303 int changed;
304 int error;
305
306 error = sysctl_io_number(req, if_fake_max_mtu,
307 sizeof(if_fake_max_mtu), &new_value, &changed);
308 if (error == 0 && changed != 0) {
309 if (new_value > FETH_MAX_MTU_MAX ||
310 new_value < ETHERMTU ||
311 new_value <= if_fake_buflet_size) {
312 return EINVAL;
313 }
314 if_fake_max_mtu = new_value;
315 }
316 return 0;
317 }
318
319 SYSCTL_PROC(_net_link_fake, OID_AUTO, max_mtu,
320 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
321 0, 0, feth_max_mtu_sysctl, "IU", "Fake interface maximum MTU");
322
323 static int
324 feth_buflet_size_sysctl SYSCTL_HANDLER_ARGS
325 {
326 #pragma unused(oidp, arg1, arg2)
327 unsigned int new_value;
328 int changed;
329 int error;
330
331 error = sysctl_io_number(req, if_fake_buflet_size,
332 sizeof(if_fake_buflet_size), &new_value, &changed);
333 if (error == 0 && changed != 0) {
334 /* must be a power of 2 between min and max */
335 if (new_value > FETH_BUFLET_SIZE_MAX ||
336 new_value < FETH_BUFLET_SIZE_MIN ||
337 !is_power_of_two(new_value) ||
338 new_value >= if_fake_max_mtu) {
339 return EINVAL;
340 }
341 if_fake_buflet_size = new_value;
342 }
343 return 0;
344 }
345
346 SYSCTL_PROC(_net_link_fake, OID_AUTO, buflet_size,
347 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
348 0, 0, feth_buflet_size_sysctl, "IU", "Fake interface buflet size");
349
350 static unsigned int if_fake_user_access = 0;
351
352 static int
353 feth_user_access_sysctl SYSCTL_HANDLER_ARGS
354 {
355 #pragma unused(oidp, arg1, arg2)
356 unsigned int new_value;
357 int changed;
358 int error;
359
360 error = sysctl_io_number(req, if_fake_user_access,
361 sizeof(if_fake_user_access), &new_value, &changed);
362 if (error == 0 && changed != 0) {
363 if (new_value != 0) {
364 if (new_value != 1) {
365 return EINVAL;
366 }
367 }
368 if_fake_user_access = new_value;
369 }
370 return 0;
371 }
372
373 SYSCTL_PROC(_net_link_fake, OID_AUTO, user_access,
374 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
375 0, 0, feth_user_access_sysctl, "IU", "Fake interface user access");
376
377 /* sysctl net.link.fake.if_adv_intvl (unit: millisecond) */
378 #define FETH_IF_ADV_INTVL_MIN 10
379 #define FETH_IF_ADV_INTVL_MAX INT_MAX
380
381 static int if_fake_if_adv_interval = 0; /* no interface advisory */
382 static int
383 feth_if_adv_interval_sysctl SYSCTL_HANDLER_ARGS
384 {
385 #pragma unused(oidp, arg1, arg2)
386 unsigned int new_value;
387 int changed;
388 int error;
389
390 error = sysctl_io_number(req, if_fake_if_adv_interval,
391 sizeof(if_fake_if_adv_interval), &new_value, &changed);
392 if (error == 0 && changed != 0) {
393 if ((new_value != 0) && (new_value > FETH_IF_ADV_INTVL_MAX ||
394 new_value < FETH_IF_ADV_INTVL_MIN)) {
395 return EINVAL;
396 }
397 if_fake_if_adv_interval = new_value;
398 }
399 return 0;
400 }
401
402 SYSCTL_PROC(_net_link_fake, OID_AUTO, if_adv_intvl,
403 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
404 feth_if_adv_interval_sysctl, "IU",
405 "Fake interface will generate interface advisories reports at the specified interval in ms");
406
407 /* sysctl net.link.fake.tx_drops */
408 /*
409 * Fake ethernet will drop packet on the transmit path at the specified
410 * rate, i.e drop one in every if_fake_tx_drops number of packets.
411 */
412 #define FETH_TX_DROPS_MIN 0
413 #define FETH_TX_DROPS_MAX INT_MAX
414 static int if_fake_tx_drops = 0; /* no packets are dropped */
415 static int
416 feth_fake_tx_drops_sysctl SYSCTL_HANDLER_ARGS
417 {
418 #pragma unused(oidp, arg1, arg2)
419 unsigned int new_value;
420 int changed;
421 int error;
422
423 error = sysctl_io_number(req, if_fake_tx_drops,
424 sizeof(if_fake_tx_drops), &new_value, &changed);
425 if (error == 0 && changed != 0) {
426 if (new_value > FETH_TX_DROPS_MAX ||
427 new_value < FETH_TX_DROPS_MIN) {
428 return EINVAL;
429 }
430 if_fake_tx_drops = new_value;
431 }
432 return 0;
433 }
434
435 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_drops,
436 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
437 feth_fake_tx_drops_sysctl, "IU",
438 "Fake interface will intermittently drop packets on Tx path");
439
440 /* sysctl.net.link.fake.tx_exp_policy */
441
442 typedef enum {
443 IFF_TX_EXP_POLICY_DISABLED = 0, /* Expiry notification disabled */
444 IFF_TX_EXP_POLICY_DROP_AND_NOTIFY = 1, /* Expiry notification enabled; drop + notify mode */
445 IFF_TX_EXP_POLICY_NOTIFY_ONLY = 2, /* Expiry notification enabled; notify only mode */
446 IFF_TX_EXP_POLICY_METADATA = 3, /* Expiry notification enabled; use packet metadata */
447 } iff_tx_exp_policy_t;
448 static iff_tx_exp_policy_t if_fake_tx_exp_policy = IFF_TX_EXP_POLICY_DISABLED;
449
450 static int
451 feth_fake_tx_exp_policy_sysctl SYSCTL_HANDLER_ARGS
452 {
453 #pragma unused(oidp, arg1, arg2)
454 unsigned int new_value;
455 int changed;
456 int error;
457
458 error = sysctl_io_number(req, if_fake_tx_exp_policy,
459 sizeof(if_fake_tx_exp_policy), &new_value, &changed);
460 FETH_DPRINTF("if_fake_tx_exp_policy: %u -> %u (%d)",
461 if_fake_tx_exp_policy, new_value, changed);
462 if (error == 0 && changed != 0) {
463 if (new_value > IFF_TX_EXP_POLICY_METADATA ||
464 new_value < IFF_TX_EXP_POLICY_DISABLED) {
465 return EINVAL;
466 }
467 if_fake_tx_exp_policy = new_value;
468 }
469 return 0;
470 }
471 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_exp_policy,
472 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
473 feth_fake_tx_exp_policy_sysctl, "IU",
474 "Fake interface handling policy for expired TX attempts "
475 "(0 disabled, 1 drop and notify, 2 notify only, 3 packet metadata)");
476
477 /* sysctl net.link.fake.tx_completion_mode */
478 typedef enum {
479 IFF_TX_COMPL_MODE_SYNC = 0,
480 IFF_TX_COMPL_MODE_ASYNC = 1,
481 } iff_tx_completion_mode_t;
482 static iff_tx_completion_mode_t if_tx_completion_mode = IFF_TX_COMPL_MODE_SYNC;
483 static int
484 feth_fake_tx_completion_mode_sysctl SYSCTL_HANDLER_ARGS
485 {
486 #pragma unused(oidp, arg1, arg2)
487 unsigned int new_value;
488 int changed;
489 int error;
490
491 error = sysctl_io_number(req, if_tx_completion_mode,
492 sizeof(if_tx_completion_mode), &new_value, &changed);
493 if (error == 0 && changed != 0) {
494 if (new_value > IFF_TX_COMPL_MODE_ASYNC ||
495 new_value < IFF_TX_COMPL_MODE_SYNC) {
496 return EINVAL;
497 }
498 if_tx_completion_mode = new_value;
499 }
500 return 0;
501 }
502 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_completion_mode,
503 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
504 feth_fake_tx_completion_mode_sysctl, "IU",
505 "Fake interface tx completion mode (0 synchronous, 1 asynchronous)");
506
507 /* sysctl net.link.fake.llink_cnt */
508
509 /* The maximum number of logical links (including default link) */
510 #define FETH_MAX_LLINKS 16
511 /*
512 * The default number of logical links (including default link).
513 * Zero means logical link mode is disabled.
514 */
515 #define FETH_DEF_LLINKS 0
516
517 static uint32_t if_fake_llink_cnt = FETH_DEF_LLINKS;
518 static int
519 feth_fake_llink_cnt_sysctl SYSCTL_HANDLER_ARGS
520 {
521 #pragma unused(oidp, arg1, arg2)
522 unsigned int new_value;
523 int changed;
524 int error;
525
526 error = sysctl_io_number(req, if_fake_llink_cnt,
527 sizeof(if_fake_llink_cnt), &new_value, &changed);
528 if (error == 0 && changed != 0) {
529 if (new_value > FETH_MAX_LLINKS) {
530 return EINVAL;
531 }
532 if_fake_llink_cnt = new_value;
533 }
534 return 0;
535 }
536
537 SYSCTL_PROC(_net_link_fake, OID_AUTO, llink_cnt,
538 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
539 feth_fake_llink_cnt_sysctl, "IU",
540 "Fake interface logical link count");
541
542 /* sysctl net.link.fake.qset_cnt */
543
544 /* The maximum number of qsets for each logical link */
545 #define FETH_MAX_QSETS 16
546 /* The default number of qsets for each logical link */
547 #define FETH_DEF_QSETS 4
548
549 static uint32_t if_fake_qset_cnt = FETH_DEF_QSETS;
550 static int
551 feth_fake_qset_cnt_sysctl SYSCTL_HANDLER_ARGS
552 {
553 #pragma unused(oidp, arg1, arg2)
554 unsigned int new_value;
555 int changed;
556 int error;
557
558 error = sysctl_io_number(req, if_fake_qset_cnt,
559 sizeof(if_fake_qset_cnt), &new_value, &changed);
560 if (error == 0 && changed != 0) {
561 if (new_value == 0 ||
562 new_value > FETH_MAX_QSETS) {
563 return EINVAL;
564 }
565 if_fake_qset_cnt = new_value;
566 }
567 return 0;
568 }
569
570 SYSCTL_PROC(_net_link_fake, OID_AUTO, qset_cnt,
571 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
572 feth_fake_qset_cnt_sysctl, "IU",
573 "Fake interface queue set count");
574
575 /**
576 ** virtual ethernet structures, types
577 **/
578
579 #define IFF_NUM_TX_RINGS_WMM_MODE 4
580 #define IFF_NUM_RX_RINGS_WMM_MODE 1
581 #define IFF_MAX_TX_RINGS IFF_NUM_TX_RINGS_WMM_MODE
582 #define IFF_MAX_RX_RINGS IFF_NUM_RX_RINGS_WMM_MODE
583 #define IFF_NUM_TX_QUEUES_WMM_MODE 4
584 #define IFF_NUM_RX_QUEUES_WMM_MODE 1
585 #define IFF_MAX_TX_QUEUES IFF_NUM_TX_QUEUES_WMM_MODE
586 #define IFF_MAX_RX_QUEUES IFF_NUM_RX_QUEUES_WMM_MODE
587
588 #define IFF_MAX_BATCH_SIZE 32
589
590 typedef uint16_t iff_flags_t;
591 #define IFF_FLAGS_HWCSUM 0x0001
592 #define IFF_FLAGS_BSD_MODE 0x0002
593 #define IFF_FLAGS_DETACHING 0x0004
594 #define IFF_FLAGS_WMM_MODE 0x0008
595 #define IFF_FLAGS_MULTIBUFLETS 0x0010
596 #define IFF_FLAGS_TSO_SUPPORT 0x0020
597 #define IFF_FLAGS_LRO 0x0040
598
599 #if SKYWALK
600
601 typedef struct {
602 uuid_t fnx_provider;
603 uuid_t fnx_instance;
604 } fake_nx, *fake_nx_t;
605
606 typedef struct {
607 kern_netif_queue_t fq_queue;
608 } fake_queue;
609
610 typedef struct {
611 kern_netif_qset_t fqs_qset; /* provided by xnu */
612 fake_queue fqs_rx_queue[IFF_MAX_RX_QUEUES];
613 fake_queue fqs_tx_queue[IFF_MAX_TX_QUEUES];
614 uint32_t fqs_rx_queue_cnt;
615 uint32_t fqs_tx_queue_cnt;
616 uint32_t fqs_llink_idx;
617 uint32_t fqs_idx;
618 uint32_t fqs_dequeue_cnt;
619 uint64_t fqs_id;
620 boolean_t fqs_combined_mode;
621 } fake_qset;
622
623 typedef struct {
624 uint64_t fl_id;
625 uint32_t fl_idx;
626 uint32_t fl_qset_cnt;
627 fake_qset fl_qset[FETH_MAX_QSETS];
628 } fake_llink;
629
630 static kern_pbufpool_t S_pp;
631
632 #define IFF_TT_OUTPUT 0x01 /* generate trace_tag on output */
633 #define IFF_TT_INPUT 0x02 /* generate trace_tag on input */
634 static int if_fake_trace_tag_flags = 0;
635 SYSCTL_INT(_net_link_fake, OID_AUTO, trace_tag, CTLFLAG_RW | CTLFLAG_LOCKED,
636 &if_fake_trace_tag_flags, 0, "Fake interface generate trace_tag");
637 static packet_trace_tag_t if_fake_trace_tag_current = 1;
638
639 #endif /* SKYWALK */
640
641 struct if_fake {
642 char iff_name[IFNAMSIZ]; /* our unique id */
643 ifnet_t iff_ifp;
644 iff_flags_t iff_flags;
645 uint32_t iff_retain_count;
646 ifnet_t iff_peer; /* the other end */
647 int iff_media_current;
648 int iff_media_active;
649 uint32_t iff_media_count;
650 int iff_media_list[IF_FAKE_MEDIA_LIST_MAX];
651 struct mbuf * iff_pending_tx_packet;
652 boolean_t iff_start_busy;
653 unsigned int iff_max_mtu;
654 uint32_t iff_fcs;
655 uint32_t iff_trailer_length;
656 #if SKYWALK
657 fake_nx iff_nx;
658 struct netif_stats *iff_nifs;
659 uint32_t iff_nifs_ref;
660 uint32_t iff_llink_cnt;
661 kern_channel_ring_t iff_rx_ring[IFF_MAX_RX_RINGS];
662 kern_channel_ring_t iff_tx_ring[IFF_MAX_TX_RINGS];
663 fake_llink *iff_llink __counted_by(FETH_MAX_LLINKS);
664 thread_call_t iff_doorbell_tcall;
665 thread_call_t iff_if_adv_tcall;
666 boolean_t iff_doorbell_tcall_active;
667 boolean_t iff_waiting_for_tcall;
668 boolean_t iff_channel_connected;
669 iff_pktpool_mode_t iff_pp_mode;
670 kern_pbufpool_t iff_rx_pp;
671 kern_pbufpool_t iff_tx_pp;
672 uint32_t iff_tx_headroom;
673 unsigned int iff_adv_interval;
674 uint32_t iff_tx_drop_rate;
675 uint32_t iff_tx_pkts_count;
676 iff_tx_completion_mode_t iff_tx_completion_mode;
677 bool iff_intf_adv_enabled;
678 void *iff_intf_adv_kern_ctx;
679 kern_nexus_capab_interface_advisory_notify_fn_t iff_intf_adv_notify;
680 iff_tx_exp_policy_t iff_tx_exp_policy;
681 #endif /* SKYWALK */
682 };
683
684 typedef struct if_fake * if_fake_ref;
685
686 static if_fake_ref
687 ifnet_get_if_fake(ifnet_t ifp);
688
689 static inline boolean_t
feth_in_bsd_mode(if_fake_ref fakeif)690 feth_in_bsd_mode(if_fake_ref fakeif)
691 {
692 return (fakeif->iff_flags & IFF_FLAGS_BSD_MODE) != 0;
693 }
694
695 static inline void
feth_set_detaching(if_fake_ref fakeif)696 feth_set_detaching(if_fake_ref fakeif)
697 {
698 fakeif->iff_flags |= IFF_FLAGS_DETACHING;
699 }
700
701 static inline boolean_t
feth_is_detaching(if_fake_ref fakeif)702 feth_is_detaching(if_fake_ref fakeif)
703 {
704 return (fakeif->iff_flags & IFF_FLAGS_DETACHING) != 0;
705 }
706
707 static int
feth_enable_dequeue_stall(ifnet_t ifp,uint32_t enable)708 feth_enable_dequeue_stall(ifnet_t ifp, uint32_t enable)
709 {
710 int error;
711
712 if (enable != 0) {
713 error = ifnet_disable_output(ifp);
714 } else {
715 error = ifnet_enable_output(ifp);
716 }
717
718 return error;
719 }
720
721 #if SKYWALK
722 static inline boolean_t
feth_in_wmm_mode(if_fake_ref fakeif)723 feth_in_wmm_mode(if_fake_ref fakeif)
724 {
725 return (fakeif->iff_flags & IFF_FLAGS_WMM_MODE) != 0;
726 }
727
728 static inline boolean_t
feth_using_multibuflets(if_fake_ref fakeif)729 feth_using_multibuflets(if_fake_ref fakeif)
730 {
731 return (fakeif->iff_flags & IFF_FLAGS_MULTIBUFLETS) != 0;
732 }
733 static void feth_detach_netif_nexus(if_fake_ref fakeif);
734
735 static inline boolean_t
feth_has_intf_advisory_configured(if_fake_ref fakeif)736 feth_has_intf_advisory_configured(if_fake_ref fakeif)
737 {
738 return fakeif->iff_adv_interval > 0;
739 }
740
741 static inline bool
feth_supports_tso(if_fake_ref fakeif)742 feth_supports_tso(if_fake_ref fakeif)
743 {
744 return (fakeif->iff_flags & IFF_FLAGS_TSO_SUPPORT) != 0;
745 }
746 #endif /* SKYWALK */
747
748 #define FETH_MAXUNIT IF_MAXUNIT
749 #define FETH_ZONE_MAX_ELEM MIN(IFNETS_MAX, FETH_MAXUNIT)
750
751 static int feth_clone_create(struct if_clone *, u_int32_t, void *);
752 static int feth_clone_destroy(ifnet_t);
753 static int feth_output(ifnet_t ifp, struct mbuf *m);
754 static void feth_start(ifnet_t ifp);
755 static int feth_ioctl(ifnet_t ifp, u_long cmd, void * addr);
756 static int feth_config(ifnet_t ifp, ifnet_t peer);
757 static void feth_if_free(ifnet_t ifp);
758 static void feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp);
759 static void feth_free(if_fake_ref fakeif);
760
761 static struct if_clone
762 feth_cloner = IF_CLONE_INITIALIZER(FAKE_ETHER_NAME,
763 feth_clone_create,
764 feth_clone_destroy,
765 0,
766 FETH_MAXUNIT);
767 static void interface_link_event(ifnet_t ifp, u_int32_t event_code);
768
769 /* some media words to pretend to be ethernet */
770 static int default_media_words[] = {
771 IFM_MAKEWORD(IFM_ETHER, 0, 0, 0),
772 IFM_MAKEWORD(IFM_ETHER, IFM_10G_T, IFM_FDX, 0),
773 IFM_MAKEWORD(IFM_ETHER, IFM_2500_T, IFM_FDX, 0),
774 IFM_MAKEWORD(IFM_ETHER, IFM_5000_T, IFM_FDX, 0),
775
776 IFM_MAKEWORD(IFM_ETHER, IFM_10G_KX4, IFM_FDX, 0),
777 IFM_MAKEWORD(IFM_ETHER, IFM_20G_KR2, IFM_FDX, 0),
778 IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX, 0),
779 IFM_MAKEWORD(IFM_ETHER, IFM_25G_KR, IFM_FDX, 0),
780 IFM_MAKEWORD(IFM_ETHER, IFM_40G_SR4, IFM_FDX, 0),
781 IFM_MAKEWORD(IFM_ETHER, IFM_50G_CR2, IFM_FDX, 0),
782 IFM_MAKEWORD(IFM_ETHER, IFM_56G_R4, IFM_FDX, 0),
783 IFM_MAKEWORD(IFM_ETHER, IFM_100G_CR4, IFM_FDX, 0),
784 IFM_MAKEWORD(IFM_ETHER, IFM_400G_AUI8, IFM_FDX, 0),
785 };
786 #define default_media_words_count (sizeof(default_media_words) \
787 / sizeof (default_media_words[0]))
788
789 /**
790 ** veth locks
791 **/
792
793 static LCK_GRP_DECLARE(feth_lck_grp, "fake");
794 static LCK_MTX_DECLARE(feth_lck_mtx, &feth_lck_grp);
795
796 static inline void
feth_lock(void)797 feth_lock(void)
798 {
799 lck_mtx_lock(&feth_lck_mtx);
800 }
801
802 static inline void
feth_unlock(void)803 feth_unlock(void)
804 {
805 lck_mtx_unlock(&feth_lck_mtx);
806 }
807
808 static inline int
get_max_mtu(int bsd_mode,unsigned int max_mtu)809 get_max_mtu(int bsd_mode, unsigned int max_mtu)
810 {
811 unsigned int mtu;
812
813 if (bsd_mode != 0) {
814 mtu = (njcl > 0) ? (M16KCLBYTES - ETHER_HDR_LEN)
815 : MBIGCLBYTES - ETHER_HDR_LEN;
816 if (mtu > max_mtu) {
817 mtu = max_mtu;
818 }
819 } else {
820 mtu = max_mtu;
821 }
822 return mtu;
823 }
824
825 static inline unsigned int
feth_max_mtu(ifnet_t ifp)826 feth_max_mtu(ifnet_t ifp)
827 {
828 if_fake_ref fakeif;
829 unsigned int max_mtu = ETHERMTU;
830
831 feth_lock();
832 fakeif = ifnet_get_if_fake(ifp);
833 if (fakeif != NULL) {
834 max_mtu = fakeif->iff_max_mtu;
835 }
836 feth_unlock();
837 return max_mtu;
838 }
839
840 static void
feth_free(if_fake_ref fakeif)841 feth_free(if_fake_ref fakeif)
842 {
843 VERIFY(fakeif->iff_retain_count == 0);
844 if (feth_in_bsd_mode(fakeif)) {
845 if (fakeif->iff_pending_tx_packet) {
846 m_freem(fakeif->iff_pending_tx_packet);
847 }
848 }
849 #if SKYWALK
850 else {
851 if (fakeif->iff_pp_mode == IFF_PP_MODE_GLOBAL) {
852 VERIFY(fakeif->iff_rx_pp == S_pp);
853 VERIFY(fakeif->iff_tx_pp == S_pp);
854 pp_release(fakeif->iff_rx_pp);
855 fakeif->iff_rx_pp = NULL;
856 pp_release(fakeif->iff_tx_pp);
857 fakeif->iff_tx_pp = NULL;
858 feth_lock();
859 if (S_pp->pp_refcnt == 1) {
860 pp_release(S_pp);
861 S_pp = NULL;
862 }
863 feth_unlock();
864 } else {
865 if (fakeif->iff_rx_pp != NULL) {
866 pp_release(fakeif->iff_rx_pp);
867 fakeif->iff_rx_pp = NULL;
868 }
869 if (fakeif->iff_tx_pp != NULL) {
870 pp_release(fakeif->iff_tx_pp);
871 fakeif->iff_tx_pp = NULL;
872 }
873 }
874 }
875 #endif /* SKYWALK */
876
877 FETH_DPRINTF("%s\n", fakeif->iff_name);
878 kfree_type(fake_llink, FETH_MAX_LLINKS, fakeif->iff_llink);
879 kfree_type(struct if_fake, fakeif);
880 }
881
882 static void
feth_release(if_fake_ref fakeif)883 feth_release(if_fake_ref fakeif)
884 {
885 u_int32_t old_retain_count;
886
887 old_retain_count = OSDecrementAtomic(&fakeif->iff_retain_count);
888 switch (old_retain_count) {
889 case 0:
890 VERIFY(old_retain_count != 0);
891 break;
892 case 1:
893 feth_free(fakeif);
894 break;
895 default:
896 break;
897 }
898 return;
899 }
900
901 #if SKYWALK
902
903 static void
feth_retain(if_fake_ref fakeif)904 feth_retain(if_fake_ref fakeif)
905 {
906 OSIncrementAtomic(&fakeif->iff_retain_count);
907 }
908
909 static void
feth_packet_pool_init_prepare(if_fake_ref fakeif,struct kern_pbufpool_init * pp_init)910 feth_packet_pool_init_prepare(if_fake_ref fakeif,
911 struct kern_pbufpool_init *pp_init)
912 {
913 uint32_t max_mtu = fakeif->iff_max_mtu;
914 uint32_t buflet_size = if_fake_buflet_size;
915
916 bzero(pp_init, sizeof(*pp_init));
917 pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
918 pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
919 pp_init->kbi_packets = 1024; /* TBD configurable */
920 if (feth_supports_tso(fakeif)) {
921 buflet_size = if_fake_tso_buffer_size;
922 }
923 if (feth_using_multibuflets(fakeif)) {
924 pp_init->kbi_bufsize = buflet_size;
925 pp_init->kbi_max_frags = howmany(max_mtu, buflet_size);
926 pp_init->kbi_buflets = pp_init->kbi_packets *
927 pp_init->kbi_max_frags;
928 pp_init->kbi_flags |= KBIF_BUFFER_ON_DEMAND;
929 } else {
930 pp_init->kbi_bufsize = max(max_mtu, buflet_size);
931 pp_init->kbi_max_frags = 1;
932 pp_init->kbi_buflets = pp_init->kbi_packets;
933 }
934 pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
935 if (if_fake_user_access != 0) {
936 pp_init->kbi_flags |= KBIF_USER_ACCESS;
937 }
938 pp_init->kbi_ctx = NULL;
939 pp_init->kbi_ctx_retain = NULL;
940 pp_init->kbi_ctx_release = NULL;
941 }
942
943 static errno_t
feth_packet_pool_make(if_fake_ref fakeif)944 feth_packet_pool_make(if_fake_ref fakeif)
945 {
946 struct kern_pbufpool_init pp_init;
947 errno_t err;
948
949 feth_packet_pool_init_prepare(fakeif, &pp_init);
950
951 switch (fakeif->iff_pp_mode) {
952 case IFF_PP_MODE_GLOBAL:
953 feth_lock();
954 if (S_pp == NULL) {
955 (void)snprintf((char *)pp_init.kbi_name,
956 sizeof(pp_init.kbi_name), "%s", "feth shared pp");
957 err = kern_pbufpool_create(&pp_init, &S_pp, NULL);
958 }
959 pp_retain(S_pp);
960 feth_unlock();
961 fakeif->iff_rx_pp = S_pp;
962 pp_retain(S_pp);
963 fakeif->iff_tx_pp = S_pp;
964 break;
965 case IFF_PP_MODE_PRIVATE:
966 (void)snprintf((char *)pp_init.kbi_name,
967 sizeof(pp_init.kbi_name), "%s pp", fakeif->iff_name);
968 err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
969 pp_retain(fakeif->iff_rx_pp);
970 fakeif->iff_tx_pp = fakeif->iff_rx_pp;
971 break;
972 case IFF_PP_MODE_PRIVATE_SPLIT:
973 (void)snprintf((char *)pp_init.kbi_name,
974 sizeof(pp_init.kbi_name), "%s rx pp", fakeif->iff_name);
975 pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
976 KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
977 pp_init.kbi_flags |= (KBIF_IODIR_IN | KBIF_BUFFER_ON_DEMAND);
978 pp_init.kbi_packets = 1024;
979 pp_init.kbi_bufsize = if_fake_link_layer_aggregation_factor * 1024;
980 err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
981 if (err != 0) {
982 printf("%s: rx pp create failed %d\n", __func__, err);
983 return err;
984 }
985 pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
986 KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
987 pp_init.kbi_flags |= KBIF_IODIR_OUT;
988 pp_init.kbi_packets = 1024; /* TBD configurable */
989 pp_init.kbi_bufsize = fakeif->iff_max_mtu;
990 (void)snprintf((char *)pp_init.kbi_name,
991 sizeof(pp_init.kbi_name), "%s tx pp", fakeif->iff_name);
992 err = kern_pbufpool_create(&pp_init, &fakeif->iff_tx_pp, NULL);
993 if (err != 0) {
994 printf("%s: tx pp create failed %d\n", __func__, err);
995 pp_release(fakeif->iff_rx_pp);
996 return err;
997 }
998 break;
999 default:
1000 VERIFY(0);
1001 __builtin_unreachable();
1002 }
1003
1004 return 0;
1005 }
1006
1007 static void
feth_packet_set_trace_tag(kern_packet_t ph,int flag)1008 feth_packet_set_trace_tag(kern_packet_t ph, int flag)
1009 {
1010 if (if_fake_trace_tag_flags & flag) {
1011 if (++if_fake_trace_tag_current == 0) {
1012 if_fake_trace_tag_current = 1;
1013 }
1014 kern_packet_set_trace_tag(ph, if_fake_trace_tag_current);
1015 }
1016 }
1017
1018 static errno_t
feth_clone_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1019 feth_clone_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1020 {
1021 errno_t err = 0;
1022 kern_pbufpool_t pp = dif->iff_rx_pp;
1023 kern_packet_t dph = 0, dph0 = 0;
1024 kern_buflet_t sbuf, dbuf0 = NULL, dbuf;
1025 void *saddr, *daddr;
1026 uint32_t soff, doff;
1027 uint32_t slen, dlen;
1028 uint32_t dlim0, dlim;
1029
1030 sbuf = kern_packet_get_next_buflet(sph, NULL);
1031 saddr = kern_buflet_get_data_address(sbuf);
1032 doff = soff = kern_buflet_get_data_offset(sbuf);
1033 dlen = slen = kern_buflet_get_data_length(sbuf);
1034
1035 /* packet clone is only supported for single-buflet */
1036 ASSERT(kern_packet_get_buflet_count(sph) == 1);
1037 ASSERT(soff == kern_packet_get_headroom(sph));
1038 ASSERT(slen == kern_packet_get_data_length(sph));
1039
1040 dph0 = *pdph;
1041 if (dph0 == 0) {
1042 dlim0 = 0;
1043 } else {
1044 dbuf0 = kern_packet_get_next_buflet(dph0, NULL);
1045 ASSERT(kern_buflet_get_object_limit(dbuf0) ==
1046 PP_BUF_OBJ_SIZE_DEF(pp));
1047 ASSERT(kern_buflet_get_data_limit(dbuf0) % 16 == 0);
1048 dlim0 = ((uintptr_t)kern_buflet_get_object_address(dbuf0) +
1049 kern_buflet_get_object_limit(dbuf0)) -
1050 ((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1051 kern_buflet_get_data_limit(dbuf0));
1052 }
1053
1054 if (doff + dlen > dlim0) {
1055 err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1056 if (err != 0) {
1057 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1058 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1059 return err;
1060 }
1061 dbuf = kern_packet_get_next_buflet(dph, NULL);
1062 ASSERT(kern_buflet_get_data_address(dbuf) ==
1063 kern_buflet_get_object_address(dbuf));
1064 daddr = kern_buflet_get_data_address(dbuf);
1065 dlim = kern_buflet_get_object_limit(dbuf);
1066 ASSERT(dlim == PP_BUF_OBJ_SIZE_DEF(pp));
1067 } else {
1068 err = kern_packet_clone_nosleep(dph0, &dph, KPKT_COPY_LIGHT);
1069 if (err != 0) {
1070 printf("%s: packet clone err %d\n", __func__, err);
1071 return err;
1072 }
1073 dbuf = kern_packet_get_next_buflet(dph, NULL);
1074 ASSERT(kern_buflet_get_object_address(dbuf) ==
1075 kern_buflet_get_object_address(dbuf0));
1076 daddr = (void *)((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1077 kern_buflet_get_data_limit(dbuf0));
1078 dlim = dlim0;
1079 }
1080
1081 ASSERT(doff + dlen <= dlim);
1082
1083 ASSERT((uintptr_t)daddr % 16 == 0);
1084
1085 bcopy((const void *)((uintptr_t)saddr + soff),
1086 (void *)((uintptr_t)daddr + doff), slen);
1087
1088 dlim = MIN(dlim, P2ROUNDUP(doff + dlen, 16));
1089 err = kern_buflet_set_data_address(dbuf, daddr);
1090 VERIFY(err == 0);
1091 err = kern_buflet_set_data_limit(dbuf, dlim);
1092 VERIFY(err == 0);
1093 err = kern_buflet_set_data_length(dbuf, dlen);
1094 VERIFY(err == 0);
1095 err = kern_buflet_set_data_offset(dbuf, doff);
1096 VERIFY(err == 0);
1097 err = kern_packet_set_headroom(dph, doff);
1098 VERIFY(err == 0);
1099 err = kern_packet_set_link_header_length(dph,
1100 kern_packet_get_link_header_length(sph));
1101 VERIFY(err == 0);
1102 err = kern_packet_set_service_class(dph,
1103 kern_packet_get_service_class(sph));
1104 VERIFY(err == 0);
1105 err = kern_packet_finalize(dph);
1106 VERIFY(err == 0);
1107 *pdph = dph;
1108
1109 return err;
1110 }
1111
1112 static inline void
feth_copy_buflet(kern_buflet_t sbuf,kern_buflet_t dbuf)1113 feth_copy_buflet(kern_buflet_t sbuf, kern_buflet_t dbuf)
1114 {
1115 errno_t err;
1116 uint32_t off, len;
1117 uint8_t *saddr, *daddr;
1118
1119 saddr = kern_buflet_get_data_address(sbuf);
1120 off = kern_buflet_get_data_offset(sbuf);
1121 len = kern_buflet_get_data_length(sbuf);
1122 daddr = kern_buflet_get_data_address(dbuf);
1123 bcopy((saddr + off), (daddr + off), len);
1124 err = kern_buflet_set_data_offset(dbuf, off);
1125 VERIFY(err == 0);
1126 err = kern_buflet_set_data_length(dbuf, len);
1127 VERIFY(err == 0);
1128 }
1129
1130 static int
feth_add_packet_trailer(kern_packet_t ph,void * trailer,size_t trailer_len)1131 feth_add_packet_trailer(kern_packet_t ph, void *trailer, size_t trailer_len)
1132 {
1133 errno_t err = 0;
1134
1135 ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
1136
1137 kern_buflet_t buf = NULL, iter = NULL;
1138 while ((iter = kern_packet_get_next_buflet(ph, iter)) != NULL) {
1139 buf = iter;
1140 }
1141 ASSERT(buf != NULL);
1142
1143 uint32_t dlim = kern_buflet_get_data_limit(buf);
1144 uint32_t doff = kern_buflet_get_data_offset(buf);
1145 uint32_t dlen = kern_buflet_get_data_length(buf);
1146
1147 size_t trailer_room = dlim - doff - dlen;
1148
1149 if (trailer_room < trailer_len) {
1150 printf("not enough room");
1151 return ERANGE;
1152 }
1153
1154 void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff + dlen);
1155 memcpy(data, trailer, trailer_len);
1156
1157 err = kern_buflet_set_data_length(buf, dlen + trailer_len);
1158 VERIFY(err == 0);
1159
1160 err = kern_packet_finalize(ph);
1161 VERIFY(err == 0);
1162
1163 FETH_DPRINTF("%s %zuB trailer added\n", __func__, trailer_len);
1164
1165 return 0;
1166 }
1167
1168 static int
feth_add_packet_fcs(kern_packet_t ph)1169 feth_add_packet_fcs(kern_packet_t ph)
1170 {
1171 uint32_t crc = 0;
1172 int err;
1173
1174 ASSERT(sizeof(crc) == ETHER_CRC_LEN);
1175
1176 kern_buflet_t buf = NULL;
1177 while ((buf = kern_packet_get_next_buflet(ph, buf)) != NULL) {
1178 uint32_t doff = kern_buflet_get_data_offset(buf);
1179 uint32_t dlen = kern_buflet_get_data_length(buf);
1180 void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff);
1181 crc = crc32(crc, data, dlen);
1182 }
1183
1184 err = feth_add_packet_trailer(ph, &crc, ETHER_CRC_LEN);
1185 if (!err) {
1186 return err;
1187 }
1188
1189 err = kern_packet_set_link_ethfcs(ph);
1190 VERIFY(err == 0);
1191
1192 return 0;
1193 }
1194
1195 static errno_t
feth_copy_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1196 feth_copy_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1197 {
1198 errno_t err = 0;
1199 uint16_t i, bufcnt;
1200 mach_vm_address_t baddr;
1201 kern_buflet_t sbuf = NULL, dbuf = NULL;
1202 kern_pbufpool_t pp = dif->iff_rx_pp;
1203 kern_packet_t dph;
1204 boolean_t multi_buflet = feth_using_multibuflets(dif);
1205
1206 bufcnt = kern_packet_get_buflet_count(sph);
1207 ASSERT((bufcnt == 1) || multi_buflet);
1208 *pdph = 0;
1209
1210 err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1211 if (err != 0) {
1212 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1213 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1214 return err;
1215 }
1216
1217 /* pre-constructed single buflet packet copy */
1218 sbuf = kern_packet_get_next_buflet(sph, NULL);
1219 dbuf = kern_packet_get_next_buflet(dph, NULL);
1220 feth_copy_buflet(sbuf, dbuf);
1221
1222 if (!multi_buflet) {
1223 goto done;
1224 }
1225
1226 /* un-constructed multi-buflet packet copy */
1227 for (i = 1; i < bufcnt; i++) {
1228 kern_buflet_t dbuf_next = NULL;
1229
1230 sbuf = kern_packet_get_next_buflet(sph, sbuf);
1231 VERIFY(sbuf != NULL);
1232 err = kern_pbufpool_alloc_buflet_nosleep(pp, &dbuf_next);
1233 if (err != 0) {
1234 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1235 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_BUF);
1236 break;
1237 }
1238 ASSERT(dbuf_next != NULL);
1239 feth_copy_buflet(sbuf, dbuf_next);
1240 err = kern_packet_add_buflet(dph, dbuf, dbuf_next);
1241 VERIFY(err == 0);
1242 dbuf = dbuf_next;
1243 }
1244 if (__improbable(err != 0)) {
1245 dbuf = NULL;
1246 while (i-- != 0) {
1247 dbuf = kern_packet_get_next_buflet(dph, dbuf);
1248 VERIFY(dbuf != NULL);
1249 baddr = (mach_vm_address_t)
1250 kern_buflet_get_data_address(dbuf);
1251 VERIFY(baddr != 0);
1252 }
1253 kern_pbufpool_free(pp, dph);
1254 dph = 0;
1255 }
1256
1257 done:
1258 if (__probable(err == 0)) {
1259 err = kern_packet_set_headroom(dph,
1260 kern_packet_get_headroom(sph));
1261 VERIFY(err == 0);
1262 err = kern_packet_set_link_header_length(dph,
1263 kern_packet_get_link_header_length(sph));
1264 VERIFY(err == 0);
1265 err = kern_packet_set_service_class(dph,
1266 kern_packet_get_service_class(sph));
1267 VERIFY(err == 0);
1268 err = kern_packet_finalize(dph);
1269 VERIFY(err == 0);
1270 VERIFY(bufcnt == kern_packet_get_buflet_count(dph));
1271 *pdph = dph;
1272 }
1273 return err;
1274 }
1275
1276 static inline void
feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)1277 feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)
1278 {
1279 /*
1280 * Nothing to do if not a TSO offloaded packet.
1281 */
1282 uint16_t seg_sz = 0;
1283 seg_sz = kern_packet_get_protocol_segment_size(ph);
1284 if (seg_sz == 0) {
1285 return;
1286 }
1287 /*
1288 * For RX, make the packet appear as a fully validated LRO packet.
1289 */
1290 packet_csum_flags_t csum_flags = PACKET_CSUM_IP_CHECKED |
1291 PACKET_CSUM_IP_VALID | PACKET_CSUM_DATA_VALID |
1292 PACKET_CSUM_PSEUDO_HDR;
1293 (void) kern_packet_set_inet_checksum(ph, csum_flags, 0, 0xFFFF, FALSE);
1294 return;
1295 }
1296
1297 static void
feth_rx_submit(if_fake_ref sif,if_fake_ref dif,kern_packet_t sphs[],uint32_t n_pkts)1298 feth_rx_submit(if_fake_ref sif, if_fake_ref dif, kern_packet_t sphs[],
1299 uint32_t n_pkts)
1300 {
1301 errno_t err = 0;
1302 struct kern_channel_ring_stat_increment stats;
1303 kern_channel_ring_t rx_ring = NULL;
1304 kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
1305 kern_packet_t sph = 0, dph = 0;
1306
1307 memset(&stats, 0, sizeof(stats));
1308
1309 rx_ring = dif->iff_rx_ring[0];
1310 if (rx_ring == NULL) {
1311 return;
1312 }
1313
1314 kr_enter(rx_ring, TRUE);
1315 kern_channel_reclaim(rx_ring);
1316 rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1317
1318 for (uint32_t i = 0; i < n_pkts && rx_slot != NULL; i++) {
1319 sph = sphs[i];
1320
1321 switch (dif->iff_pp_mode) {
1322 case IFF_PP_MODE_GLOBAL:
1323 sphs[i] = 0;
1324 dph = sph;
1325 feth_update_pkt_tso_metadata_for_rx(dph);
1326 err = kern_packet_finalize(dph);
1327 VERIFY(err == 0);
1328 break;
1329 case IFF_PP_MODE_PRIVATE:
1330 err = feth_copy_packet(dif, sph, &dph);
1331 break;
1332 case IFF_PP_MODE_PRIVATE_SPLIT:
1333 err = feth_clone_packet(dif, sph, &dph);
1334 break;
1335 default:
1336 VERIFY(0);
1337 __builtin_unreachable();
1338 }
1339 if (__improbable(err != 0)) {
1340 continue;
1341 }
1342
1343 if (sif->iff_trailer_length != 0) {
1344 feth_add_packet_trailer(dph, feth_trailer,
1345 sif->iff_trailer_length);
1346 }
1347 if (sif->iff_fcs != 0) {
1348 feth_add_packet_fcs(dph);
1349 }
1350 feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1351 bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1352 stats.kcrsi_slots_transferred++;
1353 stats.kcrsi_bytes_transferred
1354 += kern_packet_get_data_length(dph);
1355
1356 /* attach the packet to the RX ring */
1357 err = kern_channel_slot_attach_packet(rx_ring, rx_slot, dph);
1358 VERIFY(err == 0);
1359 last_rx_slot = rx_slot;
1360 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1361 }
1362
1363 if (last_rx_slot != NULL) {
1364 kern_channel_advance_slot(rx_ring, last_rx_slot);
1365 kern_channel_increment_ring_net_stats(rx_ring, dif->iff_ifp,
1366 &stats);
1367 }
1368
1369 if (rx_ring != NULL) {
1370 kr_exit(rx_ring);
1371 kern_channel_notify(rx_ring, 0);
1372 }
1373 }
1374
1375 static void
feth_rx_queue_submit(if_fake_ref sif,if_fake_ref dif,uint32_t llink_idx,uint32_t qset_idx,kern_packet_t sphs[],uint32_t n_pkts)1376 feth_rx_queue_submit(if_fake_ref sif, if_fake_ref dif, uint32_t llink_idx,
1377 uint32_t qset_idx, kern_packet_t sphs[], uint32_t n_pkts)
1378 {
1379 errno_t err = 0;
1380 kern_netif_queue_t queue;
1381 kern_packet_t sph = 0, dph = 0;
1382 fake_llink *llink;
1383 fake_qset *qset;
1384
1385 if (llink_idx >= dif->iff_llink_cnt) {
1386 printf("%s: invalid llink_idx idx %d (max %d) on peer %s\n",
1387 __func__, llink_idx, dif->iff_llink_cnt, dif->iff_name);
1388 return;
1389 }
1390 llink = &dif->iff_llink[llink_idx];
1391 if (qset_idx >= llink->fl_qset_cnt) {
1392 printf("%s: invalid qset_idx %d (max %d) on peer %s\n",
1393 __func__, qset_idx, llink->fl_qset_cnt, dif->iff_name);
1394 return;
1395 }
1396 qset = &dif->iff_llink[llink_idx].fl_qset[qset_idx];
1397 queue = qset->fqs_rx_queue[0].fq_queue;
1398 if (queue == NULL) {
1399 printf("%s: NULL default queue (llink_idx %d, qset_idx %d) "
1400 "on peer %s\n", __func__, llink_idx, qset_idx,
1401 dif->iff_name);
1402 return;
1403 }
1404 for (uint32_t i = 0; i < n_pkts; i++) {
1405 uint32_t flags;
1406
1407 sph = sphs[i];
1408
1409 switch (dif->iff_pp_mode) {
1410 case IFF_PP_MODE_GLOBAL:
1411 sphs[i] = 0;
1412 dph = sph;
1413 feth_update_pkt_tso_metadata_for_rx(dph);
1414 break;
1415 case IFF_PP_MODE_PRIVATE:
1416 err = feth_copy_packet(dif, sph, &dph);
1417 break;
1418 case IFF_PP_MODE_PRIVATE_SPLIT:
1419 err = feth_clone_packet(dif, sph, &dph);
1420 break;
1421 default:
1422 VERIFY(0);
1423 __builtin_unreachable();
1424 }
1425 if (__improbable(err != 0)) {
1426 continue;
1427 }
1428
1429 if (sif->iff_trailer_length != 0) {
1430 feth_add_packet_trailer(dph, feth_trailer,
1431 sif->iff_trailer_length);
1432 }
1433 if (sif->iff_fcs != 0) {
1434 feth_add_packet_fcs(dph);
1435 }
1436 feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1437 bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1438
1439 flags = (i == n_pkts - 1) ?
1440 KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH : 0;
1441 kern_netif_queue_rx_enqueue(queue, dph, 1, flags);
1442 }
1443 }
1444
1445 static void
feth_tx_complete(if_fake_ref fakeif,kern_packet_t phs[],uint32_t nphs)1446 feth_tx_complete(if_fake_ref fakeif, kern_packet_t phs[], uint32_t nphs)
1447 {
1448 for (uint32_t i = 0; i < nphs; i++) {
1449 kern_packet_t ph = phs[i];
1450 if (ph == 0) {
1451 continue;
1452 }
1453 int err = kern_packet_set_tx_completion_status(ph, 0);
1454 VERIFY(err == 0);
1455 kern_packet_tx_completion(ph, fakeif->iff_ifp);
1456 kern_pbufpool_free(fakeif->iff_tx_pp, phs[i]);
1457 phs[i] = 0;
1458 }
1459 }
1460
1461 #define NSEC_PER_USEC 1000ull
1462 /*
1463 * Calculate the time delta that passed from `since' to `until'.
1464 * If `until' happens before `since', returns negative value.
1465 */
1466 static bool
feth_packet_has_expired(if_fake_ref __unused fakeif,kern_packet_t ph,uint64_t * out_deadline)1467 feth_packet_has_expired(if_fake_ref __unused fakeif, kern_packet_t ph,
1468 uint64_t *out_deadline)
1469 {
1470 uint64_t now;
1471 uint64_t packet_expire_time_mach;
1472 int64_t time_until_expiration;
1473 errno_t err;
1474 bool expired = false;
1475
1476 static mach_timebase_info_data_t clock_timebase = {0, 0};
1477
1478 if (clock_timebase.denom == 0) {
1479 clock_timebase_info(&clock_timebase);
1480 VERIFY(clock_timebase.denom != 0);
1481 }
1482
1483 err = kern_packet_get_expire_time(ph, &packet_expire_time_mach);
1484 if (err) {
1485 goto out;
1486 }
1487
1488 now = mach_absolute_time();
1489 time_until_expiration = packet_expire_time_mach - now;
1490 if (time_until_expiration < 0) {
1491 /* The packet had expired */
1492 expired = true;
1493 goto out;
1494 }
1495
1496 /* Convert the time_delta from mach ticks to nanoseconds */
1497 time_until_expiration *= clock_timebase.numer;
1498 time_until_expiration /= clock_timebase.denom;
1499 /* convert from nanoseconds to microseconds */
1500 time_until_expiration /= 1000ull;
1501
1502 if (if_fake_expiration_threshold_us < time_until_expiration) {
1503 /* packet has some life ahead of it */
1504 FETH_DPRINTF("Packet has %llu usec until expiration", time_until_expiration);
1505 goto out;
1506 }
1507
1508 out:
1509 if (expired && out_deadline) {
1510 *out_deadline = packet_expire_time_mach;
1511 }
1512
1513 return expired;
1514 }
1515
1516 static errno_t
feth_get_packet_notification_details(if_fake_ref fakeif,kern_packet_t ph,packet_id_t * pkt_id,uint32_t * nx_port_id)1517 feth_get_packet_notification_details(if_fake_ref fakeif, kern_packet_t ph,
1518 packet_id_t *pkt_id, uint32_t *nx_port_id)
1519 {
1520 errno_t err = 0;
1521
1522 err = kern_packet_get_packetid(ph, pkt_id);
1523 if (err != 0) {
1524 FETH_DPRINTF("%s err=%d getting packetid", fakeif->iff_name, err);
1525 return err;
1526 }
1527
1528 err = kern_packet_get_tx_nexus_port_id(ph, nx_port_id);
1529 if (err != 0) {
1530 FETH_DPRINTF("%s err=%d getting nx_port_id", fakeif->iff_name, err);
1531 return err;
1532 }
1533
1534 return 0;
1535 }
1536
1537 static packet_expiry_action_t
feth_get_effective_expn_action(if_fake_ref fakeif,kern_packet_t ph)1538 feth_get_effective_expn_action(if_fake_ref fakeif, kern_packet_t ph)
1539 {
1540 errno_t err;
1541 packet_expiry_action_t expiry_action;
1542
1543 switch (fakeif->iff_tx_exp_policy) {
1544 case IFF_TX_EXP_POLICY_DISABLED:
1545 expiry_action = PACKET_EXPIRY_ACTION_NONE;
1546 break;
1547 case IFF_TX_EXP_POLICY_NOTIFY_ONLY:
1548 expiry_action = PACKET_EXPIRY_ACTION_NOTIFY;
1549 break;
1550 case IFF_TX_EXP_POLICY_DROP_AND_NOTIFY:
1551 expiry_action = PACKET_EXPIRY_ACTION_DROP;
1552 break;
1553 case IFF_TX_EXP_POLICY_METADATA:
1554 err = kern_packet_get_expiry_action(ph, &expiry_action);
1555 if (err != 0) {
1556 if (err != ENOENT) {
1557 FETH_DPRINTF("Error %d when getting expiry action", err);
1558 }
1559 expiry_action = PACKET_EXPIRY_ACTION_NONE;
1560 }
1561 break;
1562 default:
1563 FETH_DPRINTF("Unrecognized value %d for \"net.link.fake.tx_exp_policy\"",
1564 fakeif->iff_tx_exp_policy);
1565 expiry_action = PACKET_EXPIRY_ACTION_NONE;
1566 }
1567
1568 return expiry_action;
1569 }
1570
1571 /* returns true if the packet is selected for epxiration and should be dropped */
1572 static bool
feth_tx_expired_error(if_fake_ref fakeif,kern_packet_t ph)1573 feth_tx_expired_error(if_fake_ref fakeif, kern_packet_t ph)
1574 {
1575 int err = 0;
1576 uint32_t nx_port_id = 0;
1577 os_channel_event_packet_transmit_expired_t expn = {0};
1578 packet_expiry_action_t expiry_action = PACKET_EXPIRY_ACTION_NONE;
1579
1580 FETH_DPRINTF("%s\n", fakeif->iff_name);
1581
1582 if (feth_packet_has_expired(fakeif, ph, &expn.packet_tx_expiration_deadline)) {
1583 expiry_action = feth_get_effective_expn_action(fakeif, ph);
1584 }
1585
1586 bool drop_packet = (expiry_action == PACKET_EXPIRY_ACTION_DROP);
1587 if (expiry_action != PACKET_EXPIRY_ACTION_NONE) {
1588 /* set the expiration status code */
1589 expn.packet_tx_expiration_status = drop_packet ?
1590 CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_DROPPED :
1591 CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_NOT_DROPPED;
1592
1593 /* Mark the expiration timestamp */
1594 expn.packet_tx_expiration_timestamp = mach_absolute_time();
1595
1596 err = feth_get_packet_notification_details(fakeif, ph,
1597 &expn.packet_id, &nx_port_id);
1598
1599 if (err == 0) {
1600 err = kern_channel_event_transmit_expired(
1601 fakeif->iff_ifp, &expn, nx_port_id);
1602 FETH_DPRINTF("%s sent epxiry notification on nexus port %u notif code %u\n",
1603 fakeif->iff_name, nx_port_id, expn.packet_tx_expiration_status);
1604 }
1605 if (err != 0) {
1606 FETH_DPRINTF("%s err=%d, nx_port_id: 0x%x\n",
1607 fakeif->iff_name, err, nx_port_id);
1608 }
1609 }
1610
1611 return drop_packet;
1612 }
1613
1614 /* returns true if the packet is selected for TX error & dropped */
1615 static bool
feth_tx_complete_error(if_fake_ref fakeif,kern_packet_t ph)1616 feth_tx_complete_error(if_fake_ref fakeif, kern_packet_t ph)
1617 {
1618 int err;
1619
1620 if (fakeif->iff_tx_drop_rate == 0 ||
1621 fakeif->iff_tx_pkts_count != fakeif->iff_tx_drop_rate) {
1622 return false;
1623 }
1624 /* simulate TX completion error on the packet */
1625 if (fakeif->iff_tx_completion_mode == IFF_TX_COMPL_MODE_SYNC) {
1626 err = kern_packet_set_tx_completion_status(ph,
1627 CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED);
1628 VERIFY(err == 0);
1629 kern_packet_tx_completion(ph, fakeif->iff_ifp);
1630 } else {
1631 uint32_t nx_port_id = 0;
1632 os_channel_event_packet_transmit_status_t pkt_tx_status = {0};
1633
1634 pkt_tx_status.packet_status =
1635 CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED;
1636 err = feth_get_packet_notification_details(fakeif, ph,
1637 &pkt_tx_status.packet_id, &nx_port_id);
1638 if (err == 0) {
1639 err = kern_channel_event_transmit_status(
1640 fakeif->iff_ifp, &pkt_tx_status, nx_port_id);
1641 }
1642 if (err != 0) {
1643 FETH_DPRINTF("%s err=%d, nx_port_id: 0x%x\n",
1644 fakeif->iff_name, err, nx_port_id);
1645 }
1646 }
1647
1648 return true;
1649 }
1650
1651 static void
feth_if_adv(thread_call_param_t arg0,thread_call_param_t arg1)1652 feth_if_adv(thread_call_param_t arg0, thread_call_param_t arg1)
1653 {
1654 #pragma unused(arg1)
1655 errno_t error;
1656 if_fake_ref fakeif = (if_fake_ref)arg0;
1657 struct ifnet_interface_advisory if_adv;
1658 struct ifnet_stats_param if_stat;
1659
1660 feth_lock();
1661 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1662 feth_unlock();
1663 return;
1664 }
1665 feth_unlock();
1666
1667 if (!fakeif->iff_intf_adv_enabled) {
1668 goto done;
1669 }
1670
1671 error = ifnet_stat(fakeif->iff_ifp, &if_stat);
1672 if (error != 0) {
1673 FETH_DPRINTF("%s: ifnet_stat() failed %d\n",
1674 fakeif->iff_name, error);
1675 goto done;
1676 }
1677 if_adv.header.version = IF_INTERFACE_ADVISORY_VERSION_CURRENT;
1678 if_adv.header.direction = IF_INTERFACE_ADVISORY_DIRECTION_TX;
1679 if_adv.header.interface_type =
1680 IF_INTERFACE_ADVISORY_INTERFACE_TYPE_WIFI;
1681 if_adv.capacity.timestamp = mach_absolute_time();
1682 if_adv.capacity.rate_trend_suggestion =
1683 IF_INTERFACE_ADVISORY_RATE_SUGGESTION_RAMP_NEUTRAL;
1684 if_adv.capacity.max_bandwidth = 1000 * 1000 * 1000; /* 1Gbps */
1685 if_adv.capacity.total_byte_count = if_stat.packets_out;
1686 if_adv.capacity.average_throughput = 1000 * 1000 * 1000; /* 1Gbps */
1687 if_adv.capacity.flushable_queue_size = UINT32_MAX;
1688 if_adv.capacity.non_flushable_queue_size = UINT32_MAX;
1689 if_adv.capacity.average_delay = 1; /* ms */
1690
1691 error = fakeif->iff_intf_adv_notify(fakeif->iff_intf_adv_kern_ctx,
1692 &if_adv);
1693 if (error != 0) {
1694 FETH_DPRINTF("%s: interface advisory report failed %d\n",
1695 fakeif->iff_name, error);
1696 }
1697
1698 done:
1699 feth_lock();
1700 if (!feth_is_detaching(fakeif) && fakeif->iff_channel_connected) {
1701 uint64_t deadline;
1702 clock_interval_to_deadline(fakeif->iff_adv_interval,
1703 NSEC_PER_MSEC, &deadline);
1704 thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1705 }
1706 feth_unlock();
1707 }
1708
1709 static int
feth_if_adv_tcall_create(if_fake_ref fakeif)1710 feth_if_adv_tcall_create(if_fake_ref fakeif)
1711 {
1712 uint64_t deadline;
1713
1714 feth_lock();
1715 ASSERT(fakeif->iff_if_adv_tcall == NULL);
1716 ASSERT(fakeif->iff_adv_interval > 0);
1717 ASSERT(fakeif->iff_channel_connected);
1718 fakeif->iff_if_adv_tcall =
1719 thread_call_allocate_with_options(feth_if_adv,
1720 (thread_call_param_t)fakeif, THREAD_CALL_PRIORITY_KERNEL,
1721 THREAD_CALL_OPTIONS_ONCE);
1722 if (fakeif->iff_if_adv_tcall == NULL) {
1723 printf("%s: %s if_adv tcall alloc failed\n", __func__,
1724 fakeif->iff_name);
1725 return ENXIO;
1726 }
1727 /* retain for the interface advisory thread call */
1728 feth_retain(fakeif);
1729 clock_interval_to_deadline(fakeif->iff_adv_interval,
1730 NSEC_PER_MSEC, &deadline);
1731 thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1732 feth_unlock();
1733 return 0;
1734 }
1735
1736 static void
feth_if_adv_tcall_destroy(if_fake_ref fakeif)1737 feth_if_adv_tcall_destroy(if_fake_ref fakeif)
1738 {
1739 thread_call_t tcall;
1740
1741 feth_lock();
1742 ASSERT(fakeif->iff_if_adv_tcall != NULL);
1743 tcall = fakeif->iff_if_adv_tcall;
1744 feth_unlock();
1745 (void) thread_call_cancel_wait(tcall);
1746 if (!thread_call_free(tcall)) {
1747 boolean_t freed;
1748 (void) thread_call_cancel_wait(tcall);
1749 freed = thread_call_free(tcall);
1750 VERIFY(freed);
1751 }
1752 feth_lock();
1753 fakeif->iff_if_adv_tcall = NULL;
1754 feth_unlock();
1755 /* release for the interface advisory thread call */
1756 feth_release(fakeif);
1757 }
1758
1759
1760 /**
1761 ** nexus netif domain provider
1762 **/
1763 static errno_t
feth_nxdp_init(kern_nexus_domain_provider_t domprov)1764 feth_nxdp_init(kern_nexus_domain_provider_t domprov)
1765 {
1766 #pragma unused(domprov)
1767 return 0;
1768 }
1769
1770 static void
feth_nxdp_fini(kern_nexus_domain_provider_t domprov)1771 feth_nxdp_fini(kern_nexus_domain_provider_t domprov)
1772 {
1773 #pragma unused(domprov)
1774 }
1775
1776 static uuid_t feth_nx_dom_prov;
1777
1778 static errno_t
feth_register_nexus_domain_provider(void)1779 feth_register_nexus_domain_provider(void)
1780 {
1781 const struct kern_nexus_domain_provider_init dp_init = {
1782 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1783 .nxdpi_flags = 0,
1784 .nxdpi_init = feth_nxdp_init,
1785 .nxdpi_fini = feth_nxdp_fini
1786 };
1787 errno_t err = 0;
1788
1789 /* feth_nxdp_init() is called before this function returns */
1790 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
1791 (const uint8_t *)
1792 "com.apple.feth",
1793 &dp_init, sizeof(dp_init),
1794 &feth_nx_dom_prov);
1795 if (err != 0) {
1796 printf("%s: failed to register domain provider\n", __func__);
1797 return err;
1798 }
1799 return 0;
1800 }
1801
1802 /**
1803 ** netif nexus routines
1804 **/
1805 static if_fake_ref
feth_nexus_context(kern_nexus_t nexus)1806 feth_nexus_context(kern_nexus_t nexus)
1807 {
1808 if_fake_ref fakeif;
1809
1810 fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
1811 assert(fakeif != NULL);
1812 return fakeif;
1813 }
1814
1815 static uint8_t
feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1816 feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1817 {
1818 switch (svc_class) {
1819 case KPKT_SC_VO:
1820 return 0;
1821 case KPKT_SC_VI:
1822 return 1;
1823 case KPKT_SC_BE:
1824 return 2;
1825 case KPKT_SC_BK:
1826 return 3;
1827 default:
1828 VERIFY(0);
1829 return 0;
1830 }
1831 }
1832
1833 static errno_t
feth_nx_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1834 feth_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1835 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1836 void **ring_ctx)
1837 {
1838 if_fake_ref fakeif;
1839 int err;
1840 #pragma unused(nxprov, channel, ring_ctx)
1841 feth_lock();
1842 fakeif = feth_nexus_context(nexus);
1843 if (feth_is_detaching(fakeif)) {
1844 feth_unlock();
1845 return 0;
1846 }
1847 if (is_tx_ring) {
1848 if (feth_in_wmm_mode(fakeif)) {
1849 kern_packet_svc_class_t svc_class;
1850 uint8_t ring_idx;
1851
1852 err = kern_channel_get_service_class(ring, &svc_class);
1853 VERIFY(err == 0);
1854 ring_idx = feth_find_tx_ring_by_svc(svc_class);
1855 VERIFY(ring_idx < IFF_NUM_TX_RINGS_WMM_MODE);
1856 VERIFY(fakeif->iff_tx_ring[ring_idx] == NULL);
1857 fakeif->iff_tx_ring[ring_idx] = ring;
1858 } else {
1859 VERIFY(fakeif->iff_tx_ring[0] == NULL);
1860 fakeif->iff_tx_ring[0] = ring;
1861 }
1862 } else {
1863 VERIFY(fakeif->iff_rx_ring[0] == NULL);
1864 fakeif->iff_rx_ring[0] = ring;
1865 }
1866 fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1867 feth_unlock();
1868 FETH_DPRINTF("%s: %s ring init\n",
1869 fakeif->iff_name, is_tx_ring ? "TX" : "RX");
1870 return 0;
1871 }
1872
1873 static void
feth_nx_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)1874 feth_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1875 kern_channel_ring_t ring)
1876 {
1877 #pragma unused(nxprov, ring)
1878 if_fake_ref fakeif;
1879 thread_call_t tcall = NULL;
1880
1881 feth_lock();
1882 fakeif = feth_nexus_context(nexus);
1883 if (fakeif->iff_rx_ring[0] == ring) {
1884 fakeif->iff_rx_ring[0] = NULL;
1885 FETH_DPRINTF("%s: RX ring fini\n", fakeif->iff_name);
1886 } else if (feth_in_wmm_mode(fakeif)) {
1887 int i;
1888 for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
1889 if (fakeif->iff_tx_ring[i] == ring) {
1890 fakeif->iff_tx_ring[i] = NULL;
1891 break;
1892 }
1893 }
1894 for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
1895 if (fakeif->iff_tx_ring[i] != NULL) {
1896 break;
1897 }
1898 }
1899 if (i == IFF_MAX_TX_RINGS) {
1900 tcall = fakeif->iff_doorbell_tcall;
1901 fakeif->iff_doorbell_tcall = NULL;
1902 }
1903 FETH_DPRINTF("%s: TX ring fini\n", fakeif->iff_name);
1904 } else if (fakeif->iff_tx_ring[0] == ring) {
1905 tcall = fakeif->iff_doorbell_tcall;
1906 fakeif->iff_doorbell_tcall = NULL;
1907 fakeif->iff_tx_ring[0] = NULL;
1908 }
1909 fakeif->iff_nifs = NULL;
1910 feth_unlock();
1911 if (tcall != NULL) {
1912 boolean_t success;
1913
1914 success = thread_call_cancel_wait(tcall);
1915 FETH_DPRINTF("%s: thread_call_cancel %s\n",
1916 fakeif->iff_name,
1917 success ? "SUCCESS" : "FAILURE");
1918 if (!success) {
1919 feth_lock();
1920 if (fakeif->iff_doorbell_tcall_active) {
1921 fakeif->iff_waiting_for_tcall = TRUE;
1922 FETH_DPRINTF("%s: *waiting for threadcall\n",
1923 fakeif->iff_name);
1924 do {
1925 msleep(fakeif, &feth_lck_mtx,
1926 PZERO, "feth threadcall", 0);
1927 } while (fakeif->iff_doorbell_tcall_active);
1928 FETH_DPRINTF("%s: ^threadcall done\n",
1929 fakeif->iff_name);
1930 fakeif->iff_waiting_for_tcall = FALSE;
1931 }
1932 feth_unlock();
1933 }
1934 success = thread_call_free(tcall);
1935 FETH_DPRINTF("%s: thread_call_free %s\n",
1936 fakeif->iff_name,
1937 success ? "SUCCESS" : "FAILURE");
1938 feth_release(fakeif);
1939 VERIFY(success == TRUE);
1940 }
1941 }
1942
1943 static errno_t
feth_nx_pre_connect(kern_nexus_provider_t nxprov,proc_t proc,kern_nexus_t nexus,nexus_port_t port,kern_channel_t channel,void ** channel_context)1944 feth_nx_pre_connect(kern_nexus_provider_t nxprov,
1945 proc_t proc, kern_nexus_t nexus, nexus_port_t port, kern_channel_t channel,
1946 void **channel_context)
1947 {
1948 #pragma unused(nxprov, proc, nexus, port, channel, channel_context)
1949 return 0;
1950 }
1951
1952 static errno_t
feth_nx_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1953 feth_nx_connected(kern_nexus_provider_t nxprov,
1954 kern_nexus_t nexus, kern_channel_t channel)
1955 {
1956 #pragma unused(nxprov, channel)
1957 int err;
1958 if_fake_ref fakeif;
1959
1960 fakeif = feth_nexus_context(nexus);
1961 feth_lock();
1962 if (feth_is_detaching(fakeif)) {
1963 feth_unlock();
1964 return EBUSY;
1965 }
1966 feth_retain(fakeif);
1967 fakeif->iff_channel_connected = TRUE;
1968 feth_unlock();
1969 if (feth_has_intf_advisory_configured(fakeif)) {
1970 err = feth_if_adv_tcall_create(fakeif);
1971 if (err != 0) {
1972 return err;
1973 }
1974 }
1975 FETH_DPRINTF("%s: connected channel %p\n",
1976 fakeif->iff_name, channel);
1977 return 0;
1978 }
1979
1980 static void
feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)1981 feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,
1982 kern_nexus_t nexus, kern_channel_t channel)
1983 {
1984 #pragma unused(nxprov, channel)
1985 if_fake_ref fakeif;
1986
1987 fakeif = feth_nexus_context(nexus);
1988 FETH_DPRINTF("%s: pre-disconnect channel %p\n",
1989 fakeif->iff_name, channel);
1990 /* Quiesce the interface and flush any pending outbound packets. */
1991 if_down(fakeif->iff_ifp);
1992 feth_lock();
1993 fakeif->iff_channel_connected = FALSE;
1994 feth_unlock();
1995 if (fakeif->iff_if_adv_tcall != NULL) {
1996 feth_if_adv_tcall_destroy(fakeif);
1997 }
1998 }
1999
2000 static void
feth_nx_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2001 feth_nx_disconnected(kern_nexus_provider_t nxprov,
2002 kern_nexus_t nexus, kern_channel_t channel)
2003 {
2004 #pragma unused(nxprov, channel)
2005 if_fake_ref fakeif;
2006
2007 fakeif = feth_nexus_context(nexus);
2008 FETH_DPRINTF("%s: disconnected channel %p\n",
2009 fakeif->iff_name, channel);
2010 feth_release(fakeif);
2011 }
2012
2013 static errno_t
feth_nx_slot_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index,struct kern_slot_prop ** slot_prop_addr,void ** slot_context)2014 feth_nx_slot_init(kern_nexus_provider_t nxprov,
2015 kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2016 uint32_t slot_index, struct kern_slot_prop **slot_prop_addr,
2017 void **slot_context)
2018 {
2019 #pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
2020 return 0;
2021 }
2022
2023 static void
feth_nx_slot_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index)2024 feth_nx_slot_fini(kern_nexus_provider_t nxprov,
2025 kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2026 uint32_t slot_index)
2027 {
2028 #pragma unused(nxprov, nexus, ring, slot, slot_index)
2029 }
2030
2031 static errno_t
feth_nx_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)2032 feth_nx_sync_tx(kern_nexus_provider_t nxprov,
2033 kern_nexus_t nexus, kern_channel_ring_t tx_ring, uint32_t flags)
2034 {
2035 #pragma unused(nxprov)
2036 if_fake_ref fakeif;
2037 ifnet_t ifp;
2038 kern_channel_slot_t last_tx_slot = NULL;
2039 ifnet_t peer_ifp;
2040 if_fake_ref peer_fakeif = NULL;
2041 struct kern_channel_ring_stat_increment stats;
2042 kern_channel_slot_t tx_slot;
2043 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2044 kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2045 uint32_t n_pkts = 0;
2046
2047 memset(&stats, 0, sizeof(stats));
2048
2049 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2050 fakeif = feth_nexus_context(nexus);
2051 FETH_DPRINTF("%s ring %d flags 0x%x\n", fakeif->iff_name,
2052 tx_ring->ckr_ring_id, flags);
2053 (void)flags;
2054 feth_lock();
2055 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2056 feth_unlock();
2057 return 0;
2058 }
2059 ifp = fakeif->iff_ifp;
2060 peer_ifp = fakeif->iff_peer;
2061 if (peer_ifp != NULL) {
2062 peer_fakeif = ifnet_get_if_fake(peer_ifp);
2063 if (peer_fakeif != NULL) {
2064 if (feth_is_detaching(peer_fakeif)) {
2065 FETH_DPRINTF("%s peer fakeif %s is detaching\n",
2066 fakeif->iff_name, peer_fakeif->iff_name);
2067 goto done;
2068 }
2069 if (!peer_fakeif->iff_channel_connected) {
2070 if (fakeif->iff_tx_exp_policy ==
2071 IFF_TX_EXP_POLICY_DISABLED) {
2072 FETH_DPRINTF("%s peer fakeif %s channel not connected, expn: %d\n",
2073 fakeif->iff_name, peer_fakeif->iff_name,
2074 fakeif->iff_tx_exp_policy);
2075 goto done;
2076 }
2077 }
2078 } else {
2079 FETH_DPRINTF("%s no peer fakeif (peer %p)\n", fakeif->iff_name, peer_ifp);
2080 goto done;
2081 }
2082 } else {
2083 FETH_DPRINTF("%s no peer\n", fakeif->iff_name);
2084 goto done;
2085 }
2086 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
2087 while (tx_slot != NULL) {
2088 uint16_t off;
2089 kern_packet_t sph;
2090
2091 /* detach the packet from the TX ring */
2092 sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
2093 VERIFY(sph != 0);
2094 kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
2095
2096 /* bpf tap output */
2097 off = kern_packet_get_headroom(sph);
2098 VERIFY(off >= fakeif->iff_tx_headroom);
2099 kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2100 feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2101 bpf_tap_packet_out(ifp, DLT_EN10MB, sph, NULL, 0);
2102
2103 /* drop packets, if requested */
2104 fakeif->iff_tx_pkts_count++;
2105 if (feth_tx_expired_error(fakeif, sph) ||
2106 feth_tx_complete_error(fakeif, sph) ||
2107 !peer_fakeif->iff_channel_connected) {
2108 fakeif->iff_tx_pkts_count = 0;
2109 kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2110 STATS_INC(nifs, NETIF_STATS_DROP);
2111 goto next_tx_slot;
2112 }
2113
2114 ASSERT(sph != 0);
2115 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2116 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2117
2118 stats.kcrsi_slots_transferred++;
2119 stats.kcrsi_bytes_transferred
2120 += kern_packet_get_data_length(sph);
2121
2122 /* prepare batch for receiver */
2123 pkts[n_pkts++] = sph;
2124 if (n_pkts == IFF_MAX_BATCH_SIZE) {
2125 feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2126 feth_tx_complete(fakeif, pkts, n_pkts);
2127 n_pkts = 0;
2128 }
2129
2130 next_tx_slot:
2131 last_tx_slot = tx_slot;
2132 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
2133 }
2134
2135 /* catch last batch for receiver */
2136 if (n_pkts != 0) {
2137 feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2138 feth_tx_complete(fakeif, pkts, n_pkts);
2139 n_pkts = 0;
2140 }
2141
2142 if (last_tx_slot != NULL) {
2143 kern_channel_advance_slot(tx_ring, last_tx_slot);
2144 kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
2145 }
2146 done:
2147 feth_unlock();
2148 return 0;
2149 }
2150
2151 static errno_t
feth_nx_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2152 feth_nx_sync_rx(kern_nexus_provider_t nxprov,
2153 kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2154 {
2155 #pragma unused(nxprov, ring, flags)
2156 if_fake_ref fakeif;
2157 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2158
2159 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
2160 fakeif = feth_nexus_context(nexus);
2161 FETH_DPRINTF("%s:\n", fakeif->iff_name);
2162 return 0;
2163 }
2164
2165 static errno_t
feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif,boolean_t doorbell_ctxt)2166 feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif, boolean_t doorbell_ctxt)
2167 {
2168 int i;
2169 errno_t error = 0;
2170 boolean_t more;
2171
2172 for (i = 0; i < IFF_NUM_TX_RINGS_WMM_MODE; i++) {
2173 kern_channel_ring_t ring = fakeif->iff_tx_ring[i];
2174 if (ring != NULL) {
2175 error = kern_channel_tx_refill(ring, UINT32_MAX,
2176 UINT32_MAX, doorbell_ctxt, &more);
2177 }
2178 if (error != 0) {
2179 FETH_DPRINTF("%s: TX refill ring %d (%s) %d\n",
2180 fakeif->iff_name, ring->ckr_ring_id,
2181 doorbell_ctxt ? "sync" : "async", error);
2182 if (!((error == EAGAIN) || (error == EBUSY))) {
2183 break;
2184 }
2185 } else {
2186 FETH_DPRINTF("%s: TX refilled ring %d (%s)\n",
2187 fakeif->iff_name, ring->ckr_ring_id,
2188 doorbell_ctxt ? "sync" : "async");
2189 }
2190 }
2191 return error;
2192 }
2193
2194 static void
feth_async_doorbell(thread_call_param_t arg0,thread_call_param_t arg1)2195 feth_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
2196 {
2197 #pragma unused(arg1)
2198 errno_t error;
2199 if_fake_ref fakeif = (if_fake_ref)arg0;
2200 kern_channel_ring_t ring;
2201 boolean_t more;
2202
2203 feth_lock();
2204 ring = fakeif->iff_tx_ring[0];
2205 if (feth_is_detaching(fakeif) ||
2206 !fakeif->iff_channel_connected ||
2207 ring == NULL) {
2208 goto done;
2209 }
2210 fakeif->iff_doorbell_tcall_active = TRUE;
2211 feth_unlock();
2212 if (feth_in_wmm_mode(fakeif)) {
2213 error = feth_nx_tx_dequeue_driver_managed(fakeif, FALSE);
2214 } else {
2215 error = kern_channel_tx_refill(ring, UINT32_MAX,
2216 UINT32_MAX, FALSE, &more);
2217 }
2218 if (error != 0) {
2219 FETH_DPRINTF("%s: TX refill failed %d\n",
2220 fakeif->iff_name, error);
2221 } else {
2222 FETH_DPRINTF("%s: TX refilled\n", fakeif->iff_name);
2223 }
2224
2225 feth_lock();
2226 done:
2227 fakeif->iff_doorbell_tcall_active = FALSE;
2228 if (fakeif->iff_waiting_for_tcall) {
2229 FETH_DPRINTF("%s: threadcall waking up waiter\n",
2230 fakeif->iff_name);
2231 wakeup((caddr_t)fakeif);
2232 }
2233 feth_unlock();
2234 }
2235
2236 static void
feth_schedule_async_doorbell(if_fake_ref fakeif)2237 feth_schedule_async_doorbell(if_fake_ref fakeif)
2238 {
2239 thread_call_t tcall;
2240
2241 feth_lock();
2242 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2243 feth_unlock();
2244 return;
2245 }
2246 tcall = fakeif->iff_doorbell_tcall;
2247 if (tcall != NULL) {
2248 thread_call_enter(tcall);
2249 } else {
2250 tcall = thread_call_allocate_with_options(feth_async_doorbell,
2251 (thread_call_param_t)fakeif,
2252 THREAD_CALL_PRIORITY_KERNEL,
2253 THREAD_CALL_OPTIONS_ONCE);
2254 if (tcall == NULL) {
2255 printf("%s: %s tcall alloc failed\n",
2256 __func__, fakeif->iff_name);
2257 } else {
2258 fakeif->iff_doorbell_tcall = tcall;
2259 feth_retain(fakeif);
2260 thread_call_enter(tcall);
2261 }
2262 }
2263 feth_unlock();
2264 }
2265
2266 static errno_t
feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2267 feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,
2268 kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2269 {
2270 #pragma unused(nxprov, ring, flags)
2271 errno_t error;
2272 if_fake_ref fakeif;
2273
2274 fakeif = feth_nexus_context(nexus);
2275 FETH_DPRINTF("%s\n", fakeif->iff_name);
2276
2277 if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
2278 boolean_t more;
2279 /* synchronous tx refill */
2280 if (feth_in_wmm_mode(fakeif)) {
2281 error = feth_nx_tx_dequeue_driver_managed(fakeif, TRUE);
2282 } else {
2283 error = kern_channel_tx_refill(ring, UINT32_MAX,
2284 UINT32_MAX, TRUE, &more);
2285 }
2286 if (error != 0) {
2287 FETH_DPRINTF("%s: TX refill (sync) %d\n",
2288 fakeif->iff_name, error);
2289 } else {
2290 FETH_DPRINTF("%s: TX refilled (sync)\n",
2291 fakeif->iff_name);
2292 }
2293 } else {
2294 FETH_DPRINTF("%s: schedule async refill\n", fakeif->iff_name);
2295 feth_schedule_async_doorbell(fakeif);
2296 }
2297 return 0;
2298 }
2299
2300 static errno_t
feth_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)2301 feth_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
2302 {
2303 if_fake_ref fakeif;
2304
2305 fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
2306 feth_ifnet_set_attrs(fakeif, ifp);
2307 return 0;
2308 }
2309
2310 static errno_t
feth_nx_intf_adv_config(void * prov_ctx,bool enable)2311 feth_nx_intf_adv_config(void *prov_ctx, bool enable)
2312 {
2313 if_fake_ref fakeif = prov_ctx;
2314
2315 feth_lock();
2316 fakeif->iff_intf_adv_enabled = enable;
2317 feth_unlock();
2318 FETH_DPRINTF("%s enable %d\n", fakeif->iff_name, enable);
2319 return 0;
2320 }
2321
2322 static errno_t
fill_capab_interface_advisory(if_fake_ref fakeif,void * contents,uint32_t * len)2323 fill_capab_interface_advisory(if_fake_ref fakeif, void *contents, uint32_t *len)
2324 {
2325 struct kern_nexus_capab_interface_advisory *capab = contents;
2326
2327 if (*len != sizeof(*capab)) {
2328 return EINVAL;
2329 }
2330 if (capab->kncia_version !=
2331 KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
2332 return EINVAL;
2333 }
2334 if (!feth_has_intf_advisory_configured(fakeif)) {
2335 return ENOTSUP;
2336 }
2337 VERIFY(capab->kncia_notify != NULL);
2338 fakeif->iff_intf_adv_kern_ctx = capab->kncia_kern_context;
2339 fakeif->iff_intf_adv_notify = capab->kncia_notify;
2340 capab->kncia_provider_context = fakeif;
2341 capab->kncia_config = feth_nx_intf_adv_config;
2342 return 0;
2343 }
2344
2345 static errno_t
feth_notify_steering_info(void * prov_ctx,void * qset_ctx,struct ifnet_traffic_descriptor_common * td,bool add)2346 feth_notify_steering_info(void *prov_ctx, void *qset_ctx,
2347 struct ifnet_traffic_descriptor_common *td, bool add)
2348 {
2349 #pragma unused(td)
2350 if_fake_ref fakeif = prov_ctx;
2351 fake_qset *qset = qset_ctx;
2352
2353 FETH_DPRINTF("%s: notify_steering_info: qset_id 0x%llx, %s\n",
2354 fakeif->iff_name, qset->fqs_id, add ? "add" : "remove");
2355 return 0;
2356 }
2357
2358 static errno_t
fill_capab_qset_extensions(if_fake_ref fakeif,void * contents,uint32_t * len)2359 fill_capab_qset_extensions(if_fake_ref fakeif, void *contents, uint32_t *len)
2360 {
2361 struct kern_nexus_capab_qset_extensions *capab = contents;
2362
2363 if (*len != sizeof(*capab)) {
2364 return EINVAL;
2365 }
2366 if (capab->cqe_version !=
2367 KERN_NEXUS_CAPAB_QSET_EXTENSIONS_VERSION_1) {
2368 return EINVAL;
2369 }
2370 capab->cqe_prov_ctx = fakeif;
2371 capab->cqe_notify_steering_info = feth_notify_steering_info;
2372 return 0;
2373 }
2374
2375 static errno_t
feth_nx_capab_config(kern_nexus_provider_t nxprov,kern_nexus_t nx,kern_nexus_capab_t capab,void * contents,uint32_t * len)2376 feth_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
2377 kern_nexus_capab_t capab, void *contents, uint32_t *len)
2378 {
2379 #pragma unused(nxprov)
2380 errno_t error;
2381 if_fake_ref fakeif;
2382
2383 fakeif = feth_nexus_context(nx);
2384 FETH_DPRINTF("%s\n", fakeif->iff_name);
2385
2386 switch (capab) {
2387 case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
2388 error = fill_capab_interface_advisory(fakeif, contents, len);
2389 break;
2390 case KERN_NEXUS_CAPAB_QSET_EXTENSIONS:
2391 error = fill_capab_qset_extensions(fakeif, contents, len);
2392 break;
2393 default:
2394 error = ENOTSUP;
2395 break;
2396 }
2397 return error;
2398 }
2399
2400 static int
feth_set_tso(ifnet_t ifp)2401 feth_set_tso(ifnet_t ifp)
2402 {
2403 ifnet_offload_t offload;
2404 uint32_t tso_v4_mtu, tso_v6_mtu;
2405 int error;
2406
2407 offload = IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2408 tso_v4_mtu = if_fake_tso_buffer_size;
2409 tso_v6_mtu = if_fake_tso_buffer_size;
2410 error = ifnet_set_offload(ifp, offload);
2411 if (error != 0) {
2412 printf("%s: set TSO offload failed on %s, err %d\n", __func__,
2413 if_name(ifp), error);
2414 return error;
2415 }
2416 error = ifnet_set_tso_mtu(ifp, AF_INET, tso_v4_mtu);
2417 if (error != 0) {
2418 printf("%s: set TSO MTU IPv4 failed on %s, err %d\n", __func__,
2419 if_name(ifp), error);
2420 return error;
2421 }
2422 error = ifnet_set_tso_mtu(ifp, AF_INET6, tso_v6_mtu);
2423 if (error != 0) {
2424 printf("%s: set TSO MTU IPv6 failed on %s, err %d\n", __func__,
2425 if_name(ifp), error);
2426 return error;
2427 }
2428 return 0;
2429 }
2430
2431 static errno_t
create_netif_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2432 create_netif_provider_and_instance(if_fake_ref fakeif,
2433 struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2434 uuid_t * provider, uuid_t * instance)
2435 {
2436 errno_t err;
2437 nexus_controller_t controller = kern_nexus_shared_controller();
2438 struct kern_nexus_net_init net_init;
2439 nexus_name_t provider_name;
2440 nexus_attr_t nexus_attr = NULL;
2441 struct kern_nexus_provider_init prov_init = {
2442 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2443 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2444 .nxpi_pre_connect = feth_nx_pre_connect,
2445 .nxpi_connected = feth_nx_connected,
2446 .nxpi_pre_disconnect = feth_nx_pre_disconnect,
2447 .nxpi_disconnected = feth_nx_disconnected,
2448 .nxpi_ring_init = feth_nx_ring_init,
2449 .nxpi_ring_fini = feth_nx_ring_fini,
2450 .nxpi_slot_init = feth_nx_slot_init,
2451 .nxpi_slot_fini = feth_nx_slot_fini,
2452 .nxpi_sync_tx = feth_nx_sync_tx,
2453 .nxpi_sync_rx = feth_nx_sync_rx,
2454 .nxpi_tx_doorbell = feth_nx_tx_doorbell,
2455 .nxpi_config_capab = feth_nx_capab_config,
2456 };
2457
2458 _CASSERT(IFF_MAX_RX_RINGS == 1);
2459 err = kern_nexus_attr_create(&nexus_attr);
2460 if (err != 0) {
2461 printf("%s nexus attribute creation failed, error %d\n",
2462 __func__, err);
2463 goto failed;
2464 }
2465 if (feth_in_wmm_mode(fakeif)) {
2466 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_TX_RINGS,
2467 IFF_NUM_TX_RINGS_WMM_MODE);
2468 VERIFY(err == 0);
2469 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_RX_RINGS,
2470 IFF_NUM_RX_RINGS_WMM_MODE);
2471 VERIFY(err == 0);
2472 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_QMAP,
2473 NEXUS_QMAP_TYPE_WMM);
2474 VERIFY(err == 0);
2475 }
2476
2477 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2478 VERIFY(err == 0);
2479 snprintf((char *)provider_name, sizeof(provider_name),
2480 "com.apple.netif.%s", fakeif->iff_name);
2481 err = kern_nexus_controller_register_provider(controller,
2482 feth_nx_dom_prov,
2483 provider_name,
2484 &prov_init,
2485 sizeof(prov_init),
2486 nexus_attr,
2487 provider);
2488 if (err != 0) {
2489 printf("%s register provider failed, error %d\n",
2490 __func__, err);
2491 goto failed;
2492 }
2493 bzero(&net_init, sizeof(net_init));
2494 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2495 net_init.nxneti_flags = 0;
2496 net_init.nxneti_eparams = init_params;
2497 net_init.nxneti_lladdr = NULL;
2498 net_init.nxneti_prepare = feth_netif_prepare;
2499 net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2500 net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2501 err = kern_nexus_controller_alloc_net_provider_instance(controller,
2502 *provider,
2503 fakeif,
2504 NULL,
2505 instance,
2506 &net_init,
2507 ifp);
2508 if (err != 0) {
2509 printf("%s alloc_net_provider_instance failed, %d\n",
2510 __func__, err);
2511 kern_nexus_controller_deregister_provider(controller,
2512 *provider);
2513 uuid_clear(*provider);
2514 goto failed;
2515 }
2516 if (feth_supports_tso(fakeif)) {
2517 if ((err = feth_set_tso(*ifp)) != 0) {
2518 goto failed;
2519 }
2520 }
2521
2522 failed:
2523 if (nexus_attr != NULL) {
2524 kern_nexus_attr_destroy(nexus_attr);
2525 }
2526 return err;
2527 }
2528
2529 /*
2530 * The nif_stats need to be referenced because we don't want it set
2531 * to NULL until the last llink is removed.
2532 */
2533 static void
get_nexus_stats(if_fake_ref fakeif,kern_nexus_t nexus)2534 get_nexus_stats(if_fake_ref fakeif, kern_nexus_t nexus)
2535 {
2536 if (++fakeif->iff_nifs_ref == 1) {
2537 ASSERT(fakeif->iff_nifs == NULL);
2538 fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2539 }
2540 }
2541
2542 static void
clear_nexus_stats(if_fake_ref fakeif)2543 clear_nexus_stats(if_fake_ref fakeif)
2544 {
2545 if (--fakeif->iff_nifs_ref == 0) {
2546 ASSERT(fakeif->iff_nifs != NULL);
2547 fakeif->iff_nifs = NULL;
2548 }
2549 }
2550
2551 static errno_t
feth_nx_qset_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * llink_ctx,uint8_t qset_idx,uint64_t qset_id,kern_netif_qset_t qset,void ** qset_ctx)2552 feth_nx_qset_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2553 void *llink_ctx, uint8_t qset_idx, uint64_t qset_id, kern_netif_qset_t qset,
2554 void **qset_ctx)
2555 {
2556 #pragma unused(nxprov)
2557 if_fake_ref fakeif;
2558 fake_llink *fl = llink_ctx;
2559 fake_qset *fqs;
2560
2561 feth_lock();
2562 fakeif = feth_nexus_context(nexus);
2563 if (feth_is_detaching(fakeif)) {
2564 feth_unlock();
2565 printf("%s: %s: detaching\n", __func__, fakeif->iff_name);
2566 return ENXIO;
2567 }
2568 if (qset_idx >= fl->fl_qset_cnt) {
2569 feth_unlock();
2570 printf("%s: %s: invalid qset_idx %d\n", __func__,
2571 fakeif->iff_name, qset_idx);
2572 return EINVAL;
2573 }
2574 fqs = &fl->fl_qset[qset_idx];
2575 ASSERT(fqs->fqs_qset == NULL);
2576 fqs->fqs_qset = qset;
2577 fqs->fqs_id = qset_id;
2578 *qset_ctx = fqs;
2579
2580 /* XXX This should really be done during registration */
2581 get_nexus_stats(fakeif, nexus);
2582 feth_unlock();
2583 return 0;
2584 }
2585
2586 static void
feth_nx_qset_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx)2587 feth_nx_qset_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2588 void *qset_ctx)
2589 {
2590 #pragma unused(nxprov)
2591 if_fake_ref fakeif;
2592 fake_qset *fqs = qset_ctx;
2593
2594 feth_lock();
2595 fakeif = feth_nexus_context(nexus);
2596 clear_nexus_stats(fakeif);
2597 ASSERT(fqs->fqs_qset != NULL);
2598 fqs->fqs_qset = NULL;
2599 fqs->fqs_id = 0;
2600 feth_unlock();
2601 }
2602
2603 static errno_t
feth_nx_queue_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint8_t qidx,bool tx,kern_netif_queue_t queue,void ** queue_ctx)2604 feth_nx_queue_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2605 void *qset_ctx, uint8_t qidx, bool tx, kern_netif_queue_t queue,
2606 void **queue_ctx)
2607 {
2608 #pragma unused(nxprov)
2609 if_fake_ref fakeif;
2610 fake_qset *fqs = qset_ctx;
2611 fake_queue *fq;
2612
2613 feth_lock();
2614 fakeif = feth_nexus_context(nexus);
2615 if (feth_is_detaching(fakeif)) {
2616 printf("%s: %s: detaching\n", __func__, fakeif->iff_name);
2617 feth_unlock();
2618 return ENXIO;
2619 }
2620 if (tx) {
2621 if (qidx >= fqs->fqs_tx_queue_cnt) {
2622 printf("%s: %s: invalid tx qidx %d\n", __func__,
2623 fakeif->iff_name, qidx);
2624 feth_unlock();
2625 return EINVAL;
2626 }
2627 fq = &fqs->fqs_tx_queue[qidx];
2628 } else {
2629 if (qidx >= fqs->fqs_rx_queue_cnt) {
2630 printf("%s: %s: invalid rx qidx %d\n", __func__,
2631 fakeif->iff_name, qidx);
2632 feth_unlock();
2633 return EINVAL;
2634 }
2635 fq = &fqs->fqs_rx_queue[qidx];
2636 }
2637 ASSERT(fq->fq_queue == NULL);
2638 fq->fq_queue = queue;
2639 *queue_ctx = fq;
2640 feth_unlock();
2641 return 0;
2642 }
2643
2644 static void
feth_nx_queue_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx)2645 feth_nx_queue_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2646 void *queue_ctx)
2647 {
2648 #pragma unused(nxprov, nexus)
2649 fake_queue *fq = queue_ctx;
2650
2651 feth_lock();
2652 ASSERT(fq->fq_queue != NULL);
2653 fq->fq_queue = NULL;
2654 feth_unlock();
2655 }
2656
2657 static void
feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif,kern_packet_t sph,struct netif_stats * nifs,if_fake_ref peer_fakeif,uint32_t llink_idx,uint32_t qset_idx)2658 feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif, kern_packet_t sph,
2659 struct netif_stats *nifs, if_fake_ref peer_fakeif,
2660 uint32_t llink_idx, uint32_t qset_idx)
2661 {
2662 kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2663 uint32_t n_pkts = 0;
2664
2665 FETH_DPRINTF("%s -> %s\n", fakeif->iff_name, peer_fakeif->iff_name);
2666
2667 while (sph != 0) {
2668 uint16_t off;
2669 kern_packet_t next;
2670
2671 next = kern_packet_get_next(sph);
2672 kern_packet_set_next(sph, 0);
2673
2674 /* bpf tap output */
2675 off = kern_packet_get_headroom(sph);
2676 VERIFY(off >= fakeif->iff_tx_headroom);
2677 kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2678 feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2679 bpf_tap_packet_out(fakeif->iff_ifp, DLT_EN10MB, sph, NULL, 0);
2680
2681 /* drop packets, if requested */
2682 fakeif->iff_tx_pkts_count++;
2683 if (feth_tx_expired_error(fakeif, sph) ||
2684 feth_tx_complete_error(fakeif, sph)) {
2685 fakeif->iff_tx_pkts_count = 0;
2686 kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2687 STATS_INC(nifs, NETIF_STATS_DROP);
2688 goto next_pkt;
2689 }
2690 ASSERT(sph != 0);
2691 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2692 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2693
2694 /* prepare batch for receiver */
2695 pkts[n_pkts++] = sph;
2696 if (n_pkts == IFF_MAX_BATCH_SIZE) {
2697 feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx,
2698 qset_idx, pkts, n_pkts);
2699 feth_tx_complete(fakeif, pkts, n_pkts);
2700 n_pkts = 0;
2701 }
2702 next_pkt:
2703 sph = next;
2704 }
2705 /* catch last batch for receiver */
2706 if (n_pkts != 0) {
2707 feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx, qset_idx,
2708 pkts, n_pkts);
2709 feth_tx_complete(fakeif, pkts, n_pkts);
2710 n_pkts = 0;
2711 }
2712 }
2713
2714 static errno_t
feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint32_t flags)2715 feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2716 void *qset_ctx, uint32_t flags)
2717 {
2718 #pragma unused(nxprov)
2719 if_fake_ref fakeif;
2720 ifnet_t ifp;
2721 ifnet_t peer_ifp;
2722 if_fake_ref peer_fakeif = NULL;
2723 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2724 fake_qset *qset = qset_ctx;
2725 boolean_t detaching, connected;
2726 uint32_t i;
2727 errno_t err;
2728
2729 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2730 fakeif = feth_nexus_context(nexus);
2731 FETH_DPRINTF("%s qset %p, idx %d, flags 0x%x\n", fakeif->iff_name, qset,
2732 qset->fqs_idx, flags);
2733
2734 feth_lock();
2735 detaching = feth_is_detaching(fakeif);
2736 connected = fakeif->iff_channel_connected;
2737 if (detaching || !connected) {
2738 FETH_DPRINTF("%s: %s: detaching %s, channel connected %s\n",
2739 __func__, fakeif->iff_name,
2740 (detaching ? "true" : "false"),
2741 (connected ? "true" : "false"));
2742 feth_unlock();
2743 return 0;
2744 }
2745 ifp = fakeif->iff_ifp;
2746 peer_ifp = fakeif->iff_peer;
2747 if (peer_ifp != NULL) {
2748 peer_fakeif = ifnet_get_if_fake(peer_ifp);
2749 if (peer_fakeif != NULL) {
2750 detaching = feth_is_detaching(peer_fakeif);
2751 connected = peer_fakeif->iff_channel_connected;
2752 if (detaching || !connected) {
2753 FETH_DPRINTF("%s: peer %s: detaching %s, "
2754 "channel connected %s\n",
2755 __func__, peer_fakeif->iff_name,
2756 (detaching ? "true" : "false"),
2757 (connected ? "true" : "false"));
2758 goto done;
2759 }
2760 } else {
2761 FETH_DPRINTF("%s: peer_fakeif is NULL\n", __func__);
2762 goto done;
2763 }
2764 } else {
2765 printf("%s: peer_ifp is NULL\n", __func__);
2766 goto done;
2767 }
2768
2769 if (if_fake_switch_combined_mode &&
2770 qset->fqs_dequeue_cnt >= if_fake_switch_mode_frequency) {
2771 if (qset->fqs_combined_mode) {
2772 kern_netif_set_qset_separate(qset->fqs_qset);
2773 } else {
2774 kern_netif_set_qset_combined(qset->fqs_qset);
2775 }
2776 qset->fqs_combined_mode = !qset->fqs_combined_mode;
2777 qset->fqs_dequeue_cnt = 0;
2778 }
2779
2780 for (i = 0; i < qset->fqs_tx_queue_cnt; i++) {
2781 kern_packet_t sph = 0;
2782 kern_netif_queue_t queue = qset->fqs_tx_queue[i].fq_queue;
2783 boolean_t more = FALSE;
2784
2785 err = kern_netif_queue_tx_dequeue(queue, UINT32_MAX, UINT32_MAX,
2786 &more, &sph);
2787 if (err != 0 && err != EAGAIN) {
2788 FETH_DPRINTF("%s queue %p dequeue failed: err "
2789 "%d\n", fakeif->iff_name, queue, err);
2790 }
2791 feth_nx_tx_queue_deliver_pkt_chain(fakeif, sph, nifs,
2792 peer_fakeif, qset->fqs_llink_idx, qset->fqs_idx);
2793 }
2794
2795 done:
2796 feth_unlock();
2797 return 0;
2798 }
2799
2800 static void
fill_qset_info_and_params(if_fake_ref fakeif,fake_llink * llink_info,uint32_t qset_idx,struct kern_nexus_netif_llink_qset_init * qset_init,bool is_def,bool is_low_latency)2801 fill_qset_info_and_params(if_fake_ref fakeif, fake_llink *llink_info,
2802 uint32_t qset_idx, struct kern_nexus_netif_llink_qset_init *qset_init,
2803 bool is_def, bool is_low_latency)
2804 {
2805 fake_qset *qset_info = &llink_info->fl_qset[qset_idx];
2806
2807 qset_init->nlqi_flags =
2808 (is_def ? KERN_NEXUS_NET_LLINK_QSET_DEFAULT : 0) |
2809 (is_low_latency ? KERN_NEXUS_NET_LLINK_QSET_LOW_LATENCY : 0) |
2810 KERN_NEXUS_NET_LLINK_QSET_AQM;
2811
2812 if (feth_in_wmm_mode(fakeif)) {
2813 qset_init->nlqi_flags |= KERN_NEXUS_NET_LLINK_QSET_WMM_MODE;
2814 qset_init->nlqi_num_txqs = IFF_NUM_TX_QUEUES_WMM_MODE;
2815 qset_init->nlqi_num_rxqs = IFF_NUM_RX_QUEUES_WMM_MODE;
2816 } else {
2817 qset_init->nlqi_num_txqs = 1;
2818 qset_init->nlqi_num_rxqs = 1;
2819 }
2820 qset_info->fqs_tx_queue_cnt = qset_init->nlqi_num_txqs;
2821 qset_info->fqs_rx_queue_cnt = qset_init->nlqi_num_rxqs;
2822
2823 /* These are needed for locating the peer qset */
2824 qset_info->fqs_llink_idx = llink_info->fl_idx;
2825 qset_info->fqs_idx = qset_idx;
2826 }
2827
2828 static void
fill_llink_info_and_params(if_fake_ref fakeif,uint32_t llink_idx,struct kern_nexus_netif_llink_init * llink_init,uint32_t llink_id,struct kern_nexus_netif_llink_qset_init * qset_init,uint32_t qset_cnt,uint32_t flags)2829 fill_llink_info_and_params(if_fake_ref fakeif, uint32_t llink_idx,
2830 struct kern_nexus_netif_llink_init *llink_init, uint32_t llink_id,
2831 struct kern_nexus_netif_llink_qset_init *qset_init, uint32_t qset_cnt,
2832 uint32_t flags)
2833 {
2834 fake_llink *llink_info = &fakeif->iff_llink[llink_idx];
2835 uint32_t i;
2836 bool create_ll_qset = if_fake_low_latency && (llink_idx != 0);
2837
2838 for (i = 0; i < qset_cnt; i++) {
2839 fill_qset_info_and_params(fakeif, llink_info, i,
2840 &qset_init[i], i == 0, create_ll_qset && i == 1);
2841 }
2842 llink_info->fl_idx = llink_idx;
2843
2844 /* This doesn't have to be the same as llink_idx */
2845 llink_info->fl_id = llink_id;
2846 llink_info->fl_qset_cnt = qset_cnt;
2847
2848 llink_init->nli_link_id = llink_id;
2849 llink_init->nli_num_qsets = qset_cnt;
2850 llink_init->nli_qsets = qset_init;
2851 llink_init->nli_flags = flags;
2852 llink_init->nli_ctx = llink_info;
2853 }
2854
2855 static errno_t
create_non_default_llinks(if_fake_ref fakeif)2856 create_non_default_llinks(if_fake_ref fakeif)
2857 {
2858 struct kern_nexus *nx;
2859 fake_nx_t fnx = &fakeif->iff_nx;
2860 struct kern_nexus_netif_llink_init llink_init;
2861 struct kern_nexus_netif_llink_qset_init qset_init[FETH_MAX_QSETS];
2862 errno_t err;
2863 uint64_t llink_id;
2864 uint32_t i;
2865
2866 nx = nx_find(fnx->fnx_instance, FALSE);
2867 if (nx == NULL) {
2868 printf("%s: %s: nx not found\n", __func__, fakeif->iff_name);
2869 return ENXIO;
2870 }
2871 /* Default llink starts at index 0 */
2872 for (i = 1; i < if_fake_llink_cnt; i++) {
2873 llink_id = (uint64_t)i;
2874
2875 /*
2876 * The llink_init and qset_init structures are reused for
2877 * each llink creation.
2878 */
2879 fill_llink_info_and_params(fakeif, i, &llink_init,
2880 llink_id, qset_init, if_fake_qset_cnt, 0);
2881 err = kern_nexus_netif_llink_add(nx, &llink_init);
2882 if (err != 0) {
2883 printf("%s: %s: llink add failed, error %d\n",
2884 __func__, fakeif->iff_name, err);
2885 goto fail;
2886 }
2887 fakeif->iff_llink_cnt++;
2888 }
2889 nx_release(nx);
2890 return 0;
2891
2892 fail:
2893 for (i = 0; i < fakeif->iff_llink_cnt; i++) {
2894 int e;
2895
2896 e = kern_nexus_netif_llink_remove(nx, fakeif->
2897 iff_llink[i].fl_id);
2898 if (e != 0) {
2899 printf("%s: %s: llink remove failed, llink_id 0x%llx, "
2900 "error %d\n", __func__, fakeif->iff_name,
2901 fakeif->iff_llink[i].fl_id, e);
2902 }
2903 fakeif->iff_llink[i].fl_id = 0;
2904 }
2905 fakeif->iff_llink_cnt = 0;
2906 nx_release(nx);
2907 return err;
2908 }
2909
2910 static errno_t
create_netif_llink_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2911 create_netif_llink_provider_and_instance(if_fake_ref fakeif,
2912 struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2913 uuid_t * provider, uuid_t * instance)
2914 {
2915 errno_t err;
2916 nexus_controller_t controller = kern_nexus_shared_controller();
2917 struct kern_nexus_net_init net_init;
2918 struct kern_nexus_netif_llink_init llink_init;
2919 struct kern_nexus_netif_llink_qset_init qsets[FETH_MAX_QSETS];
2920
2921 nexus_name_t provider_name;
2922 nexus_attr_t nexus_attr = NULL;
2923 struct kern_nexus_netif_provider_init prov_init = {
2924 .nxnpi_version = KERN_NEXUS_DOMAIN_PROVIDER_NETIF,
2925 .nxnpi_flags = NXPIF_VIRTUAL_DEVICE,
2926 .nxnpi_pre_connect = feth_nx_pre_connect,
2927 .nxnpi_connected = feth_nx_connected,
2928 .nxnpi_pre_disconnect = feth_nx_pre_disconnect,
2929 .nxnpi_disconnected = feth_nx_disconnected,
2930 .nxnpi_qset_init = feth_nx_qset_init,
2931 .nxnpi_qset_fini = feth_nx_qset_fini,
2932 .nxnpi_queue_init = feth_nx_queue_init,
2933 .nxnpi_queue_fini = feth_nx_queue_fini,
2934 .nxnpi_tx_qset_notify = feth_nx_tx_qset_notify,
2935 .nxnpi_config_capab = feth_nx_capab_config,
2936 };
2937
2938 err = kern_nexus_attr_create(&nexus_attr);
2939 if (err != 0) {
2940 printf("%s nexus attribute creation failed, error %d\n",
2941 __func__, err);
2942 goto failed;
2943 }
2944
2945 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2946 VERIFY(err == 0);
2947
2948 snprintf((char *)provider_name, sizeof(provider_name),
2949 "com.apple.netif.%s", fakeif->iff_name);
2950 err = kern_nexus_controller_register_provider(controller,
2951 feth_nx_dom_prov,
2952 provider_name,
2953 (struct kern_nexus_provider_init *)&prov_init,
2954 sizeof(prov_init),
2955 nexus_attr,
2956 provider);
2957 if (err != 0) {
2958 printf("%s register provider failed, error %d\n",
2959 __func__, err);
2960 goto failed;
2961 }
2962 bzero(&net_init, sizeof(net_init));
2963 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2964 net_init.nxneti_flags = 0;
2965 net_init.nxneti_eparams = init_params;
2966 net_init.nxneti_lladdr = NULL;
2967 net_init.nxneti_prepare = feth_netif_prepare;
2968 net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2969 net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2970
2971 /*
2972 * Assume llink id is same as the index for if_fake.
2973 * This is not required for other drivers.
2974 */
2975 _CASSERT(NETIF_LLINK_ID_DEFAULT == 0);
2976 fill_llink_info_and_params(fakeif, 0, &llink_init,
2977 NETIF_LLINK_ID_DEFAULT, qsets, if_fake_qset_cnt,
2978 KERN_NEXUS_NET_LLINK_DEFAULT);
2979
2980 net_init.nxneti_llink = &llink_init;
2981
2982 err = kern_nexus_controller_alloc_net_provider_instance(controller,
2983 *provider, fakeif, NULL, instance, &net_init, ifp);
2984 if (err != 0) {
2985 printf("%s alloc_net_provider_instance failed, %d\n",
2986 __func__, err);
2987 kern_nexus_controller_deregister_provider(controller,
2988 *provider);
2989 uuid_clear(*provider);
2990 goto failed;
2991 }
2992 fakeif->iff_llink_cnt++;
2993
2994 if (if_fake_llink_cnt > 1) {
2995 err = create_non_default_llinks(fakeif);
2996 if (err != 0) {
2997 printf("%s create_non_default_llinks failed, %d\n",
2998 __func__, err);
2999 feth_detach_netif_nexus(fakeif);
3000 goto failed;
3001 }
3002 }
3003 if (feth_supports_tso(fakeif)) {
3004 if ((err = feth_set_tso(*ifp)) != 0) {
3005 goto failed;
3006 }
3007 }
3008 failed:
3009 if (nexus_attr != NULL) {
3010 kern_nexus_attr_destroy(nexus_attr);
3011 }
3012 return err;
3013 }
3014
3015 static errno_t
feth_attach_netif_nexus(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp)3016 feth_attach_netif_nexus(if_fake_ref fakeif,
3017 struct ifnet_init_eparams * init_params, ifnet_t *ifp)
3018 {
3019 errno_t error;
3020 fake_nx_t nx = &fakeif->iff_nx;
3021
3022 error = feth_packet_pool_make(fakeif);
3023 if (error != 0) {
3024 return error;
3025 }
3026 if (if_fake_llink_cnt == 0) {
3027 return create_netif_provider_and_instance(fakeif, init_params,
3028 ifp, &nx->fnx_provider, &nx->fnx_instance);
3029 } else {
3030 return create_netif_llink_provider_and_instance(fakeif,
3031 init_params, ifp, &nx->fnx_provider,
3032 &nx->fnx_instance);
3033 }
3034 }
3035
3036 static void
remove_non_default_llinks(if_fake_ref fakeif)3037 remove_non_default_llinks(if_fake_ref fakeif)
3038 {
3039 struct kern_nexus *nx;
3040 fake_nx_t fnx = &fakeif->iff_nx;
3041 uint32_t i;
3042
3043 if (fakeif->iff_llink_cnt <= 1) {
3044 return;
3045 }
3046 nx = nx_find(fnx->fnx_instance, FALSE);
3047 if (nx == NULL) {
3048 printf("%s: %s: nx not found\n", __func__,
3049 fakeif->iff_name);
3050 return;
3051 }
3052 /* Default llink (at index 0) is freed separately */
3053 for (i = 1; i < fakeif->iff_llink_cnt; i++) {
3054 int err;
3055
3056 err = kern_nexus_netif_llink_remove(nx, fakeif->
3057 iff_llink[i].fl_id);
3058 if (err != 0) {
3059 printf("%s: %s: llink remove failed, llink_id 0x%llx, "
3060 "error %d\n", __func__, fakeif->iff_name,
3061 fakeif->iff_llink[i].fl_id, err);
3062 }
3063 fakeif->iff_llink[i].fl_id = 0;
3064 }
3065 fakeif->iff_llink_cnt = 0;
3066 nx_release(nx);
3067 }
3068
3069 static void
detach_provider_and_instance(uuid_t provider,uuid_t instance)3070 detach_provider_and_instance(uuid_t provider, uuid_t instance)
3071 {
3072 nexus_controller_t controller = kern_nexus_shared_controller();
3073 errno_t err;
3074
3075 if (!uuid_is_null(instance)) {
3076 err = kern_nexus_controller_free_provider_instance(controller,
3077 instance);
3078 if (err != 0) {
3079 printf("%s free_provider_instance failed %d\n",
3080 __func__, err);
3081 }
3082 uuid_clear(instance);
3083 }
3084 if (!uuid_is_null(provider)) {
3085 err = kern_nexus_controller_deregister_provider(controller,
3086 provider);
3087 if (err != 0) {
3088 printf("%s deregister_provider %d\n", __func__, err);
3089 }
3090 uuid_clear(provider);
3091 }
3092 return;
3093 }
3094
3095 static void
feth_detach_netif_nexus(if_fake_ref fakeif)3096 feth_detach_netif_nexus(if_fake_ref fakeif)
3097 {
3098 fake_nx_t fnx = &fakeif->iff_nx;
3099
3100 remove_non_default_llinks(fakeif);
3101 detach_provider_and_instance(fnx->fnx_provider, fnx->fnx_instance);
3102 }
3103
3104 #endif /* SKYWALK */
3105
3106 /**
3107 ** feth interface routines
3108 **/
3109 static void
feth_ifnet_set_attrs(if_fake_ref fakeif,ifnet_t ifp)3110 feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp)
3111 {
3112 uint32_t cap;
3113
3114 cap = ((fakeif->iff_flags & IFF_FLAGS_LRO) != 0) ? IFCAP_LRO : 0;
3115 if (cap != 0) {
3116 errno_t error;
3117
3118 error = ifnet_set_capabilities_supported(ifp, cap, IFCAP_VALID);
3119 if (error != 0) {
3120 printf("%s: failed to enable LRO, %d\n",
3121 ifp->if_xname, error);
3122 }
3123 }
3124 (void)ifnet_set_capabilities_enabled(ifp, cap, IFCAP_VALID);
3125 ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
3126 ifnet_set_baudrate(ifp, 0);
3127 ifnet_set_mtu(ifp, ETHERMTU);
3128 ifnet_set_flags(ifp,
3129 IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX,
3130 0xffff);
3131 ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
3132 if ((fakeif->iff_flags & IFF_FLAGS_HWCSUM) != 0) {
3133 ifnet_set_offload(ifp,
3134 IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
3135 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6);
3136 } else {
3137 ifnet_set_offload(ifp, 0);
3138 }
3139 }
3140
3141 static void
interface_link_event(ifnet_t ifp,u_int32_t event_code)3142 interface_link_event(ifnet_t ifp, u_int32_t event_code)
3143 {
3144 struct event {
3145 u_int32_t ifnet_family;
3146 u_int32_t unit;
3147 char if_name[IFNAMSIZ];
3148 };
3149 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3150 struct kern_event_msg *header = (struct kern_event_msg*)message;
3151 struct event *data = (struct event *)(header + 1);
3152
3153 header->total_size = sizeof(message);
3154 header->vendor_code = KEV_VENDOR_APPLE;
3155 header->kev_class = KEV_NETWORK_CLASS;
3156 header->kev_subclass = KEV_DL_SUBCLASS;
3157 header->event_code = event_code;
3158 data->ifnet_family = ifnet_family(ifp);
3159 data->unit = (u_int32_t)ifnet_unit(ifp);
3160 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
3161 ifnet_event(ifp, header);
3162 }
3163
3164 static if_fake_ref
ifnet_get_if_fake(ifnet_t ifp)3165 ifnet_get_if_fake(ifnet_t ifp)
3166 {
3167 return (if_fake_ref)ifnet_softc(ifp);
3168 }
3169
3170 static int
feth_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)3171 feth_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
3172 {
3173 int error;
3174 if_fake_ref fakeif;
3175 struct ifnet_init_eparams feth_init;
3176 ifnet_t ifp;
3177 uint8_t mac_address[ETHER_ADDR_LEN];
3178 fake_llink *iff_llink;
3179
3180 iff_llink = kalloc_type(fake_llink, FETH_MAX_LLINKS, Z_WAITOK_ZERO);
3181 if (iff_llink == NULL) {
3182 return ENOBUFS;
3183 }
3184 fakeif = kalloc_type(struct if_fake, Z_WAITOK_ZERO_NOFAIL);
3185 fakeif->iff_llink = iff_llink;
3186 fakeif->iff_retain_count = 1;
3187 #define FAKE_ETHER_NAME_LEN (sizeof(FAKE_ETHER_NAME) - 1)
3188 _CASSERT(FAKE_ETHER_NAME_LEN == 4);
3189 bcopy(FAKE_ETHER_NAME, mac_address, FAKE_ETHER_NAME_LEN);
3190 mac_address[ETHER_ADDR_LEN - 2] = (unit & 0xff00) >> 8;
3191 mac_address[ETHER_ADDR_LEN - 1] = unit & 0xff;
3192 if (if_fake_bsd_mode != 0) {
3193 fakeif->iff_flags |= IFF_FLAGS_BSD_MODE;
3194 }
3195 if (if_fake_hwcsum != 0) {
3196 fakeif->iff_flags |= IFF_FLAGS_HWCSUM;
3197 }
3198 if (if_fake_lro != 0) {
3199 fakeif->iff_flags |= IFF_FLAGS_LRO;
3200 }
3201 fakeif->iff_max_mtu = get_max_mtu(if_fake_bsd_mode, if_fake_max_mtu);
3202 fakeif->iff_fcs = if_fake_fcs;
3203 fakeif->iff_trailer_length = if_fake_trailer_length;
3204
3205 /* use the interface name as the unique id for ifp recycle */
3206 if ((unsigned int)
3207 snprintf(fakeif->iff_name, sizeof(fakeif->iff_name), "%s%d",
3208 ifc->ifc_name, unit) >= sizeof(fakeif->iff_name)) {
3209 feth_release(fakeif);
3210 return EINVAL;
3211 }
3212 bzero(&feth_init, sizeof(feth_init));
3213 feth_init.ver = IFNET_INIT_CURRENT_VERSION;
3214 feth_init.len = sizeof(feth_init);
3215 if (feth_in_bsd_mode(fakeif)) {
3216 if (if_fake_txstart != 0) {
3217 feth_init.start = feth_start;
3218 } else {
3219 feth_init.flags |= IFNET_INIT_LEGACY;
3220 feth_init.output = feth_output;
3221 }
3222 }
3223 #if SKYWALK
3224 else {
3225 feth_init.flags |= IFNET_INIT_SKYWALK_NATIVE;
3226 /*
3227 * Currently we support WMM mode only for Skywalk native
3228 * interface.
3229 */
3230 if (if_fake_wmm_mode != 0) {
3231 fakeif->iff_flags |= IFF_FLAGS_WMM_MODE;
3232 }
3233
3234 if (if_fake_multibuflet != 0) {
3235 fakeif->iff_flags |= IFF_FLAGS_MULTIBUFLETS;
3236 }
3237
3238 if (if_fake_multibuflet != 0 &&
3239 if_fake_pktpool_mode == IFF_PP_MODE_PRIVATE_SPLIT) {
3240 printf("%s: multi-buflet not supported for split rx &"
3241 " tx pool", __func__);
3242 feth_release(fakeif);
3243 return EINVAL;
3244 }
3245
3246 fakeif->iff_pp_mode = if_fake_pktpool_mode;
3247 if (if_fake_tso_support != 0) {
3248 if (fakeif->iff_pp_mode != IFF_PP_MODE_GLOBAL) {
3249 printf("%s: TSO mode requires global packet"
3250 " pool mode\n", __func__);
3251 return EINVAL;
3252 }
3253 fakeif->iff_flags |= IFF_FLAGS_TSO_SUPPORT;
3254 }
3255
3256 fakeif->iff_tx_headroom = if_fake_tx_headroom;
3257 fakeif->iff_adv_interval = if_fake_if_adv_interval;
3258 if (fakeif->iff_adv_interval > 0) {
3259 feth_init.flags |= IFNET_INIT_IF_ADV;
3260 }
3261 fakeif->iff_tx_drop_rate = if_fake_tx_drops;
3262 fakeif->iff_tx_completion_mode = if_tx_completion_mode;
3263 fakeif->iff_tx_exp_policy = if_fake_tx_exp_policy;
3264 }
3265 feth_init.tx_headroom = fakeif->iff_tx_headroom;
3266 #endif /* SKYWALK */
3267 if (if_fake_nxattach == 0) {
3268 feth_init.flags |= IFNET_INIT_NX_NOAUTO;
3269 }
3270 feth_init.uniqueid = fakeif->iff_name;
3271 feth_init.uniqueid_len = strlen(fakeif->iff_name);
3272 feth_init.name = ifc->ifc_name;
3273 feth_init.unit = unit;
3274 feth_init.family = IFNET_FAMILY_ETHERNET;
3275 feth_init.type = IFT_ETHER;
3276 feth_init.demux = ether_demux;
3277 feth_init.add_proto = ether_add_proto;
3278 feth_init.del_proto = ether_del_proto;
3279 feth_init.check_multi = ether_check_multi;
3280 feth_init.framer_extended = ether_frameout_extended;
3281 feth_init.softc = fakeif;
3282 feth_init.ioctl = feth_ioctl;
3283 feth_init.set_bpf_tap = NULL;
3284 feth_init.detach = feth_if_free;
3285 feth_init.broadcast_addr = etherbroadcastaddr;
3286 feth_init.broadcast_len = ETHER_ADDR_LEN;
3287 if (feth_in_bsd_mode(fakeif)) {
3288 error = ifnet_allocate_extended(&feth_init, &ifp);
3289 if (error) {
3290 feth_release(fakeif);
3291 return error;
3292 }
3293 feth_ifnet_set_attrs(fakeif, ifp);
3294 }
3295 #if SKYWALK
3296 else {
3297 if (feth_in_wmm_mode(fakeif)) {
3298 feth_init.output_sched_model =
3299 IFNET_SCHED_MODEL_DRIVER_MANAGED;
3300 }
3301 error = feth_attach_netif_nexus(fakeif, &feth_init, &ifp);
3302 if (error != 0) {
3303 feth_release(fakeif);
3304 return error;
3305 }
3306 /* take an additional reference to ensure that it doesn't go away */
3307 feth_retain(fakeif);
3308 fakeif->iff_ifp = ifp;
3309 }
3310 #endif /* SKYWALK */
3311 fakeif->iff_media_count = MIN(default_media_words_count, IF_FAKE_MEDIA_LIST_MAX);
3312 bcopy(default_media_words, fakeif->iff_media_list,
3313 fakeif->iff_media_count * sizeof(fakeif->iff_media_list[0]));
3314 if (feth_in_bsd_mode(fakeif)) {
3315 error = ifnet_attach(ifp, NULL);
3316 if (error) {
3317 ifnet_release(ifp);
3318 feth_release(fakeif);
3319 return error;
3320 }
3321 fakeif->iff_ifp = ifp;
3322 }
3323
3324 ifnet_set_lladdr(ifp, mac_address, sizeof(mac_address));
3325
3326 /* attach as ethernet */
3327 bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
3328 return 0;
3329 }
3330
3331 static int
feth_clone_destroy(ifnet_t ifp)3332 feth_clone_destroy(ifnet_t ifp)
3333 {
3334 if_fake_ref fakeif;
3335 #if SKYWALK
3336 boolean_t nx_attached = FALSE;
3337 #endif /* SKYWALK */
3338
3339 feth_lock();
3340 fakeif = ifnet_get_if_fake(ifp);
3341 if (fakeif == NULL || feth_is_detaching(fakeif)) {
3342 feth_unlock();
3343 return 0;
3344 }
3345 feth_set_detaching(fakeif);
3346 #if SKYWALK
3347 nx_attached = !feth_in_bsd_mode(fakeif);
3348 #endif /* SKYWALK */
3349 feth_unlock();
3350
3351 #if SKYWALK
3352 if (nx_attached) {
3353 feth_detach_netif_nexus(fakeif);
3354 feth_release(fakeif);
3355 }
3356 #endif /* SKYWALK */
3357 feth_config(ifp, NULL);
3358 ifnet_detach(ifp);
3359 return 0;
3360 }
3361
3362 static void
feth_enqueue_input(ifnet_t ifp,struct mbuf * m)3363 feth_enqueue_input(ifnet_t ifp, struct mbuf * m)
3364 {
3365 struct ifnet_stat_increment_param stats = {};
3366
3367 stats.packets_in = 1;
3368 stats.bytes_in = (uint32_t)mbuf_pkthdr_len(m) + ETHER_HDR_LEN;
3369 ifnet_input(ifp, m, &stats);
3370 }
3371
3372 static struct mbuf *
copy_mbuf(struct mbuf * m)3373 copy_mbuf(struct mbuf *m)
3374 {
3375 struct mbuf * copy_m;
3376 uint32_t pkt_len;
3377 uint32_t offset;
3378
3379 if ((m->m_flags & M_PKTHDR) == 0) {
3380 return NULL;
3381 }
3382 pkt_len = m->m_pkthdr.len;
3383 MGETHDR(copy_m, M_DONTWAIT, MT_DATA);
3384 if (copy_m == NULL) {
3385 goto failed;
3386 }
3387 if (pkt_len > MHLEN) {
3388 if (pkt_len <= MCLBYTES) {
3389 MCLGET(copy_m, M_DONTWAIT);
3390 } else if (pkt_len <= MBIGCLBYTES) {
3391 copy_m = m_mbigget(copy_m, M_DONTWAIT);
3392 } else if (pkt_len <= M16KCLBYTES && njcl > 0) {
3393 copy_m = m_m16kget(copy_m, M_DONTWAIT);
3394 } else {
3395 printf("if_fake: copy_mbuf(): packet too large %d\n",
3396 pkt_len);
3397 goto failed;
3398 }
3399 if (copy_m == NULL || (copy_m->m_flags & M_EXT) == 0) {
3400 goto failed;
3401 }
3402 }
3403 mbuf_setlen(copy_m, pkt_len);
3404 copy_m->m_pkthdr.len = pkt_len;
3405 copy_m->m_pkthdr.pkt_svc = m->m_pkthdr.pkt_svc;
3406 offset = 0;
3407 while (m != NULL && offset < pkt_len) {
3408 uint32_t frag_len;
3409
3410 frag_len = m->m_len;
3411 if (frag_len > (pkt_len - offset)) {
3412 printf("if_fake_: Large mbuf fragment %d > %d\n",
3413 frag_len, (pkt_len - offset));
3414 goto failed;
3415 }
3416 m_copydata(m, 0, frag_len, mtodo(copy_m, offset));
3417 offset += frag_len;
3418 m = m->m_next;
3419 }
3420 return copy_m;
3421
3422 failed:
3423 if (copy_m != NULL) {
3424 m_freem(copy_m);
3425 }
3426 return NULL;
3427 }
3428
3429 static int
feth_add_mbuf_trailer(struct mbuf * m,void * trailer,size_t trailer_len)3430 feth_add_mbuf_trailer(struct mbuf *m, void *trailer, size_t trailer_len)
3431 {
3432 int ret;
3433 ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
3434
3435 ret = m_append(m, trailer_len, (caddr_t)trailer);
3436 if (ret == 1) {
3437 FETH_DPRINTF("%s %zuB trailer added\n", __func__, trailer_len);
3438 return 0;
3439 }
3440 printf("%s m_append failed\n", __func__);
3441 return ENOTSUP;
3442 }
3443
3444 static int
feth_add_mbuf_fcs(struct mbuf * m)3445 feth_add_mbuf_fcs(struct mbuf *m)
3446 {
3447 uint32_t pkt_len, offset = 0;
3448 uint32_t crc = 0;
3449 int err = 0;
3450
3451 ASSERT(sizeof(crc) == ETHER_CRC_LEN);
3452
3453 pkt_len = m->m_pkthdr.len;
3454 struct mbuf *iter = m;
3455 while (iter != NULL && offset < pkt_len) {
3456 uint32_t frag_len = iter->m_len;
3457 ASSERT(frag_len <= (pkt_len - offset));
3458 crc = crc32(crc, mtod(iter, void *), frag_len);
3459 offset += frag_len;
3460 iter = m->m_next;
3461 }
3462
3463 err = feth_add_mbuf_trailer(m, &crc, ETHER_CRC_LEN);
3464 if (err != 0) {
3465 return err;
3466 }
3467
3468 m->m_flags |= M_HASFCS;
3469
3470 return 0;
3471 }
3472
3473 static void
feth_output_common(ifnet_t ifp,struct mbuf * m,ifnet_t peer,iff_flags_t flags,bool fcs,void * trailer,size_t trailer_len)3474 feth_output_common(ifnet_t ifp, struct mbuf * m, ifnet_t peer,
3475 iff_flags_t flags, bool fcs, void *trailer, size_t trailer_len)
3476 {
3477 void * frame_header;
3478
3479 frame_header = mbuf_data(m);
3480 if ((flags & IFF_FLAGS_HWCSUM) != 0) {
3481 m->m_pkthdr.csum_data = 0xffff;
3482 m->m_pkthdr.csum_flags =
3483 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3484 CSUM_IP_CHECKED | CSUM_IP_VALID;
3485 }
3486
3487 (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
3488 bpf_tap_out(ifp, DLT_EN10MB, m, NULL, 0);
3489
3490 if (trailer != 0) {
3491 feth_add_mbuf_trailer(m, trailer, trailer_len);
3492 }
3493 if (fcs) {
3494 feth_add_mbuf_fcs(m);
3495 }
3496
3497 (void)mbuf_pkthdr_setrcvif(m, peer);
3498 mbuf_pkthdr_setheader(m, frame_header);
3499 mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
3500 (void)mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
3501 mbuf_len(m) - ETHER_HDR_LEN);
3502 bpf_tap_in(peer, DLT_EN10MB, m, frame_header,
3503 sizeof(struct ether_header));
3504 feth_enqueue_input(peer, m);
3505 }
3506
3507 static void
feth_start(ifnet_t ifp)3508 feth_start(ifnet_t ifp)
3509 {
3510 struct mbuf * copy_m = NULL;
3511 if_fake_ref fakeif;
3512 iff_flags_t flags = 0;
3513 bool fcs;
3514 size_t trailer_len;
3515 ifnet_t peer = NULL;
3516 struct mbuf * m;
3517 struct mbuf * save_m;
3518
3519 feth_lock();
3520 fakeif = ifnet_get_if_fake(ifp);
3521 if (fakeif == NULL) {
3522 feth_unlock();
3523 return;
3524 }
3525
3526 if (fakeif->iff_start_busy) {
3527 feth_unlock();
3528 printf("if_fake: start is busy\n");
3529 return;
3530 }
3531
3532 peer = fakeif->iff_peer;
3533 flags = fakeif->iff_flags;
3534 fcs = fakeif->iff_fcs;
3535 trailer_len = fakeif->iff_trailer_length;
3536
3537 /* check for pending TX */
3538 m = fakeif->iff_pending_tx_packet;
3539 if (m != NULL) {
3540 if (peer != NULL) {
3541 copy_m = copy_mbuf(m);
3542 if (copy_m == NULL) {
3543 feth_unlock();
3544 return;
3545 }
3546 }
3547 fakeif->iff_pending_tx_packet = NULL;
3548 m_freem(m);
3549 m = NULL;
3550 }
3551 fakeif->iff_start_busy = TRUE;
3552 feth_unlock();
3553 save_m = NULL;
3554 for (;;) {
3555 if (copy_m != NULL) {
3556 VERIFY(peer != NULL);
3557 feth_output_common(ifp, copy_m, peer, flags, fcs,
3558 feth_trailer, trailer_len);
3559 copy_m = NULL;
3560 }
3561 if (ifnet_dequeue(ifp, &m) != 0) {
3562 break;
3563 }
3564 if (peer == NULL) {
3565 m_freem(m);
3566 } else {
3567 copy_m = copy_mbuf(m);
3568 if (copy_m == NULL) {
3569 save_m = m;
3570 break;
3571 }
3572 m_freem(m);
3573 }
3574 }
3575 peer = NULL;
3576 feth_lock();
3577 fakeif = ifnet_get_if_fake(ifp);
3578 if (fakeif != NULL) {
3579 fakeif->iff_start_busy = FALSE;
3580 if (save_m != NULL && fakeif->iff_peer != NULL) {
3581 /* save it for next time */
3582 fakeif->iff_pending_tx_packet = save_m;
3583 save_m = NULL;
3584 }
3585 }
3586 feth_unlock();
3587 if (save_m != NULL) {
3588 /* didn't save packet, so free it */
3589 m_freem(save_m);
3590 }
3591 }
3592
3593 static int
feth_output(ifnet_t ifp,struct mbuf * m)3594 feth_output(ifnet_t ifp, struct mbuf * m)
3595 {
3596 struct mbuf * copy_m;
3597 if_fake_ref fakeif;
3598 iff_flags_t flags;
3599 bool fcs;
3600 size_t trailer_len;
3601 ifnet_t peer = NULL;
3602
3603 if (m == NULL) {
3604 return 0;
3605 }
3606 copy_m = copy_mbuf(m);
3607 m_freem(m);
3608 m = NULL;
3609 if (copy_m == NULL) {
3610 /* count this as an output error */
3611 ifnet_stat_increment_out(ifp, 0, 0, 1);
3612 return 0;
3613 }
3614 feth_lock();
3615 fakeif = ifnet_get_if_fake(ifp);
3616 if (fakeif != NULL) {
3617 peer = fakeif->iff_peer;
3618 flags = fakeif->iff_flags;
3619 fcs = fakeif->iff_fcs;
3620 trailer_len = fakeif->iff_trailer_length;
3621 }
3622 feth_unlock();
3623 if (peer == NULL) {
3624 m_freem(copy_m);
3625 ifnet_stat_increment_out(ifp, 0, 0, 1);
3626 return 0;
3627 }
3628 feth_output_common(ifp, copy_m, peer, flags, fcs, feth_trailer,
3629 trailer_len);
3630 return 0;
3631 }
3632
3633 static int
feth_config(ifnet_t ifp,ifnet_t peer)3634 feth_config(ifnet_t ifp, ifnet_t peer)
3635 {
3636 int connected = FALSE;
3637 int disconnected = FALSE;
3638 int error = 0;
3639 if_fake_ref fakeif = NULL;
3640
3641 feth_lock();
3642 fakeif = ifnet_get_if_fake(ifp);
3643 if (fakeif == NULL) {
3644 error = EINVAL;
3645 goto done;
3646 }
3647 if (peer != NULL) {
3648 /* connect to peer */
3649 if_fake_ref peer_fakeif;
3650
3651 peer_fakeif = ifnet_get_if_fake(peer);
3652 if (peer_fakeif == NULL) {
3653 error = EINVAL;
3654 goto done;
3655 }
3656 if (feth_is_detaching(fakeif) ||
3657 feth_is_detaching(peer_fakeif) ||
3658 peer_fakeif->iff_peer != NULL ||
3659 fakeif->iff_peer != NULL) {
3660 error = EBUSY;
3661 goto done;
3662 }
3663 #if SKYWALK
3664 if (fakeif->iff_pp_mode !=
3665 peer_fakeif->iff_pp_mode) {
3666 error = EINVAL;
3667 goto done;
3668 }
3669 #endif /* SKYWALK */
3670 fakeif->iff_peer = peer;
3671 peer_fakeif->iff_peer = ifp;
3672 connected = TRUE;
3673 } else if (fakeif->iff_peer != NULL) {
3674 /* disconnect from peer */
3675 if_fake_ref peer_fakeif;
3676
3677 peer = fakeif->iff_peer;
3678 peer_fakeif = ifnet_get_if_fake(peer);
3679 if (peer_fakeif == NULL) {
3680 /* should not happen */
3681 error = EINVAL;
3682 goto done;
3683 }
3684 fakeif->iff_peer = NULL;
3685 peer_fakeif->iff_peer = NULL;
3686 disconnected = TRUE;
3687 }
3688
3689 done:
3690 feth_unlock();
3691
3692 /* generate link status event if we connect or disconnect */
3693 if (connected) {
3694 interface_link_event(ifp, KEV_DL_LINK_ON);
3695 interface_link_event(peer, KEV_DL_LINK_ON);
3696 } else if (disconnected) {
3697 interface_link_event(ifp, KEV_DL_LINK_OFF);
3698 interface_link_event(peer, KEV_DL_LINK_OFF);
3699 }
3700 return error;
3701 }
3702
3703 static int
feth_set_media(ifnet_t ifp,struct if_fake_request * iffr)3704 feth_set_media(ifnet_t ifp, struct if_fake_request * iffr)
3705 {
3706 if_fake_ref fakeif;
3707 int error;
3708
3709 if (iffr->iffr_media.iffm_count > IF_FAKE_MEDIA_LIST_MAX) {
3710 /* list is too long */
3711 return EINVAL;
3712 }
3713 feth_lock();
3714 fakeif = ifnet_get_if_fake(ifp);
3715 if (fakeif == NULL) {
3716 error = EINVAL;
3717 goto done;
3718 }
3719 fakeif->iff_media_count = iffr->iffr_media.iffm_count;
3720 bcopy(iffr->iffr_media.iffm_list, fakeif->iff_media_list,
3721 iffr->iffr_media.iffm_count * sizeof(fakeif->iff_media_list[0]));
3722 #if 0
3723 /* XXX: "auto-negotiate" active with peer? */
3724 /* generate link status event? */
3725 fakeif->iff_media_current = iffr->iffr_media.iffm_current;
3726 #endif
3727 error = 0;
3728 done:
3729 feth_unlock();
3730 return error;
3731 }
3732
3733 static int
if_fake_request_copyin(user_addr_t user_addr,struct if_fake_request * iffr,u_int32_t len)3734 if_fake_request_copyin(user_addr_t user_addr,
3735 struct if_fake_request *iffr, u_int32_t len)
3736 {
3737 int error;
3738
3739 if (user_addr == USER_ADDR_NULL || len < sizeof(*iffr)) {
3740 error = EINVAL;
3741 goto done;
3742 }
3743 error = copyin(user_addr, iffr, sizeof(*iffr));
3744 if (error != 0) {
3745 goto done;
3746 }
3747 if (iffr->iffr_reserved[0] != 0 || iffr->iffr_reserved[1] != 0 ||
3748 iffr->iffr_reserved[2] != 0 || iffr->iffr_reserved[3] != 0) {
3749 error = EINVAL;
3750 goto done;
3751 }
3752 done:
3753 return error;
3754 }
3755
3756 static int
feth_set_drvspec(ifnet_t ifp,uint32_t cmd,u_int32_t len,user_addr_t user_addr)3757 feth_set_drvspec(ifnet_t ifp, uint32_t cmd, u_int32_t len,
3758 user_addr_t user_addr)
3759 {
3760 int error;
3761 struct if_fake_request iffr;
3762 ifnet_t peer;
3763
3764 switch (cmd) {
3765 case IF_FAKE_S_CMD_SET_PEER:
3766 error = if_fake_request_copyin(user_addr, &iffr, len);
3767 if (error != 0) {
3768 break;
3769 }
3770 if (iffr.iffr_peer_name[0] == '\0') {
3771 error = feth_config(ifp, NULL);
3772 break;
3773 }
3774
3775 /* ensure nul termination */
3776 iffr.iffr_peer_name[IFNAMSIZ - 1] = '\0';
3777 peer = ifunit(iffr.iffr_peer_name);
3778 if (peer == NULL) {
3779 error = ENXIO;
3780 break;
3781 }
3782 if (ifnet_type(peer) != IFT_ETHER) {
3783 error = EINVAL;
3784 break;
3785 }
3786 if (strcmp(ifnet_name(peer), FAKE_ETHER_NAME) != 0) {
3787 error = EINVAL;
3788 break;
3789 }
3790 error = feth_config(ifp, peer);
3791 break;
3792 case IF_FAKE_S_CMD_SET_MEDIA:
3793 error = if_fake_request_copyin(user_addr, &iffr, len);
3794 if (error != 0) {
3795 break;
3796 }
3797 error = feth_set_media(ifp, &iffr);
3798 break;
3799 case IF_FAKE_S_CMD_SET_DEQUEUE_STALL:
3800 error = if_fake_request_copyin(user_addr, &iffr, len);
3801 if (error != 0) {
3802 break;
3803 }
3804 error = feth_enable_dequeue_stall(ifp,
3805 iffr.iffr_dequeue_stall);
3806 break;
3807 default:
3808 error = EOPNOTSUPP;
3809 break;
3810 }
3811 return error;
3812 }
3813
3814 static int
feth_get_drvspec(ifnet_t ifp,u_int32_t cmd,u_int32_t len,user_addr_t user_addr)3815 feth_get_drvspec(ifnet_t ifp, u_int32_t cmd, u_int32_t len,
3816 user_addr_t user_addr)
3817 {
3818 int error = EOPNOTSUPP;
3819 if_fake_ref fakeif;
3820 struct if_fake_request iffr;
3821 ifnet_t peer;
3822
3823 switch (cmd) {
3824 case IF_FAKE_G_CMD_GET_PEER:
3825 if (len < sizeof(iffr)) {
3826 error = EINVAL;
3827 break;
3828 }
3829 feth_lock();
3830 fakeif = ifnet_get_if_fake(ifp);
3831 if (fakeif == NULL) {
3832 feth_unlock();
3833 error = EOPNOTSUPP;
3834 break;
3835 }
3836 peer = fakeif->iff_peer;
3837 feth_unlock();
3838 bzero(&iffr, sizeof(iffr));
3839 if (peer != NULL) {
3840 strlcpy(iffr.iffr_peer_name,
3841 if_name(peer),
3842 sizeof(iffr.iffr_peer_name));
3843 }
3844 error = copyout(&iffr, user_addr, sizeof(iffr));
3845 break;
3846 default:
3847 break;
3848 }
3849 return error;
3850 }
3851
3852 union ifdrvu {
3853 struct ifdrv32 *ifdrvu_32;
3854 struct ifdrv64 *ifdrvu_64;
3855 void *ifdrvu_p;
3856 };
3857
3858 static int
feth_ioctl(ifnet_t ifp,u_long cmd,void * data)3859 feth_ioctl(ifnet_t ifp, u_long cmd, void * data)
3860 {
3861 unsigned int count;
3862 struct ifdevmtu * devmtu_p;
3863 union ifdrvu drv;
3864 uint32_t drv_cmd;
3865 uint32_t drv_len;
3866 boolean_t drv_set_command = FALSE;
3867 int error = 0;
3868 struct ifmediareq * ifmr;
3869 struct ifreq * ifr;
3870 if_fake_ref fakeif;
3871 int status;
3872 user_addr_t user_addr;
3873
3874 ifr = (struct ifreq *)data;
3875 switch (cmd) {
3876 case SIOCSIFADDR:
3877 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
3878 break;
3879
3880 case SIOCGIFMEDIA32:
3881 case SIOCGIFMEDIA64:
3882 feth_lock();
3883 fakeif = ifnet_get_if_fake(ifp);
3884 if (fakeif == NULL) {
3885 feth_unlock();
3886 return EOPNOTSUPP;
3887 }
3888 status = (fakeif->iff_peer != NULL)
3889 ? (IFM_AVALID | IFM_ACTIVE) : IFM_AVALID;
3890 ifmr = (struct ifmediareq *)data;
3891 user_addr = (cmd == SIOCGIFMEDIA64) ?
3892 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
3893 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
3894 count = ifmr->ifm_count;
3895 ifmr->ifm_active = IFM_ETHER;
3896 ifmr->ifm_current = IFM_ETHER;
3897 ifmr->ifm_mask = 0;
3898 ifmr->ifm_status = status;
3899 if (user_addr == USER_ADDR_NULL) {
3900 ifmr->ifm_count = fakeif->iff_media_count;
3901 } else if (count > 0) {
3902 if (count > fakeif->iff_media_count) {
3903 count = fakeif->iff_media_count;
3904 }
3905 ifmr->ifm_count = count;
3906 error = copyout(&fakeif->iff_media_list, user_addr,
3907 count * sizeof(int));
3908 }
3909 feth_unlock();
3910 break;
3911
3912 case SIOCGIFDEVMTU:
3913 devmtu_p = &ifr->ifr_devmtu;
3914 devmtu_p->ifdm_current = ifnet_mtu(ifp);
3915 devmtu_p->ifdm_max = feth_max_mtu(ifp);
3916 devmtu_p->ifdm_min = IF_MINMTU;
3917 break;
3918
3919 case SIOCSIFMTU:
3920 if ((unsigned int)ifr->ifr_mtu > feth_max_mtu(ifp) ||
3921 ifr->ifr_mtu < IF_MINMTU) {
3922 error = EINVAL;
3923 } else {
3924 error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
3925 }
3926 break;
3927
3928 case SIOCSDRVSPEC32:
3929 case SIOCSDRVSPEC64:
3930 error = proc_suser(current_proc());
3931 if (error != 0) {
3932 break;
3933 }
3934 drv_set_command = TRUE;
3935 OS_FALLTHROUGH;
3936 case SIOCGDRVSPEC32:
3937 case SIOCGDRVSPEC64:
3938 drv.ifdrvu_p = data;
3939 if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
3940 drv_cmd = drv.ifdrvu_32->ifd_cmd;
3941 drv_len = drv.ifdrvu_32->ifd_len;
3942 user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
3943 } else {
3944 drv_cmd = drv.ifdrvu_64->ifd_cmd;
3945 drv_len = drv.ifdrvu_64->ifd_len;
3946 user_addr = drv.ifdrvu_64->ifd_data;
3947 }
3948 if (drv_set_command) {
3949 error = feth_set_drvspec(ifp, drv_cmd, drv_len,
3950 user_addr);
3951 } else {
3952 error = feth_get_drvspec(ifp, drv_cmd, drv_len,
3953 user_addr);
3954 }
3955 break;
3956
3957 case SIOCSIFLLADDR:
3958 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
3959 ifr->ifr_addr.sa_len);
3960 break;
3961
3962 case SIOCSIFFLAGS:
3963 if ((ifp->if_flags & IFF_UP) != 0) {
3964 /* marked up, set running if not already set */
3965 if ((ifp->if_flags & IFF_RUNNING) == 0) {
3966 /* set running */
3967 error = ifnet_set_flags(ifp, IFF_RUNNING,
3968 IFF_RUNNING);
3969 }
3970 } else if ((ifp->if_flags & IFF_RUNNING) != 0) {
3971 /* marked down, clear running */
3972 error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
3973 }
3974 break;
3975
3976 case SIOCADDMULTI:
3977 case SIOCDELMULTI:
3978 error = 0;
3979 break;
3980 case SIOCSIFCAP: {
3981 uint32_t cap;
3982
3983 feth_lock();
3984 fakeif = ifnet_get_if_fake(ifp);
3985 if (fakeif == NULL ||
3986 (fakeif->iff_flags & IFF_FLAGS_LRO) == 0) {
3987 feth_unlock();
3988 return EOPNOTSUPP;
3989 }
3990 feth_unlock();
3991 cap = (ifr->ifr_reqcap & IFCAP_LRO) != 0 ? IFCAP_LRO : 0;
3992 error = ifnet_set_capabilities_enabled(ifp, cap, IFCAP_LRO);
3993 break;
3994 }
3995 default:
3996 error = EOPNOTSUPP;
3997 break;
3998 }
3999 return error;
4000 }
4001
4002 static void
feth_if_free(ifnet_t ifp)4003 feth_if_free(ifnet_t ifp)
4004 {
4005 if_fake_ref fakeif;
4006
4007 if (ifp == NULL) {
4008 return;
4009 }
4010 feth_lock();
4011 fakeif = ifnet_get_if_fake(ifp);
4012 if (fakeif == NULL) {
4013 feth_unlock();
4014 return;
4015 }
4016 ifp->if_softc = NULL;
4017 #if SKYWALK
4018 VERIFY(fakeif->iff_doorbell_tcall == NULL);
4019 #endif /* SKYWALK */
4020 feth_unlock();
4021 feth_release(fakeif);
4022 ifnet_release(ifp);
4023 return;
4024 }
4025
4026 __private_extern__ void
if_fake_init(void)4027 if_fake_init(void)
4028 {
4029 int error;
4030
4031 #if SKYWALK
4032 (void)feth_register_nexus_domain_provider();
4033 #endif /* SKYWALK */
4034 error = if_clone_attach(&feth_cloner);
4035 if (error != 0) {
4036 return;
4037 }
4038 return;
4039 }
4040