1 /*
2 * Copyright (c) 2015-2025 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * if_fake.c
31 * - fake network interface used for testing
32 * - "feth" (e.g. "feth0", "feth1") is a virtual ethernet interface that allows
33 * two instances to have their output/input paths "crossed-over" so that
34 * output on one is input on the other
35 */
36
37 /*
38 * Modification History:
39 *
40 * September 9, 2015 Dieter Siegmund ([email protected])
41 * - created
42 */
43
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/queue.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/kern_event.h>
54 #include <sys/mcache.h>
55 #include <sys/syslog.h>
56
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_vlan_var.h>
61 #include <net/if_fake_var.h>
62 #include <net/if_arp.h>
63 #include <net/if_dl.h>
64 #include <net/if_ether.h>
65 #include <net/if_types.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <net/dlil.h>
69
70 #include <net/kpi_interface.h>
71 #include <net/kpi_protocol.h>
72
73 #include <kern/locks.h>
74 #include <kern/zalloc.h>
75
76 #include <mach/mach_time.h>
77
78 #include <os/log.h>
79
80 #ifdef INET
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #endif
84
85 #include <net/if_media.h>
86 #include <net/ether_if_module.h>
87 #if SKYWALK
88 #include <skywalk/os_skywalk_private.h>
89 #include <skywalk/nexus/netif/nx_netif.h>
90 #include <skywalk/channel/channel_var.h>
91 #endif /* SKYWALK */
92
93 /*
94 * if_fake_debug, FE_DBGF_*
95 * - 'if_fake_debug' is a bitmask of FE_DBGF_* flags that can be set
96 * to enable additional logs for the corresponding fake function
97 * - "sysctl net.link.fake.debug" controls the value of
98 * 'if_fake_debug'
99 */
100 static uint32_t if_fake_debug = 0;
101
102 #define FE_DBGF_LIFECYCLE 0x0001
103 #define FE_DBGF_INPUT 0x0002
104 #define FE_DBGF_OUTPUT 0x0004
105 #define FE_DBGF_CONTROL 0x0008
106 #define FE_DBGF_MISC 0x0010
107
108 /*
109 * if_fake_log_level
110 * - 'if_fake_log_level' ensures that by default important logs are
111 * logged regardless of if_fake_debug by comparing the log level
112 * in FAKE_LOG to if_fake_log_level
113 * - use "sysctl net.link.fake.log_level" controls the value of
114 * 'if_fake_log_level'
115 * - the default value of 'if_fake_log_level' is LOG_NOTICE; important
116 * logs must use LOG_NOTICE to ensure they appear by default
117 */
118 #define FAKE_DBGF_ENABLED(__flag) ((if_fake_debug & __flag) != 0)
119
120 /*
121 * FAKE_LOG
122 * - macro to generate the specified log conditionally based on
123 * the specified log level and debug flags
124 */
125 #define FAKE_LOG(__level, __dbgf, __string, ...) \
126 do { \
127 if (__level <= if_fake_log_level || \
128 FAKE_DBGF_ENABLED(__dbgf)) { \
129 os_log(OS_LOG_DEFAULT, "%s: " __string, \
130 __func__, ## __VA_ARGS__); \
131 } \
132 } while (0)
133
134 static boolean_t
is_power_of_two(unsigned int val)135 is_power_of_two(unsigned int val)
136 {
137 return (val & (val - 1)) == 0;
138 }
139
140 #define FAKE_ETHER_NAME "feth"
141
142 SYSCTL_DECL(_net_link);
143 SYSCTL_NODE(_net_link, OID_AUTO, fake, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
144 "Fake interface");
145
146 static int if_fake_txstart = 1;
147 SYSCTL_INT(_net_link_fake, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
148 &if_fake_txstart, 0, "Fake interface TXSTART mode");
149
150 static int if_fake_hwcsum = 0;
151 SYSCTL_INT(_net_link_fake, OID_AUTO, hwcsum, CTLFLAG_RW | CTLFLAG_LOCKED,
152 &if_fake_hwcsum, 0, "Fake interface simulate hardware checksum");
153
154 static int if_fake_vlan_tagging = 1;
155 SYSCTL_INT(_net_link_fake, OID_AUTO, vlan_tagging, CTLFLAG_RW | CTLFLAG_LOCKED,
156 &if_fake_vlan_tagging, 0, "Fake interface VLAN tagging");
157
158 static int if_fake_nxattach = 0;
159 SYSCTL_INT(_net_link_fake, OID_AUTO, nxattach, CTLFLAG_RW | CTLFLAG_LOCKED,
160 &if_fake_nxattach, 0, "Fake interface auto-attach nexus");
161
162 static int if_fake_bsd_mode = 1;
163 SYSCTL_INT(_net_link_fake, OID_AUTO, bsd_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
164 &if_fake_bsd_mode, 0, "Fake interface attach as BSD interface");
165
166 static int if_fake_log_level = LOG_NOTICE;
167 SYSCTL_INT(_net_link_fake, OID_AUTO, log_level, CTLFLAG_RW | CTLFLAG_LOCKED,
168 &if_fake_log_level, 0, "Fake interface log level");
169
170 SYSCTL_INT(_net_link_fake, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
171 &if_fake_debug, 0, "Fake interface debug flags");
172
173 static int if_fake_wmm_mode = 0;
174 SYSCTL_INT(_net_link_fake, OID_AUTO, wmm_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
175 &if_fake_wmm_mode, 0, "Fake interface in 802.11 WMM mode");
176
177 static int if_fake_multibuflet = 0;
178 SYSCTL_INT(_net_link_fake, OID_AUTO, multibuflet, CTLFLAG_RW | CTLFLAG_LOCKED,
179 &if_fake_multibuflet, 0, "Fake interface using multi-buflet packets");
180
181 static int if_fake_low_latency = 0;
182 SYSCTL_INT(_net_link_fake, OID_AUTO, low_latency, CTLFLAG_RW | CTLFLAG_LOCKED,
183 &if_fake_low_latency, 0, "Fake interface with a low latency qset");
184
185 static int if_fake_tso_support = 0;
186 SYSCTL_INT(_net_link_fake, OID_AUTO, tso_support, CTLFLAG_RW | CTLFLAG_LOCKED,
187 &if_fake_tso_support, 0, "Fake interface with support for TSO offload");
188
189 #define DEFAULT_EXPIRATION_THRESHOLD 500 /* usec */
190 static int if_fake_expiration_threshold_us = DEFAULT_EXPIRATION_THRESHOLD;
191 SYSCTL_INT(_net_link_fake, OID_AUTO, expiration_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
192 &if_fake_expiration_threshold_us, DEFAULT_EXPIRATION_THRESHOLD,
193 "Expiration threshold (usec) for expiration testing");
194
195 static int if_fake_lro = 0;
196 SYSCTL_INT(_net_link_fake, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_LOCKED,
197 &if_fake_lro, 0, "Fake interface report LRO capability");
198
199 static int if_fake_separate_frame_header = 0;
200 SYSCTL_INT(_net_link_fake, OID_AUTO, separate_frame_header,
201 CTLFLAG_RW | CTLFLAG_LOCKED,
202 &if_fake_separate_frame_header, 0, "Put frame header in separate mbuf");
203
204 static int if_fake_fail_ioctl = 0;
205 SYSCTL_INT(_net_link_fake, OID_AUTO, fail_ioctl, CTLFLAG_RW | CTLFLAG_LOCKED,
206 &if_fake_fail_ioctl, 0, "Fake interface fail ioctl");
207
208 typedef enum {
209 IFF_PP_MODE_GLOBAL = 0, /* share a global pool */
210 IFF_PP_MODE_PRIVATE = 1, /* creates its own rx/tx pool */
211 IFF_PP_MODE_PRIVATE_SPLIT = 2, /* creates its own split rx & tx pool */
212 } iff_pktpool_mode_t;
213 static iff_pktpool_mode_t if_fake_pktpool_mode = IFF_PP_MODE_GLOBAL;
214 SYSCTL_INT(_net_link_fake, OID_AUTO, pktpool_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
215 &if_fake_pktpool_mode, IFF_PP_MODE_GLOBAL,
216 "Fake interface packet pool mode (0 global, 1 private, 2 private split");
217
218 static int if_fake_rx_flow_steering_support = 0;
219 SYSCTL_INT(_net_link_fake, OID_AUTO, rx_flow_steering_support, CTLFLAG_RW | CTLFLAG_LOCKED,
220 &if_fake_rx_flow_steering_support, 0, "Fake interface with support for Rx flow steering");
221
222 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX 512
223 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF 96
224 static int if_fake_link_layer_aggregation_factor =
225 FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF;
226 static int
227 feth_link_layer_aggregation_factor_sysctl SYSCTL_HANDLER_ARGS
228 {
229 #pragma unused(oidp, arg1, arg2)
230 unsigned int new_value;
231 int changed;
232 int error;
233
234 error = sysctl_io_number(req, if_fake_link_layer_aggregation_factor,
235 sizeof(if_fake_link_layer_aggregation_factor), &new_value,
236 &changed);
237 if (error == 0 && changed != 0) {
238 if (new_value <= 0 ||
239 new_value > FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX) {
240 return EINVAL;
241 }
242 if_fake_link_layer_aggregation_factor = new_value;
243 }
244 return error;
245 }
246
247 SYSCTL_PROC(_net_link_fake, OID_AUTO, link_layer_aggregation_factor,
248 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
249 0, 0, feth_link_layer_aggregation_factor_sysctl, "IU",
250 "Fake interface link layer aggregation factor");
251
252 #define FETH_TX_HEADROOM_MAX 32
253 static unsigned int if_fake_tx_headroom = FETH_TX_HEADROOM_MAX;
254 static int
255 feth_tx_headroom_sysctl SYSCTL_HANDLER_ARGS
256 {
257 #pragma unused(oidp, arg1, arg2)
258 unsigned int new_value;
259 int changed;
260 int error;
261
262 error = sysctl_io_number(req, if_fake_tx_headroom,
263 sizeof(if_fake_tx_headroom), &new_value, &changed);
264 if (error == 0 && changed != 0) {
265 if (new_value > FETH_TX_HEADROOM_MAX ||
266 (new_value % 8) != 0) {
267 return EINVAL;
268 }
269 if_fake_tx_headroom = new_value;
270 }
271 return 0;
272 }
273
274 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_headroom,
275 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
276 0, 0, feth_tx_headroom_sysctl, "IU", "Fake ethernet Tx headroom");
277
278 static int if_fake_fcs = 0;
279 SYSCTL_INT(_net_link_fake, OID_AUTO, fcs, CTLFLAG_RW | CTLFLAG_LOCKED,
280 &if_fake_fcs, 0, "Fake interface using frame check sequence");
281
282 #define FETH_TRAILER_LENGTH_MAX 28
283 char feth_trailer[FETH_TRAILER_LENGTH_MAX + 1] = "trailertrailertrailertrailer";
284 static unsigned int if_fake_trailer_length = 0;
285 static int
286 feth_trailer_length_sysctl SYSCTL_HANDLER_ARGS
287 {
288 #pragma unused(oidp, arg1, arg2)
289 unsigned int new_value;
290 int changed;
291 int error;
292
293 error = sysctl_io_number(req, if_fake_trailer_length,
294 sizeof(if_fake_trailer_length), &new_value, &changed);
295 if (error == 0 && changed != 0) {
296 if (new_value > FETH_TRAILER_LENGTH_MAX) {
297 return EINVAL;
298 }
299 if_fake_trailer_length = new_value;
300 }
301 return 0;
302 }
303
304 SYSCTL_PROC(_net_link_fake, OID_AUTO, trailer_length,
305 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
306 feth_trailer_length_sysctl, "IU", "Fake interface frame trailer length");
307
308 /* sysctl net.link.fake.max_mtu */
309 #define FETH_MAX_MTU_DEFAULT 2048
310 #define FETH_MAX_MTU_MAX ((16 * 1024) - ETHER_HDR_LEN)
311
312 static unsigned int if_fake_max_mtu = FETH_MAX_MTU_DEFAULT;
313
314 /* sysctl net.link.fake.buflet_size */
315 #define FETH_BUFLET_SIZE_MIN 512
316 #define FETH_BUFLET_SIZE_MAX (32 * 1024)
317 #define FETH_TSO_BUFLET_SIZE (16 * 1024)
318
319 static unsigned int if_fake_buflet_size = FETH_BUFLET_SIZE_MIN;
320 static unsigned int if_fake_tso_buffer_size = FETH_TSO_BUFLET_SIZE;
321
322 static int
323 feth_tso_buffer_size_sysctl SYSCTL_HANDLER_ARGS
324 {
325 #pragma unused(oidp, arg1, arg2)
326 unsigned int new_value;
327 int changed;
328 int error;
329
330 error = sysctl_io_number(req, if_fake_tso_buffer_size,
331 sizeof(if_fake_tso_buffer_size), &new_value, &changed);
332 if (error == 0 && changed != 0) {
333 /* must be a power of 2 between min and max */
334 if (new_value > FETH_BUFLET_SIZE_MAX ||
335 new_value < FETH_BUFLET_SIZE_MIN ||
336 !is_power_of_two(new_value)) {
337 return EINVAL;
338 }
339 if_fake_tso_buffer_size = new_value;
340 }
341 return 0;
342 }
343
344 SYSCTL_PROC(_net_link_fake, OID_AUTO, tso_buf_size,
345 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
346 0, 0, feth_tso_buffer_size_sysctl, "IU", "Fake interface TSO buffer size");
347
348 static int
349 feth_max_mtu_sysctl SYSCTL_HANDLER_ARGS
350 {
351 #pragma unused(oidp, arg1, arg2)
352 unsigned int new_value;
353 int changed;
354 int error;
355
356 error = sysctl_io_number(req, if_fake_max_mtu,
357 sizeof(if_fake_max_mtu), &new_value, &changed);
358 if (error == 0 && changed != 0) {
359 if (new_value > FETH_MAX_MTU_MAX ||
360 new_value < ETHERMTU ||
361 new_value <= if_fake_buflet_size) {
362 return EINVAL;
363 }
364 if_fake_max_mtu = new_value;
365 }
366 return 0;
367 }
368
369 SYSCTL_PROC(_net_link_fake, OID_AUTO, max_mtu,
370 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
371 0, 0, feth_max_mtu_sysctl, "IU", "Fake interface maximum MTU");
372
373 static int
374 feth_buflet_size_sysctl SYSCTL_HANDLER_ARGS
375 {
376 #pragma unused(oidp, arg1, arg2)
377 unsigned int new_value;
378 int changed;
379 int error;
380
381 error = sysctl_io_number(req, if_fake_buflet_size,
382 sizeof(if_fake_buflet_size), &new_value, &changed);
383 if (error == 0 && changed != 0) {
384 /* must be a power of 2 between min and max */
385 if (new_value > FETH_BUFLET_SIZE_MAX ||
386 new_value < FETH_BUFLET_SIZE_MIN ||
387 !is_power_of_two(new_value) ||
388 new_value >= if_fake_max_mtu) {
389 return EINVAL;
390 }
391 if_fake_buflet_size = new_value;
392 }
393 return 0;
394 }
395
396 SYSCTL_PROC(_net_link_fake, OID_AUTO, buflet_size,
397 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
398 0, 0, feth_buflet_size_sysctl, "IU", "Fake interface buflet size");
399
400 static unsigned int if_fake_user_access = 0;
401
402 static int
403 feth_user_access_sysctl SYSCTL_HANDLER_ARGS
404 {
405 #pragma unused(oidp, arg1, arg2)
406 unsigned int new_value;
407 int changed;
408 int error;
409
410 error = sysctl_io_number(req, if_fake_user_access,
411 sizeof(if_fake_user_access), &new_value, &changed);
412 if (error == 0 && changed != 0) {
413 if (new_value != 0) {
414 if (new_value != 1) {
415 return EINVAL;
416 }
417 }
418 if_fake_user_access = new_value;
419 }
420 return 0;
421 }
422
423 SYSCTL_PROC(_net_link_fake, OID_AUTO, user_access,
424 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
425 0, 0, feth_user_access_sysctl, "IU", "Fake interface user access");
426
427 /* sysctl net.link.fake.if_adv_intvl (unit: millisecond) */
428 #define FETH_IF_ADV_INTVL_MIN 10
429 #define FETH_IF_ADV_INTVL_MAX INT_MAX
430
431 static int if_fake_if_adv_interval = 0; /* no interface advisory */
432 static int
433 feth_if_adv_interval_sysctl SYSCTL_HANDLER_ARGS
434 {
435 #pragma unused(oidp, arg1, arg2)
436 unsigned int new_value;
437 int changed;
438 int error;
439
440 error = sysctl_io_number(req, if_fake_if_adv_interval,
441 sizeof(if_fake_if_adv_interval), &new_value, &changed);
442 if (error == 0 && changed != 0) {
443 if ((new_value != 0) && (new_value > FETH_IF_ADV_INTVL_MAX ||
444 new_value < FETH_IF_ADV_INTVL_MIN)) {
445 return EINVAL;
446 }
447 if_fake_if_adv_interval = new_value;
448 }
449 return 0;
450 }
451
452 SYSCTL_PROC(_net_link_fake, OID_AUTO, if_adv_intvl,
453 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
454 feth_if_adv_interval_sysctl, "IU",
455 "Fake interface will generate interface advisories reports at the specified interval in ms");
456
457 /* sysctl net.link.fake.tx_drops */
458 /*
459 * Fake ethernet will drop packet on the transmit path at the specified
460 * rate, i.e drop one in every if_fake_tx_drops number of packets.
461 */
462 #define FETH_TX_DROPS_MIN 0
463 #define FETH_TX_DROPS_MAX INT_MAX
464 static int if_fake_tx_drops = 0; /* no packets are dropped */
465 static int
466 feth_fake_tx_drops_sysctl SYSCTL_HANDLER_ARGS
467 {
468 #pragma unused(oidp, arg1, arg2)
469 unsigned int new_value;
470 int changed;
471 int error;
472
473 error = sysctl_io_number(req, if_fake_tx_drops,
474 sizeof(if_fake_tx_drops), &new_value, &changed);
475 if (error == 0 && changed != 0) {
476 if (new_value > FETH_TX_DROPS_MAX ||
477 new_value < FETH_TX_DROPS_MIN) {
478 return EINVAL;
479 }
480 if_fake_tx_drops = new_value;
481 }
482 return 0;
483 }
484
485 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_drops,
486 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
487 feth_fake_tx_drops_sysctl, "IU",
488 "Fake interface will intermittently drop packets on Tx path");
489
490 /* sysctl.net.link.fake.tx_exp_policy */
491
492 typedef enum {
493 IFF_TX_EXP_POLICY_DISABLED = 0, /* Expiry notification disabled */
494 IFF_TX_EXP_POLICY_DROP_AND_NOTIFY = 1, /* Expiry notification enabled; drop + notify mode */
495 IFF_TX_EXP_POLICY_NOTIFY_ONLY = 2, /* Expiry notification enabled; notify only mode */
496 IFF_TX_EXP_POLICY_METADATA = 3, /* Expiry notification enabled; use packet metadata */
497 } iff_tx_exp_policy_t;
498 static iff_tx_exp_policy_t if_fake_tx_exp_policy = IFF_TX_EXP_POLICY_DISABLED;
499
500 static int
501 feth_fake_tx_exp_policy_sysctl SYSCTL_HANDLER_ARGS
502 {
503 #pragma unused(oidp, arg1, arg2)
504 unsigned int new_value;
505 int changed;
506 int error;
507
508 error = sysctl_io_number(req, if_fake_tx_exp_policy,
509 sizeof(if_fake_tx_exp_policy), &new_value, &changed);
510 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
511 "if_fake_tx_exp_policy: %u -> %u (%d)",
512 if_fake_tx_exp_policy, new_value, changed);
513 if (error == 0 && changed != 0) {
514 if (new_value > IFF_TX_EXP_POLICY_METADATA ||
515 new_value < IFF_TX_EXP_POLICY_DISABLED) {
516 return EINVAL;
517 }
518 if_fake_tx_exp_policy = new_value;
519 }
520 return 0;
521 }
522 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_exp_policy,
523 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
524 feth_fake_tx_exp_policy_sysctl, "IU",
525 "Fake interface handling policy for expired TX attempts "
526 "(0 disabled, 1 drop and notify, 2 notify only, 3 packet metadata)");
527
528 /* sysctl net.link.fake.tx_completion_mode */
529 typedef enum {
530 IFF_TX_COMPL_MODE_SYNC = 0,
531 IFF_TX_COMPL_MODE_ASYNC = 1,
532 } iff_tx_completion_mode_t;
533 static iff_tx_completion_mode_t if_tx_completion_mode = IFF_TX_COMPL_MODE_SYNC;
534 static int
535 feth_fake_tx_completion_mode_sysctl SYSCTL_HANDLER_ARGS
536 {
537 #pragma unused(oidp, arg1, arg2)
538 unsigned int new_value;
539 int changed;
540 int error;
541
542 error = sysctl_io_number(req, if_tx_completion_mode,
543 sizeof(if_tx_completion_mode), &new_value, &changed);
544 if (error == 0 && changed != 0) {
545 if (new_value > IFF_TX_COMPL_MODE_ASYNC ||
546 new_value < IFF_TX_COMPL_MODE_SYNC) {
547 return EINVAL;
548 }
549 if_tx_completion_mode = new_value;
550 }
551 return 0;
552 }
553 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_completion_mode,
554 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
555 feth_fake_tx_completion_mode_sysctl, "IU",
556 "Fake interface tx completion mode (0 synchronous, 1 asynchronous)");
557
558 /* sysctl net.link.fake.llink_cnt */
559
560 /* The maximum number of logical links (including default link) */
561 #define FETH_MAX_LLINKS 16
562 /*
563 * The default number of logical links (including default link).
564 * Zero means logical link mode is disabled.
565 */
566 #define FETH_DEF_LLINKS 0
567
568 static uint32_t if_fake_llink_cnt = FETH_DEF_LLINKS;
569 static int
570 feth_fake_llink_cnt_sysctl SYSCTL_HANDLER_ARGS
571 {
572 #pragma unused(oidp, arg1, arg2)
573 unsigned int new_value;
574 int changed;
575 int error;
576
577 error = sysctl_io_number(req, if_fake_llink_cnt,
578 sizeof(if_fake_llink_cnt), &new_value, &changed);
579 if (error == 0 && changed != 0) {
580 if (new_value > FETH_MAX_LLINKS) {
581 return EINVAL;
582 }
583 if_fake_llink_cnt = new_value;
584 }
585 return 0;
586 }
587
588 SYSCTL_PROC(_net_link_fake, OID_AUTO, llink_cnt,
589 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
590 feth_fake_llink_cnt_sysctl, "IU",
591 "Fake interface logical link count");
592
593 /* sysctl net.link.fake.qset_cnt */
594
595 /* The maximum number of qsets for each logical link */
596 #define FETH_MAX_QSETS 16
597 /* The default number of qsets for each logical link */
598 #define FETH_DEF_QSETS 4
599
600 static uint32_t if_fake_qset_cnt = FETH_DEF_QSETS;
601 static int
602 feth_fake_qset_cnt_sysctl SYSCTL_HANDLER_ARGS
603 {
604 #pragma unused(oidp, arg1, arg2)
605 unsigned int new_value;
606 int changed;
607 int error;
608
609 error = sysctl_io_number(req, if_fake_qset_cnt,
610 sizeof(if_fake_qset_cnt), &new_value, &changed);
611 if (error == 0 && changed != 0) {
612 if (new_value == 0 ||
613 new_value > FETH_MAX_QSETS) {
614 return EINVAL;
615 }
616 if_fake_qset_cnt = new_value;
617 }
618 return 0;
619 }
620
621 SYSCTL_PROC(_net_link_fake, OID_AUTO, qset_cnt,
622 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
623 feth_fake_qset_cnt_sysctl, "IU",
624 "Fake interface queue set count");
625
626
627 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)628 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
629 {
630 mbuf_setdata(m, mtod(m, char *) + len, mbuf_len(m) - len);
631 mbuf_pkthdr_adjustlen(m, -len);
632 }
633
634 static inline void *__indexable
get_bpf_header(mbuf_t m,struct ether_header * eh_p,struct ether_vlan_header * evl_p,size_t * header_len)635 get_bpf_header(mbuf_t m, struct ether_header * eh_p,
636 struct ether_vlan_header * evl_p, size_t * header_len)
637 {
638 void * header;
639
640 /* no VLAN tag, just use the ethernet header */
641 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
642 header = (struct ether_header *__bidi_indexable)eh_p;
643 *header_len = sizeof(*eh_p);
644 goto done;
645 }
646
647 /* has VLAN tag, populate the ether VLAN header */
648 bcopy(eh_p, evl_p,
649 offsetof(struct ether_header, ether_type)); /* dst+src ether */
650 evl_p->evl_encap_proto = htons(ETHERTYPE_VLAN); /* VLAN encap */
651 evl_p->evl_tag = htons(m->m_pkthdr.vlan_tag); /* tag */
652 evl_p->evl_proto = eh_p->ether_type; /* proto */
653 *header_len = sizeof(*evl_p);
654 header = (struct ether_vlan_header *__bidi_indexable)evl_p;
655
656 done:
657 return header;
658 }
659
660 typedef void (*_tap_func)(ifnet_t interface, u_int32_t dlt, mbuf_t packet,
661 void *__sized_by(header_len) header, size_t header_len);
662
663 static void
fake_bpf_tap_common(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p,_tap_func func)664 fake_bpf_tap_common(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p,
665 _tap_func func)
666 {
667 struct ether_vlan_header evl;
668 void * header;
669 size_t header_len;
670
671 header = get_bpf_header(m, eh_p, &evl, &header_len);
672 (*func)(ifp, DLT_EN10MB, m, header, header_len);
673 }
674
675 static inline void
fake_bpf_tap_in(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)676 fake_bpf_tap_in(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
677 {
678 fake_bpf_tap_common(ifp, m, eh_p, bpf_tap_in);
679 }
680
681
682 static inline void
fake_bpf_tap_out(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)683 fake_bpf_tap_out(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
684 {
685 fake_bpf_tap_common(ifp, m, eh_p, bpf_tap_out);
686 }
687
688 /**
689 ** virtual ethernet structures, types
690 **/
691
692 #define IFF_NUM_TX_RINGS_WMM_MODE 4
693 #define IFF_NUM_RX_RINGS_WMM_MODE 1
694 #define IFF_MAX_TX_RINGS IFF_NUM_TX_RINGS_WMM_MODE
695 #define IFF_MAX_RX_RINGS IFF_NUM_RX_RINGS_WMM_MODE
696 #define IFF_NUM_TX_QUEUES_WMM_MODE 4
697 #define IFF_NUM_RX_QUEUES_WMM_MODE 1
698 #define IFF_MAX_TX_QUEUES IFF_NUM_TX_QUEUES_WMM_MODE
699 #define IFF_MAX_RX_QUEUES IFF_NUM_RX_QUEUES_WMM_MODE
700
701 #define IFF_MAX_BATCH_SIZE 32
702
703 typedef uint16_t iff_flags_t;
704 #define IFF_FLAGS_HWCSUM 0x0001
705 #define IFF_FLAGS_BSD_MODE 0x0002
706 #define IFF_FLAGS_DETACHING 0x0004
707 #define IFF_FLAGS_WMM_MODE 0x0008
708 #define IFF_FLAGS_MULTIBUFLETS 0x0010
709 #define IFF_FLAGS_TSO_SUPPORT 0x0020
710 #define IFF_FLAGS_LRO 0x0040
711 #define IFF_FLAGS_VLAN_MTU 0x0080
712 #define IFF_FLAGS_VLAN_TAGGING 0x0100
713 #define IFF_FLAGS_SEPARATE_FRAME_HEADER 0x0200
714 #define IFF_FLAGS_NX_ATTACHED 0x0400
715 #define IFF_FLAGS_RX_FLOW_STEERING 0x0800
716
717 #if SKYWALK
718
719 typedef struct {
720 uuid_t fnx_provider;
721 uuid_t fnx_instance;
722 } fake_nx, *fake_nx_t;
723
724 typedef struct {
725 kern_netif_queue_t fq_queue;
726 } fake_queue;
727
728 typedef struct {
729 kern_netif_qset_t fqs_qset; /* provided by xnu */
730 fake_queue fqs_rx_queue[IFF_MAX_RX_QUEUES];
731 fake_queue fqs_tx_queue[IFF_MAX_TX_QUEUES];
732 uint32_t fqs_rx_queue_cnt;
733 uint32_t fqs_tx_queue_cnt;
734 uint32_t fqs_llink_idx;
735 uint32_t fqs_idx;
736 uint32_t fqs_dequeue_cnt;
737 uint64_t fqs_id;
738 } fake_qset;
739
740 typedef struct {
741 uint64_t fl_id;
742 uint32_t fl_idx;
743 uint32_t fl_qset_cnt;
744 fake_qset fl_qset[FETH_MAX_QSETS];
745 } fake_llink, * fake_llink_t;
746
747 static kern_pbufpool_t S_pp;
748
749 #define IFF_TT_OUTPUT 0x01 /* generate trace_tag on output */
750 #define IFF_TT_INPUT 0x02 /* generate trace_tag on input */
751 static int if_fake_trace_tag_flags = 0;
752 SYSCTL_INT(_net_link_fake, OID_AUTO, trace_tag, CTLFLAG_RW | CTLFLAG_LOCKED,
753 &if_fake_trace_tag_flags, 0, "Fake interface generate trace_tag");
754 static packet_trace_tag_t if_fake_trace_tag_current = 1;
755
756 #endif /* SKYWALK */
757
758 struct if_fake {
759 char iff_name[IFNAMSIZ]; /* our unique id */
760 ifnet_t iff_ifp;
761 iff_flags_t iff_flags;
762 uint32_t iff_retain_count;
763 ifnet_t iff_peer; /* the other end */
764 int iff_media_current;
765 int iff_media_active;
766 uint32_t iff_media_count;
767 int iff_media_list[IF_FAKE_MEDIA_LIST_MAX];
768 boolean_t iff_start_busy;
769 unsigned int iff_max_mtu;
770 uint32_t iff_fcs;
771 uint32_t iff_trailer_length;
772 #if SKYWALK
773 fake_nx iff_nx;
774 struct netif_stats *iff_nifs;
775 uint32_t iff_nifs_ref;
776 uint32_t iff_llink_cnt;
777 kern_channel_ring_t iff_rx_ring[IFF_MAX_RX_RINGS];
778 kern_channel_ring_t iff_tx_ring[IFF_MAX_TX_RINGS];
779 fake_llink_t iff_llink __counted_by_or_null(FETH_MAX_LLINKS);
780 thread_call_t iff_doorbell_tcall;
781 thread_call_t iff_if_adv_tcall;
782 boolean_t iff_doorbell_tcall_active;
783 boolean_t iff_waiting_for_tcall;
784 boolean_t iff_channel_connected;
785 iff_pktpool_mode_t iff_pp_mode;
786 kern_pbufpool_t iff_rx_pp;
787 kern_pbufpool_t iff_tx_pp;
788 uint32_t iff_tx_headroom;
789 unsigned int iff_adv_interval;
790 uint32_t iff_tx_drop_rate;
791 uint32_t iff_tx_pkts_count;
792 iff_tx_completion_mode_t iff_tx_completion_mode;
793 bool iff_intf_adv_enabled;
794 void *iff_intf_adv_kern_ctx;
795 kern_nexus_capab_interface_advisory_notify_fn_t iff_intf_adv_notify;
796 iff_tx_exp_policy_t iff_tx_exp_policy;
797 #endif /* SKYWALK */
798 };
799
800 typedef struct if_fake * __single if_fake_ref;
801
802 static if_fake_ref
803 ifnet_get_if_fake(ifnet_t ifp);
804
805 static inline boolean_t
feth_in_bsd_mode(if_fake_ref fakeif)806 feth_in_bsd_mode(if_fake_ref fakeif)
807 {
808 return (fakeif->iff_flags & IFF_FLAGS_BSD_MODE) != 0;
809 }
810
811 static inline void
feth_set_detaching(if_fake_ref fakeif)812 feth_set_detaching(if_fake_ref fakeif)
813 {
814 fakeif->iff_flags |= IFF_FLAGS_DETACHING;
815 }
816
817 static inline boolean_t
feth_is_detaching(if_fake_ref fakeif)818 feth_is_detaching(if_fake_ref fakeif)
819 {
820 return (fakeif->iff_flags & IFF_FLAGS_DETACHING) != 0;
821 }
822
823 static int
feth_enable_dequeue_stall(ifnet_t ifp,uint32_t enable)824 feth_enable_dequeue_stall(ifnet_t ifp, uint32_t enable)
825 {
826 int error;
827
828 if (enable != 0) {
829 error = ifnet_disable_output(ifp);
830 } else {
831 error = ifnet_enable_output(ifp);
832 }
833
834 return error;
835 }
836
837 #if SKYWALK
838 static inline boolean_t
feth_in_wmm_mode(if_fake_ref fakeif)839 feth_in_wmm_mode(if_fake_ref fakeif)
840 {
841 return (fakeif->iff_flags & IFF_FLAGS_WMM_MODE) != 0;
842 }
843
844 static inline boolean_t
feth_using_multibuflets(if_fake_ref fakeif)845 feth_using_multibuflets(if_fake_ref fakeif)
846 {
847 return (fakeif->iff_flags & IFF_FLAGS_MULTIBUFLETS) != 0;
848 }
849 static void feth_detach_netif_nexus(if_fake_ref fakeif);
850
851 static inline boolean_t
feth_has_intf_advisory_configured(if_fake_ref fakeif)852 feth_has_intf_advisory_configured(if_fake_ref fakeif)
853 {
854 return fakeif->iff_adv_interval > 0;
855 }
856 #endif /* SKYWALK */
857
858 static inline bool
feth_supports_tso(if_fake_ref fakeif)859 feth_supports_tso(if_fake_ref fakeif)
860 {
861 return (fakeif->iff_flags & IFF_FLAGS_TSO_SUPPORT) != 0;
862 }
863
864 static inline void
feth_set_supports_tso(if_fake_ref fakeif)865 feth_set_supports_tso(if_fake_ref fakeif)
866 {
867 fakeif->iff_flags |= IFF_FLAGS_TSO_SUPPORT;
868 }
869
870 static inline bool
feth_supports_vlan_mtu(if_fake_ref fakeif)871 feth_supports_vlan_mtu(if_fake_ref fakeif)
872 {
873 return (fakeif->iff_flags & IFF_FLAGS_VLAN_MTU) != 0;
874 }
875
876 static inline void
feth_set_supports_vlan_mtu(if_fake_ref fakeif)877 feth_set_supports_vlan_mtu(if_fake_ref fakeif)
878 {
879 fakeif->iff_flags |= IFF_FLAGS_VLAN_MTU;
880 }
881
882 static inline bool
feth_supports_vlan_tagging(if_fake_ref fakeif)883 feth_supports_vlan_tagging(if_fake_ref fakeif)
884 {
885 return (fakeif->iff_flags & IFF_FLAGS_VLAN_TAGGING) != 0;
886 }
887
888 static inline void
feth_set_supports_vlan_tagging(if_fake_ref fakeif)889 feth_set_supports_vlan_tagging(if_fake_ref fakeif)
890 {
891 fakeif->iff_flags |= IFF_FLAGS_VLAN_TAGGING;
892 }
893
894 static inline void
feth_set_supports_rx_flow_steering(if_fake_ref fakeif)895 feth_set_supports_rx_flow_steering(if_fake_ref fakeif)
896 {
897 fakeif->iff_flags |= IFF_FLAGS_RX_FLOW_STEERING;
898 }
899
900 static inline bool
feth_supports_rx_flow_steering(if_fake_ref fakeif)901 feth_supports_rx_flow_steering(if_fake_ref fakeif)
902 {
903 return (fakeif->iff_flags & IFF_FLAGS_RX_FLOW_STEERING) != 0;
904 }
905
906 #define FETH_MAXUNIT IF_MAXUNIT
907 #define FETH_ZONE_MAX_ELEM MIN(IFNETS_MAX, FETH_MAXUNIT)
908
909 static int feth_clone_create(struct if_clone *, u_int32_t, void *);
910 static int feth_clone_destroy(ifnet_t);
911 static int feth_output(ifnet_t ifp, struct mbuf *m);
912 static void feth_start(ifnet_t ifp);
913 static int feth_ioctl(ifnet_t ifp, u_long cmd, void * addr);
914 static int feth_config(ifnet_t ifp, ifnet_t peer);
915 static void feth_if_free(ifnet_t ifp);
916 static void feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp);
917 static void feth_free(if_fake_ref fakeif);
918
919 static struct if_clone
920 feth_cloner = IF_CLONE_INITIALIZER(FAKE_ETHER_NAME,
921 feth_clone_create,
922 feth_clone_destroy,
923 0,
924 FETH_MAXUNIT);
925 static void interface_link_event(ifnet_t ifp, u_int32_t event_code);
926
927 /* some media words to pretend to be ethernet */
928 #define FAKE_DEFAULT_MEDIA IFM_MAKEWORD(IFM_ETHER, IFM_10G_T, IFM_FDX, 0)
929 static int default_media_words[] = {
930 IFM_MAKEWORD(IFM_ETHER, 0, 0, 0),
931 FAKE_DEFAULT_MEDIA,
932 IFM_MAKEWORD(IFM_ETHER, IFM_2500_T, IFM_FDX, 0),
933 IFM_MAKEWORD(IFM_ETHER, IFM_5000_T, IFM_FDX, 0),
934
935 IFM_MAKEWORD(IFM_ETHER, IFM_10G_KX4, IFM_FDX, 0),
936 IFM_MAKEWORD(IFM_ETHER, IFM_20G_KR2, IFM_FDX, 0),
937 IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX, 0),
938 IFM_MAKEWORD(IFM_ETHER, IFM_25G_KR, IFM_FDX, 0),
939 IFM_MAKEWORD(IFM_ETHER, IFM_40G_SR4, IFM_FDX, 0),
940 IFM_MAKEWORD(IFM_ETHER, IFM_50G_CR2, IFM_FDX, 0),
941 IFM_MAKEWORD(IFM_ETHER, IFM_56G_R4, IFM_FDX, 0),
942 IFM_MAKEWORD(IFM_ETHER, IFM_100G_CR4, IFM_FDX, 0),
943 IFM_MAKEWORD(IFM_ETHER, IFM_400G_AUI8, IFM_FDX, 0),
944 };
945 #define default_media_words_count (sizeof(default_media_words) \
946 / sizeof (default_media_words[0]))
947
948 /**
949 ** veth locks
950 **/
951
952 static LCK_GRP_DECLARE(feth_lck_grp, "fake");
953 static LCK_MTX_DECLARE(feth_lck_mtx, &feth_lck_grp);
954
955 static inline void
feth_lock(void)956 feth_lock(void)
957 {
958 lck_mtx_lock(&feth_lck_mtx);
959 }
960
961 static inline void
feth_unlock(void)962 feth_unlock(void)
963 {
964 lck_mtx_unlock(&feth_lck_mtx);
965 }
966
967 static inline int
get_max_mtu(int bsd_mode,unsigned int max_mtu)968 get_max_mtu(int bsd_mode, unsigned int max_mtu)
969 {
970 unsigned int mtu;
971
972 if (bsd_mode != 0) {
973 mtu = M16KCLBYTES - ETHER_HDR_LEN;
974 if (mtu > max_mtu) {
975 mtu = max_mtu;
976 }
977 } else {
978 mtu = max_mtu;
979 }
980 return mtu;
981 }
982
983 static inline unsigned int
feth_max_mtu(ifnet_t ifp)984 feth_max_mtu(ifnet_t ifp)
985 {
986 if_fake_ref fakeif;
987 unsigned int max_mtu = ETHERMTU;
988
989 feth_lock();
990 fakeif = ifnet_get_if_fake(ifp);
991 if (fakeif != NULL) {
992 max_mtu = fakeif->iff_max_mtu;
993 }
994 feth_unlock();
995 return max_mtu;
996 }
997
998 static void
feth_free(if_fake_ref fakeif)999 feth_free(if_fake_ref fakeif)
1000 {
1001 VERIFY(fakeif->iff_retain_count == 0);
1002 #if SKYWALK
1003 if (!feth_in_bsd_mode(fakeif)) {
1004 if (fakeif->iff_pp_mode == IFF_PP_MODE_GLOBAL) {
1005 VERIFY(fakeif->iff_rx_pp == S_pp);
1006 VERIFY(fakeif->iff_tx_pp == S_pp);
1007 pp_release(fakeif->iff_rx_pp);
1008 fakeif->iff_rx_pp = NULL;
1009 pp_release(fakeif->iff_tx_pp);
1010 fakeif->iff_tx_pp = NULL;
1011 feth_lock();
1012 if (S_pp != NULL && S_pp->pp_refcnt == 1) {
1013 pp_release(S_pp);
1014 S_pp = NULL;
1015 }
1016 feth_unlock();
1017 } else {
1018 if (fakeif->iff_rx_pp != NULL) {
1019 pp_release(fakeif->iff_rx_pp);
1020 fakeif->iff_rx_pp = NULL;
1021 }
1022 if (fakeif->iff_tx_pp != NULL) {
1023 pp_release(fakeif->iff_tx_pp);
1024 fakeif->iff_tx_pp = NULL;
1025 }
1026 }
1027 }
1028 #endif /* SKYWALK */
1029
1030 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s", fakeif->iff_name);
1031 if (fakeif->iff_llink != NULL) {
1032 fake_llink_t llink;
1033 llink = fakeif->iff_llink;
1034 fakeif->iff_llink = NULL;
1035 kfree_type(fake_llink, FETH_MAX_LLINKS, llink);
1036 }
1037 kfree_type(struct if_fake, fakeif);
1038 }
1039
1040 static void
feth_release(if_fake_ref fakeif)1041 feth_release(if_fake_ref fakeif)
1042 {
1043 u_int32_t old_retain_count;
1044
1045 old_retain_count = OSDecrementAtomic(&fakeif->iff_retain_count);
1046 switch (old_retain_count) {
1047 case 0:
1048 VERIFY(old_retain_count != 0);
1049 break;
1050 case 1:
1051 feth_free(fakeif);
1052 break;
1053 default:
1054 break;
1055 }
1056 return;
1057 }
1058
1059 #if SKYWALK
1060
1061 static void
feth_retain(if_fake_ref fakeif)1062 feth_retain(if_fake_ref fakeif)
1063 {
1064 OSIncrementAtomic(&fakeif->iff_retain_count);
1065 }
1066
1067 static void
feth_packet_pool_init_prepare(if_fake_ref fakeif,struct kern_pbufpool_init * pp_init)1068 feth_packet_pool_init_prepare(if_fake_ref fakeif,
1069 struct kern_pbufpool_init *pp_init)
1070 {
1071 uint32_t max_mtu = fakeif->iff_max_mtu;
1072 uint32_t buflet_size = if_fake_buflet_size;
1073
1074 bzero(pp_init, sizeof(*pp_init));
1075 pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1076 pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
1077 pp_init->kbi_packets = 1024; /* TBD configurable */
1078 if (feth_supports_tso(fakeif)) {
1079 buflet_size = if_fake_tso_buffer_size;
1080 }
1081 if (feth_using_multibuflets(fakeif)) {
1082 pp_init->kbi_bufsize = buflet_size;
1083 pp_init->kbi_max_frags = howmany(max_mtu, buflet_size);
1084 pp_init->kbi_buflets = pp_init->kbi_packets *
1085 pp_init->kbi_max_frags;
1086 pp_init->kbi_flags |= KBIF_BUFFER_ON_DEMAND;
1087 } else {
1088 pp_init->kbi_bufsize = max(max_mtu, buflet_size);
1089 pp_init->kbi_max_frags = 1;
1090 pp_init->kbi_buflets = pp_init->kbi_packets;
1091 }
1092 pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
1093 if (if_fake_user_access != 0) {
1094 pp_init->kbi_flags |= KBIF_USER_ACCESS;
1095 }
1096 pp_init->kbi_ctx = NULL;
1097 pp_init->kbi_ctx_retain = NULL;
1098 pp_init->kbi_ctx_release = NULL;
1099 }
1100
1101 static errno_t
feth_packet_pool_make(if_fake_ref fakeif)1102 feth_packet_pool_make(if_fake_ref fakeif)
1103 {
1104 struct kern_pbufpool_init pp_init;
1105 errno_t err;
1106
1107 feth_packet_pool_init_prepare(fakeif, &pp_init);
1108
1109 switch (fakeif->iff_pp_mode) {
1110 case IFF_PP_MODE_GLOBAL:
1111 feth_lock();
1112 if (S_pp == NULL) {
1113 (void)snprintf((char *)pp_init.kbi_name,
1114 sizeof(pp_init.kbi_name), "%s", "feth shared pp");
1115 err = kern_pbufpool_create(&pp_init, &S_pp, NULL);
1116 }
1117 pp_retain(S_pp);
1118 feth_unlock();
1119 fakeif->iff_rx_pp = S_pp;
1120 pp_retain(S_pp);
1121 fakeif->iff_tx_pp = S_pp;
1122 break;
1123 case IFF_PP_MODE_PRIVATE:
1124 (void)snprintf((char *)pp_init.kbi_name,
1125 sizeof(pp_init.kbi_name), "%s pp", fakeif->iff_name);
1126 err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
1127 pp_retain(fakeif->iff_rx_pp);
1128 fakeif->iff_tx_pp = fakeif->iff_rx_pp;
1129 break;
1130 case IFF_PP_MODE_PRIVATE_SPLIT:
1131 (void)snprintf((char *)pp_init.kbi_name,
1132 sizeof(pp_init.kbi_name), "%s rx pp", fakeif->iff_name);
1133 pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
1134 KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
1135 pp_init.kbi_flags |= (KBIF_IODIR_IN | KBIF_BUFFER_ON_DEMAND);
1136 pp_init.kbi_packets = 1024;
1137 pp_init.kbi_bufsize = if_fake_link_layer_aggregation_factor * 1024;
1138 err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
1139 if (err != 0) {
1140 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1141 "rx pp create failed %d", err);
1142 return err;
1143 }
1144 pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
1145 KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
1146 pp_init.kbi_flags |= KBIF_IODIR_OUT;
1147 pp_init.kbi_packets = 1024; /* TBD configurable */
1148 pp_init.kbi_bufsize = fakeif->iff_max_mtu;
1149 (void)snprintf((char *)pp_init.kbi_name,
1150 sizeof(pp_init.kbi_name), "%s tx pp", fakeif->iff_name);
1151 err = kern_pbufpool_create(&pp_init, &fakeif->iff_tx_pp, NULL);
1152 if (err != 0) {
1153 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1154 "tx pp create failed %d", err);
1155 pp_release(fakeif->iff_rx_pp);
1156 return err;
1157 }
1158 break;
1159 default:
1160 VERIFY(0);
1161 __builtin_unreachable();
1162 }
1163
1164 return 0;
1165 }
1166
1167 static void
feth_packet_set_trace_tag(kern_packet_t ph,int flag)1168 feth_packet_set_trace_tag(kern_packet_t ph, int flag)
1169 {
1170 if (if_fake_trace_tag_flags & flag) {
1171 if (++if_fake_trace_tag_current == 0) {
1172 if_fake_trace_tag_current = 1;
1173 }
1174 kern_packet_set_trace_tag(ph, if_fake_trace_tag_current);
1175 }
1176 }
1177
1178 static errno_t
feth_clone_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1179 feth_clone_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1180 {
1181 errno_t err = 0;
1182 kern_pbufpool_t pp = dif->iff_rx_pp;
1183 kern_packet_t dph = 0, dph0 = 0;
1184 kern_buflet_t sbuf, dbuf0 = NULL, dbuf;
1185 caddr_t saddr, daddr;
1186 uint32_t soff, doff;
1187 uint32_t slen, dlen;
1188 uint32_t dlim0, dlim;
1189
1190 sbuf = kern_packet_get_next_buflet(sph, NULL);
1191 saddr = __unsafe_forge_bidi_indexable(caddr_t,
1192 kern_buflet_get_data_address(sbuf),
1193 kern_buflet_get_data_limit(sbuf));
1194 doff = soff = kern_buflet_get_data_offset(sbuf);
1195 dlen = slen = kern_buflet_get_data_length(sbuf);
1196
1197 /* packet clone is only supported for single-buflet */
1198 ASSERT(kern_packet_get_buflet_count(sph) == 1);
1199 ASSERT(soff == kern_packet_get_headroom(sph));
1200 ASSERT(slen == kern_packet_get_data_length(sph));
1201
1202 dph0 = *pdph;
1203 if (dph0 == 0) {
1204 dlim0 = 0;
1205 } else {
1206 dbuf0 = kern_packet_get_next_buflet(dph0, NULL);
1207 ASSERT(kern_buflet_get_object_limit(dbuf0) ==
1208 PP_BUF_OBJ_SIZE_DEF(pp));
1209 ASSERT(kern_buflet_get_data_limit(dbuf0) % 16 == 0);
1210 dlim0 = ((size_t)kern_buflet_get_object_address(dbuf0) +
1211 kern_buflet_get_object_limit(dbuf0)) -
1212 ((size_t)kern_buflet_get_data_address(dbuf0) +
1213 kern_buflet_get_data_limit(dbuf0));
1214 }
1215
1216 if (doff + dlen > dlim0) {
1217 err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1218 if (err != 0) {
1219 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1220 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1221 return err;
1222 }
1223 dbuf = kern_packet_get_next_buflet(dph, NULL);
1224 ASSERT(kern_buflet_get_data_address(dbuf) ==
1225 kern_buflet_get_object_address(dbuf));
1226 daddr = __unsafe_forge_bidi_indexable(caddr_t,
1227 kern_buflet_get_data_address(dbuf),
1228 kern_buflet_get_data_limit(dbuf));
1229 dlim = kern_buflet_get_object_limit(dbuf);
1230 ASSERT(dlim == PP_BUF_OBJ_SIZE_DEF(pp));
1231 } else {
1232 err = kern_packet_clone_nosleep(dph0, &dph, KPKT_COPY_LIGHT);
1233 if (err != 0) {
1234 FAKE_LOG(LOG_INFO, FE_DBGF_OUTPUT,
1235 "packet clone err %d", err);
1236 return err;
1237 }
1238 dbuf = kern_packet_get_next_buflet(dph, NULL);
1239 ASSERT(kern_buflet_get_object_address(dbuf) ==
1240 kern_buflet_get_object_address(dbuf0));
1241 daddr = __unsafe_forge_bidi_indexable(caddr_t,
1242 kern_buflet_get_data_address(dbuf0),
1243 kern_buflet_get_object_limit(dbuf0)) + kern_buflet_get_data_limit(dbuf0);
1244 dlim = dlim0;
1245 }
1246
1247 ASSERT(doff + dlen <= dlim);
1248
1249 ASSERT((uintptr_t)daddr % 16 == 0);
1250 bcopy(saddr + soff, daddr + doff, slen);
1251
1252 dlim = MIN(dlim, P2ROUNDUP(doff + dlen, 16));
1253 err = kern_buflet_set_data_address(dbuf, daddr);
1254 VERIFY(err == 0);
1255 err = kern_buflet_set_data_limit(dbuf, dlim);
1256 VERIFY(err == 0);
1257 err = kern_buflet_set_data_length(dbuf, dlen);
1258 VERIFY(err == 0);
1259 err = kern_buflet_set_data_offset(dbuf, doff);
1260 VERIFY(err == 0);
1261 err = kern_packet_set_headroom(dph, doff);
1262 VERIFY(err == 0);
1263 err = kern_packet_set_link_header_length(dph,
1264 kern_packet_get_link_header_length(sph));
1265 VERIFY(err == 0);
1266 err = kern_packet_set_service_class(dph,
1267 kern_packet_get_service_class(sph));
1268 VERIFY(err == 0);
1269 err = kern_packet_finalize(dph);
1270 VERIFY(err == 0);
1271 *pdph = dph;
1272
1273 return err;
1274 }
1275
1276 static inline void
feth_copy_buflet(kern_buflet_t sbuf,kern_buflet_t dbuf)1277 feth_copy_buflet(kern_buflet_t sbuf, kern_buflet_t dbuf)
1278 {
1279 errno_t err;
1280 uint32_t off, len;
1281 caddr_t saddr, daddr;
1282
1283 saddr = __unsafe_forge_bidi_indexable(caddr_t,
1284 kern_buflet_get_data_address(sbuf),
1285 kern_buflet_get_data_limit(sbuf));
1286 off = kern_buflet_get_data_offset(sbuf);
1287 len = kern_buflet_get_data_length(sbuf);
1288 daddr = __unsafe_forge_bidi_indexable(caddr_t,
1289 kern_buflet_get_data_address(dbuf),
1290 kern_buflet_get_data_limit(dbuf));
1291 bcopy(saddr + off, daddr + off, len);
1292 err = kern_buflet_set_data_offset(dbuf, off);
1293 VERIFY(err == 0);
1294 err = kern_buflet_set_data_length(dbuf, len);
1295 VERIFY(err == 0);
1296 }
1297
1298 static int
feth_add_packet_trailer(kern_packet_t ph,void * __sized_by (trailer_len)trailer,size_t trailer_len)1299 feth_add_packet_trailer(kern_packet_t ph, void * __sized_by(trailer_len) trailer, size_t trailer_len)
1300 {
1301 errno_t err = 0;
1302
1303 ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
1304
1305 kern_buflet_t buf = NULL, iter = NULL;
1306 while ((iter = kern_packet_get_next_buflet(ph, iter)) != NULL) {
1307 buf = iter;
1308 }
1309 ASSERT(buf != NULL);
1310
1311 uint32_t dlim = kern_buflet_get_data_limit(buf);
1312 uint32_t doff = kern_buflet_get_data_offset(buf);
1313 uint32_t dlen = kern_buflet_get_data_length(buf);
1314
1315 size_t trailer_room = dlim - doff - dlen;
1316
1317 if (trailer_room < trailer_len) {
1318 FAKE_LOG(LOG_INFO, FE_DBGF_OUTPUT, "not enough room");
1319 return ERANGE;
1320 }
1321
1322 void *data = __unsafe_forge_bidi_indexable(caddr_t,
1323 kern_buflet_get_data_address(buf),
1324 kern_buflet_get_data_limit(buf)) + doff + dlen;
1325 memcpy(data, trailer, trailer_len);
1326
1327 err = kern_buflet_set_data_length(buf, dlen + trailer_len);
1328 VERIFY(err == 0);
1329
1330 err = kern_packet_finalize(ph);
1331 VERIFY(err == 0);
1332
1333 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%zuB trailer added", trailer_len);
1334
1335 return 0;
1336 }
1337
1338 static int
feth_add_packet_fcs(kern_packet_t ph)1339 feth_add_packet_fcs(kern_packet_t ph)
1340 {
1341 uint32_t crc = 0;
1342 int err;
1343
1344 ASSERT(sizeof(crc) == ETHER_CRC_LEN);
1345
1346 kern_buflet_t buf = NULL;
1347 while ((buf = kern_packet_get_next_buflet(ph, buf)) != NULL) {
1348 uint32_t doff = kern_buflet_get_data_offset(buf);
1349 uint32_t dlen = kern_buflet_get_data_length(buf);
1350 void *data = __unsafe_forge_bidi_indexable(caddr_t,
1351 kern_buflet_get_data_address(buf),
1352 kern_buflet_get_data_limit(buf)) + doff;
1353 crc = crc32(crc, data, dlen);
1354 }
1355
1356 err = feth_add_packet_trailer(ph, &crc, ETHER_CRC_LEN);
1357 if (!err) {
1358 return err;
1359 }
1360
1361 err = kern_packet_set_link_ethfcs(ph);
1362 VERIFY(err == 0);
1363
1364 return 0;
1365 }
1366
1367 static errno_t
feth_copy_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1368 feth_copy_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1369 {
1370 errno_t err = 0;
1371 uint16_t i, bufcnt;
1372 mach_vm_address_t baddr;
1373 kern_buflet_t sbuf = NULL, dbuf = NULL;
1374 kern_pbufpool_t pp = dif->iff_rx_pp;
1375 kern_packet_t dph;
1376 boolean_t multi_buflet = feth_using_multibuflets(dif);
1377
1378 bufcnt = kern_packet_get_buflet_count(sph);
1379 ASSERT((bufcnt == 1) || multi_buflet);
1380 *pdph = 0;
1381
1382 err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1383 if (err != 0) {
1384 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1385 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1386 return err;
1387 }
1388
1389 /* pre-constructed single buflet packet copy */
1390 sbuf = kern_packet_get_next_buflet(sph, NULL);
1391 dbuf = kern_packet_get_next_buflet(dph, NULL);
1392 feth_copy_buflet(sbuf, dbuf);
1393
1394 if (!multi_buflet) {
1395 goto done;
1396 }
1397
1398 /* un-constructed multi-buflet packet copy */
1399 for (i = 1; i < bufcnt; i++) {
1400 kern_buflet_t __single dbuf_next = NULL;
1401
1402 sbuf = kern_packet_get_next_buflet(sph, sbuf);
1403 VERIFY(sbuf != NULL);
1404 err = kern_pbufpool_alloc_buflet_nosleep(pp, &dbuf_next);
1405 if (err != 0) {
1406 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1407 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_BUF);
1408 break;
1409 }
1410 ASSERT(dbuf_next != NULL);
1411 feth_copy_buflet(sbuf, dbuf_next);
1412 err = kern_packet_add_buflet(dph, dbuf, dbuf_next);
1413 VERIFY(err == 0);
1414 dbuf = dbuf_next;
1415 }
1416 if (__improbable(err != 0)) {
1417 dbuf = NULL;
1418 while (i-- != 0) {
1419 dbuf = kern_packet_get_next_buflet(dph, dbuf);
1420 VERIFY(dbuf != NULL);
1421 baddr = (mach_vm_address_t)
1422 kern_buflet_get_data_address(dbuf);
1423 VERIFY(baddr != 0);
1424 }
1425 kern_pbufpool_free(pp, dph);
1426 dph = 0;
1427 }
1428
1429 done:
1430 if (__probable(err == 0)) {
1431 err = kern_packet_set_headroom(dph,
1432 kern_packet_get_headroom(sph));
1433 VERIFY(err == 0);
1434 err = kern_packet_set_link_header_length(dph,
1435 kern_packet_get_link_header_length(sph));
1436 VERIFY(err == 0);
1437 err = kern_packet_set_service_class(dph,
1438 kern_packet_get_service_class(sph));
1439 VERIFY(err == 0);
1440 err = kern_packet_finalize(dph);
1441 VERIFY(err == 0);
1442 VERIFY(bufcnt == kern_packet_get_buflet_count(dph));
1443 *pdph = dph;
1444 }
1445 return err;
1446 }
1447
1448 static inline void
feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)1449 feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)
1450 {
1451 /*
1452 * Nothing to do if not a TSO offloaded packet.
1453 */
1454 uint16_t seg_sz = 0;
1455 seg_sz = kern_packet_get_protocol_segment_size(ph);
1456 if (seg_sz == 0) {
1457 return;
1458 }
1459 /*
1460 * For RX, make the packet appear as a fully validated LRO packet.
1461 */
1462 packet_csum_flags_t csum_flags = PACKET_CSUM_IP_CHECKED |
1463 PACKET_CSUM_IP_VALID | PACKET_CSUM_DATA_VALID |
1464 PACKET_CSUM_PSEUDO_HDR;
1465 (void) kern_packet_set_inet_checksum(ph, csum_flags, 0, 0xFFFF, FALSE);
1466 return;
1467 }
1468
1469 static void
feth_rx_submit(if_fake_ref sif,if_fake_ref dif,kern_packet_t * __counted_by (n_pkts)sphs,uint32_t n_pkts)1470 feth_rx_submit(if_fake_ref sif, if_fake_ref dif, kern_packet_t * __counted_by(n_pkts) sphs,
1471 uint32_t n_pkts)
1472 {
1473 errno_t err = 0;
1474 struct kern_channel_ring_stat_increment stats;
1475 kern_channel_ring_t rx_ring = NULL;
1476 kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
1477 kern_packet_t sph = 0, dph = 0;
1478
1479 memset(&stats, 0, sizeof(stats));
1480
1481 rx_ring = dif->iff_rx_ring[0];
1482 if (rx_ring == NULL) {
1483 return;
1484 }
1485
1486 kr_enter(rx_ring, TRUE);
1487 kern_channel_reclaim(rx_ring);
1488 rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1489
1490 for (uint32_t i = 0; i < n_pkts && rx_slot != NULL; i++) {
1491 sph = sphs[i];
1492
1493 switch (dif->iff_pp_mode) {
1494 case IFF_PP_MODE_GLOBAL:
1495 sphs[i] = 0;
1496 dph = sph;
1497 feth_update_pkt_tso_metadata_for_rx(dph);
1498 err = kern_packet_finalize(dph);
1499 VERIFY(err == 0);
1500 break;
1501 case IFF_PP_MODE_PRIVATE:
1502 err = feth_copy_packet(dif, sph, &dph);
1503 break;
1504 case IFF_PP_MODE_PRIVATE_SPLIT:
1505 err = feth_clone_packet(dif, sph, &dph);
1506 break;
1507 default:
1508 VERIFY(0);
1509 __builtin_unreachable();
1510 }
1511 if (__improbable(err != 0)) {
1512 continue;
1513 }
1514
1515 if (sif->iff_trailer_length != 0) {
1516 feth_add_packet_trailer(dph, feth_trailer,
1517 sif->iff_trailer_length);
1518 }
1519 if (sif->iff_fcs != 0) {
1520 feth_add_packet_fcs(dph);
1521 }
1522 feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1523 bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1524 stats.kcrsi_slots_transferred++;
1525 stats.kcrsi_bytes_transferred
1526 += kern_packet_get_data_length(dph);
1527
1528 /* attach the packet to the RX ring */
1529 err = kern_channel_slot_attach_packet(rx_ring, rx_slot, dph);
1530 VERIFY(err == 0);
1531 last_rx_slot = rx_slot;
1532 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1533 }
1534
1535 if (last_rx_slot != NULL) {
1536 kern_channel_advance_slot(rx_ring, last_rx_slot);
1537 kern_channel_increment_ring_net_stats(rx_ring, dif->iff_ifp,
1538 &stats);
1539 }
1540
1541 if (rx_ring != NULL) {
1542 kr_exit(rx_ring);
1543 kern_channel_notify(rx_ring, 0);
1544 }
1545 }
1546
1547 static void
feth_rx_queue_submit(if_fake_ref sif,if_fake_ref dif,uint32_t llink_idx,uint32_t qset_idx,kern_packet_t * __counted_by (n_pkts)sphs,uint32_t n_pkts)1548 feth_rx_queue_submit(if_fake_ref sif, if_fake_ref dif, uint32_t llink_idx,
1549 uint32_t qset_idx, kern_packet_t * __counted_by(n_pkts) sphs, uint32_t n_pkts)
1550 {
1551 errno_t err = 0;
1552 kern_netif_queue_t queue;
1553 kern_packet_t sph = 0, dph = 0;
1554 fake_llink *llink;
1555 fake_qset *qset;
1556
1557 if (llink_idx >= dif->iff_llink_cnt) {
1558 FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1559 "invalid llink_idx idx %d (max %d) on peer %s",
1560 llink_idx, dif->iff_llink_cnt, dif->iff_name);
1561 return;
1562 }
1563 llink = &dif->iff_llink[llink_idx];
1564 if (qset_idx >= llink->fl_qset_cnt) {
1565 FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1566 "invalid qset_idx %d (max %d) on peer %s",
1567 qset_idx, llink->fl_qset_cnt, dif->iff_name);
1568 return;
1569 }
1570 qset = &dif->iff_llink[llink_idx].fl_qset[qset_idx];
1571 queue = qset->fqs_rx_queue[0].fq_queue;
1572 if (queue == NULL) {
1573 FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1574 "NULL default queue (llink_idx %d, qset_idx %d) on peer %s",
1575 llink_idx, qset_idx, dif->iff_name);
1576 return;
1577 }
1578 for (uint32_t i = 0; i < n_pkts; i++) {
1579 uint32_t flags;
1580
1581 sph = sphs[i];
1582
1583 switch (dif->iff_pp_mode) {
1584 case IFF_PP_MODE_GLOBAL:
1585 sphs[i] = 0;
1586 dph = sph;
1587 feth_update_pkt_tso_metadata_for_rx(dph);
1588 break;
1589 case IFF_PP_MODE_PRIVATE:
1590 err = feth_copy_packet(dif, sph, &dph);
1591 break;
1592 case IFF_PP_MODE_PRIVATE_SPLIT:
1593 err = feth_clone_packet(dif, sph, &dph);
1594 break;
1595 default:
1596 VERIFY(0);
1597 __builtin_unreachable();
1598 }
1599 if (__improbable(err != 0)) {
1600 continue;
1601 }
1602
1603 if (sif->iff_trailer_length != 0) {
1604 feth_add_packet_trailer(dph, feth_trailer,
1605 sif->iff_trailer_length);
1606 }
1607 if (sif->iff_fcs != 0) {
1608 feth_add_packet_fcs(dph);
1609 }
1610 feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1611 bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1612
1613 flags = (i == n_pkts - 1) ?
1614 KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH : 0;
1615 kern_netif_queue_rx_enqueue(queue, dph, 1, flags);
1616 }
1617 }
1618
1619 static void
feth_tx_complete(if_fake_ref fakeif,kern_packet_t * __counted_by (nphs)phs,uint32_t nphs)1620 feth_tx_complete(if_fake_ref fakeif, kern_packet_t * __counted_by(nphs) phs, uint32_t nphs)
1621 {
1622 for (uint32_t i = 0; i < nphs; i++) {
1623 kern_packet_t ph = phs[i];
1624 if (ph == 0) {
1625 continue;
1626 }
1627 int err = kern_packet_set_tx_completion_status(ph, 0);
1628 VERIFY(err == 0);
1629 kern_packet_tx_completion(ph, fakeif->iff_ifp);
1630 kern_pbufpool_free(fakeif->iff_tx_pp, phs[i]);
1631 phs[i] = 0;
1632 }
1633 }
1634
1635 #define NSEC_PER_USEC 1000ull
1636 /*
1637 * Calculate the time delta that passed from `since' to `until'.
1638 * If `until' happens before `since', returns negative value.
1639 */
1640 static bool
feth_packet_has_expired(if_fake_ref __unused fakeif,kern_packet_t ph,uint64_t * out_deadline)1641 feth_packet_has_expired(if_fake_ref __unused fakeif, kern_packet_t ph,
1642 uint64_t *out_deadline)
1643 {
1644 uint64_t now;
1645 uint64_t packet_expire_time_mach;
1646 int64_t time_until_expiration;
1647 errno_t err;
1648 bool expired = false;
1649
1650 static mach_timebase_info_data_t clock_timebase = {0, 0};
1651
1652 if (clock_timebase.denom == 0) {
1653 clock_timebase_info(&clock_timebase);
1654 VERIFY(clock_timebase.denom != 0);
1655 }
1656
1657 err = kern_packet_get_expire_time(ph, &packet_expire_time_mach);
1658 if (err) {
1659 goto out;
1660 }
1661
1662 now = mach_absolute_time();
1663 time_until_expiration = packet_expire_time_mach - now;
1664 if (time_until_expiration < 0) {
1665 /* The packet had expired */
1666 expired = true;
1667 goto out;
1668 }
1669
1670 /* Convert the time_delta from mach ticks to nanoseconds */
1671 time_until_expiration *= clock_timebase.numer;
1672 time_until_expiration /= clock_timebase.denom;
1673 /* convert from nanoseconds to microseconds */
1674 time_until_expiration /= 1000ull;
1675
1676 if (if_fake_expiration_threshold_us < time_until_expiration) {
1677 /* packet has some life ahead of it */
1678 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1679 "Packet has %llu usec until expiration",
1680 time_until_expiration);
1681 goto out;
1682 }
1683
1684 out:
1685 if (expired && out_deadline) {
1686 *out_deadline = packet_expire_time_mach;
1687 }
1688
1689 return expired;
1690 }
1691
1692 static errno_t
feth_get_packet_notification_details(if_fake_ref fakeif,kern_packet_t ph,packet_id_t * pkt_id,uint32_t * nx_port_id)1693 feth_get_packet_notification_details(if_fake_ref fakeif, kern_packet_t ph,
1694 packet_id_t *pkt_id, uint32_t *nx_port_id)
1695 {
1696 errno_t err = 0;
1697
1698 err = kern_packet_get_packetid(ph, pkt_id);
1699 if (err != 0) {
1700 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1701 "%s err=%d getting packetid", fakeif->iff_name, err);
1702 return err;
1703 }
1704
1705 err = kern_packet_get_tx_nexus_port_id(ph, nx_port_id);
1706 if (err != 0) {
1707 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1708 "%s err=%d getting nx_port_id", fakeif->iff_name, err);
1709 return err;
1710 }
1711
1712 return 0;
1713 }
1714
1715 static packet_expiry_action_t
feth_get_effective_expn_action(if_fake_ref fakeif,kern_packet_t ph)1716 feth_get_effective_expn_action(if_fake_ref fakeif, kern_packet_t ph)
1717 {
1718 errno_t err;
1719 packet_expiry_action_t expiry_action;
1720
1721 switch (fakeif->iff_tx_exp_policy) {
1722 case IFF_TX_EXP_POLICY_DISABLED:
1723 expiry_action = PACKET_EXPIRY_ACTION_NONE;
1724 break;
1725 case IFF_TX_EXP_POLICY_NOTIFY_ONLY:
1726 expiry_action = PACKET_EXPIRY_ACTION_NOTIFY;
1727 break;
1728 case IFF_TX_EXP_POLICY_DROP_AND_NOTIFY:
1729 expiry_action = PACKET_EXPIRY_ACTION_DROP;
1730 break;
1731 case IFF_TX_EXP_POLICY_METADATA:
1732 err = kern_packet_get_expiry_action(ph, &expiry_action);
1733 if (err != 0) {
1734 if (err != ENOENT) {
1735 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1736 "Error %d when getting expiry action",
1737 err);
1738 }
1739 expiry_action = PACKET_EXPIRY_ACTION_NONE;
1740 }
1741 break;
1742 default:
1743 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1744 "Unrecognized value %d for \"net.link.fake.tx_exp_policy\"",
1745 fakeif->iff_tx_exp_policy);
1746 expiry_action = PACKET_EXPIRY_ACTION_NONE;
1747 }
1748
1749 return expiry_action;
1750 }
1751
1752 /* returns true if the packet is selected for epxiration and should be dropped */
1753 static bool
feth_tx_expired_error(if_fake_ref fakeif,kern_packet_t ph)1754 feth_tx_expired_error(if_fake_ref fakeif, kern_packet_t ph)
1755 {
1756 int err = 0;
1757 uint32_t nx_port_id = 0;
1758 os_channel_event_packet_transmit_expired_t expn = {0};
1759 packet_expiry_action_t expiry_action = PACKET_EXPIRY_ACTION_NONE;
1760
1761 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC, "%s", fakeif->iff_name);
1762
1763 if (feth_packet_has_expired(fakeif, ph, &expn.packet_tx_expiration_deadline)) {
1764 expiry_action = feth_get_effective_expn_action(fakeif, ph);
1765 }
1766
1767 bool drop_packet = (expiry_action == PACKET_EXPIRY_ACTION_DROP);
1768 if (expiry_action != PACKET_EXPIRY_ACTION_NONE) {
1769 /* set the expiration status code */
1770 expn.packet_tx_expiration_status = drop_packet ?
1771 CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_DROPPED :
1772 CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_NOT_DROPPED;
1773
1774 /* Mark the expiration timestamp */
1775 expn.packet_tx_expiration_timestamp = mach_absolute_time();
1776
1777 err = feth_get_packet_notification_details(fakeif, ph,
1778 &expn.packet_id, &nx_port_id);
1779
1780 if (err == 0) {
1781 err = kern_channel_event_transmit_expired(
1782 fakeif->iff_ifp, &expn, nx_port_id);
1783 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1784 "%s sent expiry notification on nexus port "
1785 "%u notif code %u",
1786 fakeif->iff_name, nx_port_id,
1787 expn.packet_tx_expiration_status);
1788 }
1789 if (err != 0) {
1790 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1791 "%s err=%d, nx_port_id: 0x%x",
1792 fakeif->iff_name, err, nx_port_id);
1793 }
1794 }
1795
1796 return drop_packet;
1797 }
1798
1799 /* returns true if the packet is selected for TX error & dropped */
1800 static bool
feth_tx_complete_error(if_fake_ref fakeif,kern_packet_t ph)1801 feth_tx_complete_error(if_fake_ref fakeif, kern_packet_t ph)
1802 {
1803 int err;
1804
1805 if (fakeif->iff_tx_drop_rate == 0 ||
1806 fakeif->iff_tx_pkts_count != fakeif->iff_tx_drop_rate) {
1807 return false;
1808 }
1809 /* simulate TX completion error on the packet */
1810 if (fakeif->iff_tx_completion_mode == IFF_TX_COMPL_MODE_SYNC) {
1811 err = kern_packet_set_tx_completion_status(ph,
1812 CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED);
1813 VERIFY(err == 0);
1814 kern_packet_tx_completion(ph, fakeif->iff_ifp);
1815 } else {
1816 uint32_t nx_port_id = 0;
1817 os_channel_event_packet_transmit_status_t pkt_tx_status = {0};
1818
1819 pkt_tx_status.packet_status =
1820 CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED;
1821 err = feth_get_packet_notification_details(fakeif, ph,
1822 &pkt_tx_status.packet_id, &nx_port_id);
1823 if (err == 0) {
1824 err = kern_channel_event_transmit_status(
1825 fakeif->iff_ifp, &pkt_tx_status, nx_port_id);
1826 }
1827 if (err != 0) {
1828 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1829 "%s err=%d, nx_port_id: 0x%x",
1830 fakeif->iff_name, err, nx_port_id);
1831 }
1832 }
1833
1834 return true;
1835 }
1836
1837 static void
feth_if_adv(thread_call_param_t arg0,thread_call_param_t arg1)1838 feth_if_adv(thread_call_param_t arg0, thread_call_param_t arg1)
1839 {
1840 #pragma unused(arg1)
1841 errno_t error;
1842 if_fake_ref fakeif = (if_fake_ref)arg0;
1843 struct ifnet_interface_advisory if_adv;
1844 struct ifnet_stats_param if_stat;
1845
1846 feth_lock();
1847 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1848 feth_unlock();
1849 return;
1850 }
1851 feth_unlock();
1852
1853 if (!fakeif->iff_intf_adv_enabled) {
1854 goto done;
1855 }
1856
1857 error = ifnet_stat(fakeif->iff_ifp, &if_stat);
1858 if (error != 0) {
1859 FAKE_LOG(LOG_NOTICE, 0, "%s: ifnet_stat() failed %d",
1860 fakeif->iff_name, error);
1861 goto done;
1862 }
1863 if_adv.header.version = IF_INTERFACE_ADVISORY_VERSION_CURRENT;
1864 if_adv.header.direction = IF_INTERFACE_ADVISORY_DIRECTION_TX;
1865 if_adv.header.interface_type =
1866 IF_INTERFACE_ADVISORY_INTERFACE_TYPE_WIFI;
1867 if_adv.capacity.timestamp = mach_absolute_time();
1868 if_adv.capacity.rate_trend_suggestion =
1869 IF_INTERFACE_ADVISORY_RATE_SUGGESTION_RAMP_NEUTRAL;
1870 if_adv.capacity.max_bandwidth = 1000 * 1000 * 1000; /* 1Gbps */
1871 if_adv.capacity.total_byte_count = if_stat.packets_out;
1872 if_adv.capacity.average_throughput = 1000 * 1000 * 1000; /* 1Gbps */
1873 if_adv.capacity.flushable_queue_size = UINT32_MAX;
1874 if_adv.capacity.non_flushable_queue_size = UINT32_MAX;
1875 if_adv.capacity.average_delay = 1; /* ms */
1876
1877 error = fakeif->iff_intf_adv_notify(fakeif->iff_intf_adv_kern_ctx,
1878 &if_adv);
1879 if (error != 0) {
1880 FAKE_LOG(LOG_NOTICE, 0,
1881 "%s: interface advisory report failed %d",
1882 fakeif->iff_name, error);
1883 }
1884
1885 done:
1886 feth_lock();
1887 if (!feth_is_detaching(fakeif) && fakeif->iff_channel_connected) {
1888 uint64_t deadline;
1889 clock_interval_to_deadline(fakeif->iff_adv_interval,
1890 NSEC_PER_MSEC, &deadline);
1891 thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1892 }
1893 feth_unlock();
1894 }
1895
1896 static int
feth_if_adv_tcall_create(if_fake_ref fakeif)1897 feth_if_adv_tcall_create(if_fake_ref fakeif)
1898 {
1899 uint64_t deadline;
1900
1901 feth_lock();
1902 ASSERT(fakeif->iff_if_adv_tcall == NULL);
1903 ASSERT(fakeif->iff_adv_interval > 0);
1904 ASSERT(fakeif->iff_channel_connected);
1905 fakeif->iff_if_adv_tcall =
1906 thread_call_allocate_with_options(feth_if_adv,
1907 (thread_call_param_t)fakeif, THREAD_CALL_PRIORITY_KERNEL,
1908 THREAD_CALL_OPTIONS_ONCE);
1909 if (fakeif->iff_if_adv_tcall == NULL) {
1910 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1911 "%s if_adv tcall alloc failed",
1912 fakeif->iff_name);
1913 feth_unlock();
1914 return ENXIO;
1915 }
1916 /* retain for the interface advisory thread call */
1917 feth_retain(fakeif);
1918 clock_interval_to_deadline(fakeif->iff_adv_interval,
1919 NSEC_PER_MSEC, &deadline);
1920 thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1921 feth_unlock();
1922 return 0;
1923 }
1924
1925 /**
1926 ** nexus netif domain provider
1927 **/
1928 static errno_t
feth_nxdp_init(kern_nexus_domain_provider_t domprov)1929 feth_nxdp_init(kern_nexus_domain_provider_t domprov)
1930 {
1931 #pragma unused(domprov)
1932 return 0;
1933 }
1934
1935 static void
feth_nxdp_fini(kern_nexus_domain_provider_t domprov)1936 feth_nxdp_fini(kern_nexus_domain_provider_t domprov)
1937 {
1938 #pragma unused(domprov)
1939 }
1940
1941 static uuid_t feth_nx_dom_prov;
1942
1943 static errno_t
feth_register_nexus_domain_provider(void)1944 feth_register_nexus_domain_provider(void)
1945 {
1946 const struct kern_nexus_domain_provider_init dp_init = {
1947 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1948 .nxdpi_flags = 0,
1949 .nxdpi_init = feth_nxdp_init,
1950 .nxdpi_fini = feth_nxdp_fini
1951 };
1952 errno_t err = 0;
1953
1954 nexus_domain_provider_name_t feth_provider_name = "com.apple.feth";
1955
1956 /* feth_nxdp_init() is called before this function returns */
1957 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
1958 feth_provider_name,
1959 &dp_init, sizeof(dp_init),
1960 &feth_nx_dom_prov);
1961 if (err != 0) {
1962 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1963 "failed to register domain provider");
1964 return err;
1965 }
1966 return 0;
1967 }
1968
1969 /**
1970 ** netif nexus routines
1971 **/
1972 static if_fake_ref
feth_nexus_context(kern_nexus_t nexus)1973 feth_nexus_context(kern_nexus_t nexus)
1974 {
1975 if_fake_ref fakeif;
1976
1977 fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
1978 assert(fakeif != NULL);
1979 return fakeif;
1980 }
1981
1982 static uint8_t
feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1983 feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1984 {
1985 switch (svc_class) {
1986 case KPKT_SC_VO:
1987 return 0;
1988 case KPKT_SC_VI:
1989 return 1;
1990 case KPKT_SC_BE:
1991 return 2;
1992 case KPKT_SC_BK:
1993 return 3;
1994 default:
1995 VERIFY(0);
1996 return 0;
1997 }
1998 }
1999
2000 static errno_t
feth_nx_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)2001 feth_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2002 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
2003 void **ring_ctx)
2004 {
2005 if_fake_ref fakeif;
2006 int err;
2007 #pragma unused(nxprov, channel, ring_ctx)
2008 feth_lock();
2009 fakeif = feth_nexus_context(nexus);
2010 if (feth_is_detaching(fakeif)) {
2011 feth_unlock();
2012 return 0;
2013 }
2014 if (is_tx_ring) {
2015 if (feth_in_wmm_mode(fakeif)) {
2016 kern_packet_svc_class_t svc_class;
2017 uint8_t ring_idx;
2018
2019 err = kern_channel_get_service_class(ring, &svc_class);
2020 VERIFY(err == 0);
2021 ring_idx = feth_find_tx_ring_by_svc(svc_class);
2022 VERIFY(ring_idx < IFF_NUM_TX_RINGS_WMM_MODE);
2023 VERIFY(fakeif->iff_tx_ring[ring_idx] == NULL);
2024 fakeif->iff_tx_ring[ring_idx] = ring;
2025 } else {
2026 VERIFY(fakeif->iff_tx_ring[0] == NULL);
2027 fakeif->iff_tx_ring[0] = ring;
2028 }
2029 } else {
2030 VERIFY(fakeif->iff_rx_ring[0] == NULL);
2031 fakeif->iff_rx_ring[0] = ring;
2032 }
2033 fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2034 feth_unlock();
2035 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: %s ring init",
2036 fakeif->iff_name, is_tx_ring ? "TX" : "RX");
2037 return 0;
2038 }
2039
2040 static void
feth_nx_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)2041 feth_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2042 kern_channel_ring_t ring)
2043 {
2044 #pragma unused(nxprov, ring)
2045 if_fake_ref fakeif;
2046 thread_call_t __single tcall = NULL;
2047
2048 feth_lock();
2049 fakeif = feth_nexus_context(nexus);
2050 if (fakeif->iff_rx_ring[0] == ring) {
2051 fakeif->iff_rx_ring[0] = NULL;
2052 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2053 "%s: RX ring fini", fakeif->iff_name);
2054 } else if (feth_in_wmm_mode(fakeif)) {
2055 int i;
2056 for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
2057 if (fakeif->iff_tx_ring[i] == ring) {
2058 fakeif->iff_tx_ring[i] = NULL;
2059 break;
2060 }
2061 }
2062 for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
2063 if (fakeif->iff_tx_ring[i] != NULL) {
2064 break;
2065 }
2066 }
2067 if (i == IFF_MAX_TX_RINGS) {
2068 tcall = fakeif->iff_doorbell_tcall;
2069 fakeif->iff_doorbell_tcall = NULL;
2070 }
2071 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2072 "%s: TX ring fini", fakeif->iff_name);
2073 } else if (fakeif->iff_tx_ring[0] == ring) {
2074 tcall = fakeif->iff_doorbell_tcall;
2075 fakeif->iff_doorbell_tcall = NULL;
2076 fakeif->iff_tx_ring[0] = NULL;
2077 }
2078 fakeif->iff_nifs = NULL;
2079 feth_unlock();
2080 if (tcall != NULL) {
2081 boolean_t success;
2082
2083 success = thread_call_cancel_wait(tcall);
2084 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2085 "%s: thread_call_cancel %s", fakeif->iff_name,
2086 success ? "SUCCESS" : "FAILURE");
2087 if (!success) {
2088 feth_lock();
2089 if (fakeif->iff_doorbell_tcall_active) {
2090 fakeif->iff_waiting_for_tcall = TRUE;
2091 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2092 "%s: *waiting for threadcall",
2093 fakeif->iff_name);
2094 do {
2095 msleep(fakeif, &feth_lck_mtx,
2096 PZERO, "feth threadcall", 0);
2097 } while (fakeif->iff_doorbell_tcall_active);
2098 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2099 "%s: ^threadcall done",
2100 fakeif->iff_name);
2101 fakeif->iff_waiting_for_tcall = FALSE;
2102 }
2103 feth_unlock();
2104 }
2105 success = thread_call_free(tcall);
2106 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2107 "%s: thread_call_free %s",
2108 fakeif->iff_name,
2109 success ? "SUCCESS" : "FAILURE");
2110 feth_release(fakeif);
2111 VERIFY(success == TRUE);
2112 }
2113 }
2114
2115 static errno_t
feth_nx_pre_connect(kern_nexus_provider_t nxprov,proc_t proc,kern_nexus_t nexus,nexus_port_t port,kern_channel_t channel,void ** channel_context)2116 feth_nx_pre_connect(kern_nexus_provider_t nxprov,
2117 proc_t proc, kern_nexus_t nexus, nexus_port_t port, kern_channel_t channel,
2118 void **channel_context)
2119 {
2120 #pragma unused(nxprov, proc, nexus, port, channel, channel_context)
2121 return 0;
2122 }
2123
2124 static errno_t
feth_nx_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2125 feth_nx_connected(kern_nexus_provider_t nxprov,
2126 kern_nexus_t nexus, kern_channel_t channel)
2127 {
2128 #pragma unused(nxprov, channel)
2129 int err;
2130 if_fake_ref fakeif;
2131
2132 fakeif = feth_nexus_context(nexus);
2133 feth_lock();
2134 if (feth_is_detaching(fakeif)) {
2135 feth_unlock();
2136 return EBUSY;
2137 }
2138 feth_retain(fakeif);
2139 fakeif->iff_channel_connected = TRUE;
2140 feth_unlock();
2141 if (feth_has_intf_advisory_configured(fakeif)) {
2142 err = feth_if_adv_tcall_create(fakeif);
2143 if (err != 0) {
2144 return err;
2145 }
2146 }
2147 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: connected channel %p",
2148 fakeif->iff_name, channel);
2149 return 0;
2150 }
2151
2152 static void
feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2153 feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,
2154 kern_nexus_t nexus, kern_channel_t channel)
2155 {
2156 #pragma unused(nxprov, channel)
2157 if_fake_ref fakeif;
2158 thread_call_t __single tcall;
2159 boolean_t connected;
2160
2161 fakeif = feth_nexus_context(nexus);
2162 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2163 "%s: pre-disconnect channel %p",
2164 fakeif->iff_name, channel);
2165 /* Quiesce the interface and flush any pending outbound packets. */
2166 if_down(fakeif->iff_ifp);
2167 feth_lock();
2168 connected = fakeif->iff_channel_connected;
2169 fakeif->iff_channel_connected = FALSE;
2170 tcall = fakeif->iff_if_adv_tcall;
2171 fakeif->iff_if_adv_tcall = NULL;
2172 feth_unlock();
2173 if (tcall != NULL) {
2174 (void) thread_call_cancel_wait(tcall);
2175 if (!thread_call_free(tcall)) {
2176 boolean_t freed;
2177 (void) thread_call_cancel_wait(tcall);
2178 freed = thread_call_free(tcall);
2179 VERIFY(freed);
2180 }
2181 /* release for the interface advisory thread call */
2182 feth_release(fakeif);
2183 }
2184 if (connected) {
2185 feth_release(fakeif);
2186 }
2187 }
2188
2189 static void
feth_nx_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2190 feth_nx_disconnected(kern_nexus_provider_t nxprov,
2191 kern_nexus_t nexus, kern_channel_t channel)
2192 {
2193 #pragma unused(nxprov, channel)
2194 if_fake_ref fakeif;
2195
2196 fakeif = feth_nexus_context(nexus);
2197 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: disconnected channel %p",
2198 fakeif->iff_name, channel);
2199 }
2200
2201 static errno_t
feth_nx_slot_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index,struct kern_slot_prop ** slot_prop_addr,void ** slot_context)2202 feth_nx_slot_init(kern_nexus_provider_t nxprov,
2203 kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2204 uint32_t slot_index, struct kern_slot_prop **slot_prop_addr,
2205 void **slot_context)
2206 {
2207 #pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
2208 return 0;
2209 }
2210
2211 static void
feth_nx_slot_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index)2212 feth_nx_slot_fini(kern_nexus_provider_t nxprov,
2213 kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2214 uint32_t slot_index)
2215 {
2216 #pragma unused(nxprov, nexus, ring, slot, slot_index)
2217 }
2218
2219 static errno_t
feth_nx_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)2220 feth_nx_sync_tx(kern_nexus_provider_t nxprov,
2221 kern_nexus_t nexus, kern_channel_ring_t tx_ring, uint32_t flags)
2222 {
2223 #pragma unused(nxprov)
2224 if_fake_ref fakeif;
2225 ifnet_t ifp;
2226 kern_channel_slot_t last_tx_slot = NULL;
2227 ifnet_t peer_ifp;
2228 if_fake_ref peer_fakeif = NULL;
2229 struct kern_channel_ring_stat_increment stats;
2230 kern_channel_slot_t tx_slot;
2231 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2232 kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2233 uint32_t n_pkts = 0;
2234
2235 memset(&stats, 0, sizeof(stats));
2236
2237 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2238 fakeif = feth_nexus_context(nexus);
2239 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2240 "%s ring %d flags 0x%x", fakeif->iff_name,
2241 tx_ring->ckr_ring_id, flags);
2242 (void)flags;
2243 feth_lock();
2244 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2245 feth_unlock();
2246 return 0;
2247 }
2248 ifp = fakeif->iff_ifp;
2249 peer_ifp = fakeif->iff_peer;
2250 if (peer_ifp != NULL) {
2251 peer_fakeif = ifnet_get_if_fake(peer_ifp);
2252 if (peer_fakeif != NULL) {
2253 if (feth_is_detaching(peer_fakeif)) {
2254 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2255 "%s peer fakeif %s is detaching",
2256 fakeif->iff_name, peer_fakeif->iff_name);
2257 goto done;
2258 }
2259 if (!peer_fakeif->iff_channel_connected) {
2260 if (fakeif->iff_tx_exp_policy ==
2261 IFF_TX_EXP_POLICY_DISABLED) {
2262 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2263 "%s peer fakeif %s channel not connected, expn: %d",
2264 fakeif->iff_name, peer_fakeif->iff_name,
2265 fakeif->iff_tx_exp_policy);
2266 goto done;
2267 }
2268 }
2269 } else {
2270 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2271 "%s no peer fakeif (peer %p)",
2272 fakeif->iff_name, peer_ifp);
2273 goto done;
2274 }
2275 } else {
2276 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2277 "%s no peer", fakeif->iff_name);
2278 goto done;
2279 }
2280 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
2281 while (tx_slot != NULL) {
2282 uint16_t off;
2283 kern_packet_t sph;
2284
2285 /* detach the packet from the TX ring */
2286 sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
2287 VERIFY(sph != 0);
2288 kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
2289
2290 /* bpf tap output */
2291 off = kern_packet_get_headroom(sph);
2292 VERIFY(off >= fakeif->iff_tx_headroom);
2293 kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2294 feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2295 bpf_tap_packet_out(ifp, DLT_EN10MB, sph, NULL, 0);
2296
2297 /* drop packets, if requested */
2298 fakeif->iff_tx_pkts_count++;
2299 if (feth_tx_expired_error(fakeif, sph) ||
2300 feth_tx_complete_error(fakeif, sph) ||
2301 !peer_fakeif->iff_channel_connected) {
2302 fakeif->iff_tx_pkts_count = 0;
2303 kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2304 STATS_INC(nifs, NETIF_STATS_DROP);
2305 goto next_tx_slot;
2306 }
2307
2308 ASSERT(sph != 0);
2309 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2310 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2311
2312 stats.kcrsi_slots_transferred++;
2313 stats.kcrsi_bytes_transferred
2314 += kern_packet_get_data_length(sph);
2315
2316 /* prepare batch for receiver */
2317 pkts[n_pkts++] = sph;
2318 if (n_pkts == IFF_MAX_BATCH_SIZE) {
2319 feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2320 feth_tx_complete(fakeif, pkts, n_pkts);
2321 n_pkts = 0;
2322 }
2323
2324 next_tx_slot:
2325 last_tx_slot = tx_slot;
2326 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
2327 }
2328
2329 /* catch last batch for receiver */
2330 if (n_pkts != 0) {
2331 feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2332 feth_tx_complete(fakeif, pkts, n_pkts);
2333 n_pkts = 0;
2334 }
2335
2336 if (last_tx_slot != NULL) {
2337 kern_channel_advance_slot(tx_ring, last_tx_slot);
2338 kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
2339 }
2340 done:
2341 feth_unlock();
2342 return 0;
2343 }
2344
2345 static errno_t
feth_nx_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2346 feth_nx_sync_rx(kern_nexus_provider_t nxprov,
2347 kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2348 {
2349 #pragma unused(nxprov, ring, flags)
2350 if_fake_ref fakeif;
2351 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2352
2353 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
2354 fakeif = feth_nexus_context(nexus);
2355 FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT, "%s", fakeif->iff_name);
2356 return 0;
2357 }
2358
2359 static errno_t
feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif,boolean_t doorbell_ctxt)2360 feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif, boolean_t doorbell_ctxt)
2361 {
2362 int i;
2363 errno_t error = 0;
2364 boolean_t more;
2365
2366 for (i = 0; i < IFF_NUM_TX_RINGS_WMM_MODE; i++) {
2367 kern_channel_ring_t ring = fakeif->iff_tx_ring[i];
2368 if (ring != NULL) {
2369 error = kern_channel_tx_refill(ring, UINT32_MAX,
2370 UINT32_MAX, doorbell_ctxt, &more);
2371 }
2372 if (error != 0) {
2373 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2374 "%s: TX refill ring %d (%s) %d",
2375 fakeif->iff_name, ring->ckr_ring_id,
2376 doorbell_ctxt ? "sync" : "async", error);
2377 if (!((error == EAGAIN) || (error == EBUSY))) {
2378 break;
2379 }
2380 } else {
2381 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2382 "%s: TX refilled ring %d (%s)",
2383 fakeif->iff_name, ring->ckr_ring_id,
2384 doorbell_ctxt ? "sync" : "async");
2385 }
2386 }
2387 return error;
2388 }
2389
2390 static void
feth_async_doorbell(thread_call_param_t arg0,thread_call_param_t arg1)2391 feth_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
2392 {
2393 #pragma unused(arg1)
2394 errno_t error;
2395 if_fake_ref fakeif = (if_fake_ref)arg0;
2396 kern_channel_ring_t ring;
2397 boolean_t more;
2398
2399 feth_lock();
2400 ring = fakeif->iff_tx_ring[0];
2401 if (feth_is_detaching(fakeif) ||
2402 !fakeif->iff_channel_connected ||
2403 ring == NULL) {
2404 goto done;
2405 }
2406 fakeif->iff_doorbell_tcall_active = TRUE;
2407 feth_unlock();
2408 if (feth_in_wmm_mode(fakeif)) {
2409 error = feth_nx_tx_dequeue_driver_managed(fakeif, FALSE);
2410 } else {
2411 error = kern_channel_tx_refill(ring, UINT32_MAX,
2412 UINT32_MAX, FALSE, &more);
2413 }
2414 if (error != 0) {
2415 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s: TX refill failed %d",
2416 fakeif->iff_name, error);
2417 } else {
2418 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s: TX refilled",
2419 fakeif->iff_name);
2420 }
2421
2422 feth_lock();
2423 done:
2424 fakeif->iff_doorbell_tcall_active = FALSE;
2425 if (fakeif->iff_waiting_for_tcall) {
2426 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2427 "%s: threadcall waking up waiter", fakeif->iff_name);
2428 wakeup((caddr_t)fakeif);
2429 }
2430 feth_unlock();
2431 }
2432
2433 static void
feth_schedule_async_doorbell(if_fake_ref fakeif)2434 feth_schedule_async_doorbell(if_fake_ref fakeif)
2435 {
2436 thread_call_t __single tcall;
2437
2438 feth_lock();
2439 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2440 feth_unlock();
2441 return;
2442 }
2443 tcall = fakeif->iff_doorbell_tcall;
2444 if (tcall != NULL) {
2445 thread_call_enter(tcall);
2446 } else {
2447 tcall = thread_call_allocate_with_options(feth_async_doorbell,
2448 (thread_call_param_t)fakeif,
2449 THREAD_CALL_PRIORITY_KERNEL,
2450 THREAD_CALL_OPTIONS_ONCE);
2451 if (tcall == NULL) {
2452 FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT,
2453 "%s tcall alloc failed", fakeif->iff_name);
2454 } else {
2455 fakeif->iff_doorbell_tcall = tcall;
2456 feth_retain(fakeif);
2457 thread_call_enter(tcall);
2458 }
2459 }
2460 feth_unlock();
2461 }
2462
2463 static errno_t
feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2464 feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,
2465 kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2466 {
2467 #pragma unused(nxprov, ring, flags)
2468 errno_t error;
2469 if_fake_ref fakeif;
2470
2471 fakeif = feth_nexus_context(nexus);
2472 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s", fakeif->iff_name);
2473
2474 if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
2475 boolean_t more;
2476 /* synchronous tx refill */
2477 if (feth_in_wmm_mode(fakeif)) {
2478 error = feth_nx_tx_dequeue_driver_managed(fakeif, TRUE);
2479 } else {
2480 error = kern_channel_tx_refill(ring, UINT32_MAX,
2481 UINT32_MAX, TRUE, &more);
2482 }
2483 if (error != 0) {
2484 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2485 "%s: TX refill (sync) %d", fakeif->iff_name, error);
2486 } else {
2487 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2488 "%s: TX refilled (sync)", fakeif->iff_name);
2489 }
2490 } else {
2491 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2492 "%s: schedule async refill", fakeif->iff_name);
2493 feth_schedule_async_doorbell(fakeif);
2494 }
2495 return 0;
2496 }
2497
2498 static errno_t
feth_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)2499 feth_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
2500 {
2501 if_fake_ref fakeif;
2502
2503 fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
2504 feth_ifnet_set_attrs(fakeif, ifp);
2505 return 0;
2506 }
2507
2508 static errno_t
feth_nx_intf_adv_config(void * prov_ctx,bool enable)2509 feth_nx_intf_adv_config(void *prov_ctx, bool enable)
2510 {
2511 if_fake_ref fakeif = prov_ctx;
2512
2513 feth_lock();
2514 fakeif->iff_intf_adv_enabled = enable;
2515 feth_unlock();
2516 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2517 "%s enable %d", fakeif->iff_name, enable);
2518 return 0;
2519 }
2520
2521 static errno_t
fill_capab_interface_advisory(if_fake_ref fakeif,void * contents,uint32_t * len)2522 fill_capab_interface_advisory(if_fake_ref fakeif, void *contents, uint32_t *len)
2523 {
2524 struct kern_nexus_capab_interface_advisory * __single capab = contents;
2525
2526 if (*len != sizeof(*capab)) {
2527 return EINVAL;
2528 }
2529 if (capab->kncia_version !=
2530 KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
2531 return EINVAL;
2532 }
2533 if (!feth_has_intf_advisory_configured(fakeif)) {
2534 return ENOTSUP;
2535 }
2536 VERIFY(capab->kncia_notify != NULL);
2537 fakeif->iff_intf_adv_kern_ctx = capab->kncia_kern_context;
2538 fakeif->iff_intf_adv_notify = capab->kncia_notify;
2539 capab->kncia_provider_context = fakeif;
2540 capab->kncia_config = feth_nx_intf_adv_config;
2541 return 0;
2542 }
2543
2544 static errno_t
feth_notify_steering_info(void * prov_ctx,void * qset_ctx,struct ifnet_traffic_descriptor_common * td,bool add)2545 feth_notify_steering_info(void *prov_ctx, void *qset_ctx,
2546 struct ifnet_traffic_descriptor_common *td, bool add)
2547 {
2548 #pragma unused(td)
2549 if_fake_ref fakeif = prov_ctx;
2550 fake_qset * __single qset = qset_ctx;
2551
2552 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2553 "%s: notify_steering_info: qset_id 0x%llx, %s",
2554 fakeif->iff_name, qset->fqs_id, add ? "add" : "remove");
2555 return 0;
2556 }
2557
2558 static errno_t
fill_capab_qset_extensions(if_fake_ref fakeif,void * contents,uint32_t * len)2559 fill_capab_qset_extensions(if_fake_ref fakeif, void *contents, uint32_t *len)
2560 {
2561 struct kern_nexus_capab_qset_extensions * __single capab = contents;
2562
2563 if (*len != sizeof(*capab)) {
2564 return EINVAL;
2565 }
2566 if (capab->cqe_version !=
2567 KERN_NEXUS_CAPAB_QSET_EXTENSIONS_VERSION_1) {
2568 return EINVAL;
2569 }
2570 capab->cqe_prov_ctx = fakeif;
2571 capab->cqe_notify_steering_info = feth_notify_steering_info;
2572 return 0;
2573 }
2574
2575 static errno_t
feth_nx_rx_flow_steering_config(void * prov_ctx,uint32_t id,struct ifnet_traffic_descriptor_common * td,uint32_t action)2576 feth_nx_rx_flow_steering_config(void *prov_ctx, uint32_t id,
2577 struct ifnet_traffic_descriptor_common *td, uint32_t action)
2578 {
2579 #pragma unused(td)
2580 if_fake_ref fakeif = prov_ctx;
2581
2582 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2583 "%s: nx_rx_flow_steering_config: id 0x%x, action %u",
2584 fakeif->iff_name, id, action);
2585 return 0;
2586 }
2587
2588 static errno_t
fill_capab_rx_flow_steering(if_fake_ref fakeif,void * contents,uint32_t * len)2589 fill_capab_rx_flow_steering(if_fake_ref fakeif, void *contents, uint32_t *len)
2590 {
2591 struct kern_nexus_capab_rx_flow_steering * __single capab = contents;
2592
2593 if (*len != sizeof(*capab)) {
2594 return EINVAL;
2595 }
2596 if (capab->kncrxfs_version !=
2597 KERN_NEXUS_CAPAB_RX_FLOW_STEERING_VERSION_1) {
2598 return EINVAL;
2599 }
2600
2601 capab->kncrxfs_prov_ctx = fakeif;
2602 capab->kncrxfs_config = feth_nx_rx_flow_steering_config;
2603 return 0;
2604 }
2605
2606 static errno_t
feth_nx_capab_config(kern_nexus_provider_t nxprov,kern_nexus_t nx,kern_nexus_capab_t capab,void * contents,uint32_t * len)2607 feth_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
2608 kern_nexus_capab_t capab, void *contents, uint32_t *len)
2609 {
2610 #pragma unused(nxprov)
2611 errno_t error;
2612 if_fake_ref fakeif;
2613
2614 fakeif = feth_nexus_context(nx);
2615 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL, "%s", fakeif->iff_name);
2616
2617 switch (capab) {
2618 case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
2619 error = fill_capab_interface_advisory(fakeif, contents, len);
2620 break;
2621 case KERN_NEXUS_CAPAB_QSET_EXTENSIONS:
2622 error = fill_capab_qset_extensions(fakeif, contents, len);
2623 break;
2624 case KERN_NEXUS_CAPAB_RX_FLOW_STEERING:
2625 error = fill_capab_rx_flow_steering(fakeif, contents, len);
2626 break;
2627 default:
2628 error = ENOTSUP;
2629 break;
2630 }
2631 return error;
2632 }
2633
2634 static int
feth_set_tso_mtu(ifnet_t ifp,uint32_t tso_v4_mtu,uint32_t tso_v6_mtu)2635 feth_set_tso_mtu(ifnet_t ifp, uint32_t tso_v4_mtu, uint32_t tso_v6_mtu)
2636 {
2637 int error;
2638
2639 error = ifnet_set_tso_mtu(ifp, AF_INET, tso_v4_mtu);
2640 if (error != 0) {
2641 FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2642 "set TSO MTU IPv4 failed on %s, err %d",
2643 if_name(ifp), error);
2644 return error;
2645 }
2646 error = ifnet_set_tso_mtu(ifp, AF_INET6, tso_v6_mtu);
2647 if (error != 0) {
2648 FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2649 "set TSO MTU IPv6 failed on %s, err %d",
2650 if_name(ifp), error);
2651 return error;
2652 }
2653 return 0;
2654 }
2655
2656 static int
feth_set_tso_offload(ifnet_t ifp)2657 feth_set_tso_offload(ifnet_t ifp)
2658 {
2659 ifnet_offload_t offload;
2660 int error;
2661
2662 offload = IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2663 error = ifnet_set_offload(ifp, offload);
2664 if (error != 0) {
2665 FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2666 "set TSO offload failed on %s, err %d",
2667 if_name(ifp), error);
2668 goto done;
2669 }
2670 error = feth_set_tso_mtu(ifp, if_fake_tso_buffer_size,
2671 if_fake_tso_buffer_size);
2672 done:
2673 return error;
2674 }
2675
2676 static errno_t
create_netif_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2677 create_netif_provider_and_instance(if_fake_ref fakeif,
2678 struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2679 uuid_t * provider, uuid_t * instance)
2680 {
2681 errno_t err;
2682 nexus_controller_t controller = kern_nexus_shared_controller();
2683 struct kern_nexus_net_init net_init;
2684 nexus_name_t provider_name;
2685 nexus_attr_t __single nexus_attr = NULL;
2686 struct kern_nexus_provider_init prov_init = {
2687 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2688 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2689 .nxpi_pre_connect = feth_nx_pre_connect,
2690 .nxpi_connected = feth_nx_connected,
2691 .nxpi_pre_disconnect = feth_nx_pre_disconnect,
2692 .nxpi_disconnected = feth_nx_disconnected,
2693 .nxpi_ring_init = feth_nx_ring_init,
2694 .nxpi_ring_fini = feth_nx_ring_fini,
2695 .nxpi_slot_init = feth_nx_slot_init,
2696 .nxpi_slot_fini = feth_nx_slot_fini,
2697 .nxpi_sync_tx = feth_nx_sync_tx,
2698 .nxpi_sync_rx = feth_nx_sync_rx,
2699 .nxpi_tx_doorbell = feth_nx_tx_doorbell,
2700 .nxpi_config_capab = feth_nx_capab_config,
2701 };
2702
2703 static_assert(IFF_MAX_RX_RINGS == 1);
2704 err = kern_nexus_attr_create(&nexus_attr);
2705 if (err != 0) {
2706 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
2707 "nexus attribute creation failed, error %d", err);
2708 goto failed;
2709 }
2710 if (feth_in_wmm_mode(fakeif)) {
2711 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_TX_RINGS,
2712 IFF_NUM_TX_RINGS_WMM_MODE);
2713 VERIFY(err == 0);
2714 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_RX_RINGS,
2715 IFF_NUM_RX_RINGS_WMM_MODE);
2716 VERIFY(err == 0);
2717 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_QMAP,
2718 NEXUS_QMAP_TYPE_WMM);
2719 VERIFY(err == 0);
2720 }
2721
2722 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2723 VERIFY(err == 0);
2724 snprintf((char *)provider_name, sizeof(provider_name),
2725 "com.apple.netif.%s", fakeif->iff_name);
2726 err = kern_nexus_controller_register_provider(controller,
2727 feth_nx_dom_prov,
2728 provider_name,
2729 &prov_init,
2730 sizeof(prov_init),
2731 nexus_attr,
2732 provider);
2733 if (err != 0) {
2734 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
2735 "register provider failed, error %d", err);
2736 goto failed;
2737 }
2738 bzero(&net_init, sizeof(net_init));
2739 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2740 net_init.nxneti_flags = 0;
2741 net_init.nxneti_eparams = init_params;
2742 net_init.nxneti_lladdr = NULL;
2743 net_init.nxneti_prepare = feth_netif_prepare;
2744 net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2745 net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2746 err = kern_nexus_controller_alloc_net_provider_instance(controller,
2747 *provider,
2748 fakeif,
2749 NULL,
2750 instance,
2751 &net_init,
2752 ifp);
2753 if (err != 0) {
2754 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2755 "alloc_net_provider_instance failed, %d", err);
2756 kern_nexus_controller_deregister_provider(controller,
2757 *provider);
2758 uuid_clear(*provider);
2759 goto failed;
2760 }
2761 if (feth_supports_tso(fakeif)) {
2762 if ((err = feth_set_tso_offload(*ifp)) != 0) {
2763 goto failed;
2764 }
2765 }
2766
2767 failed:
2768 if (nexus_attr != NULL) {
2769 kern_nexus_attr_destroy(nexus_attr);
2770 }
2771 return err;
2772 }
2773
2774 /*
2775 * The nif_stats need to be referenced because we don't want it set
2776 * to NULL until the last llink is removed.
2777 */
2778 static void
get_nexus_stats(if_fake_ref fakeif,kern_nexus_t nexus)2779 get_nexus_stats(if_fake_ref fakeif, kern_nexus_t nexus)
2780 {
2781 if (++fakeif->iff_nifs_ref == 1) {
2782 ASSERT(fakeif->iff_nifs == NULL);
2783 fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2784 }
2785 }
2786
2787 static void
clear_nexus_stats(if_fake_ref fakeif)2788 clear_nexus_stats(if_fake_ref fakeif)
2789 {
2790 if (--fakeif->iff_nifs_ref == 0) {
2791 ASSERT(fakeif->iff_nifs != NULL);
2792 fakeif->iff_nifs = NULL;
2793 }
2794 }
2795
2796 static errno_t
feth_nx_qset_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * llink_ctx,uint8_t qset_idx,uint64_t qset_id,kern_netif_qset_t qset,void ** qset_ctx)2797 feth_nx_qset_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2798 void *llink_ctx, uint8_t qset_idx, uint64_t qset_id, kern_netif_qset_t qset,
2799 void **qset_ctx)
2800 {
2801 #pragma unused(nxprov)
2802 if_fake_ref fakeif;
2803 fake_llink * __single fl = llink_ctx;
2804 fake_qset *fqs;
2805
2806 feth_lock();
2807 fakeif = feth_nexus_context(nexus);
2808 if (feth_is_detaching(fakeif)) {
2809 feth_unlock();
2810 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2811 "%s: detaching", fakeif->iff_name);
2812 return ENXIO;
2813 }
2814 if (qset_idx >= fl->fl_qset_cnt) {
2815 feth_unlock();
2816 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2817 "%s: invalid qset_idx %d", fakeif->iff_name, qset_idx);
2818 return EINVAL;
2819 }
2820 fqs = &fl->fl_qset[qset_idx];
2821 ASSERT(fqs->fqs_qset == NULL);
2822 fqs->fqs_qset = qset;
2823 fqs->fqs_id = qset_id;
2824 *qset_ctx = fqs;
2825
2826 /* XXX This should really be done during registration */
2827 get_nexus_stats(fakeif, nexus);
2828 feth_unlock();
2829 return 0;
2830 }
2831
2832 static void
feth_nx_qset_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx)2833 feth_nx_qset_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2834 void *qset_ctx)
2835 {
2836 #pragma unused(nxprov)
2837 if_fake_ref fakeif;
2838 fake_qset * __single fqs = qset_ctx;
2839
2840 feth_lock();
2841 fakeif = feth_nexus_context(nexus);
2842 clear_nexus_stats(fakeif);
2843 ASSERT(fqs->fqs_qset != NULL);
2844 fqs->fqs_qset = NULL;
2845 fqs->fqs_id = 0;
2846 feth_unlock();
2847 }
2848
2849 static errno_t
feth_nx_queue_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint8_t qidx,bool tx,kern_netif_queue_t queue,void ** queue_ctx)2850 feth_nx_queue_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2851 void *qset_ctx, uint8_t qidx, bool tx, kern_netif_queue_t queue,
2852 void **queue_ctx)
2853 {
2854 #pragma unused(nxprov)
2855 if_fake_ref fakeif;
2856 fake_qset *__single fqs = qset_ctx;
2857 fake_queue *fq;
2858
2859 feth_lock();
2860 fakeif = feth_nexus_context(nexus);
2861 if (feth_is_detaching(fakeif)) {
2862 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2863 "%s: detaching", fakeif->iff_name);
2864 feth_unlock();
2865 return ENXIO;
2866 }
2867 if (tx) {
2868 if (qidx >= fqs->fqs_tx_queue_cnt) {
2869 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2870 "%s: invalid tx qidx %d", fakeif->iff_name, qidx);
2871 feth_unlock();
2872 return EINVAL;
2873 }
2874 fq = &fqs->fqs_tx_queue[qidx];
2875 } else {
2876 if (qidx >= fqs->fqs_rx_queue_cnt) {
2877 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2878 "%s: invalid rx qidx %d", fakeif->iff_name, qidx);
2879 feth_unlock();
2880 return EINVAL;
2881 }
2882 fq = &fqs->fqs_rx_queue[qidx];
2883 }
2884 ASSERT(fq->fq_queue == NULL);
2885 fq->fq_queue = queue;
2886 *queue_ctx = fq;
2887 feth_unlock();
2888 return 0;
2889 }
2890
2891 static void
feth_nx_queue_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx)2892 feth_nx_queue_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2893 void *queue_ctx)
2894 {
2895 #pragma unused(nxprov, nexus)
2896 fake_queue *__single fq = queue_ctx;
2897
2898 feth_lock();
2899 ASSERT(fq->fq_queue != NULL);
2900 fq->fq_queue = NULL;
2901 feth_unlock();
2902 }
2903
2904 static void
feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif,kern_packet_t sph,struct netif_stats * nifs,if_fake_ref peer_fakeif,uint32_t llink_idx,uint32_t qset_idx)2905 feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif, kern_packet_t sph,
2906 struct netif_stats *nifs, if_fake_ref peer_fakeif,
2907 uint32_t llink_idx, uint32_t qset_idx)
2908 {
2909 kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2910 uint32_t n_pkts = 0;
2911
2912 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2913 "%s -> %s", fakeif->iff_name, peer_fakeif->iff_name);
2914
2915 while (sph != 0) {
2916 uint16_t off;
2917 kern_packet_t next;
2918
2919 next = kern_packet_get_next(sph);
2920 kern_packet_set_next(sph, 0);
2921
2922 /* bpf tap output */
2923 off = kern_packet_get_headroom(sph);
2924 VERIFY(off >= fakeif->iff_tx_headroom);
2925 kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2926 feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2927 bpf_tap_packet_out(fakeif->iff_ifp, DLT_EN10MB, sph, NULL, 0);
2928
2929 /* drop packets, if requested */
2930 fakeif->iff_tx_pkts_count++;
2931 if (feth_tx_expired_error(fakeif, sph) ||
2932 feth_tx_complete_error(fakeif, sph)) {
2933 fakeif->iff_tx_pkts_count = 0;
2934 kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2935 STATS_INC(nifs, NETIF_STATS_DROP);
2936 goto next_pkt;
2937 }
2938 ASSERT(sph != 0);
2939 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2940 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2941
2942 /* prepare batch for receiver */
2943 pkts[n_pkts++] = sph;
2944 if (n_pkts == IFF_MAX_BATCH_SIZE) {
2945 feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx,
2946 qset_idx, pkts, n_pkts);
2947 feth_tx_complete(fakeif, pkts, n_pkts);
2948 n_pkts = 0;
2949 }
2950 next_pkt:
2951 sph = next;
2952 }
2953 /* catch last batch for receiver */
2954 if (n_pkts != 0) {
2955 feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx, qset_idx,
2956 pkts, n_pkts);
2957 feth_tx_complete(fakeif, pkts, n_pkts);
2958 n_pkts = 0;
2959 }
2960 }
2961
2962 static errno_t
feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint32_t flags)2963 feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2964 void *qset_ctx, uint32_t flags)
2965 {
2966 #pragma unused(nxprov)
2967 if_fake_ref fakeif;
2968 ifnet_t peer_ifp;
2969 if_fake_ref peer_fakeif = NULL;
2970 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2971 fake_qset * __single qset = qset_ctx;
2972 boolean_t detaching, connected;
2973 uint32_t i;
2974 errno_t err;
2975
2976 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2977 fakeif = feth_nexus_context(nexus);
2978 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2979 "%s qset %p, idx %d, flags 0x%x", fakeif->iff_name, qset,
2980 qset->fqs_idx, flags);
2981
2982 feth_lock();
2983 detaching = feth_is_detaching(fakeif);
2984 connected = fakeif->iff_channel_connected;
2985 if (detaching || !connected) {
2986 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2987 "%s: detaching %s, channel connected %s",
2988 fakeif->iff_name,
2989 (detaching ? "true" : "false"),
2990 (connected ? "true" : "false"));
2991 feth_unlock();
2992 return 0;
2993 }
2994 peer_ifp = fakeif->iff_peer;
2995 if (peer_ifp != NULL) {
2996 peer_fakeif = ifnet_get_if_fake(peer_ifp);
2997 if (peer_fakeif != NULL) {
2998 detaching = feth_is_detaching(peer_fakeif);
2999 connected = peer_fakeif->iff_channel_connected;
3000 if (detaching || !connected) {
3001 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3002 "peer %s: detaching %s, "
3003 "channel connected %s",
3004 peer_fakeif->iff_name,
3005 (detaching ? "true" : "false"),
3006 (connected ? "true" : "false"));
3007 goto done;
3008 }
3009 } else {
3010 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3011 "peer_fakeif is NULL");
3012 goto done;
3013 }
3014 } else {
3015 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "peer_ifp is NULL");
3016 goto done;
3017 }
3018
3019 for (i = 0; i < qset->fqs_tx_queue_cnt; i++) {
3020 kern_packet_t sph = 0;
3021 kern_netif_queue_t queue = qset->fqs_tx_queue[i].fq_queue;
3022 boolean_t more = FALSE;
3023
3024 err = kern_netif_queue_tx_dequeue(queue, UINT32_MAX, UINT32_MAX,
3025 &more, &sph);
3026 if (err != 0 && err != EAGAIN) {
3027 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3028 "%s queue %p dequeue failed: err "
3029 "%d", fakeif->iff_name, queue, err);
3030 }
3031 feth_nx_tx_queue_deliver_pkt_chain(fakeif, sph, nifs,
3032 peer_fakeif, qset->fqs_llink_idx, qset->fqs_idx);
3033 }
3034
3035 done:
3036 feth_unlock();
3037 return 0;
3038 }
3039
3040
3041 static errno_t
feth_nx_queue_tx_push(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx,kern_packet_t * ph,uint32_t * packetCount,uint32_t * byteCount)3042 feth_nx_queue_tx_push(kern_nexus_provider_t nxprov,
3043 kern_nexus_t nexus, void *queue_ctx, kern_packet_t *ph,
3044 uint32_t *packetCount, uint32_t *byteCount)
3045 {
3046 #pragma unused(nxprov)
3047 if_fake_ref fakeif;
3048 ifnet_t peer_ifp;
3049 if_fake_ref peer_fakeif = NULL;
3050 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
3051 fake_queue *__single fq = queue_ctx;
3052 boolean_t detaching, connected;
3053
3054 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
3055 fakeif = feth_nexus_context(nexus);
3056 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s queue %p", fakeif->iff_name, fq);
3057
3058 feth_lock();
3059
3060 detaching = feth_is_detaching(fakeif);
3061 connected = fakeif->iff_channel_connected;
3062 if (detaching || !connected) {
3063 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3064 "%s: detaching %s, channel connected %s",
3065 fakeif->iff_name,
3066 (detaching ? "true" : "false"),
3067 (connected ? "true" : "false"));
3068 goto done;
3069 }
3070 peer_ifp = fakeif->iff_peer;
3071 if (peer_ifp != NULL) {
3072 peer_fakeif = ifnet_get_if_fake(peer_ifp);
3073 if (peer_fakeif != NULL) {
3074 detaching = feth_is_detaching(peer_fakeif);
3075 connected = peer_fakeif->iff_channel_connected;
3076 if (detaching || !connected) {
3077 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3078 "peer %s: detaching %s, "
3079 "channel connected %s",
3080 peer_fakeif->iff_name,
3081 (detaching ? "true" : "false"),
3082 (connected ? "true" : "false"));
3083 goto done;
3084 }
3085 } else {
3086 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3087 "peer_fakeif is NULL");
3088 goto done;
3089 }
3090 } else {
3091 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "peer_ifp is NULL");
3092 goto done;
3093 }
3094
3095 *packetCount = *byteCount = 0;
3096
3097 kern_packet_t sph = *ph;
3098 while (sph != 0) {
3099 (*packetCount)++;
3100 *byteCount += kern_packet_get_data_length(sph);
3101 sph = kern_packet_get_next(sph);
3102 }
3103
3104 feth_nx_tx_queue_deliver_pkt_chain(fakeif, *ph, nifs,
3105 peer_fakeif, 0, 0);
3106
3107 *ph = 0;
3108
3109 done:
3110 feth_unlock();
3111 return 0;
3112 }
3113
3114
3115 static void
fill_qset_info_and_params(if_fake_ref fakeif,fake_llink * llink_info,uint32_t qset_idx,struct kern_nexus_netif_llink_qset_init * qset_init,bool is_def,bool is_low_latency)3116 fill_qset_info_and_params(if_fake_ref fakeif, fake_llink *llink_info,
3117 uint32_t qset_idx, struct kern_nexus_netif_llink_qset_init *qset_init,
3118 bool is_def, bool is_low_latency)
3119 {
3120 fake_qset *qset_info = &llink_info->fl_qset[qset_idx];
3121
3122 qset_init->nlqi_flags =
3123 (is_def ? KERN_NEXUS_NET_LLINK_QSET_DEFAULT : 0) |
3124 (is_low_latency ? KERN_NEXUS_NET_LLINK_QSET_LOW_LATENCY : 0) |
3125 KERN_NEXUS_NET_LLINK_QSET_AQM;
3126
3127 if (feth_in_wmm_mode(fakeif)) {
3128 qset_init->nlqi_flags |= KERN_NEXUS_NET_LLINK_QSET_WMM_MODE;
3129 qset_init->nlqi_num_txqs = IFF_NUM_TX_QUEUES_WMM_MODE;
3130 qset_init->nlqi_num_rxqs = IFF_NUM_RX_QUEUES_WMM_MODE;
3131 } else {
3132 qset_init->nlqi_num_txqs = 1;
3133 qset_init->nlqi_num_rxqs = 1;
3134 }
3135 qset_info->fqs_tx_queue_cnt = qset_init->nlqi_num_txqs;
3136 qset_info->fqs_rx_queue_cnt = qset_init->nlqi_num_rxqs;
3137
3138 /* These are needed for locating the peer qset */
3139 qset_info->fqs_llink_idx = llink_info->fl_idx;
3140 qset_info->fqs_idx = qset_idx;
3141 }
3142
3143 static void
fill_llink_info_and_params(if_fake_ref fakeif,uint32_t llink_idx,struct kern_nexus_netif_llink_init * llink_init,uint32_t llink_id,struct kern_nexus_netif_llink_qset_init * __counted_by (qset_cnt)qset_init,uint32_t qset_cnt,uint32_t flags)3144 fill_llink_info_and_params(if_fake_ref fakeif, uint32_t llink_idx,
3145 struct kern_nexus_netif_llink_init *llink_init, uint32_t llink_id,
3146 struct kern_nexus_netif_llink_qset_init * __counted_by(qset_cnt) qset_init, uint32_t qset_cnt,
3147 uint32_t flags)
3148 {
3149 fake_llink *llink_info = &fakeif->iff_llink[llink_idx];
3150 uint32_t i;
3151 bool create_ll_qset = if_fake_low_latency && (llink_idx != 0);
3152
3153 for (i = 0; i < qset_cnt; i++) {
3154 fill_qset_info_and_params(fakeif, llink_info, i,
3155 &qset_init[i], i == 0, create_ll_qset && i == 1);
3156 }
3157 llink_info->fl_idx = llink_idx;
3158
3159 /* This doesn't have to be the same as llink_idx */
3160 llink_info->fl_id = llink_id;
3161 llink_info->fl_qset_cnt = qset_cnt;
3162
3163 llink_init->nli_link_id = llink_id;
3164 llink_init->nli_num_qsets = qset_cnt;
3165 llink_init->nli_qsets = qset_init;
3166 llink_init->nli_flags = flags;
3167 llink_init->nli_ctx = llink_info;
3168 }
3169
3170 static errno_t
create_non_default_llinks(if_fake_ref fakeif)3171 create_non_default_llinks(if_fake_ref fakeif)
3172 {
3173 struct kern_nexus *nx;
3174 fake_nx_t fnx = &fakeif->iff_nx;
3175 struct kern_nexus_netif_llink_init llink_init;
3176 struct kern_nexus_netif_llink_qset_init qset_init[FETH_MAX_QSETS];
3177 errno_t err;
3178 uint64_t llink_id;
3179 uint32_t i;
3180
3181 nx = nx_find(fnx->fnx_instance, FALSE);
3182 if (nx == NULL) {
3183 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3184 "%s: nx not found", fakeif->iff_name);
3185 return ENXIO;
3186 }
3187 /* Default llink starts at index 0 */
3188 for (i = 1; i < if_fake_llink_cnt; i++) {
3189 llink_id = (uint64_t)i;
3190
3191 /*
3192 * The llink_init and qset_init structures are reused for
3193 * each llink creation.
3194 */
3195 fill_llink_info_and_params(fakeif, i, &llink_init,
3196 llink_id, qset_init, if_fake_qset_cnt, 0);
3197 err = kern_nexus_netif_llink_add(nx, &llink_init);
3198 if (err != 0) {
3199 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3200 "%s: llink add failed, error %d",
3201 fakeif->iff_name, err);
3202 goto fail;
3203 }
3204 fakeif->iff_llink_cnt++;
3205 }
3206 nx_release(nx);
3207 return 0;
3208
3209 fail:
3210 for (i = 0; i < fakeif->iff_llink_cnt; i++) {
3211 int error;
3212 fake_llink * __single ll = &fakeif->iff_llink[i];
3213
3214 error = kern_nexus_netif_llink_remove(nx, ll->fl_id);
3215 if (error != 0) {
3216 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3217 "%s: llink remove failed, llink_id 0x%llx, "
3218 "error %d", fakeif->iff_name,
3219 ll->fl_id, error);
3220 }
3221 ll->fl_id = 0;
3222 }
3223 fakeif->iff_llink_cnt = 0;
3224 nx_release(nx);
3225 return err;
3226 }
3227
3228 static errno_t
create_netif_llink_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)3229 create_netif_llink_provider_and_instance(if_fake_ref fakeif,
3230 struct ifnet_init_eparams * init_params, ifnet_t *ifp,
3231 uuid_t * provider, uuid_t * instance)
3232 {
3233 errno_t err;
3234 nexus_controller_t controller = kern_nexus_shared_controller();
3235 struct kern_nexus_net_init net_init;
3236 struct kern_nexus_netif_llink_init llink_init;
3237 struct kern_nexus_netif_llink_qset_init qsets[FETH_MAX_QSETS];
3238
3239 nexus_name_t provider_name;
3240 nexus_attr_t __single nexus_attr = NULL;
3241 struct kern_nexus_netif_provider_init prov_init = {
3242 .nxnpi_version = KERN_NEXUS_DOMAIN_PROVIDER_NETIF,
3243 .nxnpi_flags = NXPIF_VIRTUAL_DEVICE,
3244 .nxnpi_pre_connect = feth_nx_pre_connect,
3245 .nxnpi_connected = feth_nx_connected,
3246 .nxnpi_pre_disconnect = feth_nx_pre_disconnect,
3247 .nxnpi_disconnected = feth_nx_disconnected,
3248 .nxnpi_qset_init = feth_nx_qset_init,
3249 .nxnpi_qset_fini = feth_nx_qset_fini,
3250 .nxnpi_queue_init = feth_nx_queue_init,
3251 .nxnpi_queue_fini = feth_nx_queue_fini,
3252 .nxnpi_tx_qset_notify = feth_nx_tx_qset_notify,
3253 .nxnpi_config_capab = feth_nx_capab_config,
3254 .nxnpi_queue_tx_push = feth_nx_queue_tx_push
3255 };
3256
3257 err = kern_nexus_attr_create(&nexus_attr);
3258 if (err != 0) {
3259 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3260 "nexus attribute creation failed, error %d", err);
3261 goto failed;
3262 }
3263
3264 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
3265 VERIFY(err == 0);
3266
3267 snprintf((char *)provider_name, sizeof(provider_name),
3268 "com.apple.netif.%s", fakeif->iff_name);
3269 err = kern_nexus_controller_register_provider(controller,
3270 feth_nx_dom_prov,
3271 provider_name,
3272 (struct kern_nexus_provider_init *)&prov_init,
3273 sizeof(prov_init),
3274 nexus_attr,
3275 provider);
3276 if (err != 0) {
3277 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3278 "register provider failed, error %d", err);
3279 goto failed;
3280 }
3281 bzero(&net_init, sizeof(net_init));
3282 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
3283 net_init.nxneti_flags = 0;
3284 net_init.nxneti_eparams = init_params;
3285 net_init.nxneti_lladdr = NULL;
3286 net_init.nxneti_prepare = feth_netif_prepare;
3287 net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
3288 net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
3289
3290 /*
3291 * Assume llink id is same as the index for if_fake.
3292 * This is not required for other drivers.
3293 */
3294 static_assert(NETIF_LLINK_ID_DEFAULT == 0);
3295 fill_llink_info_and_params(fakeif, 0, &llink_init,
3296 NETIF_LLINK_ID_DEFAULT, qsets, if_fake_qset_cnt,
3297 KERN_NEXUS_NET_LLINK_DEFAULT);
3298
3299 net_init.nxneti_llink = &llink_init;
3300
3301 err = kern_nexus_controller_alloc_net_provider_instance(controller,
3302 *provider, fakeif, NULL, instance, &net_init, ifp);
3303 if (err != 0) {
3304 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3305 "alloc_net_provider_instance failed, %d", err);
3306 kern_nexus_controller_deregister_provider(controller,
3307 *provider);
3308 uuid_clear(*provider);
3309 goto failed;
3310 }
3311 fakeif->iff_llink_cnt++;
3312
3313 if (if_fake_llink_cnt > 1) {
3314 err = create_non_default_llinks(fakeif);
3315 if (err != 0) {
3316 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3317 "create_non_default_llinks failed, %d", err);
3318 feth_detach_netif_nexus(fakeif);
3319 goto failed;
3320 }
3321 }
3322 if (feth_supports_tso(fakeif)) {
3323 if ((err = feth_set_tso_offload(*ifp)) != 0) {
3324 goto failed;
3325 }
3326 }
3327 failed:
3328 if (nexus_attr != NULL) {
3329 kern_nexus_attr_destroy(nexus_attr);
3330 }
3331 return err;
3332 }
3333
3334 static errno_t
feth_attach_netif_nexus(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp)3335 feth_attach_netif_nexus(if_fake_ref fakeif,
3336 struct ifnet_init_eparams * init_params, ifnet_t *ifp)
3337 {
3338 errno_t error;
3339 fake_nx_t nx = &fakeif->iff_nx;
3340
3341 error = feth_packet_pool_make(fakeif);
3342 if (error != 0) {
3343 return error;
3344 }
3345 if (if_fake_llink_cnt == 0) {
3346 return create_netif_provider_and_instance(fakeif, init_params,
3347 ifp, &nx->fnx_provider, &nx->fnx_instance);
3348 } else {
3349 return create_netif_llink_provider_and_instance(fakeif,
3350 init_params, ifp, &nx->fnx_provider,
3351 &nx->fnx_instance);
3352 }
3353 }
3354
3355 static void
remove_non_default_llinks(const char * name,fake_nx_t fnx,fake_llink_t llink __counted_by (FETH_MAX_LLINKS),uint32_t llink_cnt)3356 remove_non_default_llinks(const char * name, fake_nx_t fnx,
3357 fake_llink_t llink __counted_by(FETH_MAX_LLINKS),
3358 uint32_t llink_cnt)
3359 {
3360 struct kern_nexus *nx;
3361 uint32_t i;
3362
3363 if (llink_cnt <= 1) {
3364 goto done;
3365 }
3366 nx = nx_find(fnx->fnx_instance, FALSE);
3367 if (nx == NULL) {
3368 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3369 "%s: nx not found", name);
3370 goto done;
3371 }
3372 /* Default llink (at index 0) is freed separately */
3373 for (i = 1; i < llink_cnt; i++) {
3374 int err;
3375
3376 err = kern_nexus_netif_llink_remove(nx, llink[i].fl_id);
3377 if (err != 0) {
3378 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3379 "%s: llink remove failed, llink_id 0x%llx, "
3380 "error %d", name,
3381 llink[i].fl_id, err);
3382 }
3383 }
3384 nx_release(nx);
3385 done:
3386 return;
3387 }
3388
3389 static void
detach_provider_and_instance(uuid_t provider,uuid_t instance)3390 detach_provider_and_instance(uuid_t provider, uuid_t instance)
3391 {
3392 nexus_controller_t controller = kern_nexus_shared_controller();
3393 errno_t err;
3394
3395 if (!uuid_is_null(instance)) {
3396 err = kern_nexus_controller_free_provider_instance(controller,
3397 instance);
3398 if (err != 0) {
3399 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3400 "free_provider_instance failed %d", err);
3401 } else {
3402 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3403 "deregister_instance");
3404 }
3405 }
3406 if (!uuid_is_null(provider)) {
3407 err = kern_nexus_controller_deregister_provider(controller,
3408 provider);
3409 if (err != 0) {
3410 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3411 "deregister_provider %d", err);
3412 } else {
3413 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3414 "deregister_provider");
3415 }
3416 }
3417 return;
3418 }
3419
3420 static void
feth_detach_netif_nexus(if_fake_ref fakeif)3421 feth_detach_netif_nexus(if_fake_ref fakeif)
3422 {
3423 fake_nx fnx;
3424 fake_llink_t llink;
3425 uint32_t llink_cnt;
3426
3427 feth_lock();
3428 fnx = fakeif->iff_nx;
3429 bzero(&fakeif->iff_nx, sizeof(fakeif->iff_nx));
3430 llink = fakeif->iff_llink;
3431 fakeif->iff_llink = NULL;
3432 llink_cnt = fakeif->iff_llink_cnt;
3433 fakeif->iff_llink_cnt = 0;
3434 feth_unlock();
3435 remove_non_default_llinks(__unsafe_null_terminated_from_indexable(fakeif->iff_name), &fnx, llink, llink_cnt);
3436 detach_provider_and_instance(fnx.fnx_provider, fnx.fnx_instance);
3437 if (llink != NULL) {
3438 kfree_type(fake_llink, FETH_MAX_LLINKS, llink);
3439 }
3440 return;
3441 }
3442 #endif /* SKYWALK */
3443
3444 /**
3445 ** feth interface routines
3446 **/
3447 static void
feth_ifnet_set_attrs(if_fake_ref fakeif,ifnet_t ifp)3448 feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp)
3449 {
3450 errno_t error;
3451 ifnet_offload_t offload = 0;
3452
3453 ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
3454 ifnet_set_baudrate(ifp, 0);
3455 ifnet_set_mtu(ifp, ETHERMTU);
3456 ifnet_set_flags(ifp,
3457 IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX,
3458 0xffff);
3459 ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
3460 if ((fakeif->iff_flags & IFF_FLAGS_LRO) != 0) {
3461 offload |= IFNET_LRO;
3462 }
3463 if ((fakeif->iff_flags & IFF_FLAGS_HWCSUM) != 0) {
3464 offload |= IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
3465 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6;
3466 }
3467 if (feth_supports_tso(fakeif)) {
3468 offload |= IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
3469 }
3470 if (feth_supports_vlan_tagging(fakeif)) {
3471 offload |= IFNET_VLAN_TAGGING;
3472 } else if (feth_supports_vlan_mtu(fakeif)) {
3473 offload |= IFNET_VLAN_MTU;
3474 }
3475 error = ifnet_set_offload(ifp, offload);
3476 if (error != 0) {
3477 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3478 "ifnet_set_offload(%s, 0x%x) failed, %d",
3479 ifp->if_xname, offload, error);
3480 } else {
3481 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3482 "ifnet_set_offload(%s, 0x%x) succeeded",
3483 ifp->if_xname, offload);
3484 }
3485 if (feth_supports_rx_flow_steering(fakeif)) {
3486 ifnet_set_rx_flow_steering(ifp, true);
3487 }
3488 }
3489
3490 static void
interface_link_event(ifnet_t ifp,u_int32_t event_code)3491 interface_link_event(ifnet_t ifp, u_int32_t event_code)
3492 {
3493 struct event {
3494 u_int32_t ifnet_family;
3495 u_int32_t unit;
3496 char if_name[IFNAMSIZ];
3497 };
3498 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3499 struct kern_event_msg *__single header = (struct kern_event_msg*)message;
3500 struct event *data = (struct event *)(message + offsetof(struct kern_event_msg, event_data));
3501
3502 header->total_size = sizeof(message);
3503 header->vendor_code = KEV_VENDOR_APPLE;
3504 header->kev_class = KEV_NETWORK_CLASS;
3505 header->kev_subclass = KEV_DL_SUBCLASS;
3506 header->event_code = event_code;
3507 data->ifnet_family = ifnet_family(ifp);
3508 data->unit = (u_int32_t)ifnet_unit(ifp);
3509 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
3510 ifnet_event(ifp, header);
3511 }
3512
3513 static if_fake_ref
ifnet_get_if_fake(ifnet_t ifp)3514 ifnet_get_if_fake(ifnet_t ifp)
3515 {
3516 return (if_fake_ref)ifnet_softc(ifp);
3517 }
3518
3519 static int
feth_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)3520 feth_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
3521 {
3522 bool bsd_mode;
3523 int error;
3524 if_fake_ref fakeif;
3525 struct ifnet_init_eparams feth_init;
3526 fake_llink_t iff_llink __counted_by_or_null(FETH_MAX_LLINKS) = NULL;
3527 ifnet_t __single ifp;
3528 char mac_address[ETHER_ADDR_LEN];
3529 bool multi_buflet;
3530 iff_pktpool_mode_t pktpool_mode;
3531 bool tso_support;
3532 bool rx_flow_steering_support;
3533
3534 /* make local copy of globals needed to make consistency checks below */
3535 bsd_mode = (if_fake_bsd_mode != 0);
3536 multi_buflet = (if_fake_multibuflet != 0);
3537 tso_support = (if_fake_tso_support != 0);
3538 pktpool_mode = if_fake_pktpool_mode;
3539 rx_flow_steering_support = (if_fake_rx_flow_steering_support != 0);
3540
3541 if (!bsd_mode) {
3542 /* consistency checks */
3543 if (if_fake_llink_cnt == 0 &&
3544 strbufcmp(sk_ll_prefix, FAKE_ETHER_NAME) == 0) {
3545 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3546 "feth used as ifname prefix but logical link "
3547 "support in feth is disabled.");
3548 return EINVAL;
3549 }
3550 if (tso_support && pktpool_mode != IFF_PP_MODE_GLOBAL) {
3551 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3552 "TSO mode requires global packet pool mode");
3553 return EINVAL;
3554 }
3555 if (multi_buflet && pktpool_mode == IFF_PP_MODE_PRIVATE_SPLIT) {
3556 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3557 "multi-buflet not supported for split rx & tx pool");
3558 return EINVAL;
3559 }
3560 iff_llink = kalloc_type(fake_llink,
3561 FETH_MAX_LLINKS, Z_WAITOK_ZERO);
3562 if (iff_llink == NULL) {
3563 return ENOBUFS;
3564 }
3565 }
3566 fakeif = kalloc_type(struct if_fake, Z_WAITOK_ZERO_NOFAIL);
3567 fakeif->iff_llink = iff_llink;
3568 fakeif->iff_retain_count = 1;
3569 #define FAKE_ETHER_NAME_LEN (sizeof(FAKE_ETHER_NAME) - 1)
3570 static_assert(FAKE_ETHER_NAME_LEN == 4);
3571 strbufcpy(mac_address, FAKE_ETHER_NAME);
3572 mac_address[ETHER_ADDR_LEN - 2] = (unit & 0xff00) >> 8;
3573 mac_address[ETHER_ADDR_LEN - 1] = unit & 0xff;
3574 if (bsd_mode) {
3575 fakeif->iff_flags |= IFF_FLAGS_BSD_MODE;
3576 }
3577 if (if_fake_hwcsum != 0) {
3578 fakeif->iff_flags |= IFF_FLAGS_HWCSUM;
3579 }
3580 if (if_fake_lro != 0) {
3581 fakeif->iff_flags |= IFF_FLAGS_LRO;
3582 }
3583 if (if_fake_vlan_tagging != 0) {
3584 /* support VLAN tagging in hardware */
3585 feth_set_supports_vlan_tagging(fakeif);
3586 } else {
3587 /* support VLAN mtu-sized packets */
3588 feth_set_supports_vlan_mtu(fakeif);
3589 }
3590 if (if_fake_separate_frame_header != 0) {
3591 fakeif->iff_flags |= IFF_FLAGS_SEPARATE_FRAME_HEADER;
3592 }
3593 fakeif->iff_max_mtu = get_max_mtu(bsd_mode, if_fake_max_mtu);
3594 fakeif->iff_fcs = if_fake_fcs;
3595 fakeif->iff_trailer_length = if_fake_trailer_length;
3596
3597 /* use the interface name as the unique id for ifp recycle */
3598 if ((unsigned int)
3599 snprintf(fakeif->iff_name, sizeof(fakeif->iff_name), "%s%d",
3600 ifc->ifc_name, unit) >= sizeof(fakeif->iff_name)) {
3601 feth_release(fakeif);
3602 return EINVAL;
3603 }
3604 bzero(&feth_init, sizeof(feth_init));
3605 feth_init.ver = IFNET_INIT_CURRENT_VERSION;
3606 feth_init.len = sizeof(feth_init);
3607 if (feth_in_bsd_mode(fakeif)) {
3608 if (if_fake_txstart != 0) {
3609 feth_init.start = feth_start;
3610 } else {
3611 feth_init.flags |= IFNET_INIT_LEGACY;
3612 feth_init.output = feth_output;
3613 }
3614 if (tso_support) {
3615 feth_set_supports_tso(fakeif);
3616 }
3617 }
3618 #if SKYWALK
3619 else {
3620 feth_init.flags |= IFNET_INIT_SKYWALK_NATIVE;
3621 /*
3622 * Currently we support WMM mode only for Skywalk native
3623 * interface.
3624 */
3625 if (if_fake_wmm_mode != 0) {
3626 fakeif->iff_flags |= IFF_FLAGS_WMM_MODE;
3627 }
3628
3629 if (multi_buflet) {
3630 fakeif->iff_flags |= IFF_FLAGS_MULTIBUFLETS;
3631 }
3632
3633 fakeif->iff_pp_mode = pktpool_mode;
3634 if (tso_support) {
3635 feth_set_supports_tso(fakeif);
3636 }
3637
3638 fakeif->iff_tx_headroom = if_fake_tx_headroom;
3639 fakeif->iff_adv_interval = if_fake_if_adv_interval;
3640 if (fakeif->iff_adv_interval > 0) {
3641 feth_init.flags |= IFNET_INIT_IF_ADV;
3642 }
3643 fakeif->iff_tx_drop_rate = if_fake_tx_drops;
3644 fakeif->iff_tx_completion_mode = if_tx_completion_mode;
3645 fakeif->iff_tx_exp_policy = if_fake_tx_exp_policy;
3646
3647 if (rx_flow_steering_support) {
3648 feth_set_supports_rx_flow_steering(fakeif);
3649 }
3650 }
3651 feth_init.tx_headroom = fakeif->iff_tx_headroom;
3652 #endif /* SKYWALK */
3653 if (if_fake_nxattach == 0) {
3654 feth_init.flags |= IFNET_INIT_NX_NOAUTO;
3655 }
3656 feth_init.uniqueid_len = (uint32_t)strbuflen(fakeif->iff_name);
3657 feth_init.uniqueid = fakeif->iff_name;
3658 feth_init.name = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
3659 feth_init.unit = unit;
3660 feth_init.family = IFNET_FAMILY_ETHERNET;
3661 feth_init.type = IFT_ETHER;
3662 feth_init.demux = ether_demux;
3663 feth_init.add_proto = ether_add_proto;
3664 feth_init.del_proto = ether_del_proto;
3665 feth_init.check_multi = ether_check_multi;
3666 feth_init.framer_extended = ether_frameout_extended;
3667 feth_init.softc = fakeif;
3668 feth_init.ioctl = feth_ioctl;
3669 feth_init.set_bpf_tap = NULL;
3670 feth_init.detach = feth_if_free;
3671 feth_init.broadcast_addr = etherbroadcastaddr;
3672 feth_init.broadcast_len = ETHER_ADDR_LEN;
3673 if (feth_in_bsd_mode(fakeif)) {
3674 error = ifnet_allocate_extended(&feth_init, &ifp);
3675 if (error) {
3676 feth_release(fakeif);
3677 return error;
3678 }
3679 feth_ifnet_set_attrs(fakeif, ifp);
3680 if (feth_supports_tso(fakeif)) {
3681 feth_set_tso_mtu(ifp, IP_MAXPACKET, IP_MAXPACKET);
3682 }
3683 }
3684 #if SKYWALK
3685 else {
3686 if (feth_in_wmm_mode(fakeif)) {
3687 feth_init.output_sched_model =
3688 IFNET_SCHED_MODEL_DRIVER_MANAGED;
3689 }
3690 error = feth_attach_netif_nexus(fakeif, &feth_init, &ifp);
3691 if (error != 0) {
3692 feth_release(fakeif);
3693 return error;
3694 }
3695 /* take an additional reference to ensure that it doesn't go away */
3696 feth_retain(fakeif);
3697 fakeif->iff_flags |= IFF_FLAGS_NX_ATTACHED;
3698 fakeif->iff_ifp = ifp;
3699 }
3700 #endif /* SKYWALK */
3701 fakeif->iff_media_count = MIN(default_media_words_count, IF_FAKE_MEDIA_LIST_MAX);
3702 bcopy(default_media_words, fakeif->iff_media_list,
3703 fakeif->iff_media_count * sizeof(fakeif->iff_media_list[0]));
3704 if (feth_in_bsd_mode(fakeif)) {
3705 error = ifnet_attach(ifp, NULL);
3706 if (error) {
3707 ifnet_release(ifp);
3708 feth_release(fakeif);
3709 return error;
3710 }
3711 fakeif->iff_ifp = ifp;
3712 }
3713
3714 ifnet_set_lladdr(ifp, mac_address, sizeof(mac_address));
3715
3716 /* attach as ethernet */
3717 bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
3718 return 0;
3719 }
3720
3721 static int
feth_clone_destroy(ifnet_t ifp)3722 feth_clone_destroy(ifnet_t ifp)
3723 {
3724 if_fake_ref fakeif;
3725 #if SKYWALK
3726 boolean_t nx_attached = FALSE;
3727 #endif /* SKYWALK */
3728
3729 feth_lock();
3730 fakeif = ifnet_get_if_fake(ifp);
3731 if (fakeif == NULL || feth_is_detaching(fakeif)) {
3732 feth_unlock();
3733 return 0;
3734 }
3735 feth_set_detaching(fakeif);
3736 #if SKYWALK
3737 nx_attached = (fakeif->iff_flags & IFF_FLAGS_NX_ATTACHED) != 0;
3738 #endif /* SKYWALK */
3739 feth_unlock();
3740 feth_config(ifp, NULL);
3741 #if SKYWALK
3742 if (nx_attached) {
3743 feth_detach_netif_nexus(fakeif);
3744 feth_release(fakeif);
3745 }
3746 #endif /* SKYWALK */
3747 ifnet_detach(ifp);
3748 return 0;
3749 }
3750
3751 static void
feth_enqueue_input(ifnet_t ifp,struct mbuf * m)3752 feth_enqueue_input(ifnet_t ifp, struct mbuf * m)
3753 {
3754 struct ifnet_stat_increment_param stats = {};
3755
3756 stats.packets_in = 1;
3757 stats.bytes_in = (uint32_t)mbuf_pkthdr_len(m) + ETHER_HDR_LEN;
3758 ifnet_input(ifp, m, &stats);
3759 }
3760
3761
3762 static int
feth_add_mbuf_trailer(struct mbuf * m,void * trailer __sized_by (trailer_len),size_t trailer_len)3763 feth_add_mbuf_trailer(struct mbuf *m, void *trailer __sized_by(trailer_len), size_t trailer_len)
3764 {
3765 int ret;
3766 ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
3767
3768 ret = m_append(m, trailer_len, (caddr_t)trailer);
3769 if (ret == 1) {
3770 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3771 "%zuB trailer added", trailer_len);
3772 return 0;
3773 }
3774 FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT, "m_append failed");
3775 return ENOTSUP;
3776 }
3777
3778 static int
feth_add_mbuf_fcs(struct mbuf * m)3779 feth_add_mbuf_fcs(struct mbuf *m)
3780 {
3781 uint32_t pkt_len, offset = 0;
3782 uint32_t crc = 0;
3783 int err = 0;
3784
3785 ASSERT(sizeof(crc) == ETHER_CRC_LEN);
3786
3787 pkt_len = m->m_pkthdr.len;
3788 struct mbuf *iter = m;
3789 while (iter != NULL && offset < pkt_len) {
3790 uint32_t frag_len = iter->m_len;
3791 ASSERT(frag_len <= (pkt_len - offset));
3792 crc = crc32(crc, mtod(iter, void *), frag_len);
3793 offset += frag_len;
3794 iter = iter->m_next;
3795 }
3796
3797 err = feth_add_mbuf_trailer(m, &crc, ETHER_CRC_LEN);
3798 if (err != 0) {
3799 return err;
3800 }
3801
3802 m->m_flags |= M_HASFCS;
3803
3804 return 0;
3805 }
3806
3807 static void
feth_output_common(ifnet_t ifp,struct mbuf * m,ifnet_t peer,iff_flags_t flags,bool fcs,void * trailer __sized_by (trailer_len),size_t trailer_len)3808 feth_output_common(ifnet_t ifp, struct mbuf * m, ifnet_t peer,
3809 iff_flags_t flags, bool fcs, void *trailer __sized_by(trailer_len), size_t trailer_len)
3810 {
3811 void * frame_header;
3812
3813 if ((flags & IFF_FLAGS_HWCSUM) != 0) {
3814 m->m_pkthdr.csum_data = 0xffff;
3815 m->m_pkthdr.csum_flags =
3816 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3817 CSUM_IP_CHECKED | CSUM_IP_VALID;
3818 }
3819
3820 (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
3821 if (trailer_len != 0 && trailer != NULL) {
3822 feth_add_mbuf_trailer(m, trailer, trailer_len);
3823 }
3824 if (fcs) {
3825 feth_add_mbuf_fcs(m);
3826 }
3827 if ((flags & IFF_FLAGS_SEPARATE_FRAME_HEADER) != 0) {
3828 m = m_copyup(m, ETHER_HDR_LEN, 0);
3829 if (m == NULL) {
3830 FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT, "m_copyup failed");
3831 goto done;
3832 }
3833 frame_header = mtod(m, void *);
3834 mbuf_pkthdr_setheader(m, frame_header);
3835 m_adj(m, ETHER_HDR_LEN);
3836 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3837 "%s: frame 0x%llx data 0x%llx len %ld",
3838 ifp->if_xname,
3839 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
3840 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
3841 mbuf_len(m));
3842 } else {
3843 frame_header = mtod(m, void *);
3844 mbuf_pkthdr_setheader(m, frame_header);
3845 _mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
3846 }
3847
3848 /* tap it out */
3849 if (ifp->if_bpf != NULL) {
3850 fake_bpf_tap_out(ifp, m, frame_header);
3851 }
3852
3853 /* tap it in */
3854 if (peer->if_bpf != NULL) {
3855 fake_bpf_tap_in(peer, m, frame_header);
3856 }
3857
3858 (void)mbuf_pkthdr_setrcvif(m, peer);
3859 feth_enqueue_input(peer, m);
3860 done:
3861 return;
3862 }
3863
3864 static void
feth_start(ifnet_t ifp)3865 feth_start(ifnet_t ifp)
3866 {
3867 if_fake_ref fakeif;
3868 iff_flags_t flags = 0;
3869 bool fcs;
3870 struct mbuf * __single m;
3871 ifnet_t peer = NULL;
3872 size_t trailer_len;
3873
3874 feth_lock();
3875 fakeif = ifnet_get_if_fake(ifp);
3876 if (fakeif == NULL) {
3877 feth_unlock();
3878 return;
3879 }
3880
3881 if (fakeif->iff_start_busy) {
3882 feth_unlock();
3883 return;
3884 }
3885
3886 peer = fakeif->iff_peer;
3887 flags = fakeif->iff_flags;
3888 fcs = fakeif->iff_fcs;
3889 trailer_len = fakeif->iff_trailer_length;
3890
3891 fakeif->iff_start_busy = TRUE;
3892 feth_unlock();
3893 for (;;) {
3894 if (ifnet_dequeue(ifp, &m) != 0) {
3895 break;
3896 }
3897 if (peer == NULL) {
3898 m_freem(m);
3899 continue;
3900 }
3901 if (m != NULL) {
3902 feth_output_common(ifp, m, peer, flags, fcs,
3903 feth_trailer, trailer_len);
3904 }
3905 }
3906 feth_lock();
3907 fakeif = ifnet_get_if_fake(ifp);
3908 if (fakeif != NULL) {
3909 fakeif->iff_start_busy = FALSE;
3910 }
3911 feth_unlock();
3912 }
3913
3914 static int
feth_output(ifnet_t ifp,struct mbuf * m)3915 feth_output(ifnet_t ifp, struct mbuf * m)
3916 {
3917 if_fake_ref fakeif;
3918 iff_flags_t flags;
3919 bool fcs;
3920 size_t trailer_len;
3921 ifnet_t peer = NULL;
3922
3923 if (m == NULL) {
3924 return 0;
3925 }
3926 feth_lock();
3927 fakeif = ifnet_get_if_fake(ifp);
3928 if (fakeif != NULL) {
3929 peer = fakeif->iff_peer;
3930 flags = fakeif->iff_flags;
3931 fcs = fakeif->iff_fcs;
3932 trailer_len = fakeif->iff_trailer_length;
3933 }
3934 feth_unlock();
3935 if (peer == NULL) {
3936 m_freem(m);
3937 ifnet_stat_increment_out(ifp, 0, 0, 1);
3938 return 0;
3939 }
3940 feth_output_common(ifp, m, peer, flags, fcs, feth_trailer, trailer_len);
3941 return 0;
3942 }
3943
3944 static int
feth_config(ifnet_t ifp,ifnet_t peer)3945 feth_config(ifnet_t ifp, ifnet_t peer)
3946 {
3947 int connected = FALSE;
3948 int disconnected = FALSE;
3949 int error = 0;
3950 if_fake_ref fakeif = NULL;
3951
3952 feth_lock();
3953 fakeif = ifnet_get_if_fake(ifp);
3954 if (fakeif == NULL) {
3955 error = EINVAL;
3956 goto done;
3957 }
3958 if (peer != NULL) {
3959 /* connect to peer */
3960 if_fake_ref peer_fakeif;
3961
3962 peer_fakeif = ifnet_get_if_fake(peer);
3963 if (peer_fakeif == NULL) {
3964 error = EINVAL;
3965 goto done;
3966 }
3967 if (feth_is_detaching(fakeif) ||
3968 feth_is_detaching(peer_fakeif) ||
3969 peer_fakeif->iff_peer != NULL ||
3970 fakeif->iff_peer != NULL) {
3971 error = EBUSY;
3972 goto done;
3973 }
3974 #if SKYWALK
3975 if (fakeif->iff_pp_mode !=
3976 peer_fakeif->iff_pp_mode) {
3977 error = EINVAL;
3978 goto done;
3979 }
3980 #endif /* SKYWALK */
3981 fakeif->iff_peer = peer;
3982 peer_fakeif->iff_peer = ifp;
3983 connected = TRUE;
3984 } else if (fakeif->iff_peer != NULL) {
3985 /* disconnect from peer */
3986 if_fake_ref peer_fakeif;
3987
3988 peer = fakeif->iff_peer;
3989 peer_fakeif = ifnet_get_if_fake(peer);
3990 if (peer_fakeif == NULL) {
3991 /* should not happen */
3992 error = EINVAL;
3993 goto done;
3994 }
3995 fakeif->iff_peer = NULL;
3996 peer_fakeif->iff_peer = NULL;
3997 disconnected = TRUE;
3998 }
3999
4000 done:
4001 feth_unlock();
4002
4003 /* generate link status event if we connect or disconnect */
4004 if (connected) {
4005 interface_link_event(ifp, KEV_DL_LINK_ON);
4006 interface_link_event(peer, KEV_DL_LINK_ON);
4007 } else if (disconnected) {
4008 interface_link_event(ifp, KEV_DL_LINK_OFF);
4009 interface_link_event(peer, KEV_DL_LINK_OFF);
4010 }
4011 return error;
4012 }
4013
4014 static int
feth_set_media(ifnet_t ifp,struct if_fake_request * iffr)4015 feth_set_media(ifnet_t ifp, struct if_fake_request * iffr)
4016 {
4017 if_fake_ref fakeif;
4018 int error;
4019
4020 if (iffr->iffr_media.iffm_count > IF_FAKE_MEDIA_LIST_MAX) {
4021 /* list is too long */
4022 return EINVAL;
4023 }
4024 feth_lock();
4025 fakeif = ifnet_get_if_fake(ifp);
4026 if (fakeif == NULL) {
4027 error = EINVAL;
4028 goto done;
4029 }
4030 fakeif->iff_media_count = iffr->iffr_media.iffm_count;
4031 bcopy(iffr->iffr_media.iffm_list, fakeif->iff_media_list,
4032 iffr->iffr_media.iffm_count * sizeof(fakeif->iff_media_list[0]));
4033 #if 0
4034 /* XXX: "auto-negotiate" active with peer? */
4035 /* generate link status event? */
4036 fakeif->iff_media_current = iffr->iffr_media.iffm_current;
4037 #endif
4038 error = 0;
4039 done:
4040 feth_unlock();
4041 return error;
4042 }
4043
4044 static int
if_fake_request_copyin(user_addr_t user_addr,struct if_fake_request * iffr,u_int32_t len)4045 if_fake_request_copyin(user_addr_t user_addr,
4046 struct if_fake_request *iffr, u_int32_t len)
4047 {
4048 int error;
4049
4050 if (user_addr == USER_ADDR_NULL || len < sizeof(*iffr)) {
4051 error = EINVAL;
4052 goto done;
4053 }
4054 error = copyin(user_addr, iffr, sizeof(*iffr));
4055 if (error != 0) {
4056 goto done;
4057 }
4058 if (iffr->iffr_reserved[0] != 0 || iffr->iffr_reserved[1] != 0 ||
4059 iffr->iffr_reserved[2] != 0 || iffr->iffr_reserved[3] != 0) {
4060 error = EINVAL;
4061 goto done;
4062 }
4063 done:
4064 return error;
4065 }
4066
4067 static int
feth_set_drvspec(ifnet_t ifp,uint32_t cmd,u_int32_t len,user_addr_t user_addr)4068 feth_set_drvspec(ifnet_t ifp, uint32_t cmd, u_int32_t len,
4069 user_addr_t user_addr)
4070 {
4071 int error;
4072 struct if_fake_request iffr;
4073 ifnet_t peer;
4074
4075 switch (cmd) {
4076 case IF_FAKE_S_CMD_SET_PEER:
4077 error = if_fake_request_copyin(user_addr, &iffr, len);
4078 if (error != 0) {
4079 break;
4080 }
4081 if (iffr.iffr_peer_name[0] == '\0') {
4082 error = feth_config(ifp, NULL);
4083 break;
4084 }
4085
4086 /* ensure nul termination */
4087 iffr.iffr_peer_name[IFNAMSIZ - 1] = '\0';
4088 peer = ifunit(__unsafe_null_terminated_from_indexable(iffr.iffr_peer_name));
4089 if (peer == NULL) {
4090 error = ENXIO;
4091 break;
4092 }
4093 if (ifnet_type(peer) != IFT_ETHER) {
4094 error = EINVAL;
4095 break;
4096 }
4097 if (strcmp(ifnet_name(peer), FAKE_ETHER_NAME) != 0) {
4098 error = EINVAL;
4099 break;
4100 }
4101 error = feth_config(ifp, peer);
4102 break;
4103 case IF_FAKE_S_CMD_SET_MEDIA:
4104 error = if_fake_request_copyin(user_addr, &iffr, len);
4105 if (error != 0) {
4106 break;
4107 }
4108 error = feth_set_media(ifp, &iffr);
4109 break;
4110 case IF_FAKE_S_CMD_SET_DEQUEUE_STALL:
4111 error = if_fake_request_copyin(user_addr, &iffr, len);
4112 if (error != 0) {
4113 break;
4114 }
4115 error = feth_enable_dequeue_stall(ifp,
4116 iffr.iffr_dequeue_stall);
4117 break;
4118 default:
4119 error = EOPNOTSUPP;
4120 break;
4121 }
4122 return error;
4123 }
4124
4125 static int
feth_get_drvspec(ifnet_t ifp,u_int32_t cmd,u_int32_t len,user_addr_t user_addr)4126 feth_get_drvspec(ifnet_t ifp, u_int32_t cmd, u_int32_t len,
4127 user_addr_t user_addr)
4128 {
4129 int error = EOPNOTSUPP;
4130 if_fake_ref fakeif;
4131 struct if_fake_request iffr;
4132 ifnet_t peer;
4133
4134 switch (cmd) {
4135 case IF_FAKE_G_CMD_GET_PEER:
4136 if (len < sizeof(iffr)) {
4137 error = EINVAL;
4138 break;
4139 }
4140 feth_lock();
4141 fakeif = ifnet_get_if_fake(ifp);
4142 if (fakeif == NULL) {
4143 feth_unlock();
4144 error = EOPNOTSUPP;
4145 break;
4146 }
4147 peer = fakeif->iff_peer;
4148 feth_unlock();
4149 bzero(&iffr, sizeof(iffr));
4150 if (peer != NULL) {
4151 strlcpy(iffr.iffr_peer_name,
4152 if_name(peer),
4153 sizeof(iffr.iffr_peer_name));
4154 }
4155 error = copyout(&iffr, user_addr, sizeof(iffr));
4156 break;
4157 default:
4158 break;
4159 }
4160 return error;
4161 }
4162
4163 union ifdrvu {
4164 struct ifdrv32 *ifdrvu_32;
4165 struct ifdrv64 *ifdrvu_64;
4166 void *ifdrvu_p;
4167 };
4168
4169 static int
feth_ioctl(ifnet_t ifp,u_long cmd,void * data)4170 feth_ioctl(ifnet_t ifp, u_long cmd, void * data)
4171 {
4172 unsigned int count;
4173 struct ifdevmtu * devmtu_p;
4174 union ifdrvu drv;
4175 uint32_t drv_cmd;
4176 uint32_t drv_len;
4177 boolean_t drv_set_command = FALSE;
4178 int error = 0;
4179 struct ifmediareq32 * ifmr;
4180 struct ifreq * ifr;
4181 if_fake_ref fakeif;
4182 int status;
4183 user_addr_t user_addr;
4184
4185 ifr = (struct ifreq *)data;
4186 switch (cmd) {
4187 case SIOCSIFADDR:
4188 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
4189 break;
4190
4191 case SIOCGIFMEDIA32:
4192 case SIOCGIFMEDIA64:
4193 feth_lock();
4194 fakeif = ifnet_get_if_fake(ifp);
4195 if (fakeif == NULL) {
4196 feth_unlock();
4197 return EOPNOTSUPP;
4198 }
4199 status = (fakeif->iff_peer != NULL)
4200 ? (IFM_AVALID | IFM_ACTIVE) : IFM_AVALID;
4201 ifmr = (struct ifmediareq32 *)data;
4202 user_addr = (cmd == SIOCGIFMEDIA64) ?
4203 ((struct ifmediareq64 *)data)->ifmu_ulist :
4204 CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
4205 count = ifmr->ifm_count;
4206 ifmr->ifm_active = (fakeif->iff_peer != NULL)
4207 ? FAKE_DEFAULT_MEDIA : IFM_ETHER;
4208 ifmr->ifm_current = IFM_ETHER;
4209 ifmr->ifm_mask = 0;
4210 ifmr->ifm_status = status;
4211 if (user_addr == USER_ADDR_NULL) {
4212 ifmr->ifm_count = fakeif->iff_media_count;
4213 } else if (count > 0) {
4214 if (count > fakeif->iff_media_count) {
4215 count = fakeif->iff_media_count;
4216 }
4217 ifmr->ifm_count = count;
4218 error = copyout(&fakeif->iff_media_list, user_addr,
4219 count * sizeof(int));
4220 }
4221 feth_unlock();
4222 break;
4223
4224 case SIOCGIFDEVMTU:
4225 devmtu_p = &ifr->ifr_devmtu;
4226 devmtu_p->ifdm_current = ifnet_mtu(ifp);
4227 devmtu_p->ifdm_max = feth_max_mtu(ifp);
4228 devmtu_p->ifdm_min = IF_MINMTU;
4229 break;
4230
4231 case SIOCSIFMTU:
4232 if ((unsigned int)ifr->ifr_mtu > feth_max_mtu(ifp) ||
4233 ifr->ifr_mtu < IF_MINMTU) {
4234 error = EINVAL;
4235 } else {
4236 error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
4237 }
4238 break;
4239
4240 case SIOCSDRVSPEC32:
4241 case SIOCSDRVSPEC64:
4242 error = proc_suser(current_proc());
4243 if (error != 0) {
4244 break;
4245 }
4246 drv_set_command = TRUE;
4247 OS_FALLTHROUGH;
4248 case SIOCGDRVSPEC32:
4249 case SIOCGDRVSPEC64:
4250 drv.ifdrvu_p = data;
4251 if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
4252 drv_cmd = drv.ifdrvu_32->ifd_cmd;
4253 drv_len = drv.ifdrvu_32->ifd_len;
4254 user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
4255 } else {
4256 drv_cmd = drv.ifdrvu_64->ifd_cmd;
4257 drv_len = drv.ifdrvu_64->ifd_len;
4258 user_addr = drv.ifdrvu_64->ifd_data;
4259 }
4260 if (drv_set_command) {
4261 error = feth_set_drvspec(ifp, drv_cmd, drv_len,
4262 user_addr);
4263 } else {
4264 error = feth_get_drvspec(ifp, drv_cmd, drv_len,
4265 user_addr);
4266 }
4267 break;
4268
4269 case SIOCSIFLLADDR:
4270 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
4271 ifr->ifr_addr.sa_len);
4272 break;
4273
4274 case SIOCSIFFLAGS:
4275 if ((ifp->if_flags & IFF_UP) != 0) {
4276 /* marked up, set running if not already set */
4277 if ((ifp->if_flags & IFF_RUNNING) == 0) {
4278 /* set running */
4279 error = ifnet_set_flags(ifp, IFF_RUNNING,
4280 IFF_RUNNING);
4281 }
4282 } else if ((ifp->if_flags & IFF_RUNNING) != 0) {
4283 /* marked down, clear running */
4284 error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
4285 }
4286 break;
4287
4288 case SIOCDIFADDR:
4289 if (if_fake_fail_ioctl != 0) {
4290 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
4291 "%s: failing SIOCDIFADDR with EPWROFF",
4292 ifp->if_xname);
4293 error = EPWROFF;
4294 }
4295 break;
4296
4297 case SIOCADDMULTI:
4298 case SIOCDELMULTI:
4299 error = 0;
4300 break;
4301 case SIOCSIFCAP: {
4302 uint32_t cap;
4303
4304 feth_lock();
4305 fakeif = ifnet_get_if_fake(ifp);
4306 if (fakeif == NULL ||
4307 (fakeif->iff_flags & IFF_FLAGS_LRO) == 0) {
4308 feth_unlock();
4309 return EOPNOTSUPP;
4310 }
4311 feth_unlock();
4312 cap = (ifr->ifr_reqcap & IFCAP_LRO) != 0 ? IFCAP_LRO : 0;
4313 error = ifnet_set_capabilities_enabled(ifp, cap, IFCAP_LRO);
4314 break;
4315 }
4316 default:
4317 error = EOPNOTSUPP;
4318 break;
4319 }
4320 return error;
4321 }
4322
4323 static void
feth_if_free(ifnet_t ifp)4324 feth_if_free(ifnet_t ifp)
4325 {
4326 if_fake_ref fakeif;
4327
4328 if (ifp == NULL) {
4329 return;
4330 }
4331 feth_lock();
4332 fakeif = ifnet_get_if_fake(ifp);
4333 if (fakeif == NULL) {
4334 feth_unlock();
4335 return;
4336 }
4337 ifp->if_softc = NULL;
4338 #if SKYWALK
4339 VERIFY(fakeif->iff_doorbell_tcall == NULL);
4340 #endif /* SKYWALK */
4341 feth_unlock();
4342 feth_release(fakeif);
4343 ifnet_release(ifp);
4344 return;
4345 }
4346
4347 __private_extern__ void
if_fake_init(void)4348 if_fake_init(void)
4349 {
4350 int error;
4351
4352 #if SKYWALK
4353 (void)feth_register_nexus_domain_provider();
4354 #endif /* SKYWALK */
4355 error = if_clone_attach(&feth_cloner);
4356 if (error != 0) {
4357 return;
4358 }
4359 return;
4360 }
4361