1 /*
2 * Copyright (c) 2015-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * if_fake.c
31 * - fake network interface used for testing
32 * - "feth" (e.g. "feth0", "feth1") is a virtual ethernet interface that allows
33 * two instances to have their output/input paths "crossed-over" so that
34 * output on one is input on the other
35 */
36
37 /*
38 * Modification History:
39 *
40 * September 9, 2015 Dieter Siegmund ([email protected])
41 * - created
42 */
43
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/queue.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/kern_event.h>
54 #include <sys/mcache.h>
55 #include <sys/syslog.h>
56
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_vlan_var.h>
61 #include <net/if_fake_var.h>
62 #include <net/if_arp.h>
63 #include <net/if_dl.h>
64 #include <net/if_ether.h>
65 #include <net/if_types.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <net/dlil.h>
69
70 #include <net/kpi_interface.h>
71 #include <net/kpi_protocol.h>
72
73 #include <kern/locks.h>
74 #include <kern/zalloc.h>
75
76 #include <mach/mach_time.h>
77
78 #include <os/log.h>
79
80 #ifdef INET
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #endif
84
85 #include <net/if_media.h>
86 #include <net/ether_if_module.h>
87 #if SKYWALK
88 #include <skywalk/os_skywalk_private.h>
89 #include <skywalk/nexus/netif/nx_netif.h>
90 #include <skywalk/channel/channel_var.h>
91 #endif /* SKYWALK */
92
93 /*
94 * if_fake_debug, FE_DBGF_*
95 * - 'if_fake_debug' is a bitmask of FE_DBGF_* flags that can be set
96 * to enable additional logs for the corresponding fake function
97 * - "sysctl net.link.fake.debug" controls the value of
98 * 'if_fake_debug'
99 */
100 static uint32_t if_fake_debug = 0;
101
102 #define FE_DBGF_LIFECYCLE 0x0001
103 #define FE_DBGF_INPUT 0x0002
104 #define FE_DBGF_OUTPUT 0x0004
105 #define FE_DBGF_CONTROL 0x0008
106 #define FE_DBGF_MISC 0x0010
107
108 /*
109 * if_fake_log_level
110 * - 'if_fake_log_level' ensures that by default important logs are
111 * logged regardless of if_fake_debug by comparing the log level
112 * in FAKE_LOG to if_fake_log_level
113 * - use "sysctl net.link.fake.log_level" controls the value of
114 * 'if_fake_log_level'
115 * - the default value of 'if_fake_log_level' is LOG_NOTICE; important
116 * logs must use LOG_NOTICE to ensure they appear by default
117 */
118 #define FAKE_DBGF_ENABLED(__flag) ((if_fake_debug & __flag) != 0)
119
120 /*
121 * FAKE_LOG
122 * - macro to generate the specified log conditionally based on
123 * the specified log level and debug flags
124 */
125 #define FAKE_LOG(__level, __dbgf, __string, ...) \
126 do { \
127 if (__level <= if_fake_log_level || \
128 FAKE_DBGF_ENABLED(__dbgf)) { \
129 os_log(OS_LOG_DEFAULT, "%s: " __string, \
130 __func__, ## __VA_ARGS__); \
131 } \
132 } while (0)
133
134 static boolean_t
is_power_of_two(unsigned int val)135 is_power_of_two(unsigned int val)
136 {
137 return (val & (val - 1)) == 0;
138 }
139
140 #define FAKE_ETHER_NAME "feth"
141
142 SYSCTL_DECL(_net_link);
143 SYSCTL_NODE(_net_link, OID_AUTO, fake, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
144 "Fake interface");
145
146 static int if_fake_txstart = 1;
147 SYSCTL_INT(_net_link_fake, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
148 &if_fake_txstart, 0, "Fake interface TXSTART mode");
149
150 static int if_fake_hwcsum = 0;
151 SYSCTL_INT(_net_link_fake, OID_AUTO, hwcsum, CTLFLAG_RW | CTLFLAG_LOCKED,
152 &if_fake_hwcsum, 0, "Fake interface simulate hardware checksum");
153
154 static int if_fake_vlan_tagging = 1;
155 SYSCTL_INT(_net_link_fake, OID_AUTO, vlan_tagging, CTLFLAG_RW | CTLFLAG_LOCKED,
156 &if_fake_vlan_tagging, 0, "Fake interface VLAN tagging");
157
158 static int if_fake_nxattach = 0;
159 SYSCTL_INT(_net_link_fake, OID_AUTO, nxattach, CTLFLAG_RW | CTLFLAG_LOCKED,
160 &if_fake_nxattach, 0, "Fake interface auto-attach nexus");
161
162 static int if_fake_bsd_mode = 1;
163 SYSCTL_INT(_net_link_fake, OID_AUTO, bsd_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
164 &if_fake_bsd_mode, 0, "Fake interface attach as BSD interface");
165
166 static int if_fake_log_level = LOG_NOTICE;
167 SYSCTL_INT(_net_link_fake, OID_AUTO, log_level, CTLFLAG_RW | CTLFLAG_LOCKED,
168 &if_fake_log_level, 0, "Fake interface log level");
169
170 SYSCTL_INT(_net_link_fake, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
171 &if_fake_debug, 0, "Fake interface debug flags");
172
173 static int if_fake_wmm_mode = 0;
174 SYSCTL_INT(_net_link_fake, OID_AUTO, wmm_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
175 &if_fake_wmm_mode, 0, "Fake interface in 802.11 WMM mode");
176
177 static int if_fake_multibuflet = 0;
178 SYSCTL_INT(_net_link_fake, OID_AUTO, multibuflet, CTLFLAG_RW | CTLFLAG_LOCKED,
179 &if_fake_multibuflet, 0, "Fake interface using multi-buflet packets");
180
181 static int if_fake_low_latency = 0;
182 SYSCTL_INT(_net_link_fake, OID_AUTO, low_latency, CTLFLAG_RW | CTLFLAG_LOCKED,
183 &if_fake_low_latency, 0, "Fake interface with a low latency qset");
184
185 static int if_fake_switch_combined_mode = 0;
186 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_combined_mode,
187 CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_combined_mode, 0,
188 "Switch a qset between combined and separate mode during dequeues");
189
190 static int if_fake_switch_mode_frequency = 10;
191 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_mode_frequency,
192 CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_mode_frequency, 0,
193 "The number of dequeues before we switch between the combined and separated mode");
194
195 static int if_fake_tso_support = 0;
196 SYSCTL_INT(_net_link_fake, OID_AUTO, tso_support, CTLFLAG_RW | CTLFLAG_LOCKED,
197 &if_fake_tso_support, 0, "Fake interface with support for TSO offload");
198
199 #define DEFAULT_EXPIRATION_THRESHOLD 500 /* usec */
200 static int if_fake_expiration_threshold_us = DEFAULT_EXPIRATION_THRESHOLD;
201 SYSCTL_INT(_net_link_fake, OID_AUTO, expiration_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
202 &if_fake_expiration_threshold_us, DEFAULT_EXPIRATION_THRESHOLD,
203 "Expiration threshold (usec) for expiration testing");
204
205 static int if_fake_lro = 0;
206 SYSCTL_INT(_net_link_fake, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_LOCKED,
207 &if_fake_lro, 0, "Fake interface report LRO capability");
208
209 static int if_fake_separate_frame_header = 0;
210 SYSCTL_INT(_net_link_fake, OID_AUTO, separate_frame_header,
211 CTLFLAG_RW | CTLFLAG_LOCKED,
212 &if_fake_separate_frame_header, 0, "Put frame header in separate mbuf");
213
214 typedef enum {
215 IFF_PP_MODE_GLOBAL = 0, /* share a global pool */
216 IFF_PP_MODE_PRIVATE = 1, /* creates its own rx/tx pool */
217 IFF_PP_MODE_PRIVATE_SPLIT = 2, /* creates its own split rx & tx pool */
218 } iff_pktpool_mode_t;
219 static iff_pktpool_mode_t if_fake_pktpool_mode = IFF_PP_MODE_GLOBAL;
220 SYSCTL_INT(_net_link_fake, OID_AUTO, pktpool_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
221 &if_fake_pktpool_mode, IFF_PP_MODE_GLOBAL,
222 "Fake interface packet pool mode (0 global, 1 private, 2 private split");
223
224 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX 512
225 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF 96
226 static int if_fake_link_layer_aggregation_factor =
227 FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF;
228 static int
229 feth_link_layer_aggregation_factor_sysctl SYSCTL_HANDLER_ARGS
230 {
231 #pragma unused(oidp, arg1, arg2)
232 unsigned int new_value;
233 int changed;
234 int error;
235
236 error = sysctl_io_number(req, if_fake_link_layer_aggregation_factor,
237 sizeof(if_fake_link_layer_aggregation_factor), &new_value,
238 &changed);
239 if (error == 0 && changed != 0) {
240 if (new_value <= 0 ||
241 new_value > FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX) {
242 return EINVAL;
243 }
244 if_fake_link_layer_aggregation_factor = new_value;
245 }
246 return error;
247 }
248
249 SYSCTL_PROC(_net_link_fake, OID_AUTO, link_layer_aggregation_factor,
250 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
251 0, 0, feth_link_layer_aggregation_factor_sysctl, "IU",
252 "Fake interface link layer aggregation factor");
253
254 #define FETH_TX_HEADROOM_MAX 32
255 static unsigned int if_fake_tx_headroom = FETH_TX_HEADROOM_MAX;
256 static int
257 feth_tx_headroom_sysctl SYSCTL_HANDLER_ARGS
258 {
259 #pragma unused(oidp, arg1, arg2)
260 unsigned int new_value;
261 int changed;
262 int error;
263
264 error = sysctl_io_number(req, if_fake_tx_headroom,
265 sizeof(if_fake_tx_headroom), &new_value, &changed);
266 if (error == 0 && changed != 0) {
267 if (new_value > FETH_TX_HEADROOM_MAX ||
268 (new_value % 8) != 0) {
269 return EINVAL;
270 }
271 if_fake_tx_headroom = new_value;
272 }
273 return 0;
274 }
275
276 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_headroom,
277 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
278 0, 0, feth_tx_headroom_sysctl, "IU", "Fake ethernet Tx headroom");
279
280 static int if_fake_fcs = 0;
281 SYSCTL_INT(_net_link_fake, OID_AUTO, fcs, CTLFLAG_RW | CTLFLAG_LOCKED,
282 &if_fake_fcs, 0, "Fake interface using frame check sequence");
283
284 #define FETH_TRAILER_LENGTH_MAX 28
285 char feth_trailer[FETH_TRAILER_LENGTH_MAX + 1] = "trailertrailertrailertrailer";
286 static unsigned int if_fake_trailer_length = 0;
287 static int
288 feth_trailer_length_sysctl SYSCTL_HANDLER_ARGS
289 {
290 #pragma unused(oidp, arg1, arg2)
291 unsigned int new_value;
292 int changed;
293 int error;
294
295 error = sysctl_io_number(req, if_fake_trailer_length,
296 sizeof(if_fake_trailer_length), &new_value, &changed);
297 if (error == 0 && changed != 0) {
298 if (new_value > FETH_TRAILER_LENGTH_MAX) {
299 return EINVAL;
300 }
301 if_fake_trailer_length = new_value;
302 }
303 return 0;
304 }
305
306 SYSCTL_PROC(_net_link_fake, OID_AUTO, trailer_length,
307 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
308 feth_trailer_length_sysctl, "IU", "Fake interface frame trailer length");
309
310 /* sysctl net.link.fake.max_mtu */
311 #define FETH_MAX_MTU_DEFAULT 2048
312 #define FETH_MAX_MTU_MAX ((16 * 1024) - ETHER_HDR_LEN)
313
314 static unsigned int if_fake_max_mtu = FETH_MAX_MTU_DEFAULT;
315
316 /* sysctl net.link.fake.buflet_size */
317 #define FETH_BUFLET_SIZE_MIN 512
318 #define FETH_BUFLET_SIZE_MAX (32 * 1024)
319 #define FETH_TSO_BUFLET_SIZE (16 * 1024)
320
321 static unsigned int if_fake_buflet_size = FETH_BUFLET_SIZE_MIN;
322 static unsigned int if_fake_tso_buffer_size = FETH_TSO_BUFLET_SIZE;
323
324 static int
325 feth_tso_buffer_size_sysctl SYSCTL_HANDLER_ARGS
326 {
327 #pragma unused(oidp, arg1, arg2)
328 unsigned int new_value;
329 int changed;
330 int error;
331
332 error = sysctl_io_number(req, if_fake_tso_buffer_size,
333 sizeof(if_fake_tso_buffer_size), &new_value, &changed);
334 if (error == 0 && changed != 0) {
335 /* must be a power of 2 between min and max */
336 if (new_value > FETH_BUFLET_SIZE_MAX ||
337 new_value < FETH_BUFLET_SIZE_MIN ||
338 !is_power_of_two(new_value)) {
339 return EINVAL;
340 }
341 if_fake_tso_buffer_size = new_value;
342 }
343 return 0;
344 }
345
346 SYSCTL_PROC(_net_link_fake, OID_AUTO, tso_buf_size,
347 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
348 0, 0, feth_tso_buffer_size_sysctl, "IU", "Fake interface TSO buffer size");
349
350 static int
351 feth_max_mtu_sysctl SYSCTL_HANDLER_ARGS
352 {
353 #pragma unused(oidp, arg1, arg2)
354 unsigned int new_value;
355 int changed;
356 int error;
357
358 error = sysctl_io_number(req, if_fake_max_mtu,
359 sizeof(if_fake_max_mtu), &new_value, &changed);
360 if (error == 0 && changed != 0) {
361 if (new_value > FETH_MAX_MTU_MAX ||
362 new_value < ETHERMTU ||
363 new_value <= if_fake_buflet_size) {
364 return EINVAL;
365 }
366 if_fake_max_mtu = new_value;
367 }
368 return 0;
369 }
370
371 SYSCTL_PROC(_net_link_fake, OID_AUTO, max_mtu,
372 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
373 0, 0, feth_max_mtu_sysctl, "IU", "Fake interface maximum MTU");
374
375 static int
376 feth_buflet_size_sysctl SYSCTL_HANDLER_ARGS
377 {
378 #pragma unused(oidp, arg1, arg2)
379 unsigned int new_value;
380 int changed;
381 int error;
382
383 error = sysctl_io_number(req, if_fake_buflet_size,
384 sizeof(if_fake_buflet_size), &new_value, &changed);
385 if (error == 0 && changed != 0) {
386 /* must be a power of 2 between min and max */
387 if (new_value > FETH_BUFLET_SIZE_MAX ||
388 new_value < FETH_BUFLET_SIZE_MIN ||
389 !is_power_of_two(new_value) ||
390 new_value >= if_fake_max_mtu) {
391 return EINVAL;
392 }
393 if_fake_buflet_size = new_value;
394 }
395 return 0;
396 }
397
398 SYSCTL_PROC(_net_link_fake, OID_AUTO, buflet_size,
399 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
400 0, 0, feth_buflet_size_sysctl, "IU", "Fake interface buflet size");
401
402 static unsigned int if_fake_user_access = 0;
403
404 static int
405 feth_user_access_sysctl SYSCTL_HANDLER_ARGS
406 {
407 #pragma unused(oidp, arg1, arg2)
408 unsigned int new_value;
409 int changed;
410 int error;
411
412 error = sysctl_io_number(req, if_fake_user_access,
413 sizeof(if_fake_user_access), &new_value, &changed);
414 if (error == 0 && changed != 0) {
415 if (new_value != 0) {
416 if (new_value != 1) {
417 return EINVAL;
418 }
419 }
420 if_fake_user_access = new_value;
421 }
422 return 0;
423 }
424
425 SYSCTL_PROC(_net_link_fake, OID_AUTO, user_access,
426 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
427 0, 0, feth_user_access_sysctl, "IU", "Fake interface user access");
428
429 /* sysctl net.link.fake.if_adv_intvl (unit: millisecond) */
430 #define FETH_IF_ADV_INTVL_MIN 10
431 #define FETH_IF_ADV_INTVL_MAX INT_MAX
432
433 static int if_fake_if_adv_interval = 0; /* no interface advisory */
434 static int
435 feth_if_adv_interval_sysctl SYSCTL_HANDLER_ARGS
436 {
437 #pragma unused(oidp, arg1, arg2)
438 unsigned int new_value;
439 int changed;
440 int error;
441
442 error = sysctl_io_number(req, if_fake_if_adv_interval,
443 sizeof(if_fake_if_adv_interval), &new_value, &changed);
444 if (error == 0 && changed != 0) {
445 if ((new_value != 0) && (new_value > FETH_IF_ADV_INTVL_MAX ||
446 new_value < FETH_IF_ADV_INTVL_MIN)) {
447 return EINVAL;
448 }
449 if_fake_if_adv_interval = new_value;
450 }
451 return 0;
452 }
453
454 SYSCTL_PROC(_net_link_fake, OID_AUTO, if_adv_intvl,
455 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
456 feth_if_adv_interval_sysctl, "IU",
457 "Fake interface will generate interface advisories reports at the specified interval in ms");
458
459 /* sysctl net.link.fake.tx_drops */
460 /*
461 * Fake ethernet will drop packet on the transmit path at the specified
462 * rate, i.e drop one in every if_fake_tx_drops number of packets.
463 */
464 #define FETH_TX_DROPS_MIN 0
465 #define FETH_TX_DROPS_MAX INT_MAX
466 static int if_fake_tx_drops = 0; /* no packets are dropped */
467 static int
468 feth_fake_tx_drops_sysctl SYSCTL_HANDLER_ARGS
469 {
470 #pragma unused(oidp, arg1, arg2)
471 unsigned int new_value;
472 int changed;
473 int error;
474
475 error = sysctl_io_number(req, if_fake_tx_drops,
476 sizeof(if_fake_tx_drops), &new_value, &changed);
477 if (error == 0 && changed != 0) {
478 if (new_value > FETH_TX_DROPS_MAX ||
479 new_value < FETH_TX_DROPS_MIN) {
480 return EINVAL;
481 }
482 if_fake_tx_drops = new_value;
483 }
484 return 0;
485 }
486
487 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_drops,
488 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
489 feth_fake_tx_drops_sysctl, "IU",
490 "Fake interface will intermittently drop packets on Tx path");
491
492 /* sysctl.net.link.fake.tx_exp_policy */
493
494 typedef enum {
495 IFF_TX_EXP_POLICY_DISABLED = 0, /* Expiry notification disabled */
496 IFF_TX_EXP_POLICY_DROP_AND_NOTIFY = 1, /* Expiry notification enabled; drop + notify mode */
497 IFF_TX_EXP_POLICY_NOTIFY_ONLY = 2, /* Expiry notification enabled; notify only mode */
498 IFF_TX_EXP_POLICY_METADATA = 3, /* Expiry notification enabled; use packet metadata */
499 } iff_tx_exp_policy_t;
500 static iff_tx_exp_policy_t if_fake_tx_exp_policy = IFF_TX_EXP_POLICY_DISABLED;
501
502 static int
503 feth_fake_tx_exp_policy_sysctl SYSCTL_HANDLER_ARGS
504 {
505 #pragma unused(oidp, arg1, arg2)
506 unsigned int new_value;
507 int changed;
508 int error;
509
510 error = sysctl_io_number(req, if_fake_tx_exp_policy,
511 sizeof(if_fake_tx_exp_policy), &new_value, &changed);
512 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
513 "if_fake_tx_exp_policy: %u -> %u (%d)",
514 if_fake_tx_exp_policy, new_value, changed);
515 if (error == 0 && changed != 0) {
516 if (new_value > IFF_TX_EXP_POLICY_METADATA ||
517 new_value < IFF_TX_EXP_POLICY_DISABLED) {
518 return EINVAL;
519 }
520 if_fake_tx_exp_policy = new_value;
521 }
522 return 0;
523 }
524 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_exp_policy,
525 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
526 feth_fake_tx_exp_policy_sysctl, "IU",
527 "Fake interface handling policy for expired TX attempts "
528 "(0 disabled, 1 drop and notify, 2 notify only, 3 packet metadata)");
529
530 /* sysctl net.link.fake.tx_completion_mode */
531 typedef enum {
532 IFF_TX_COMPL_MODE_SYNC = 0,
533 IFF_TX_COMPL_MODE_ASYNC = 1,
534 } iff_tx_completion_mode_t;
535 static iff_tx_completion_mode_t if_tx_completion_mode = IFF_TX_COMPL_MODE_SYNC;
536 static int
537 feth_fake_tx_completion_mode_sysctl SYSCTL_HANDLER_ARGS
538 {
539 #pragma unused(oidp, arg1, arg2)
540 unsigned int new_value;
541 int changed;
542 int error;
543
544 error = sysctl_io_number(req, if_tx_completion_mode,
545 sizeof(if_tx_completion_mode), &new_value, &changed);
546 if (error == 0 && changed != 0) {
547 if (new_value > IFF_TX_COMPL_MODE_ASYNC ||
548 new_value < IFF_TX_COMPL_MODE_SYNC) {
549 return EINVAL;
550 }
551 if_tx_completion_mode = new_value;
552 }
553 return 0;
554 }
555 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_completion_mode,
556 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
557 feth_fake_tx_completion_mode_sysctl, "IU",
558 "Fake interface tx completion mode (0 synchronous, 1 asynchronous)");
559
560 /* sysctl net.link.fake.llink_cnt */
561
562 /* The maximum number of logical links (including default link) */
563 #define FETH_MAX_LLINKS 16
564 /*
565 * The default number of logical links (including default link).
566 * Zero means logical link mode is disabled.
567 */
568 #define FETH_DEF_LLINKS 0
569
570 static uint32_t if_fake_llink_cnt = FETH_DEF_LLINKS;
571 static int
572 feth_fake_llink_cnt_sysctl SYSCTL_HANDLER_ARGS
573 {
574 #pragma unused(oidp, arg1, arg2)
575 unsigned int new_value;
576 int changed;
577 int error;
578
579 error = sysctl_io_number(req, if_fake_llink_cnt,
580 sizeof(if_fake_llink_cnt), &new_value, &changed);
581 if (error == 0 && changed != 0) {
582 if (new_value > FETH_MAX_LLINKS) {
583 return EINVAL;
584 }
585 if_fake_llink_cnt = new_value;
586 }
587 return 0;
588 }
589
590 SYSCTL_PROC(_net_link_fake, OID_AUTO, llink_cnt,
591 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
592 feth_fake_llink_cnt_sysctl, "IU",
593 "Fake interface logical link count");
594
595 /* sysctl net.link.fake.qset_cnt */
596
597 /* The maximum number of qsets for each logical link */
598 #define FETH_MAX_QSETS 16
599 /* The default number of qsets for each logical link */
600 #define FETH_DEF_QSETS 4
601
602 static uint32_t if_fake_qset_cnt = FETH_DEF_QSETS;
603 static int
604 feth_fake_qset_cnt_sysctl SYSCTL_HANDLER_ARGS
605 {
606 #pragma unused(oidp, arg1, arg2)
607 unsigned int new_value;
608 int changed;
609 int error;
610
611 error = sysctl_io_number(req, if_fake_qset_cnt,
612 sizeof(if_fake_qset_cnt), &new_value, &changed);
613 if (error == 0 && changed != 0) {
614 if (new_value == 0 ||
615 new_value > FETH_MAX_QSETS) {
616 return EINVAL;
617 }
618 if_fake_qset_cnt = new_value;
619 }
620 return 0;
621 }
622
623 SYSCTL_PROC(_net_link_fake, OID_AUTO, qset_cnt,
624 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
625 feth_fake_qset_cnt_sysctl, "IU",
626 "Fake interface queue set count");
627
628
629 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)630 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
631 {
632 mbuf_setdata(m, mtod(m, char *) + len, mbuf_len(m) - len);
633 mbuf_pkthdr_adjustlen(m, -len);
634 }
635
636 static inline void *__indexable
get_bpf_header(mbuf_t m,struct ether_header * eh_p,struct ether_vlan_header * evl_p,size_t * header_len)637 get_bpf_header(mbuf_t m, struct ether_header * eh_p,
638 struct ether_vlan_header * evl_p, size_t * header_len)
639 {
640 void * header;
641
642 /* no VLAN tag, just use the ethernet header */
643 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
644 header = (struct ether_header *__bidi_indexable)eh_p;
645 *header_len = sizeof(*eh_p);
646 goto done;
647 }
648
649 /* has VLAN tag, populate the ether VLAN header */
650 bcopy(eh_p, evl_p,
651 offsetof(struct ether_header, ether_type)); /* dst+src ether */
652 evl_p->evl_encap_proto = htons(ETHERTYPE_VLAN); /* VLAN encap */
653 evl_p->evl_tag = htons(m->m_pkthdr.vlan_tag); /* tag */
654 evl_p->evl_proto = eh_p->ether_type; /* proto */
655 *header_len = sizeof(*evl_p);
656 header = (struct ether_vlan_header *__bidi_indexable)evl_p;
657
658 done:
659 return header;
660 }
661
662 typedef void (*_tap_func)(ifnet_t interface, u_int32_t dlt, mbuf_t packet,
663 void *__sized_by(header_len) header, size_t header_len);
664
665 static void
fake_bpf_tap_common(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p,_tap_func func)666 fake_bpf_tap_common(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p,
667 _tap_func func)
668 {
669 struct ether_vlan_header evl;
670 void * header;
671 size_t header_len;
672
673 header = get_bpf_header(m, eh_p, &evl, &header_len);
674 (*func)(ifp, DLT_EN10MB, m, header, header_len);
675 }
676
677 static inline void
fake_bpf_tap_in(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)678 fake_bpf_tap_in(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
679 {
680 fake_bpf_tap_common(ifp, m, eh_p, bpf_tap_in);
681 }
682
683
684 static inline void
fake_bpf_tap_out(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)685 fake_bpf_tap_out(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
686 {
687 fake_bpf_tap_common(ifp, m, eh_p, bpf_tap_out);
688 }
689
690 /**
691 ** virtual ethernet structures, types
692 **/
693
694 #define IFF_NUM_TX_RINGS_WMM_MODE 4
695 #define IFF_NUM_RX_RINGS_WMM_MODE 1
696 #define IFF_MAX_TX_RINGS IFF_NUM_TX_RINGS_WMM_MODE
697 #define IFF_MAX_RX_RINGS IFF_NUM_RX_RINGS_WMM_MODE
698 #define IFF_NUM_TX_QUEUES_WMM_MODE 4
699 #define IFF_NUM_RX_QUEUES_WMM_MODE 1
700 #define IFF_MAX_TX_QUEUES IFF_NUM_TX_QUEUES_WMM_MODE
701 #define IFF_MAX_RX_QUEUES IFF_NUM_RX_QUEUES_WMM_MODE
702
703 #define IFF_MAX_BATCH_SIZE 32
704
705 typedef uint16_t iff_flags_t;
706 #define IFF_FLAGS_HWCSUM 0x0001
707 #define IFF_FLAGS_BSD_MODE 0x0002
708 #define IFF_FLAGS_DETACHING 0x0004
709 #define IFF_FLAGS_WMM_MODE 0x0008
710 #define IFF_FLAGS_MULTIBUFLETS 0x0010
711 #define IFF_FLAGS_TSO_SUPPORT 0x0020
712 #define IFF_FLAGS_LRO 0x0040
713 #define IFF_FLAGS_VLAN_MTU 0x0080
714 #define IFF_FLAGS_VLAN_TAGGING 0x0100
715 #define IFF_FLAGS_SEPARATE_FRAME_HEADER 0x0200
716 #define IFF_FLAGS_NX_ATTACHED 0x0400
717
718 #if SKYWALK
719
720 typedef struct {
721 uuid_t fnx_provider;
722 uuid_t fnx_instance;
723 } fake_nx, *fake_nx_t;
724
725 typedef struct {
726 kern_netif_queue_t fq_queue;
727 } fake_queue;
728
729 typedef struct {
730 kern_netif_qset_t fqs_qset; /* provided by xnu */
731 fake_queue fqs_rx_queue[IFF_MAX_RX_QUEUES];
732 fake_queue fqs_tx_queue[IFF_MAX_TX_QUEUES];
733 uint32_t fqs_rx_queue_cnt;
734 uint32_t fqs_tx_queue_cnt;
735 uint32_t fqs_llink_idx;
736 uint32_t fqs_idx;
737 uint32_t fqs_dequeue_cnt;
738 uint64_t fqs_id;
739 boolean_t fqs_combined_mode;
740 } fake_qset;
741
742 typedef struct {
743 uint64_t fl_id;
744 uint32_t fl_idx;
745 uint32_t fl_qset_cnt;
746 fake_qset fl_qset[FETH_MAX_QSETS];
747 } fake_llink, * fake_llink_t;
748
749 static kern_pbufpool_t S_pp;
750
751 #define IFF_TT_OUTPUT 0x01 /* generate trace_tag on output */
752 #define IFF_TT_INPUT 0x02 /* generate trace_tag on input */
753 static int if_fake_trace_tag_flags = 0;
754 SYSCTL_INT(_net_link_fake, OID_AUTO, trace_tag, CTLFLAG_RW | CTLFLAG_LOCKED,
755 &if_fake_trace_tag_flags, 0, "Fake interface generate trace_tag");
756 static packet_trace_tag_t if_fake_trace_tag_current = 1;
757
758 #endif /* SKYWALK */
759
760 struct if_fake {
761 char iff_name[IFNAMSIZ]; /* our unique id */
762 ifnet_t iff_ifp;
763 iff_flags_t iff_flags;
764 uint32_t iff_retain_count;
765 ifnet_t iff_peer; /* the other end */
766 int iff_media_current;
767 int iff_media_active;
768 uint32_t iff_media_count;
769 int iff_media_list[IF_FAKE_MEDIA_LIST_MAX];
770 boolean_t iff_start_busy;
771 unsigned int iff_max_mtu;
772 uint32_t iff_fcs;
773 uint32_t iff_trailer_length;
774 #if SKYWALK
775 fake_nx iff_nx;
776 struct netif_stats *iff_nifs;
777 uint32_t iff_nifs_ref;
778 uint32_t iff_llink_cnt;
779 kern_channel_ring_t iff_rx_ring[IFF_MAX_RX_RINGS];
780 kern_channel_ring_t iff_tx_ring[IFF_MAX_TX_RINGS];
781 fake_llink_t iff_llink __counted_by_or_null(FETH_MAX_LLINKS);
782 thread_call_t iff_doorbell_tcall;
783 thread_call_t iff_if_adv_tcall;
784 boolean_t iff_doorbell_tcall_active;
785 boolean_t iff_waiting_for_tcall;
786 boolean_t iff_channel_connected;
787 iff_pktpool_mode_t iff_pp_mode;
788 kern_pbufpool_t iff_rx_pp;
789 kern_pbufpool_t iff_tx_pp;
790 uint32_t iff_tx_headroom;
791 unsigned int iff_adv_interval;
792 uint32_t iff_tx_drop_rate;
793 uint32_t iff_tx_pkts_count;
794 iff_tx_completion_mode_t iff_tx_completion_mode;
795 bool iff_intf_adv_enabled;
796 void *iff_intf_adv_kern_ctx;
797 kern_nexus_capab_interface_advisory_notify_fn_t iff_intf_adv_notify;
798 iff_tx_exp_policy_t iff_tx_exp_policy;
799 #endif /* SKYWALK */
800 };
801
802 typedef struct if_fake * __single if_fake_ref;
803
804 static if_fake_ref
805 ifnet_get_if_fake(ifnet_t ifp);
806
807 static inline boolean_t
feth_in_bsd_mode(if_fake_ref fakeif)808 feth_in_bsd_mode(if_fake_ref fakeif)
809 {
810 return (fakeif->iff_flags & IFF_FLAGS_BSD_MODE) != 0;
811 }
812
813 static inline void
feth_set_detaching(if_fake_ref fakeif)814 feth_set_detaching(if_fake_ref fakeif)
815 {
816 fakeif->iff_flags |= IFF_FLAGS_DETACHING;
817 }
818
819 static inline boolean_t
feth_is_detaching(if_fake_ref fakeif)820 feth_is_detaching(if_fake_ref fakeif)
821 {
822 return (fakeif->iff_flags & IFF_FLAGS_DETACHING) != 0;
823 }
824
825 static int
feth_enable_dequeue_stall(ifnet_t ifp,uint32_t enable)826 feth_enable_dequeue_stall(ifnet_t ifp, uint32_t enable)
827 {
828 int error;
829
830 if (enable != 0) {
831 error = ifnet_disable_output(ifp);
832 } else {
833 error = ifnet_enable_output(ifp);
834 }
835
836 return error;
837 }
838
839 #if SKYWALK
840 static inline boolean_t
feth_in_wmm_mode(if_fake_ref fakeif)841 feth_in_wmm_mode(if_fake_ref fakeif)
842 {
843 return (fakeif->iff_flags & IFF_FLAGS_WMM_MODE) != 0;
844 }
845
846 static inline boolean_t
feth_using_multibuflets(if_fake_ref fakeif)847 feth_using_multibuflets(if_fake_ref fakeif)
848 {
849 return (fakeif->iff_flags & IFF_FLAGS_MULTIBUFLETS) != 0;
850 }
851 static void feth_detach_netif_nexus(if_fake_ref fakeif);
852
853 static inline boolean_t
feth_has_intf_advisory_configured(if_fake_ref fakeif)854 feth_has_intf_advisory_configured(if_fake_ref fakeif)
855 {
856 return fakeif->iff_adv_interval > 0;
857 }
858 #endif /* SKYWALK */
859
860 static inline bool
feth_supports_tso(if_fake_ref fakeif)861 feth_supports_tso(if_fake_ref fakeif)
862 {
863 return (fakeif->iff_flags & IFF_FLAGS_TSO_SUPPORT) != 0;
864 }
865
866 static inline void
feth_set_supports_tso(if_fake_ref fakeif)867 feth_set_supports_tso(if_fake_ref fakeif)
868 {
869 fakeif->iff_flags |= IFF_FLAGS_TSO_SUPPORT;
870 }
871
872 static inline bool
feth_supports_vlan_mtu(if_fake_ref fakeif)873 feth_supports_vlan_mtu(if_fake_ref fakeif)
874 {
875 return (fakeif->iff_flags & IFF_FLAGS_VLAN_MTU) != 0;
876 }
877
878 static inline void
feth_set_supports_vlan_mtu(if_fake_ref fakeif)879 feth_set_supports_vlan_mtu(if_fake_ref fakeif)
880 {
881 fakeif->iff_flags |= IFF_FLAGS_VLAN_MTU;
882 }
883
884 static inline bool
feth_supports_vlan_tagging(if_fake_ref fakeif)885 feth_supports_vlan_tagging(if_fake_ref fakeif)
886 {
887 return (fakeif->iff_flags & IFF_FLAGS_VLAN_TAGGING) != 0;
888 }
889
890 static inline void
feth_set_supports_vlan_tagging(if_fake_ref fakeif)891 feth_set_supports_vlan_tagging(if_fake_ref fakeif)
892 {
893 fakeif->iff_flags |= IFF_FLAGS_VLAN_TAGGING;
894 }
895
896
897 #define FETH_MAXUNIT IF_MAXUNIT
898 #define FETH_ZONE_MAX_ELEM MIN(IFNETS_MAX, FETH_MAXUNIT)
899
900 static int feth_clone_create(struct if_clone *, u_int32_t, void *);
901 static int feth_clone_destroy(ifnet_t);
902 static int feth_output(ifnet_t ifp, struct mbuf *m);
903 static void feth_start(ifnet_t ifp);
904 static int feth_ioctl(ifnet_t ifp, u_long cmd, void * addr);
905 static int feth_config(ifnet_t ifp, ifnet_t peer);
906 static void feth_if_free(ifnet_t ifp);
907 static void feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp);
908 static void feth_free(if_fake_ref fakeif);
909
910 static struct if_clone
911 feth_cloner = IF_CLONE_INITIALIZER(FAKE_ETHER_NAME,
912 feth_clone_create,
913 feth_clone_destroy,
914 0,
915 FETH_MAXUNIT);
916 static void interface_link_event(ifnet_t ifp, u_int32_t event_code);
917
918 /* some media words to pretend to be ethernet */
919 #define FAKE_DEFAULT_MEDIA IFM_MAKEWORD(IFM_ETHER, IFM_10G_T, IFM_FDX, 0)
920 static int default_media_words[] = {
921 IFM_MAKEWORD(IFM_ETHER, 0, 0, 0),
922 FAKE_DEFAULT_MEDIA,
923 IFM_MAKEWORD(IFM_ETHER, IFM_2500_T, IFM_FDX, 0),
924 IFM_MAKEWORD(IFM_ETHER, IFM_5000_T, IFM_FDX, 0),
925
926 IFM_MAKEWORD(IFM_ETHER, IFM_10G_KX4, IFM_FDX, 0),
927 IFM_MAKEWORD(IFM_ETHER, IFM_20G_KR2, IFM_FDX, 0),
928 IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX, 0),
929 IFM_MAKEWORD(IFM_ETHER, IFM_25G_KR, IFM_FDX, 0),
930 IFM_MAKEWORD(IFM_ETHER, IFM_40G_SR4, IFM_FDX, 0),
931 IFM_MAKEWORD(IFM_ETHER, IFM_50G_CR2, IFM_FDX, 0),
932 IFM_MAKEWORD(IFM_ETHER, IFM_56G_R4, IFM_FDX, 0),
933 IFM_MAKEWORD(IFM_ETHER, IFM_100G_CR4, IFM_FDX, 0),
934 IFM_MAKEWORD(IFM_ETHER, IFM_400G_AUI8, IFM_FDX, 0),
935 };
936 #define default_media_words_count (sizeof(default_media_words) \
937 / sizeof (default_media_words[0]))
938
939 /**
940 ** veth locks
941 **/
942
943 static LCK_GRP_DECLARE(feth_lck_grp, "fake");
944 static LCK_MTX_DECLARE(feth_lck_mtx, &feth_lck_grp);
945
946 static inline void
feth_lock(void)947 feth_lock(void)
948 {
949 lck_mtx_lock(&feth_lck_mtx);
950 }
951
952 static inline void
feth_unlock(void)953 feth_unlock(void)
954 {
955 lck_mtx_unlock(&feth_lck_mtx);
956 }
957
958 static inline int
get_max_mtu(int bsd_mode,unsigned int max_mtu)959 get_max_mtu(int bsd_mode, unsigned int max_mtu)
960 {
961 unsigned int mtu;
962
963 if (bsd_mode != 0) {
964 mtu = (njcl > 0) ? (M16KCLBYTES - ETHER_HDR_LEN)
965 : MBIGCLBYTES - ETHER_HDR_LEN;
966 if (mtu > max_mtu) {
967 mtu = max_mtu;
968 }
969 } else {
970 mtu = max_mtu;
971 }
972 return mtu;
973 }
974
975 static inline unsigned int
feth_max_mtu(ifnet_t ifp)976 feth_max_mtu(ifnet_t ifp)
977 {
978 if_fake_ref fakeif;
979 unsigned int max_mtu = ETHERMTU;
980
981 feth_lock();
982 fakeif = ifnet_get_if_fake(ifp);
983 if (fakeif != NULL) {
984 max_mtu = fakeif->iff_max_mtu;
985 }
986 feth_unlock();
987 return max_mtu;
988 }
989
990 static void
feth_free(if_fake_ref fakeif)991 feth_free(if_fake_ref fakeif)
992 {
993 VERIFY(fakeif->iff_retain_count == 0);
994 #if SKYWALK
995 if (!feth_in_bsd_mode(fakeif)) {
996 if (fakeif->iff_pp_mode == IFF_PP_MODE_GLOBAL) {
997 VERIFY(fakeif->iff_rx_pp == S_pp);
998 VERIFY(fakeif->iff_tx_pp == S_pp);
999 pp_release(fakeif->iff_rx_pp);
1000 fakeif->iff_rx_pp = NULL;
1001 pp_release(fakeif->iff_tx_pp);
1002 fakeif->iff_tx_pp = NULL;
1003 feth_lock();
1004 if (S_pp != NULL && S_pp->pp_refcnt == 1) {
1005 pp_release(S_pp);
1006 S_pp = NULL;
1007 }
1008 feth_unlock();
1009 } else {
1010 if (fakeif->iff_rx_pp != NULL) {
1011 pp_release(fakeif->iff_rx_pp);
1012 fakeif->iff_rx_pp = NULL;
1013 }
1014 if (fakeif->iff_tx_pp != NULL) {
1015 pp_release(fakeif->iff_tx_pp);
1016 fakeif->iff_tx_pp = NULL;
1017 }
1018 }
1019 }
1020 #endif /* SKYWALK */
1021
1022 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s", fakeif->iff_name);
1023 if (fakeif->iff_llink != NULL) {
1024 fake_llink_t llink;
1025 llink = fakeif->iff_llink;
1026 fakeif->iff_llink = NULL;
1027 kfree_type(fake_llink, FETH_MAX_LLINKS, llink);
1028 }
1029 kfree_type(struct if_fake, fakeif);
1030 }
1031
1032 static void
feth_release(if_fake_ref fakeif)1033 feth_release(if_fake_ref fakeif)
1034 {
1035 u_int32_t old_retain_count;
1036
1037 old_retain_count = OSDecrementAtomic(&fakeif->iff_retain_count);
1038 switch (old_retain_count) {
1039 case 0:
1040 VERIFY(old_retain_count != 0);
1041 break;
1042 case 1:
1043 feth_free(fakeif);
1044 break;
1045 default:
1046 break;
1047 }
1048 return;
1049 }
1050
1051 #if SKYWALK
1052
1053 static void
feth_retain(if_fake_ref fakeif)1054 feth_retain(if_fake_ref fakeif)
1055 {
1056 OSIncrementAtomic(&fakeif->iff_retain_count);
1057 }
1058
1059 static void
feth_packet_pool_init_prepare(if_fake_ref fakeif,struct kern_pbufpool_init * pp_init)1060 feth_packet_pool_init_prepare(if_fake_ref fakeif,
1061 struct kern_pbufpool_init *pp_init)
1062 {
1063 uint32_t max_mtu = fakeif->iff_max_mtu;
1064 uint32_t buflet_size = if_fake_buflet_size;
1065
1066 bzero(pp_init, sizeof(*pp_init));
1067 pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1068 pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
1069 pp_init->kbi_packets = 1024; /* TBD configurable */
1070 if (feth_supports_tso(fakeif)) {
1071 buflet_size = if_fake_tso_buffer_size;
1072 }
1073 if (feth_using_multibuflets(fakeif)) {
1074 pp_init->kbi_bufsize = buflet_size;
1075 pp_init->kbi_max_frags = howmany(max_mtu, buflet_size);
1076 pp_init->kbi_buflets = pp_init->kbi_packets *
1077 pp_init->kbi_max_frags;
1078 pp_init->kbi_flags |= KBIF_BUFFER_ON_DEMAND;
1079 } else {
1080 pp_init->kbi_bufsize = max(max_mtu, buflet_size);
1081 pp_init->kbi_max_frags = 1;
1082 pp_init->kbi_buflets = pp_init->kbi_packets;
1083 }
1084 pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
1085 if (if_fake_user_access != 0) {
1086 pp_init->kbi_flags |= KBIF_USER_ACCESS;
1087 }
1088 pp_init->kbi_ctx = NULL;
1089 pp_init->kbi_ctx_retain = NULL;
1090 pp_init->kbi_ctx_release = NULL;
1091 }
1092
1093 static errno_t
feth_packet_pool_make(if_fake_ref fakeif)1094 feth_packet_pool_make(if_fake_ref fakeif)
1095 {
1096 struct kern_pbufpool_init pp_init;
1097 errno_t err;
1098
1099 feth_packet_pool_init_prepare(fakeif, &pp_init);
1100
1101 switch (fakeif->iff_pp_mode) {
1102 case IFF_PP_MODE_GLOBAL:
1103 feth_lock();
1104 if (S_pp == NULL) {
1105 (void)snprintf((char *)pp_init.kbi_name,
1106 sizeof(pp_init.kbi_name), "%s", "feth shared pp");
1107 err = kern_pbufpool_create(&pp_init, &S_pp, NULL);
1108 }
1109 pp_retain(S_pp);
1110 feth_unlock();
1111 fakeif->iff_rx_pp = S_pp;
1112 pp_retain(S_pp);
1113 fakeif->iff_tx_pp = S_pp;
1114 break;
1115 case IFF_PP_MODE_PRIVATE:
1116 (void)snprintf((char *)pp_init.kbi_name,
1117 sizeof(pp_init.kbi_name), "%s pp", fakeif->iff_name);
1118 err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
1119 pp_retain(fakeif->iff_rx_pp);
1120 fakeif->iff_tx_pp = fakeif->iff_rx_pp;
1121 break;
1122 case IFF_PP_MODE_PRIVATE_SPLIT:
1123 (void)snprintf((char *)pp_init.kbi_name,
1124 sizeof(pp_init.kbi_name), "%s rx pp", fakeif->iff_name);
1125 pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
1126 KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
1127 pp_init.kbi_flags |= (KBIF_IODIR_IN | KBIF_BUFFER_ON_DEMAND);
1128 pp_init.kbi_packets = 1024;
1129 pp_init.kbi_bufsize = if_fake_link_layer_aggregation_factor * 1024;
1130 err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
1131 if (err != 0) {
1132 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1133 "rx pp create failed %d", err);
1134 return err;
1135 }
1136 pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
1137 KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
1138 pp_init.kbi_flags |= KBIF_IODIR_OUT;
1139 pp_init.kbi_packets = 1024; /* TBD configurable */
1140 pp_init.kbi_bufsize = fakeif->iff_max_mtu;
1141 (void)snprintf((char *)pp_init.kbi_name,
1142 sizeof(pp_init.kbi_name), "%s tx pp", fakeif->iff_name);
1143 err = kern_pbufpool_create(&pp_init, &fakeif->iff_tx_pp, NULL);
1144 if (err != 0) {
1145 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1146 "tx pp create failed %d", err);
1147 pp_release(fakeif->iff_rx_pp);
1148 return err;
1149 }
1150 break;
1151 default:
1152 VERIFY(0);
1153 __builtin_unreachable();
1154 }
1155
1156 return 0;
1157 }
1158
1159 static void
feth_packet_set_trace_tag(kern_packet_t ph,int flag)1160 feth_packet_set_trace_tag(kern_packet_t ph, int flag)
1161 {
1162 if (if_fake_trace_tag_flags & flag) {
1163 if (++if_fake_trace_tag_current == 0) {
1164 if_fake_trace_tag_current = 1;
1165 }
1166 kern_packet_set_trace_tag(ph, if_fake_trace_tag_current);
1167 }
1168 }
1169
1170 static errno_t
feth_clone_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1171 feth_clone_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1172 {
1173 errno_t err = 0;
1174 kern_pbufpool_t pp = dif->iff_rx_pp;
1175 kern_packet_t dph = 0, dph0 = 0;
1176 kern_buflet_t sbuf, dbuf0 = NULL, dbuf;
1177 caddr_t saddr, daddr;
1178 uint32_t soff, doff;
1179 uint32_t slen, dlen;
1180 uint32_t dlim0, dlim;
1181
1182 sbuf = kern_packet_get_next_buflet(sph, NULL);
1183 saddr = __unsafe_forge_bidi_indexable(caddr_t,
1184 kern_buflet_get_data_address(sbuf),
1185 kern_buflet_get_data_limit(sbuf));
1186 doff = soff = kern_buflet_get_data_offset(sbuf);
1187 dlen = slen = kern_buflet_get_data_length(sbuf);
1188
1189 /* packet clone is only supported for single-buflet */
1190 ASSERT(kern_packet_get_buflet_count(sph) == 1);
1191 ASSERT(soff == kern_packet_get_headroom(sph));
1192 ASSERT(slen == kern_packet_get_data_length(sph));
1193
1194 dph0 = *pdph;
1195 if (dph0 == 0) {
1196 dlim0 = 0;
1197 } else {
1198 dbuf0 = kern_packet_get_next_buflet(dph0, NULL);
1199 ASSERT(kern_buflet_get_object_limit(dbuf0) ==
1200 PP_BUF_OBJ_SIZE_DEF(pp));
1201 ASSERT(kern_buflet_get_data_limit(dbuf0) % 16 == 0);
1202 dlim0 = ((size_t)kern_buflet_get_object_address(dbuf0) +
1203 kern_buflet_get_object_limit(dbuf0)) -
1204 ((size_t)kern_buflet_get_data_address(dbuf0) +
1205 kern_buflet_get_data_limit(dbuf0));
1206 }
1207
1208 if (doff + dlen > dlim0) {
1209 err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1210 if (err != 0) {
1211 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1212 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1213 return err;
1214 }
1215 dbuf = kern_packet_get_next_buflet(dph, NULL);
1216 ASSERT(kern_buflet_get_data_address(dbuf) ==
1217 kern_buflet_get_object_address(dbuf));
1218 daddr = __unsafe_forge_bidi_indexable(caddr_t,
1219 kern_buflet_get_data_address(dbuf),
1220 kern_buflet_get_data_limit(dbuf));
1221 dlim = kern_buflet_get_object_limit(dbuf);
1222 ASSERT(dlim == PP_BUF_OBJ_SIZE_DEF(pp));
1223 } else {
1224 err = kern_packet_clone_nosleep(dph0, &dph, KPKT_COPY_LIGHT);
1225 if (err != 0) {
1226 FAKE_LOG(LOG_INFO, FE_DBGF_OUTPUT,
1227 "packet clone err %d", err);
1228 return err;
1229 }
1230 dbuf = kern_packet_get_next_buflet(dph, NULL);
1231 ASSERT(kern_buflet_get_object_address(dbuf) ==
1232 kern_buflet_get_object_address(dbuf0));
1233 daddr = __unsafe_forge_bidi_indexable(caddr_t,
1234 kern_buflet_get_data_address(dbuf0),
1235 kern_buflet_get_object_limit(dbuf0)) + kern_buflet_get_data_limit(dbuf0);
1236 dlim = dlim0;
1237 }
1238
1239 ASSERT(doff + dlen <= dlim);
1240
1241 ASSERT((uintptr_t)daddr % 16 == 0);
1242 bcopy(saddr + soff, daddr + doff, slen);
1243
1244 dlim = MIN(dlim, P2ROUNDUP(doff + dlen, 16));
1245 err = kern_buflet_set_data_address(dbuf, daddr);
1246 VERIFY(err == 0);
1247 err = kern_buflet_set_data_limit(dbuf, dlim);
1248 VERIFY(err == 0);
1249 err = kern_buflet_set_data_length(dbuf, dlen);
1250 VERIFY(err == 0);
1251 err = kern_buflet_set_data_offset(dbuf, doff);
1252 VERIFY(err == 0);
1253 err = kern_packet_set_headroom(dph, doff);
1254 VERIFY(err == 0);
1255 err = kern_packet_set_link_header_length(dph,
1256 kern_packet_get_link_header_length(sph));
1257 VERIFY(err == 0);
1258 err = kern_packet_set_service_class(dph,
1259 kern_packet_get_service_class(sph));
1260 VERIFY(err == 0);
1261 err = kern_packet_finalize(dph);
1262 VERIFY(err == 0);
1263 *pdph = dph;
1264
1265 return err;
1266 }
1267
1268 static inline void
feth_copy_buflet(kern_buflet_t sbuf,kern_buflet_t dbuf)1269 feth_copy_buflet(kern_buflet_t sbuf, kern_buflet_t dbuf)
1270 {
1271 errno_t err;
1272 uint32_t off, len;
1273 caddr_t saddr, daddr;
1274
1275 saddr = __unsafe_forge_bidi_indexable(caddr_t,
1276 kern_buflet_get_data_address(sbuf),
1277 kern_buflet_get_data_limit(sbuf));
1278 off = kern_buflet_get_data_offset(sbuf);
1279 len = kern_buflet_get_data_length(sbuf);
1280 daddr = __unsafe_forge_bidi_indexable(caddr_t,
1281 kern_buflet_get_data_address(dbuf),
1282 kern_buflet_get_data_limit(dbuf));
1283 bcopy(saddr + off, daddr + off, len);
1284 err = kern_buflet_set_data_offset(dbuf, off);
1285 VERIFY(err == 0);
1286 err = kern_buflet_set_data_length(dbuf, len);
1287 VERIFY(err == 0);
1288 }
1289
1290 static int
feth_add_packet_trailer(kern_packet_t ph,void * __sized_by (trailer_len)trailer,size_t trailer_len)1291 feth_add_packet_trailer(kern_packet_t ph, void * __sized_by(trailer_len) trailer, size_t trailer_len)
1292 {
1293 errno_t err = 0;
1294
1295 ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
1296
1297 kern_buflet_t buf = NULL, iter = NULL;
1298 while ((iter = kern_packet_get_next_buflet(ph, iter)) != NULL) {
1299 buf = iter;
1300 }
1301 ASSERT(buf != NULL);
1302
1303 uint32_t dlim = kern_buflet_get_data_limit(buf);
1304 uint32_t doff = kern_buflet_get_data_offset(buf);
1305 uint32_t dlen = kern_buflet_get_data_length(buf);
1306
1307 size_t trailer_room = dlim - doff - dlen;
1308
1309 if (trailer_room < trailer_len) {
1310 FAKE_LOG(LOG_INFO, FE_DBGF_OUTPUT, "not enough room");
1311 return ERANGE;
1312 }
1313
1314 void *data = __unsafe_forge_bidi_indexable(caddr_t,
1315 kern_buflet_get_data_address(buf),
1316 kern_buflet_get_data_limit(buf)) + doff + dlen;
1317 memcpy(data, trailer, trailer_len);
1318
1319 err = kern_buflet_set_data_length(buf, dlen + trailer_len);
1320 VERIFY(err == 0);
1321
1322 err = kern_packet_finalize(ph);
1323 VERIFY(err == 0);
1324
1325 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%zuB trailer added", trailer_len);
1326
1327 return 0;
1328 }
1329
1330 static int
feth_add_packet_fcs(kern_packet_t ph)1331 feth_add_packet_fcs(kern_packet_t ph)
1332 {
1333 uint32_t crc = 0;
1334 int err;
1335
1336 ASSERT(sizeof(crc) == ETHER_CRC_LEN);
1337
1338 kern_buflet_t buf = NULL;
1339 while ((buf = kern_packet_get_next_buflet(ph, buf)) != NULL) {
1340 uint32_t doff = kern_buflet_get_data_offset(buf);
1341 uint32_t dlen = kern_buflet_get_data_length(buf);
1342 void *data = __unsafe_forge_bidi_indexable(caddr_t,
1343 kern_buflet_get_data_address(buf),
1344 kern_buflet_get_data_limit(buf)) + doff;
1345 crc = crc32(crc, data, dlen);
1346 }
1347
1348 err = feth_add_packet_trailer(ph, &crc, ETHER_CRC_LEN);
1349 if (!err) {
1350 return err;
1351 }
1352
1353 err = kern_packet_set_link_ethfcs(ph);
1354 VERIFY(err == 0);
1355
1356 return 0;
1357 }
1358
1359 static errno_t
feth_copy_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1360 feth_copy_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1361 {
1362 errno_t err = 0;
1363 uint16_t i, bufcnt;
1364 mach_vm_address_t baddr;
1365 kern_buflet_t sbuf = NULL, dbuf = NULL;
1366 kern_pbufpool_t pp = dif->iff_rx_pp;
1367 kern_packet_t dph;
1368 boolean_t multi_buflet = feth_using_multibuflets(dif);
1369
1370 bufcnt = kern_packet_get_buflet_count(sph);
1371 ASSERT((bufcnt == 1) || multi_buflet);
1372 *pdph = 0;
1373
1374 err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1375 if (err != 0) {
1376 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1377 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1378 return err;
1379 }
1380
1381 /* pre-constructed single buflet packet copy */
1382 sbuf = kern_packet_get_next_buflet(sph, NULL);
1383 dbuf = kern_packet_get_next_buflet(dph, NULL);
1384 feth_copy_buflet(sbuf, dbuf);
1385
1386 if (!multi_buflet) {
1387 goto done;
1388 }
1389
1390 /* un-constructed multi-buflet packet copy */
1391 for (i = 1; i < bufcnt; i++) {
1392 kern_buflet_t __single dbuf_next = NULL;
1393
1394 sbuf = kern_packet_get_next_buflet(sph, sbuf);
1395 VERIFY(sbuf != NULL);
1396 err = kern_pbufpool_alloc_buflet_nosleep(pp, &dbuf_next);
1397 if (err != 0) {
1398 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1399 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_BUF);
1400 break;
1401 }
1402 ASSERT(dbuf_next != NULL);
1403 feth_copy_buflet(sbuf, dbuf_next);
1404 err = kern_packet_add_buflet(dph, dbuf, dbuf_next);
1405 VERIFY(err == 0);
1406 dbuf = dbuf_next;
1407 }
1408 if (__improbable(err != 0)) {
1409 dbuf = NULL;
1410 while (i-- != 0) {
1411 dbuf = kern_packet_get_next_buflet(dph, dbuf);
1412 VERIFY(dbuf != NULL);
1413 baddr = (mach_vm_address_t)
1414 kern_buflet_get_data_address(dbuf);
1415 VERIFY(baddr != 0);
1416 }
1417 kern_pbufpool_free(pp, dph);
1418 dph = 0;
1419 }
1420
1421 done:
1422 if (__probable(err == 0)) {
1423 err = kern_packet_set_headroom(dph,
1424 kern_packet_get_headroom(sph));
1425 VERIFY(err == 0);
1426 err = kern_packet_set_link_header_length(dph,
1427 kern_packet_get_link_header_length(sph));
1428 VERIFY(err == 0);
1429 err = kern_packet_set_service_class(dph,
1430 kern_packet_get_service_class(sph));
1431 VERIFY(err == 0);
1432 err = kern_packet_finalize(dph);
1433 VERIFY(err == 0);
1434 VERIFY(bufcnt == kern_packet_get_buflet_count(dph));
1435 *pdph = dph;
1436 }
1437 return err;
1438 }
1439
1440 static inline void
feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)1441 feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)
1442 {
1443 /*
1444 * Nothing to do if not a TSO offloaded packet.
1445 */
1446 uint16_t seg_sz = 0;
1447 seg_sz = kern_packet_get_protocol_segment_size(ph);
1448 if (seg_sz == 0) {
1449 return;
1450 }
1451 /*
1452 * For RX, make the packet appear as a fully validated LRO packet.
1453 */
1454 packet_csum_flags_t csum_flags = PACKET_CSUM_IP_CHECKED |
1455 PACKET_CSUM_IP_VALID | PACKET_CSUM_DATA_VALID |
1456 PACKET_CSUM_PSEUDO_HDR;
1457 (void) kern_packet_set_inet_checksum(ph, csum_flags, 0, 0xFFFF, FALSE);
1458 return;
1459 }
1460
1461 static void
feth_rx_submit(if_fake_ref sif,if_fake_ref dif,kern_packet_t * __counted_by (n_pkts)sphs,uint32_t n_pkts)1462 feth_rx_submit(if_fake_ref sif, if_fake_ref dif, kern_packet_t * __counted_by(n_pkts) sphs,
1463 uint32_t n_pkts)
1464 {
1465 errno_t err = 0;
1466 struct kern_channel_ring_stat_increment stats;
1467 kern_channel_ring_t rx_ring = NULL;
1468 kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
1469 kern_packet_t sph = 0, dph = 0;
1470
1471 memset(&stats, 0, sizeof(stats));
1472
1473 rx_ring = dif->iff_rx_ring[0];
1474 if (rx_ring == NULL) {
1475 return;
1476 }
1477
1478 kr_enter(rx_ring, TRUE);
1479 kern_channel_reclaim(rx_ring);
1480 rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1481
1482 for (uint32_t i = 0; i < n_pkts && rx_slot != NULL; i++) {
1483 sph = sphs[i];
1484
1485 switch (dif->iff_pp_mode) {
1486 case IFF_PP_MODE_GLOBAL:
1487 sphs[i] = 0;
1488 dph = sph;
1489 feth_update_pkt_tso_metadata_for_rx(dph);
1490 err = kern_packet_finalize(dph);
1491 VERIFY(err == 0);
1492 break;
1493 case IFF_PP_MODE_PRIVATE:
1494 err = feth_copy_packet(dif, sph, &dph);
1495 break;
1496 case IFF_PP_MODE_PRIVATE_SPLIT:
1497 err = feth_clone_packet(dif, sph, &dph);
1498 break;
1499 default:
1500 VERIFY(0);
1501 __builtin_unreachable();
1502 }
1503 if (__improbable(err != 0)) {
1504 continue;
1505 }
1506
1507 if (sif->iff_trailer_length != 0) {
1508 feth_add_packet_trailer(dph, feth_trailer,
1509 sif->iff_trailer_length);
1510 }
1511 if (sif->iff_fcs != 0) {
1512 feth_add_packet_fcs(dph);
1513 }
1514 feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1515 bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1516 stats.kcrsi_slots_transferred++;
1517 stats.kcrsi_bytes_transferred
1518 += kern_packet_get_data_length(dph);
1519
1520 /* attach the packet to the RX ring */
1521 err = kern_channel_slot_attach_packet(rx_ring, rx_slot, dph);
1522 VERIFY(err == 0);
1523 last_rx_slot = rx_slot;
1524 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1525 }
1526
1527 if (last_rx_slot != NULL) {
1528 kern_channel_advance_slot(rx_ring, last_rx_slot);
1529 kern_channel_increment_ring_net_stats(rx_ring, dif->iff_ifp,
1530 &stats);
1531 }
1532
1533 if (rx_ring != NULL) {
1534 kr_exit(rx_ring);
1535 kern_channel_notify(rx_ring, 0);
1536 }
1537 }
1538
1539 static void
feth_rx_queue_submit(if_fake_ref sif,if_fake_ref dif,uint32_t llink_idx,uint32_t qset_idx,kern_packet_t * __counted_by (n_pkts)sphs,uint32_t n_pkts)1540 feth_rx_queue_submit(if_fake_ref sif, if_fake_ref dif, uint32_t llink_idx,
1541 uint32_t qset_idx, kern_packet_t * __counted_by(n_pkts) sphs, uint32_t n_pkts)
1542 {
1543 errno_t err = 0;
1544 kern_netif_queue_t queue;
1545 kern_packet_t sph = 0, dph = 0;
1546 fake_llink *llink;
1547 fake_qset *qset;
1548
1549 if (llink_idx >= dif->iff_llink_cnt) {
1550 FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1551 "invalid llink_idx idx %d (max %d) on peer %s",
1552 llink_idx, dif->iff_llink_cnt, dif->iff_name);
1553 return;
1554 }
1555 llink = &dif->iff_llink[llink_idx];
1556 if (qset_idx >= llink->fl_qset_cnt) {
1557 FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1558 "invalid qset_idx %d (max %d) on peer %s",
1559 qset_idx, llink->fl_qset_cnt, dif->iff_name);
1560 return;
1561 }
1562 qset = &dif->iff_llink[llink_idx].fl_qset[qset_idx];
1563 queue = qset->fqs_rx_queue[0].fq_queue;
1564 if (queue == NULL) {
1565 FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1566 "NULL default queue (llink_idx %d, qset_idx %d) on peer %s",
1567 llink_idx, qset_idx, dif->iff_name);
1568 return;
1569 }
1570 for (uint32_t i = 0; i < n_pkts; i++) {
1571 uint32_t flags;
1572
1573 sph = sphs[i];
1574
1575 switch (dif->iff_pp_mode) {
1576 case IFF_PP_MODE_GLOBAL:
1577 sphs[i] = 0;
1578 dph = sph;
1579 feth_update_pkt_tso_metadata_for_rx(dph);
1580 break;
1581 case IFF_PP_MODE_PRIVATE:
1582 err = feth_copy_packet(dif, sph, &dph);
1583 break;
1584 case IFF_PP_MODE_PRIVATE_SPLIT:
1585 err = feth_clone_packet(dif, sph, &dph);
1586 break;
1587 default:
1588 VERIFY(0);
1589 __builtin_unreachable();
1590 }
1591 if (__improbable(err != 0)) {
1592 continue;
1593 }
1594
1595 if (sif->iff_trailer_length != 0) {
1596 feth_add_packet_trailer(dph, feth_trailer,
1597 sif->iff_trailer_length);
1598 }
1599 if (sif->iff_fcs != 0) {
1600 feth_add_packet_fcs(dph);
1601 }
1602 feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1603 bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1604
1605 flags = (i == n_pkts - 1) ?
1606 KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH : 0;
1607 kern_netif_queue_rx_enqueue(queue, dph, 1, flags);
1608 }
1609 }
1610
1611 static void
feth_tx_complete(if_fake_ref fakeif,kern_packet_t * __counted_by (nphs)phs,uint32_t nphs)1612 feth_tx_complete(if_fake_ref fakeif, kern_packet_t * __counted_by(nphs) phs, uint32_t nphs)
1613 {
1614 for (uint32_t i = 0; i < nphs; i++) {
1615 kern_packet_t ph = phs[i];
1616 if (ph == 0) {
1617 continue;
1618 }
1619 int err = kern_packet_set_tx_completion_status(ph, 0);
1620 VERIFY(err == 0);
1621 kern_packet_tx_completion(ph, fakeif->iff_ifp);
1622 kern_pbufpool_free(fakeif->iff_tx_pp, phs[i]);
1623 phs[i] = 0;
1624 }
1625 }
1626
1627 #define NSEC_PER_USEC 1000ull
1628 /*
1629 * Calculate the time delta that passed from `since' to `until'.
1630 * If `until' happens before `since', returns negative value.
1631 */
1632 static bool
feth_packet_has_expired(if_fake_ref __unused fakeif,kern_packet_t ph,uint64_t * out_deadline)1633 feth_packet_has_expired(if_fake_ref __unused fakeif, kern_packet_t ph,
1634 uint64_t *out_deadline)
1635 {
1636 uint64_t now;
1637 uint64_t packet_expire_time_mach;
1638 int64_t time_until_expiration;
1639 errno_t err;
1640 bool expired = false;
1641
1642 static mach_timebase_info_data_t clock_timebase = {0, 0};
1643
1644 if (clock_timebase.denom == 0) {
1645 clock_timebase_info(&clock_timebase);
1646 VERIFY(clock_timebase.denom != 0);
1647 }
1648
1649 err = kern_packet_get_expire_time(ph, &packet_expire_time_mach);
1650 if (err) {
1651 goto out;
1652 }
1653
1654 now = mach_absolute_time();
1655 time_until_expiration = packet_expire_time_mach - now;
1656 if (time_until_expiration < 0) {
1657 /* The packet had expired */
1658 expired = true;
1659 goto out;
1660 }
1661
1662 /* Convert the time_delta from mach ticks to nanoseconds */
1663 time_until_expiration *= clock_timebase.numer;
1664 time_until_expiration /= clock_timebase.denom;
1665 /* convert from nanoseconds to microseconds */
1666 time_until_expiration /= 1000ull;
1667
1668 if (if_fake_expiration_threshold_us < time_until_expiration) {
1669 /* packet has some life ahead of it */
1670 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1671 "Packet has %llu usec until expiration",
1672 time_until_expiration);
1673 goto out;
1674 }
1675
1676 out:
1677 if (expired && out_deadline) {
1678 *out_deadline = packet_expire_time_mach;
1679 }
1680
1681 return expired;
1682 }
1683
1684 static errno_t
feth_get_packet_notification_details(if_fake_ref fakeif,kern_packet_t ph,packet_id_t * pkt_id,uint32_t * nx_port_id)1685 feth_get_packet_notification_details(if_fake_ref fakeif, kern_packet_t ph,
1686 packet_id_t *pkt_id, uint32_t *nx_port_id)
1687 {
1688 errno_t err = 0;
1689
1690 err = kern_packet_get_packetid(ph, pkt_id);
1691 if (err != 0) {
1692 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1693 "%s err=%d getting packetid", fakeif->iff_name, err);
1694 return err;
1695 }
1696
1697 err = kern_packet_get_tx_nexus_port_id(ph, nx_port_id);
1698 if (err != 0) {
1699 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1700 "%s err=%d getting nx_port_id", fakeif->iff_name, err);
1701 return err;
1702 }
1703
1704 return 0;
1705 }
1706
1707 static packet_expiry_action_t
feth_get_effective_expn_action(if_fake_ref fakeif,kern_packet_t ph)1708 feth_get_effective_expn_action(if_fake_ref fakeif, kern_packet_t ph)
1709 {
1710 errno_t err;
1711 packet_expiry_action_t expiry_action;
1712
1713 switch (fakeif->iff_tx_exp_policy) {
1714 case IFF_TX_EXP_POLICY_DISABLED:
1715 expiry_action = PACKET_EXPIRY_ACTION_NONE;
1716 break;
1717 case IFF_TX_EXP_POLICY_NOTIFY_ONLY:
1718 expiry_action = PACKET_EXPIRY_ACTION_NOTIFY;
1719 break;
1720 case IFF_TX_EXP_POLICY_DROP_AND_NOTIFY:
1721 expiry_action = PACKET_EXPIRY_ACTION_DROP;
1722 break;
1723 case IFF_TX_EXP_POLICY_METADATA:
1724 err = kern_packet_get_expiry_action(ph, &expiry_action);
1725 if (err != 0) {
1726 if (err != ENOENT) {
1727 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1728 "Error %d when getting expiry action",
1729 err);
1730 }
1731 expiry_action = PACKET_EXPIRY_ACTION_NONE;
1732 }
1733 break;
1734 default:
1735 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1736 "Unrecognized value %d for \"net.link.fake.tx_exp_policy\"",
1737 fakeif->iff_tx_exp_policy);
1738 expiry_action = PACKET_EXPIRY_ACTION_NONE;
1739 }
1740
1741 return expiry_action;
1742 }
1743
1744 /* returns true if the packet is selected for epxiration and should be dropped */
1745 static bool
feth_tx_expired_error(if_fake_ref fakeif,kern_packet_t ph)1746 feth_tx_expired_error(if_fake_ref fakeif, kern_packet_t ph)
1747 {
1748 int err = 0;
1749 uint32_t nx_port_id = 0;
1750 os_channel_event_packet_transmit_expired_t expn = {0};
1751 packet_expiry_action_t expiry_action = PACKET_EXPIRY_ACTION_NONE;
1752
1753 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC, "%s", fakeif->iff_name);
1754
1755 if (feth_packet_has_expired(fakeif, ph, &expn.packet_tx_expiration_deadline)) {
1756 expiry_action = feth_get_effective_expn_action(fakeif, ph);
1757 }
1758
1759 bool drop_packet = (expiry_action == PACKET_EXPIRY_ACTION_DROP);
1760 if (expiry_action != PACKET_EXPIRY_ACTION_NONE) {
1761 /* set the expiration status code */
1762 expn.packet_tx_expiration_status = drop_packet ?
1763 CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_DROPPED :
1764 CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_NOT_DROPPED;
1765
1766 /* Mark the expiration timestamp */
1767 expn.packet_tx_expiration_timestamp = mach_absolute_time();
1768
1769 err = feth_get_packet_notification_details(fakeif, ph,
1770 &expn.packet_id, &nx_port_id);
1771
1772 if (err == 0) {
1773 err = kern_channel_event_transmit_expired(
1774 fakeif->iff_ifp, &expn, nx_port_id);
1775 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1776 "%s sent expiry notification on nexus port "
1777 "%u notif code %u",
1778 fakeif->iff_name, nx_port_id,
1779 expn.packet_tx_expiration_status);
1780 }
1781 if (err != 0) {
1782 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1783 "%s err=%d, nx_port_id: 0x%x",
1784 fakeif->iff_name, err, nx_port_id);
1785 }
1786 }
1787
1788 return drop_packet;
1789 }
1790
1791 /* returns true if the packet is selected for TX error & dropped */
1792 static bool
feth_tx_complete_error(if_fake_ref fakeif,kern_packet_t ph)1793 feth_tx_complete_error(if_fake_ref fakeif, kern_packet_t ph)
1794 {
1795 int err;
1796
1797 if (fakeif->iff_tx_drop_rate == 0 ||
1798 fakeif->iff_tx_pkts_count != fakeif->iff_tx_drop_rate) {
1799 return false;
1800 }
1801 /* simulate TX completion error on the packet */
1802 if (fakeif->iff_tx_completion_mode == IFF_TX_COMPL_MODE_SYNC) {
1803 err = kern_packet_set_tx_completion_status(ph,
1804 CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED);
1805 VERIFY(err == 0);
1806 kern_packet_tx_completion(ph, fakeif->iff_ifp);
1807 } else {
1808 uint32_t nx_port_id = 0;
1809 os_channel_event_packet_transmit_status_t pkt_tx_status = {0};
1810
1811 pkt_tx_status.packet_status =
1812 CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED;
1813 err = feth_get_packet_notification_details(fakeif, ph,
1814 &pkt_tx_status.packet_id, &nx_port_id);
1815 if (err == 0) {
1816 err = kern_channel_event_transmit_status(
1817 fakeif->iff_ifp, &pkt_tx_status, nx_port_id);
1818 }
1819 if (err != 0) {
1820 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1821 "%s err=%d, nx_port_id: 0x%x",
1822 fakeif->iff_name, err, nx_port_id);
1823 }
1824 }
1825
1826 return true;
1827 }
1828
1829 static void
feth_if_adv(thread_call_param_t arg0,thread_call_param_t arg1)1830 feth_if_adv(thread_call_param_t arg0, thread_call_param_t arg1)
1831 {
1832 #pragma unused(arg1)
1833 errno_t error;
1834 if_fake_ref fakeif = (if_fake_ref)arg0;
1835 struct ifnet_interface_advisory if_adv;
1836 struct ifnet_stats_param if_stat;
1837
1838 feth_lock();
1839 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1840 feth_unlock();
1841 return;
1842 }
1843 feth_unlock();
1844
1845 if (!fakeif->iff_intf_adv_enabled) {
1846 goto done;
1847 }
1848
1849 error = ifnet_stat(fakeif->iff_ifp, &if_stat);
1850 if (error != 0) {
1851 FAKE_LOG(LOG_NOTICE, 0, "%s: ifnet_stat() failed %d",
1852 fakeif->iff_name, error);
1853 goto done;
1854 }
1855 if_adv.header.version = IF_INTERFACE_ADVISORY_VERSION_CURRENT;
1856 if_adv.header.direction = IF_INTERFACE_ADVISORY_DIRECTION_TX;
1857 if_adv.header.interface_type =
1858 IF_INTERFACE_ADVISORY_INTERFACE_TYPE_WIFI;
1859 if_adv.capacity.timestamp = mach_absolute_time();
1860 if_adv.capacity.rate_trend_suggestion =
1861 IF_INTERFACE_ADVISORY_RATE_SUGGESTION_RAMP_NEUTRAL;
1862 if_adv.capacity.max_bandwidth = 1000 * 1000 * 1000; /* 1Gbps */
1863 if_adv.capacity.total_byte_count = if_stat.packets_out;
1864 if_adv.capacity.average_throughput = 1000 * 1000 * 1000; /* 1Gbps */
1865 if_adv.capacity.flushable_queue_size = UINT32_MAX;
1866 if_adv.capacity.non_flushable_queue_size = UINT32_MAX;
1867 if_adv.capacity.average_delay = 1; /* ms */
1868
1869 error = fakeif->iff_intf_adv_notify(fakeif->iff_intf_adv_kern_ctx,
1870 &if_adv);
1871 if (error != 0) {
1872 FAKE_LOG(LOG_NOTICE, 0,
1873 "%s: interface advisory report failed %d",
1874 fakeif->iff_name, error);
1875 }
1876
1877 done:
1878 feth_lock();
1879 if (!feth_is_detaching(fakeif) && fakeif->iff_channel_connected) {
1880 uint64_t deadline;
1881 clock_interval_to_deadline(fakeif->iff_adv_interval,
1882 NSEC_PER_MSEC, &deadline);
1883 thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1884 }
1885 feth_unlock();
1886 }
1887
1888 static int
feth_if_adv_tcall_create(if_fake_ref fakeif)1889 feth_if_adv_tcall_create(if_fake_ref fakeif)
1890 {
1891 uint64_t deadline;
1892
1893 feth_lock();
1894 ASSERT(fakeif->iff_if_adv_tcall == NULL);
1895 ASSERT(fakeif->iff_adv_interval > 0);
1896 ASSERT(fakeif->iff_channel_connected);
1897 fakeif->iff_if_adv_tcall =
1898 thread_call_allocate_with_options(feth_if_adv,
1899 (thread_call_param_t)fakeif, THREAD_CALL_PRIORITY_KERNEL,
1900 THREAD_CALL_OPTIONS_ONCE);
1901 if (fakeif->iff_if_adv_tcall == NULL) {
1902 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1903 "%s if_adv tcall alloc failed",
1904 fakeif->iff_name);
1905 feth_unlock();
1906 return ENXIO;
1907 }
1908 /* retain for the interface advisory thread call */
1909 feth_retain(fakeif);
1910 clock_interval_to_deadline(fakeif->iff_adv_interval,
1911 NSEC_PER_MSEC, &deadline);
1912 thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1913 feth_unlock();
1914 return 0;
1915 }
1916
1917 /**
1918 ** nexus netif domain provider
1919 **/
1920 static errno_t
feth_nxdp_init(kern_nexus_domain_provider_t domprov)1921 feth_nxdp_init(kern_nexus_domain_provider_t domprov)
1922 {
1923 #pragma unused(domprov)
1924 return 0;
1925 }
1926
1927 static void
feth_nxdp_fini(kern_nexus_domain_provider_t domprov)1928 feth_nxdp_fini(kern_nexus_domain_provider_t domprov)
1929 {
1930 #pragma unused(domprov)
1931 }
1932
1933 static uuid_t feth_nx_dom_prov;
1934
1935 static errno_t
feth_register_nexus_domain_provider(void)1936 feth_register_nexus_domain_provider(void)
1937 {
1938 const struct kern_nexus_domain_provider_init dp_init = {
1939 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1940 .nxdpi_flags = 0,
1941 .nxdpi_init = feth_nxdp_init,
1942 .nxdpi_fini = feth_nxdp_fini
1943 };
1944 errno_t err = 0;
1945
1946 nexus_domain_provider_name_t feth_provider_name = "com.apple.feth";
1947
1948 /* feth_nxdp_init() is called before this function returns */
1949 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
1950 feth_provider_name,
1951 &dp_init, sizeof(dp_init),
1952 &feth_nx_dom_prov);
1953 if (err != 0) {
1954 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1955 "failed to register domain provider");
1956 return err;
1957 }
1958 return 0;
1959 }
1960
1961 /**
1962 ** netif nexus routines
1963 **/
1964 static if_fake_ref
feth_nexus_context(kern_nexus_t nexus)1965 feth_nexus_context(kern_nexus_t nexus)
1966 {
1967 if_fake_ref fakeif;
1968
1969 fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
1970 assert(fakeif != NULL);
1971 return fakeif;
1972 }
1973
1974 static uint8_t
feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1975 feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1976 {
1977 switch (svc_class) {
1978 case KPKT_SC_VO:
1979 return 0;
1980 case KPKT_SC_VI:
1981 return 1;
1982 case KPKT_SC_BE:
1983 return 2;
1984 case KPKT_SC_BK:
1985 return 3;
1986 default:
1987 VERIFY(0);
1988 return 0;
1989 }
1990 }
1991
1992 static errno_t
feth_nx_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1993 feth_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1994 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1995 void **ring_ctx)
1996 {
1997 if_fake_ref fakeif;
1998 int err;
1999 #pragma unused(nxprov, channel, ring_ctx)
2000 feth_lock();
2001 fakeif = feth_nexus_context(nexus);
2002 if (feth_is_detaching(fakeif)) {
2003 feth_unlock();
2004 return 0;
2005 }
2006 if (is_tx_ring) {
2007 if (feth_in_wmm_mode(fakeif)) {
2008 kern_packet_svc_class_t svc_class;
2009 uint8_t ring_idx;
2010
2011 err = kern_channel_get_service_class(ring, &svc_class);
2012 VERIFY(err == 0);
2013 ring_idx = feth_find_tx_ring_by_svc(svc_class);
2014 VERIFY(ring_idx < IFF_NUM_TX_RINGS_WMM_MODE);
2015 VERIFY(fakeif->iff_tx_ring[ring_idx] == NULL);
2016 fakeif->iff_tx_ring[ring_idx] = ring;
2017 } else {
2018 VERIFY(fakeif->iff_tx_ring[0] == NULL);
2019 fakeif->iff_tx_ring[0] = ring;
2020 }
2021 } else {
2022 VERIFY(fakeif->iff_rx_ring[0] == NULL);
2023 fakeif->iff_rx_ring[0] = ring;
2024 }
2025 fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2026 feth_unlock();
2027 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: %s ring init",
2028 fakeif->iff_name, is_tx_ring ? "TX" : "RX");
2029 return 0;
2030 }
2031
2032 static void
feth_nx_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)2033 feth_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2034 kern_channel_ring_t ring)
2035 {
2036 #pragma unused(nxprov, ring)
2037 if_fake_ref fakeif;
2038 thread_call_t __single tcall = NULL;
2039
2040 feth_lock();
2041 fakeif = feth_nexus_context(nexus);
2042 if (fakeif->iff_rx_ring[0] == ring) {
2043 fakeif->iff_rx_ring[0] = NULL;
2044 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2045 "%s: RX ring fini", fakeif->iff_name);
2046 } else if (feth_in_wmm_mode(fakeif)) {
2047 int i;
2048 for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
2049 if (fakeif->iff_tx_ring[i] == ring) {
2050 fakeif->iff_tx_ring[i] = NULL;
2051 break;
2052 }
2053 }
2054 for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
2055 if (fakeif->iff_tx_ring[i] != NULL) {
2056 break;
2057 }
2058 }
2059 if (i == IFF_MAX_TX_RINGS) {
2060 tcall = fakeif->iff_doorbell_tcall;
2061 fakeif->iff_doorbell_tcall = NULL;
2062 }
2063 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2064 "%s: TX ring fini", fakeif->iff_name);
2065 } else if (fakeif->iff_tx_ring[0] == ring) {
2066 tcall = fakeif->iff_doorbell_tcall;
2067 fakeif->iff_doorbell_tcall = NULL;
2068 fakeif->iff_tx_ring[0] = NULL;
2069 }
2070 fakeif->iff_nifs = NULL;
2071 feth_unlock();
2072 if (tcall != NULL) {
2073 boolean_t success;
2074
2075 success = thread_call_cancel_wait(tcall);
2076 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2077 "%s: thread_call_cancel %s", fakeif->iff_name,
2078 success ? "SUCCESS" : "FAILURE");
2079 if (!success) {
2080 feth_lock();
2081 if (fakeif->iff_doorbell_tcall_active) {
2082 fakeif->iff_waiting_for_tcall = TRUE;
2083 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2084 "%s: *waiting for threadcall",
2085 fakeif->iff_name);
2086 do {
2087 msleep(fakeif, &feth_lck_mtx,
2088 PZERO, "feth threadcall", 0);
2089 } while (fakeif->iff_doorbell_tcall_active);
2090 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2091 "%s: ^threadcall done",
2092 fakeif->iff_name);
2093 fakeif->iff_waiting_for_tcall = FALSE;
2094 }
2095 feth_unlock();
2096 }
2097 success = thread_call_free(tcall);
2098 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2099 "%s: thread_call_free %s",
2100 fakeif->iff_name,
2101 success ? "SUCCESS" : "FAILURE");
2102 feth_release(fakeif);
2103 VERIFY(success == TRUE);
2104 }
2105 }
2106
2107 static errno_t
feth_nx_pre_connect(kern_nexus_provider_t nxprov,proc_t proc,kern_nexus_t nexus,nexus_port_t port,kern_channel_t channel,void ** channel_context)2108 feth_nx_pre_connect(kern_nexus_provider_t nxprov,
2109 proc_t proc, kern_nexus_t nexus, nexus_port_t port, kern_channel_t channel,
2110 void **channel_context)
2111 {
2112 #pragma unused(nxprov, proc, nexus, port, channel, channel_context)
2113 return 0;
2114 }
2115
2116 static errno_t
feth_nx_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2117 feth_nx_connected(kern_nexus_provider_t nxprov,
2118 kern_nexus_t nexus, kern_channel_t channel)
2119 {
2120 #pragma unused(nxprov, channel)
2121 int err;
2122 if_fake_ref fakeif;
2123
2124 fakeif = feth_nexus_context(nexus);
2125 feth_lock();
2126 if (feth_is_detaching(fakeif)) {
2127 feth_unlock();
2128 return EBUSY;
2129 }
2130 feth_retain(fakeif);
2131 fakeif->iff_channel_connected = TRUE;
2132 feth_unlock();
2133 if (feth_has_intf_advisory_configured(fakeif)) {
2134 err = feth_if_adv_tcall_create(fakeif);
2135 if (err != 0) {
2136 return err;
2137 }
2138 }
2139 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: connected channel %p",
2140 fakeif->iff_name, channel);
2141 return 0;
2142 }
2143
2144 static void
feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2145 feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,
2146 kern_nexus_t nexus, kern_channel_t channel)
2147 {
2148 #pragma unused(nxprov, channel)
2149 if_fake_ref fakeif;
2150 thread_call_t __single tcall;
2151 boolean_t connected;
2152
2153 fakeif = feth_nexus_context(nexus);
2154 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2155 "%s: pre-disconnect channel %p",
2156 fakeif->iff_name, channel);
2157 /* Quiesce the interface and flush any pending outbound packets. */
2158 if_down(fakeif->iff_ifp);
2159 feth_lock();
2160 connected = fakeif->iff_channel_connected;
2161 fakeif->iff_channel_connected = FALSE;
2162 tcall = fakeif->iff_if_adv_tcall;
2163 fakeif->iff_if_adv_tcall = NULL;
2164 feth_unlock();
2165 if (tcall != NULL) {
2166 (void) thread_call_cancel_wait(tcall);
2167 if (!thread_call_free(tcall)) {
2168 boolean_t freed;
2169 (void) thread_call_cancel_wait(tcall);
2170 freed = thread_call_free(tcall);
2171 VERIFY(freed);
2172 }
2173 /* release for the interface advisory thread call */
2174 feth_release(fakeif);
2175 }
2176 if (connected) {
2177 feth_release(fakeif);
2178 }
2179 }
2180
2181 static void
feth_nx_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2182 feth_nx_disconnected(kern_nexus_provider_t nxprov,
2183 kern_nexus_t nexus, kern_channel_t channel)
2184 {
2185 #pragma unused(nxprov, channel)
2186 if_fake_ref fakeif;
2187
2188 fakeif = feth_nexus_context(nexus);
2189 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: disconnected channel %p",
2190 fakeif->iff_name, channel);
2191 }
2192
2193 static errno_t
feth_nx_slot_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index,struct kern_slot_prop ** slot_prop_addr,void ** slot_context)2194 feth_nx_slot_init(kern_nexus_provider_t nxprov,
2195 kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2196 uint32_t slot_index, struct kern_slot_prop **slot_prop_addr,
2197 void **slot_context)
2198 {
2199 #pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
2200 return 0;
2201 }
2202
2203 static void
feth_nx_slot_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index)2204 feth_nx_slot_fini(kern_nexus_provider_t nxprov,
2205 kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2206 uint32_t slot_index)
2207 {
2208 #pragma unused(nxprov, nexus, ring, slot, slot_index)
2209 }
2210
2211 static errno_t
feth_nx_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)2212 feth_nx_sync_tx(kern_nexus_provider_t nxprov,
2213 kern_nexus_t nexus, kern_channel_ring_t tx_ring, uint32_t flags)
2214 {
2215 #pragma unused(nxprov)
2216 if_fake_ref fakeif;
2217 ifnet_t ifp;
2218 kern_channel_slot_t last_tx_slot = NULL;
2219 ifnet_t peer_ifp;
2220 if_fake_ref peer_fakeif = NULL;
2221 struct kern_channel_ring_stat_increment stats;
2222 kern_channel_slot_t tx_slot;
2223 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2224 kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2225 uint32_t n_pkts = 0;
2226
2227 memset(&stats, 0, sizeof(stats));
2228
2229 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2230 fakeif = feth_nexus_context(nexus);
2231 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2232 "%s ring %d flags 0x%x", fakeif->iff_name,
2233 tx_ring->ckr_ring_id, flags);
2234 (void)flags;
2235 feth_lock();
2236 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2237 feth_unlock();
2238 return 0;
2239 }
2240 ifp = fakeif->iff_ifp;
2241 peer_ifp = fakeif->iff_peer;
2242 if (peer_ifp != NULL) {
2243 peer_fakeif = ifnet_get_if_fake(peer_ifp);
2244 if (peer_fakeif != NULL) {
2245 if (feth_is_detaching(peer_fakeif)) {
2246 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2247 "%s peer fakeif %s is detaching",
2248 fakeif->iff_name, peer_fakeif->iff_name);
2249 goto done;
2250 }
2251 if (!peer_fakeif->iff_channel_connected) {
2252 if (fakeif->iff_tx_exp_policy ==
2253 IFF_TX_EXP_POLICY_DISABLED) {
2254 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2255 "%s peer fakeif %s channel not connected, expn: %d",
2256 fakeif->iff_name, peer_fakeif->iff_name,
2257 fakeif->iff_tx_exp_policy);
2258 goto done;
2259 }
2260 }
2261 } else {
2262 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2263 "%s no peer fakeif (peer %p)",
2264 fakeif->iff_name, peer_ifp);
2265 goto done;
2266 }
2267 } else {
2268 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2269 "%s no peer", fakeif->iff_name);
2270 goto done;
2271 }
2272 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
2273 while (tx_slot != NULL) {
2274 uint16_t off;
2275 kern_packet_t sph;
2276
2277 /* detach the packet from the TX ring */
2278 sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
2279 VERIFY(sph != 0);
2280 kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
2281
2282 /* bpf tap output */
2283 off = kern_packet_get_headroom(sph);
2284 VERIFY(off >= fakeif->iff_tx_headroom);
2285 kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2286 feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2287 bpf_tap_packet_out(ifp, DLT_EN10MB, sph, NULL, 0);
2288
2289 /* drop packets, if requested */
2290 fakeif->iff_tx_pkts_count++;
2291 if (feth_tx_expired_error(fakeif, sph) ||
2292 feth_tx_complete_error(fakeif, sph) ||
2293 !peer_fakeif->iff_channel_connected) {
2294 fakeif->iff_tx_pkts_count = 0;
2295 kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2296 STATS_INC(nifs, NETIF_STATS_DROP);
2297 goto next_tx_slot;
2298 }
2299
2300 ASSERT(sph != 0);
2301 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2302 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2303
2304 stats.kcrsi_slots_transferred++;
2305 stats.kcrsi_bytes_transferred
2306 += kern_packet_get_data_length(sph);
2307
2308 /* prepare batch for receiver */
2309 pkts[n_pkts++] = sph;
2310 if (n_pkts == IFF_MAX_BATCH_SIZE) {
2311 feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2312 feth_tx_complete(fakeif, pkts, n_pkts);
2313 n_pkts = 0;
2314 }
2315
2316 next_tx_slot:
2317 last_tx_slot = tx_slot;
2318 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
2319 }
2320
2321 /* catch last batch for receiver */
2322 if (n_pkts != 0) {
2323 feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2324 feth_tx_complete(fakeif, pkts, n_pkts);
2325 n_pkts = 0;
2326 }
2327
2328 if (last_tx_slot != NULL) {
2329 kern_channel_advance_slot(tx_ring, last_tx_slot);
2330 kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
2331 }
2332 done:
2333 feth_unlock();
2334 return 0;
2335 }
2336
2337 static errno_t
feth_nx_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2338 feth_nx_sync_rx(kern_nexus_provider_t nxprov,
2339 kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2340 {
2341 #pragma unused(nxprov, ring, flags)
2342 if_fake_ref fakeif;
2343 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2344
2345 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
2346 fakeif = feth_nexus_context(nexus);
2347 FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT, "%s", fakeif->iff_name);
2348 return 0;
2349 }
2350
2351 static errno_t
feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif,boolean_t doorbell_ctxt)2352 feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif, boolean_t doorbell_ctxt)
2353 {
2354 int i;
2355 errno_t error = 0;
2356 boolean_t more;
2357
2358 for (i = 0; i < IFF_NUM_TX_RINGS_WMM_MODE; i++) {
2359 kern_channel_ring_t ring = fakeif->iff_tx_ring[i];
2360 if (ring != NULL) {
2361 error = kern_channel_tx_refill(ring, UINT32_MAX,
2362 UINT32_MAX, doorbell_ctxt, &more);
2363 }
2364 if (error != 0) {
2365 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2366 "%s: TX refill ring %d (%s) %d",
2367 fakeif->iff_name, ring->ckr_ring_id,
2368 doorbell_ctxt ? "sync" : "async", error);
2369 if (!((error == EAGAIN) || (error == EBUSY))) {
2370 break;
2371 }
2372 } else {
2373 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2374 "%s: TX refilled ring %d (%s)",
2375 fakeif->iff_name, ring->ckr_ring_id,
2376 doorbell_ctxt ? "sync" : "async");
2377 }
2378 }
2379 return error;
2380 }
2381
2382 static void
feth_async_doorbell(thread_call_param_t arg0,thread_call_param_t arg1)2383 feth_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
2384 {
2385 #pragma unused(arg1)
2386 errno_t error;
2387 if_fake_ref fakeif = (if_fake_ref)arg0;
2388 kern_channel_ring_t ring;
2389 boolean_t more;
2390
2391 feth_lock();
2392 ring = fakeif->iff_tx_ring[0];
2393 if (feth_is_detaching(fakeif) ||
2394 !fakeif->iff_channel_connected ||
2395 ring == NULL) {
2396 goto done;
2397 }
2398 fakeif->iff_doorbell_tcall_active = TRUE;
2399 feth_unlock();
2400 if (feth_in_wmm_mode(fakeif)) {
2401 error = feth_nx_tx_dequeue_driver_managed(fakeif, FALSE);
2402 } else {
2403 error = kern_channel_tx_refill(ring, UINT32_MAX,
2404 UINT32_MAX, FALSE, &more);
2405 }
2406 if (error != 0) {
2407 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s: TX refill failed %d",
2408 fakeif->iff_name, error);
2409 } else {
2410 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s: TX refilled",
2411 fakeif->iff_name);
2412 }
2413
2414 feth_lock();
2415 done:
2416 fakeif->iff_doorbell_tcall_active = FALSE;
2417 if (fakeif->iff_waiting_for_tcall) {
2418 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2419 "%s: threadcall waking up waiter", fakeif->iff_name);
2420 wakeup((caddr_t)fakeif);
2421 }
2422 feth_unlock();
2423 }
2424
2425 static void
feth_schedule_async_doorbell(if_fake_ref fakeif)2426 feth_schedule_async_doorbell(if_fake_ref fakeif)
2427 {
2428 thread_call_t __single tcall;
2429
2430 feth_lock();
2431 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2432 feth_unlock();
2433 return;
2434 }
2435 tcall = fakeif->iff_doorbell_tcall;
2436 if (tcall != NULL) {
2437 thread_call_enter(tcall);
2438 } else {
2439 tcall = thread_call_allocate_with_options(feth_async_doorbell,
2440 (thread_call_param_t)fakeif,
2441 THREAD_CALL_PRIORITY_KERNEL,
2442 THREAD_CALL_OPTIONS_ONCE);
2443 if (tcall == NULL) {
2444 FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT,
2445 "%s tcall alloc failed", fakeif->iff_name);
2446 } else {
2447 fakeif->iff_doorbell_tcall = tcall;
2448 feth_retain(fakeif);
2449 thread_call_enter(tcall);
2450 }
2451 }
2452 feth_unlock();
2453 }
2454
2455 static errno_t
feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2456 feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,
2457 kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2458 {
2459 #pragma unused(nxprov, ring, flags)
2460 errno_t error;
2461 if_fake_ref fakeif;
2462
2463 fakeif = feth_nexus_context(nexus);
2464 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s", fakeif->iff_name);
2465
2466 if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
2467 boolean_t more;
2468 /* synchronous tx refill */
2469 if (feth_in_wmm_mode(fakeif)) {
2470 error = feth_nx_tx_dequeue_driver_managed(fakeif, TRUE);
2471 } else {
2472 error = kern_channel_tx_refill(ring, UINT32_MAX,
2473 UINT32_MAX, TRUE, &more);
2474 }
2475 if (error != 0) {
2476 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2477 "%s: TX refill (sync) %d", fakeif->iff_name, error);
2478 } else {
2479 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2480 "%s: TX refilled (sync)", fakeif->iff_name);
2481 }
2482 } else {
2483 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2484 "%s: schedule async refill", fakeif->iff_name);
2485 feth_schedule_async_doorbell(fakeif);
2486 }
2487 return 0;
2488 }
2489
2490 static errno_t
feth_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)2491 feth_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
2492 {
2493 if_fake_ref fakeif;
2494
2495 fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
2496 feth_ifnet_set_attrs(fakeif, ifp);
2497 return 0;
2498 }
2499
2500 static errno_t
feth_nx_intf_adv_config(void * prov_ctx,bool enable)2501 feth_nx_intf_adv_config(void *prov_ctx, bool enable)
2502 {
2503 if_fake_ref fakeif = prov_ctx;
2504
2505 feth_lock();
2506 fakeif->iff_intf_adv_enabled = enable;
2507 feth_unlock();
2508 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2509 "%s enable %d", fakeif->iff_name, enable);
2510 return 0;
2511 }
2512
2513 static errno_t
fill_capab_interface_advisory(if_fake_ref fakeif,void * contents,uint32_t * len)2514 fill_capab_interface_advisory(if_fake_ref fakeif, void *contents, uint32_t *len)
2515 {
2516 struct kern_nexus_capab_interface_advisory * __single capab = contents;
2517
2518 if (*len != sizeof(*capab)) {
2519 return EINVAL;
2520 }
2521 if (capab->kncia_version !=
2522 KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
2523 return EINVAL;
2524 }
2525 if (!feth_has_intf_advisory_configured(fakeif)) {
2526 return ENOTSUP;
2527 }
2528 VERIFY(capab->kncia_notify != NULL);
2529 fakeif->iff_intf_adv_kern_ctx = capab->kncia_kern_context;
2530 fakeif->iff_intf_adv_notify = capab->kncia_notify;
2531 capab->kncia_provider_context = fakeif;
2532 capab->kncia_config = feth_nx_intf_adv_config;
2533 return 0;
2534 }
2535
2536 static errno_t
feth_notify_steering_info(void * prov_ctx,void * qset_ctx,struct ifnet_traffic_descriptor_common * td,bool add)2537 feth_notify_steering_info(void *prov_ctx, void *qset_ctx,
2538 struct ifnet_traffic_descriptor_common *td, bool add)
2539 {
2540 #pragma unused(td)
2541 if_fake_ref fakeif = prov_ctx;
2542 fake_qset * __single qset = qset_ctx;
2543
2544 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2545 "%s: notify_steering_info: qset_id 0x%llx, %s",
2546 fakeif->iff_name, qset->fqs_id, add ? "add" : "remove");
2547 return 0;
2548 }
2549
2550 static errno_t
fill_capab_qset_extensions(if_fake_ref fakeif,void * contents,uint32_t * len)2551 fill_capab_qset_extensions(if_fake_ref fakeif, void *contents, uint32_t *len)
2552 {
2553 struct kern_nexus_capab_qset_extensions * __single capab = contents;
2554
2555 if (*len != sizeof(*capab)) {
2556 return EINVAL;
2557 }
2558 if (capab->cqe_version !=
2559 KERN_NEXUS_CAPAB_QSET_EXTENSIONS_VERSION_1) {
2560 return EINVAL;
2561 }
2562 capab->cqe_prov_ctx = fakeif;
2563 capab->cqe_notify_steering_info = feth_notify_steering_info;
2564 return 0;
2565 }
2566
2567 static errno_t
feth_nx_capab_config(kern_nexus_provider_t nxprov,kern_nexus_t nx,kern_nexus_capab_t capab,void * contents,uint32_t * len)2568 feth_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
2569 kern_nexus_capab_t capab, void *contents, uint32_t *len)
2570 {
2571 #pragma unused(nxprov)
2572 errno_t error;
2573 if_fake_ref fakeif;
2574
2575 fakeif = feth_nexus_context(nx);
2576 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL, "%s", fakeif->iff_name);
2577
2578 switch (capab) {
2579 case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
2580 error = fill_capab_interface_advisory(fakeif, contents, len);
2581 break;
2582 case KERN_NEXUS_CAPAB_QSET_EXTENSIONS:
2583 error = fill_capab_qset_extensions(fakeif, contents, len);
2584 break;
2585 default:
2586 error = ENOTSUP;
2587 break;
2588 }
2589 return error;
2590 }
2591
2592 static int
feth_set_tso_mtu(ifnet_t ifp,uint32_t tso_v4_mtu,uint32_t tso_v6_mtu)2593 feth_set_tso_mtu(ifnet_t ifp, uint32_t tso_v4_mtu, uint32_t tso_v6_mtu)
2594 {
2595 int error;
2596
2597 error = ifnet_set_tso_mtu(ifp, AF_INET, tso_v4_mtu);
2598 if (error != 0) {
2599 FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2600 "set TSO MTU IPv4 failed on %s, err %d",
2601 if_name(ifp), error);
2602 return error;
2603 }
2604 error = ifnet_set_tso_mtu(ifp, AF_INET6, tso_v6_mtu);
2605 if (error != 0) {
2606 FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2607 "set TSO MTU IPv6 failed on %s, err %d",
2608 if_name(ifp), error);
2609 return error;
2610 }
2611 return 0;
2612 }
2613
2614 static int
feth_set_tso_offload(ifnet_t ifp)2615 feth_set_tso_offload(ifnet_t ifp)
2616 {
2617 ifnet_offload_t offload;
2618 int error;
2619
2620 offload = IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2621 error = ifnet_set_offload(ifp, offload);
2622 if (error != 0) {
2623 FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2624 "set TSO offload failed on %s, err %d",
2625 if_name(ifp), error);
2626 goto done;
2627 }
2628 error = feth_set_tso_mtu(ifp, if_fake_tso_buffer_size,
2629 if_fake_tso_buffer_size);
2630 done:
2631 return error;
2632 }
2633
2634 static errno_t
create_netif_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2635 create_netif_provider_and_instance(if_fake_ref fakeif,
2636 struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2637 uuid_t * provider, uuid_t * instance)
2638 {
2639 errno_t err;
2640 nexus_controller_t controller = kern_nexus_shared_controller();
2641 struct kern_nexus_net_init net_init;
2642 nexus_name_t provider_name;
2643 nexus_attr_t __single nexus_attr = NULL;
2644 struct kern_nexus_provider_init prov_init = {
2645 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2646 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2647 .nxpi_pre_connect = feth_nx_pre_connect,
2648 .nxpi_connected = feth_nx_connected,
2649 .nxpi_pre_disconnect = feth_nx_pre_disconnect,
2650 .nxpi_disconnected = feth_nx_disconnected,
2651 .nxpi_ring_init = feth_nx_ring_init,
2652 .nxpi_ring_fini = feth_nx_ring_fini,
2653 .nxpi_slot_init = feth_nx_slot_init,
2654 .nxpi_slot_fini = feth_nx_slot_fini,
2655 .nxpi_sync_tx = feth_nx_sync_tx,
2656 .nxpi_sync_rx = feth_nx_sync_rx,
2657 .nxpi_tx_doorbell = feth_nx_tx_doorbell,
2658 .nxpi_config_capab = feth_nx_capab_config,
2659 };
2660
2661 _CASSERT(IFF_MAX_RX_RINGS == 1);
2662 err = kern_nexus_attr_create(&nexus_attr);
2663 if (err != 0) {
2664 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
2665 "nexus attribute creation failed, error %d", err);
2666 goto failed;
2667 }
2668 if (feth_in_wmm_mode(fakeif)) {
2669 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_TX_RINGS,
2670 IFF_NUM_TX_RINGS_WMM_MODE);
2671 VERIFY(err == 0);
2672 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_RX_RINGS,
2673 IFF_NUM_RX_RINGS_WMM_MODE);
2674 VERIFY(err == 0);
2675 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_QMAP,
2676 NEXUS_QMAP_TYPE_WMM);
2677 VERIFY(err == 0);
2678 }
2679
2680 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2681 VERIFY(err == 0);
2682 snprintf((char *)provider_name, sizeof(provider_name),
2683 "com.apple.netif.%s", fakeif->iff_name);
2684 err = kern_nexus_controller_register_provider(controller,
2685 feth_nx_dom_prov,
2686 provider_name,
2687 &prov_init,
2688 sizeof(prov_init),
2689 nexus_attr,
2690 provider);
2691 if (err != 0) {
2692 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
2693 "register provider failed, error %d", err);
2694 goto failed;
2695 }
2696 bzero(&net_init, sizeof(net_init));
2697 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2698 net_init.nxneti_flags = 0;
2699 net_init.nxneti_eparams = init_params;
2700 net_init.nxneti_lladdr = NULL;
2701 net_init.nxneti_prepare = feth_netif_prepare;
2702 net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2703 net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2704 err = kern_nexus_controller_alloc_net_provider_instance(controller,
2705 *provider,
2706 fakeif,
2707 NULL,
2708 instance,
2709 &net_init,
2710 ifp);
2711 if (err != 0) {
2712 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2713 "alloc_net_provider_instance failed, %d", err);
2714 kern_nexus_controller_deregister_provider(controller,
2715 *provider);
2716 uuid_clear(*provider);
2717 goto failed;
2718 }
2719 if (feth_supports_tso(fakeif)) {
2720 if ((err = feth_set_tso_offload(*ifp)) != 0) {
2721 goto failed;
2722 }
2723 }
2724
2725 failed:
2726 if (nexus_attr != NULL) {
2727 kern_nexus_attr_destroy(nexus_attr);
2728 }
2729 return err;
2730 }
2731
2732 /*
2733 * The nif_stats need to be referenced because we don't want it set
2734 * to NULL until the last llink is removed.
2735 */
2736 static void
get_nexus_stats(if_fake_ref fakeif,kern_nexus_t nexus)2737 get_nexus_stats(if_fake_ref fakeif, kern_nexus_t nexus)
2738 {
2739 if (++fakeif->iff_nifs_ref == 1) {
2740 ASSERT(fakeif->iff_nifs == NULL);
2741 fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2742 }
2743 }
2744
2745 static void
clear_nexus_stats(if_fake_ref fakeif)2746 clear_nexus_stats(if_fake_ref fakeif)
2747 {
2748 if (--fakeif->iff_nifs_ref == 0) {
2749 ASSERT(fakeif->iff_nifs != NULL);
2750 fakeif->iff_nifs = NULL;
2751 }
2752 }
2753
2754 static errno_t
feth_nx_qset_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * llink_ctx,uint8_t qset_idx,uint64_t qset_id,kern_netif_qset_t qset,void ** qset_ctx)2755 feth_nx_qset_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2756 void *llink_ctx, uint8_t qset_idx, uint64_t qset_id, kern_netif_qset_t qset,
2757 void **qset_ctx)
2758 {
2759 #pragma unused(nxprov)
2760 if_fake_ref fakeif;
2761 fake_llink * __single fl = llink_ctx;
2762 fake_qset *fqs;
2763
2764 feth_lock();
2765 fakeif = feth_nexus_context(nexus);
2766 if (feth_is_detaching(fakeif)) {
2767 feth_unlock();
2768 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2769 "%s: detaching", fakeif->iff_name);
2770 return ENXIO;
2771 }
2772 if (qset_idx >= fl->fl_qset_cnt) {
2773 feth_unlock();
2774 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2775 "%s: invalid qset_idx %d", fakeif->iff_name, qset_idx);
2776 return EINVAL;
2777 }
2778 fqs = &fl->fl_qset[qset_idx];
2779 ASSERT(fqs->fqs_qset == NULL);
2780 fqs->fqs_qset = qset;
2781 fqs->fqs_id = qset_id;
2782 *qset_ctx = fqs;
2783
2784 /* XXX This should really be done during registration */
2785 get_nexus_stats(fakeif, nexus);
2786 feth_unlock();
2787 return 0;
2788 }
2789
2790 static void
feth_nx_qset_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx)2791 feth_nx_qset_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2792 void *qset_ctx)
2793 {
2794 #pragma unused(nxprov)
2795 if_fake_ref fakeif;
2796 fake_qset * __single fqs = qset_ctx;
2797
2798 feth_lock();
2799 fakeif = feth_nexus_context(nexus);
2800 clear_nexus_stats(fakeif);
2801 ASSERT(fqs->fqs_qset != NULL);
2802 fqs->fqs_qset = NULL;
2803 fqs->fqs_id = 0;
2804 feth_unlock();
2805 }
2806
2807 static errno_t
feth_nx_queue_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint8_t qidx,bool tx,kern_netif_queue_t queue,void ** queue_ctx)2808 feth_nx_queue_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2809 void *qset_ctx, uint8_t qidx, bool tx, kern_netif_queue_t queue,
2810 void **queue_ctx)
2811 {
2812 #pragma unused(nxprov)
2813 if_fake_ref fakeif;
2814 fake_qset *__single fqs = qset_ctx;
2815 fake_queue *fq;
2816
2817 feth_lock();
2818 fakeif = feth_nexus_context(nexus);
2819 if (feth_is_detaching(fakeif)) {
2820 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2821 "%s: detaching", fakeif->iff_name);
2822 feth_unlock();
2823 return ENXIO;
2824 }
2825 if (tx) {
2826 if (qidx >= fqs->fqs_tx_queue_cnt) {
2827 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2828 "%s: invalid tx qidx %d", fakeif->iff_name, qidx);
2829 feth_unlock();
2830 return EINVAL;
2831 }
2832 fq = &fqs->fqs_tx_queue[qidx];
2833 } else {
2834 if (qidx >= fqs->fqs_rx_queue_cnt) {
2835 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2836 "%s: invalid rx qidx %d", fakeif->iff_name, qidx);
2837 feth_unlock();
2838 return EINVAL;
2839 }
2840 fq = &fqs->fqs_rx_queue[qidx];
2841 }
2842 ASSERT(fq->fq_queue == NULL);
2843 fq->fq_queue = queue;
2844 *queue_ctx = fq;
2845 feth_unlock();
2846 return 0;
2847 }
2848
2849 static void
feth_nx_queue_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx)2850 feth_nx_queue_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2851 void *queue_ctx)
2852 {
2853 #pragma unused(nxprov, nexus)
2854 fake_queue *__single fq = queue_ctx;
2855
2856 feth_lock();
2857 ASSERT(fq->fq_queue != NULL);
2858 fq->fq_queue = NULL;
2859 feth_unlock();
2860 }
2861
2862 static void
feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif,kern_packet_t sph,struct netif_stats * nifs,if_fake_ref peer_fakeif,uint32_t llink_idx,uint32_t qset_idx)2863 feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif, kern_packet_t sph,
2864 struct netif_stats *nifs, if_fake_ref peer_fakeif,
2865 uint32_t llink_idx, uint32_t qset_idx)
2866 {
2867 kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2868 uint32_t n_pkts = 0;
2869
2870 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2871 "%s -> %s", fakeif->iff_name, peer_fakeif->iff_name);
2872
2873 while (sph != 0) {
2874 uint16_t off;
2875 kern_packet_t next;
2876
2877 next = kern_packet_get_next(sph);
2878 kern_packet_set_next(sph, 0);
2879
2880 /* bpf tap output */
2881 off = kern_packet_get_headroom(sph);
2882 VERIFY(off >= fakeif->iff_tx_headroom);
2883 kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2884 feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2885 bpf_tap_packet_out(fakeif->iff_ifp, DLT_EN10MB, sph, NULL, 0);
2886
2887 /* drop packets, if requested */
2888 fakeif->iff_tx_pkts_count++;
2889 if (feth_tx_expired_error(fakeif, sph) ||
2890 feth_tx_complete_error(fakeif, sph)) {
2891 fakeif->iff_tx_pkts_count = 0;
2892 kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2893 STATS_INC(nifs, NETIF_STATS_DROP);
2894 goto next_pkt;
2895 }
2896 ASSERT(sph != 0);
2897 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2898 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2899
2900 /* prepare batch for receiver */
2901 pkts[n_pkts++] = sph;
2902 if (n_pkts == IFF_MAX_BATCH_SIZE) {
2903 feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx,
2904 qset_idx, pkts, n_pkts);
2905 feth_tx_complete(fakeif, pkts, n_pkts);
2906 n_pkts = 0;
2907 }
2908 next_pkt:
2909 sph = next;
2910 }
2911 /* catch last batch for receiver */
2912 if (n_pkts != 0) {
2913 feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx, qset_idx,
2914 pkts, n_pkts);
2915 feth_tx_complete(fakeif, pkts, n_pkts);
2916 n_pkts = 0;
2917 }
2918 }
2919
2920 static errno_t
feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint32_t flags)2921 feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2922 void *qset_ctx, uint32_t flags)
2923 {
2924 #pragma unused(nxprov)
2925 if_fake_ref fakeif;
2926 ifnet_t ifp;
2927 ifnet_t peer_ifp;
2928 if_fake_ref peer_fakeif = NULL;
2929 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2930 fake_qset * __single qset = qset_ctx;
2931 boolean_t detaching, connected;
2932 uint32_t i;
2933 errno_t err;
2934
2935 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2936 fakeif = feth_nexus_context(nexus);
2937 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2938 "%s qset %p, idx %d, flags 0x%x", fakeif->iff_name, qset,
2939 qset->fqs_idx, flags);
2940
2941 feth_lock();
2942 detaching = feth_is_detaching(fakeif);
2943 connected = fakeif->iff_channel_connected;
2944 if (detaching || !connected) {
2945 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2946 "%s: detaching %s, channel connected %s",
2947 fakeif->iff_name,
2948 (detaching ? "true" : "false"),
2949 (connected ? "true" : "false"));
2950 feth_unlock();
2951 return 0;
2952 }
2953 ifp = fakeif->iff_ifp;
2954 peer_ifp = fakeif->iff_peer;
2955 if (peer_ifp != NULL) {
2956 peer_fakeif = ifnet_get_if_fake(peer_ifp);
2957 if (peer_fakeif != NULL) {
2958 detaching = feth_is_detaching(peer_fakeif);
2959 connected = peer_fakeif->iff_channel_connected;
2960 if (detaching || !connected) {
2961 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2962 "peer %s: detaching %s, "
2963 "channel connected %s",
2964 peer_fakeif->iff_name,
2965 (detaching ? "true" : "false"),
2966 (connected ? "true" : "false"));
2967 goto done;
2968 }
2969 } else {
2970 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2971 "peer_fakeif is NULL");
2972 goto done;
2973 }
2974 } else {
2975 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "peer_ifp is NULL");
2976 goto done;
2977 }
2978
2979 if (if_fake_switch_combined_mode &&
2980 qset->fqs_dequeue_cnt >= if_fake_switch_mode_frequency) {
2981 if (qset->fqs_combined_mode) {
2982 kern_netif_set_qset_separate(qset->fqs_qset);
2983 } else {
2984 kern_netif_set_qset_combined(qset->fqs_qset);
2985 }
2986 qset->fqs_combined_mode = !qset->fqs_combined_mode;
2987 qset->fqs_dequeue_cnt = 0;
2988 }
2989
2990 for (i = 0; i < qset->fqs_tx_queue_cnt; i++) {
2991 kern_packet_t sph = 0;
2992 kern_netif_queue_t queue = qset->fqs_tx_queue[i].fq_queue;
2993 boolean_t more = FALSE;
2994
2995 err = kern_netif_queue_tx_dequeue(queue, UINT32_MAX, UINT32_MAX,
2996 &more, &sph);
2997 if (err != 0 && err != EAGAIN) {
2998 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2999 "%s queue %p dequeue failed: err "
3000 "%d", fakeif->iff_name, queue, err);
3001 }
3002 feth_nx_tx_queue_deliver_pkt_chain(fakeif, sph, nifs,
3003 peer_fakeif, qset->fqs_llink_idx, qset->fqs_idx);
3004 }
3005
3006 done:
3007 feth_unlock();
3008 return 0;
3009 }
3010
3011
3012 static errno_t
feth_nx_queue_tx_push(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx,kern_packet_t * ph,uint32_t * packetCount,uint32_t * byteCount)3013 feth_nx_queue_tx_push(kern_nexus_provider_t nxprov,
3014 kern_nexus_t nexus, void *queue_ctx, kern_packet_t *ph,
3015 uint32_t *packetCount, uint32_t *byteCount)
3016 {
3017 #pragma unused(nxprov)
3018 if_fake_ref fakeif;
3019 ifnet_t ifp;
3020 ifnet_t peer_ifp;
3021 if_fake_ref peer_fakeif = NULL;
3022 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
3023 fake_queue *__single fq = queue_ctx;
3024 boolean_t detaching, connected;
3025
3026 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
3027 fakeif = feth_nexus_context(nexus);
3028 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s queue %p", fakeif->iff_name, fq);
3029
3030 feth_lock();
3031
3032 detaching = feth_is_detaching(fakeif);
3033 connected = fakeif->iff_channel_connected;
3034 if (detaching || !connected) {
3035 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3036 "%s: detaching %s, channel connected %s",
3037 fakeif->iff_name,
3038 (detaching ? "true" : "false"),
3039 (connected ? "true" : "false"));
3040 goto done;
3041 }
3042 ifp = fakeif->iff_ifp;
3043 peer_ifp = fakeif->iff_peer;
3044 if (peer_ifp != NULL) {
3045 peer_fakeif = ifnet_get_if_fake(peer_ifp);
3046 if (peer_fakeif != NULL) {
3047 detaching = feth_is_detaching(peer_fakeif);
3048 connected = peer_fakeif->iff_channel_connected;
3049 if (detaching || !connected) {
3050 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3051 "peer %s: detaching %s, "
3052 "channel connected %s",
3053 peer_fakeif->iff_name,
3054 (detaching ? "true" : "false"),
3055 (connected ? "true" : "false"));
3056 goto done;
3057 }
3058 } else {
3059 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3060 "peer_fakeif is NULL");
3061 goto done;
3062 }
3063 } else {
3064 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "peer_ifp is NULL");
3065 goto done;
3066 }
3067
3068 *packetCount = *byteCount = 0;
3069
3070 kern_packet_t sph = *ph;
3071 while (sph != 0) {
3072 (*packetCount)++;
3073 *byteCount += kern_packet_get_data_length(sph);
3074 sph = kern_packet_get_next(sph);
3075 }
3076
3077 feth_nx_tx_queue_deliver_pkt_chain(fakeif, *ph, nifs,
3078 peer_fakeif, 0, 0);
3079
3080 *ph = 0;
3081
3082 done:
3083 feth_unlock();
3084 return 0;
3085 }
3086
3087
3088 static void
fill_qset_info_and_params(if_fake_ref fakeif,fake_llink * llink_info,uint32_t qset_idx,struct kern_nexus_netif_llink_qset_init * qset_init,bool is_def,bool is_low_latency)3089 fill_qset_info_and_params(if_fake_ref fakeif, fake_llink *llink_info,
3090 uint32_t qset_idx, struct kern_nexus_netif_llink_qset_init *qset_init,
3091 bool is_def, bool is_low_latency)
3092 {
3093 fake_qset *qset_info = &llink_info->fl_qset[qset_idx];
3094
3095 qset_init->nlqi_flags =
3096 (is_def ? KERN_NEXUS_NET_LLINK_QSET_DEFAULT : 0) |
3097 (is_low_latency ? KERN_NEXUS_NET_LLINK_QSET_LOW_LATENCY : 0) |
3098 KERN_NEXUS_NET_LLINK_QSET_AQM;
3099
3100 if (feth_in_wmm_mode(fakeif)) {
3101 qset_init->nlqi_flags |= KERN_NEXUS_NET_LLINK_QSET_WMM_MODE;
3102 qset_init->nlqi_num_txqs = IFF_NUM_TX_QUEUES_WMM_MODE;
3103 qset_init->nlqi_num_rxqs = IFF_NUM_RX_QUEUES_WMM_MODE;
3104 } else {
3105 qset_init->nlqi_num_txqs = 1;
3106 qset_init->nlqi_num_rxqs = 1;
3107 }
3108 qset_info->fqs_tx_queue_cnt = qset_init->nlqi_num_txqs;
3109 qset_info->fqs_rx_queue_cnt = qset_init->nlqi_num_rxqs;
3110
3111 /* These are needed for locating the peer qset */
3112 qset_info->fqs_llink_idx = llink_info->fl_idx;
3113 qset_info->fqs_idx = qset_idx;
3114 }
3115
3116 static void
fill_llink_info_and_params(if_fake_ref fakeif,uint32_t llink_idx,struct kern_nexus_netif_llink_init * llink_init,uint32_t llink_id,struct kern_nexus_netif_llink_qset_init * __counted_by (qset_cnt)qset_init,uint32_t qset_cnt,uint32_t flags)3117 fill_llink_info_and_params(if_fake_ref fakeif, uint32_t llink_idx,
3118 struct kern_nexus_netif_llink_init *llink_init, uint32_t llink_id,
3119 struct kern_nexus_netif_llink_qset_init * __counted_by(qset_cnt) qset_init, uint32_t qset_cnt,
3120 uint32_t flags)
3121 {
3122 fake_llink *llink_info = &fakeif->iff_llink[llink_idx];
3123 uint32_t i;
3124 bool create_ll_qset = if_fake_low_latency && (llink_idx != 0);
3125
3126 for (i = 0; i < qset_cnt; i++) {
3127 fill_qset_info_and_params(fakeif, llink_info, i,
3128 &qset_init[i], i == 0, create_ll_qset && i == 1);
3129 }
3130 llink_info->fl_idx = llink_idx;
3131
3132 /* This doesn't have to be the same as llink_idx */
3133 llink_info->fl_id = llink_id;
3134 llink_info->fl_qset_cnt = qset_cnt;
3135
3136 llink_init->nli_link_id = llink_id;
3137 llink_init->nli_num_qsets = qset_cnt;
3138 llink_init->nli_qsets = qset_init;
3139 llink_init->nli_flags = flags;
3140 llink_init->nli_ctx = llink_info;
3141 }
3142
3143 static errno_t
create_non_default_llinks(if_fake_ref fakeif)3144 create_non_default_llinks(if_fake_ref fakeif)
3145 {
3146 struct kern_nexus *nx;
3147 fake_nx_t fnx = &fakeif->iff_nx;
3148 struct kern_nexus_netif_llink_init llink_init;
3149 struct kern_nexus_netif_llink_qset_init qset_init[FETH_MAX_QSETS];
3150 errno_t err;
3151 uint64_t llink_id;
3152 uint32_t i;
3153
3154 nx = nx_find(fnx->fnx_instance, FALSE);
3155 if (nx == NULL) {
3156 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3157 "%s: nx not found", fakeif->iff_name);
3158 return ENXIO;
3159 }
3160 /* Default llink starts at index 0 */
3161 for (i = 1; i < if_fake_llink_cnt; i++) {
3162 llink_id = (uint64_t)i;
3163
3164 /*
3165 * The llink_init and qset_init structures are reused for
3166 * each llink creation.
3167 */
3168 fill_llink_info_and_params(fakeif, i, &llink_init,
3169 llink_id, qset_init, if_fake_qset_cnt, 0);
3170 err = kern_nexus_netif_llink_add(nx, &llink_init);
3171 if (err != 0) {
3172 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3173 "%s: llink add failed, error %d",
3174 fakeif->iff_name, err);
3175 goto fail;
3176 }
3177 fakeif->iff_llink_cnt++;
3178 }
3179 nx_release(nx);
3180 return 0;
3181
3182 fail:
3183 for (i = 0; i < fakeif->iff_llink_cnt; i++) {
3184 int error;
3185 fake_llink * __single ll = &fakeif->iff_llink[i];
3186
3187 error = kern_nexus_netif_llink_remove(nx, ll->fl_id);
3188 if (error != 0) {
3189 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3190 "%s: llink remove failed, llink_id 0x%llx, "
3191 "error %d", fakeif->iff_name,
3192 ll->fl_id, error);
3193 }
3194 ll->fl_id = 0;
3195 }
3196 fakeif->iff_llink_cnt = 0;
3197 nx_release(nx);
3198 return err;
3199 }
3200
3201 static errno_t
create_netif_llink_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)3202 create_netif_llink_provider_and_instance(if_fake_ref fakeif,
3203 struct ifnet_init_eparams * init_params, ifnet_t *ifp,
3204 uuid_t * provider, uuid_t * instance)
3205 {
3206 errno_t err;
3207 nexus_controller_t controller = kern_nexus_shared_controller();
3208 struct kern_nexus_net_init net_init;
3209 struct kern_nexus_netif_llink_init llink_init;
3210 struct kern_nexus_netif_llink_qset_init qsets[FETH_MAX_QSETS];
3211
3212 nexus_name_t provider_name;
3213 nexus_attr_t __single nexus_attr = NULL;
3214 struct kern_nexus_netif_provider_init prov_init = {
3215 .nxnpi_version = KERN_NEXUS_DOMAIN_PROVIDER_NETIF,
3216 .nxnpi_flags = NXPIF_VIRTUAL_DEVICE,
3217 .nxnpi_pre_connect = feth_nx_pre_connect,
3218 .nxnpi_connected = feth_nx_connected,
3219 .nxnpi_pre_disconnect = feth_nx_pre_disconnect,
3220 .nxnpi_disconnected = feth_nx_disconnected,
3221 .nxnpi_qset_init = feth_nx_qset_init,
3222 .nxnpi_qset_fini = feth_nx_qset_fini,
3223 .nxnpi_queue_init = feth_nx_queue_init,
3224 .nxnpi_queue_fini = feth_nx_queue_fini,
3225 .nxnpi_tx_qset_notify = feth_nx_tx_qset_notify,
3226 .nxnpi_config_capab = feth_nx_capab_config,
3227 .nxnpi_queue_tx_push = feth_nx_queue_tx_push
3228 };
3229
3230 err = kern_nexus_attr_create(&nexus_attr);
3231 if (err != 0) {
3232 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3233 "nexus attribute creation failed, error %d", err);
3234 goto failed;
3235 }
3236
3237 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
3238 VERIFY(err == 0);
3239
3240 snprintf((char *)provider_name, sizeof(provider_name),
3241 "com.apple.netif.%s", fakeif->iff_name);
3242 err = kern_nexus_controller_register_provider(controller,
3243 feth_nx_dom_prov,
3244 provider_name,
3245 (struct kern_nexus_provider_init *)&prov_init,
3246 sizeof(prov_init),
3247 nexus_attr,
3248 provider);
3249 if (err != 0) {
3250 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3251 "register provider failed, error %d", err);
3252 goto failed;
3253 }
3254 bzero(&net_init, sizeof(net_init));
3255 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
3256 net_init.nxneti_flags = 0;
3257 net_init.nxneti_eparams = init_params;
3258 net_init.nxneti_lladdr = NULL;
3259 net_init.nxneti_prepare = feth_netif_prepare;
3260 net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
3261 net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
3262
3263 /*
3264 * Assume llink id is same as the index for if_fake.
3265 * This is not required for other drivers.
3266 */
3267 _CASSERT(NETIF_LLINK_ID_DEFAULT == 0);
3268 fill_llink_info_and_params(fakeif, 0, &llink_init,
3269 NETIF_LLINK_ID_DEFAULT, qsets, if_fake_qset_cnt,
3270 KERN_NEXUS_NET_LLINK_DEFAULT);
3271
3272 net_init.nxneti_llink = &llink_init;
3273
3274 err = kern_nexus_controller_alloc_net_provider_instance(controller,
3275 *provider, fakeif, NULL, instance, &net_init, ifp);
3276 if (err != 0) {
3277 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3278 "alloc_net_provider_instance failed, %d", err);
3279 kern_nexus_controller_deregister_provider(controller,
3280 *provider);
3281 uuid_clear(*provider);
3282 goto failed;
3283 }
3284 fakeif->iff_llink_cnt++;
3285
3286 if (if_fake_llink_cnt > 1) {
3287 err = create_non_default_llinks(fakeif);
3288 if (err != 0) {
3289 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3290 "create_non_default_llinks failed, %d", err);
3291 feth_detach_netif_nexus(fakeif);
3292 goto failed;
3293 }
3294 }
3295 if (feth_supports_tso(fakeif)) {
3296 if ((err = feth_set_tso_offload(*ifp)) != 0) {
3297 goto failed;
3298 }
3299 }
3300 failed:
3301 if (nexus_attr != NULL) {
3302 kern_nexus_attr_destroy(nexus_attr);
3303 }
3304 return err;
3305 }
3306
3307 static errno_t
feth_attach_netif_nexus(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp)3308 feth_attach_netif_nexus(if_fake_ref fakeif,
3309 struct ifnet_init_eparams * init_params, ifnet_t *ifp)
3310 {
3311 errno_t error;
3312 fake_nx_t nx = &fakeif->iff_nx;
3313
3314 error = feth_packet_pool_make(fakeif);
3315 if (error != 0) {
3316 return error;
3317 }
3318 if (if_fake_llink_cnt == 0) {
3319 return create_netif_provider_and_instance(fakeif, init_params,
3320 ifp, &nx->fnx_provider, &nx->fnx_instance);
3321 } else {
3322 return create_netif_llink_provider_and_instance(fakeif,
3323 init_params, ifp, &nx->fnx_provider,
3324 &nx->fnx_instance);
3325 }
3326 }
3327
3328 static void
remove_non_default_llinks(const char * name,fake_nx_t fnx,fake_llink_t llink __counted_by (FETH_MAX_LLINKS),uint32_t llink_cnt)3329 remove_non_default_llinks(const char * name, fake_nx_t fnx,
3330 fake_llink_t llink __counted_by(FETH_MAX_LLINKS),
3331 uint32_t llink_cnt)
3332 {
3333 struct kern_nexus *nx;
3334 uint32_t i;
3335
3336 if (llink_cnt <= 1) {
3337 goto done;
3338 }
3339 nx = nx_find(fnx->fnx_instance, FALSE);
3340 if (nx == NULL) {
3341 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3342 "%s: nx not found", name);
3343 goto done;
3344 }
3345 /* Default llink (at index 0) is freed separately */
3346 for (i = 1; i < llink_cnt; i++) {
3347 int err;
3348
3349 err = kern_nexus_netif_llink_remove(nx, llink[i].fl_id);
3350 if (err != 0) {
3351 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3352 "%s: llink remove failed, llink_id 0x%llx, "
3353 "error %d", name,
3354 llink[i].fl_id, err);
3355 }
3356 }
3357 nx_release(nx);
3358 done:
3359 return;
3360 }
3361
3362 static void
detach_provider_and_instance(uuid_t provider,uuid_t instance)3363 detach_provider_and_instance(uuid_t provider, uuid_t instance)
3364 {
3365 nexus_controller_t controller = kern_nexus_shared_controller();
3366 errno_t err;
3367
3368 if (!uuid_is_null(instance)) {
3369 err = kern_nexus_controller_free_provider_instance(controller,
3370 instance);
3371 if (err != 0) {
3372 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3373 "free_provider_instance failed %d", err);
3374 } else {
3375 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3376 "deregister_instance");
3377 }
3378 }
3379 if (!uuid_is_null(provider)) {
3380 err = kern_nexus_controller_deregister_provider(controller,
3381 provider);
3382 if (err != 0) {
3383 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3384 "deregister_provider %d", err);
3385 } else {
3386 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3387 "deregister_provider");
3388 }
3389 }
3390 return;
3391 }
3392
3393 static void
feth_detach_netif_nexus(if_fake_ref fakeif)3394 feth_detach_netif_nexus(if_fake_ref fakeif)
3395 {
3396 fake_nx fnx;
3397 fake_llink_t llink;
3398 uint32_t llink_cnt;
3399
3400 feth_lock();
3401 fnx = fakeif->iff_nx;
3402 bzero(&fakeif->iff_nx, sizeof(fakeif->iff_nx));
3403 llink = fakeif->iff_llink;
3404 fakeif->iff_llink = NULL;
3405 llink_cnt = fakeif->iff_llink_cnt;
3406 fakeif->iff_llink_cnt = 0;
3407 feth_unlock();
3408 remove_non_default_llinks(__unsafe_null_terminated_from_indexable(fakeif->iff_name), &fnx, llink, llink_cnt);
3409 detach_provider_and_instance(fnx.fnx_provider, fnx.fnx_instance);
3410 if (llink != NULL) {
3411 kfree_type(fake_llink, FETH_MAX_LLINKS, llink);
3412 }
3413 return;
3414 }
3415 #endif /* SKYWALK */
3416
3417 /**
3418 ** feth interface routines
3419 **/
3420 static void
feth_ifnet_set_attrs(if_fake_ref fakeif,ifnet_t ifp)3421 feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp)
3422 {
3423 errno_t error;
3424 ifnet_offload_t offload = 0;
3425
3426 ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
3427 ifnet_set_baudrate(ifp, 0);
3428 ifnet_set_mtu(ifp, ETHERMTU);
3429 ifnet_set_flags(ifp,
3430 IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX,
3431 0xffff);
3432 ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
3433 if ((fakeif->iff_flags & IFF_FLAGS_LRO) != 0) {
3434 offload |= IFNET_LRO;
3435 }
3436 if ((fakeif->iff_flags & IFF_FLAGS_HWCSUM) != 0) {
3437 offload |= IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
3438 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6;
3439 }
3440 if (feth_supports_tso(fakeif)) {
3441 offload |= IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
3442 }
3443 if (feth_supports_vlan_tagging(fakeif)) {
3444 offload |= IFNET_VLAN_TAGGING;
3445 } else if (feth_supports_vlan_mtu(fakeif)) {
3446 offload |= IFNET_VLAN_MTU;
3447 }
3448 error = ifnet_set_offload(ifp, offload);
3449 if (error != 0) {
3450 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3451 "ifnet_set_offload(%s, 0x%x) failed, %d",
3452 ifp->if_xname, offload, error);
3453 } else {
3454 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3455 "ifnet_set_offload(%s, 0x%x) succeeded",
3456 ifp->if_xname, offload);
3457 }
3458 }
3459
3460 static void
interface_link_event(ifnet_t ifp,u_int32_t event_code)3461 interface_link_event(ifnet_t ifp, u_int32_t event_code)
3462 {
3463 struct event {
3464 u_int32_t ifnet_family;
3465 u_int32_t unit;
3466 char if_name[IFNAMSIZ];
3467 };
3468 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3469 struct kern_event_msg *__single header = (struct kern_event_msg*)message;
3470 struct event *data = (struct event *)(message + offsetof(struct kern_event_msg, event_data));
3471
3472 header->total_size = sizeof(message);
3473 header->vendor_code = KEV_VENDOR_APPLE;
3474 header->kev_class = KEV_NETWORK_CLASS;
3475 header->kev_subclass = KEV_DL_SUBCLASS;
3476 header->event_code = event_code;
3477 data->ifnet_family = ifnet_family(ifp);
3478 data->unit = (u_int32_t)ifnet_unit(ifp);
3479 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
3480 ifnet_event(ifp, header);
3481 }
3482
3483 static if_fake_ref
ifnet_get_if_fake(ifnet_t ifp)3484 ifnet_get_if_fake(ifnet_t ifp)
3485 {
3486 return (if_fake_ref)ifnet_softc(ifp);
3487 }
3488
3489 static int
feth_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)3490 feth_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
3491 {
3492 bool bsd_mode;
3493 int error;
3494 if_fake_ref fakeif;
3495 struct ifnet_init_eparams feth_init;
3496 fake_llink_t iff_llink __counted_by_or_null(FETH_MAX_LLINKS) = NULL;
3497 ifnet_t __single ifp;
3498 char mac_address[ETHER_ADDR_LEN];
3499 bool multi_buflet;
3500 iff_pktpool_mode_t pktpool_mode;
3501 bool tso_support;
3502
3503 /* make local copy of globals needed to make consistency checks below */
3504 bsd_mode = (if_fake_bsd_mode != 0);
3505 multi_buflet = (if_fake_multibuflet != 0);
3506 tso_support = (if_fake_tso_support != 0);
3507 pktpool_mode = if_fake_pktpool_mode;
3508
3509 if (!bsd_mode) {
3510 /* consistency checks */
3511 if (if_fake_llink_cnt == 0 &&
3512 strbufcmp(sk_ll_prefix, FAKE_ETHER_NAME) == 0) {
3513 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3514 "feth used as ifname prefix but logical link "
3515 "support in feth is disabled.");
3516 return EINVAL;
3517 }
3518 if (tso_support && pktpool_mode != IFF_PP_MODE_GLOBAL) {
3519 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3520 "TSO mode requires global packet pool mode");
3521 return EINVAL;
3522 }
3523 if (multi_buflet && pktpool_mode == IFF_PP_MODE_PRIVATE_SPLIT) {
3524 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3525 "multi-buflet not supported for split rx & tx pool");
3526 return EINVAL;
3527 }
3528 iff_llink = kalloc_type(fake_llink,
3529 FETH_MAX_LLINKS, Z_WAITOK_ZERO);
3530 if (iff_llink == NULL) {
3531 return ENOBUFS;
3532 }
3533 }
3534 fakeif = kalloc_type(struct if_fake, Z_WAITOK_ZERO_NOFAIL);
3535 fakeif->iff_llink = iff_llink;
3536 fakeif->iff_retain_count = 1;
3537 #define FAKE_ETHER_NAME_LEN (sizeof(FAKE_ETHER_NAME) - 1)
3538 _CASSERT(FAKE_ETHER_NAME_LEN == 4);
3539 strbufcpy(mac_address, FAKE_ETHER_NAME);
3540 mac_address[ETHER_ADDR_LEN - 2] = (unit & 0xff00) >> 8;
3541 mac_address[ETHER_ADDR_LEN - 1] = unit & 0xff;
3542 if (bsd_mode) {
3543 fakeif->iff_flags |= IFF_FLAGS_BSD_MODE;
3544 }
3545 if (if_fake_hwcsum != 0) {
3546 fakeif->iff_flags |= IFF_FLAGS_HWCSUM;
3547 }
3548 if (if_fake_lro != 0) {
3549 fakeif->iff_flags |= IFF_FLAGS_LRO;
3550 }
3551 if (if_fake_vlan_tagging != 0) {
3552 /* support VLAN tagging in hardware */
3553 feth_set_supports_vlan_tagging(fakeif);
3554 } else {
3555 /* support VLAN mtu-sized packets */
3556 feth_set_supports_vlan_mtu(fakeif);
3557 }
3558 if (if_fake_separate_frame_header != 0) {
3559 fakeif->iff_flags |= IFF_FLAGS_SEPARATE_FRAME_HEADER;
3560 }
3561 fakeif->iff_max_mtu = get_max_mtu(bsd_mode, if_fake_max_mtu);
3562 fakeif->iff_fcs = if_fake_fcs;
3563 fakeif->iff_trailer_length = if_fake_trailer_length;
3564
3565 /* use the interface name as the unique id for ifp recycle */
3566 if ((unsigned int)
3567 snprintf(fakeif->iff_name, sizeof(fakeif->iff_name), "%s%d",
3568 ifc->ifc_name, unit) >= sizeof(fakeif->iff_name)) {
3569 feth_release(fakeif);
3570 return EINVAL;
3571 }
3572 bzero(&feth_init, sizeof(feth_init));
3573 feth_init.ver = IFNET_INIT_CURRENT_VERSION;
3574 feth_init.len = sizeof(feth_init);
3575 if (feth_in_bsd_mode(fakeif)) {
3576 if (if_fake_txstart != 0) {
3577 feth_init.start = feth_start;
3578 } else {
3579 feth_init.flags |= IFNET_INIT_LEGACY;
3580 feth_init.output = feth_output;
3581 }
3582 if (tso_support) {
3583 feth_set_supports_tso(fakeif);
3584 }
3585 }
3586 #if SKYWALK
3587 else {
3588 feth_init.flags |= IFNET_INIT_SKYWALK_NATIVE;
3589 /*
3590 * Currently we support WMM mode only for Skywalk native
3591 * interface.
3592 */
3593 if (if_fake_wmm_mode != 0) {
3594 fakeif->iff_flags |= IFF_FLAGS_WMM_MODE;
3595 }
3596
3597 if (multi_buflet) {
3598 fakeif->iff_flags |= IFF_FLAGS_MULTIBUFLETS;
3599 }
3600
3601 fakeif->iff_pp_mode = pktpool_mode;
3602 if (tso_support) {
3603 feth_set_supports_tso(fakeif);
3604 }
3605
3606 fakeif->iff_tx_headroom = if_fake_tx_headroom;
3607 fakeif->iff_adv_interval = if_fake_if_adv_interval;
3608 if (fakeif->iff_adv_interval > 0) {
3609 feth_init.flags |= IFNET_INIT_IF_ADV;
3610 }
3611 fakeif->iff_tx_drop_rate = if_fake_tx_drops;
3612 fakeif->iff_tx_completion_mode = if_tx_completion_mode;
3613 fakeif->iff_tx_exp_policy = if_fake_tx_exp_policy;
3614 }
3615 feth_init.tx_headroom = fakeif->iff_tx_headroom;
3616 #endif /* SKYWALK */
3617 if (if_fake_nxattach == 0) {
3618 feth_init.flags |= IFNET_INIT_NX_NOAUTO;
3619 }
3620 feth_init.uniqueid_len = (uint32_t)strbuflen(fakeif->iff_name);
3621 feth_init.uniqueid = fakeif->iff_name;
3622 feth_init.name = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
3623 feth_init.unit = unit;
3624 feth_init.family = IFNET_FAMILY_ETHERNET;
3625 feth_init.type = IFT_ETHER;
3626 feth_init.demux = ether_demux;
3627 feth_init.add_proto = ether_add_proto;
3628 feth_init.del_proto = ether_del_proto;
3629 feth_init.check_multi = ether_check_multi;
3630 feth_init.framer_extended = ether_frameout_extended;
3631 feth_init.softc = fakeif;
3632 feth_init.ioctl = feth_ioctl;
3633 feth_init.set_bpf_tap = NULL;
3634 feth_init.detach = feth_if_free;
3635 feth_init.broadcast_addr = etherbroadcastaddr;
3636 feth_init.broadcast_len = ETHER_ADDR_LEN;
3637 if (feth_in_bsd_mode(fakeif)) {
3638 error = ifnet_allocate_extended(&feth_init, &ifp);
3639 if (error) {
3640 feth_release(fakeif);
3641 return error;
3642 }
3643 feth_ifnet_set_attrs(fakeif, ifp);
3644 if (feth_supports_tso(fakeif)) {
3645 feth_set_tso_mtu(ifp, IP_MAXPACKET, IP_MAXPACKET);
3646 }
3647 }
3648 #if SKYWALK
3649 else {
3650 if (feth_in_wmm_mode(fakeif)) {
3651 feth_init.output_sched_model =
3652 IFNET_SCHED_MODEL_DRIVER_MANAGED;
3653 }
3654 error = feth_attach_netif_nexus(fakeif, &feth_init, &ifp);
3655 if (error != 0) {
3656 feth_release(fakeif);
3657 return error;
3658 }
3659 /* take an additional reference to ensure that it doesn't go away */
3660 feth_retain(fakeif);
3661 fakeif->iff_flags |= IFF_FLAGS_NX_ATTACHED;
3662 fakeif->iff_ifp = ifp;
3663 }
3664 #endif /* SKYWALK */
3665 fakeif->iff_media_count = MIN(default_media_words_count, IF_FAKE_MEDIA_LIST_MAX);
3666 bcopy(default_media_words, fakeif->iff_media_list,
3667 fakeif->iff_media_count * sizeof(fakeif->iff_media_list[0]));
3668 if (feth_in_bsd_mode(fakeif)) {
3669 error = ifnet_attach(ifp, NULL);
3670 if (error) {
3671 ifnet_release(ifp);
3672 feth_release(fakeif);
3673 return error;
3674 }
3675 fakeif->iff_ifp = ifp;
3676 }
3677
3678 ifnet_set_lladdr(ifp, mac_address, sizeof(mac_address));
3679
3680 /* attach as ethernet */
3681 bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
3682 return 0;
3683 }
3684
3685 static int
feth_clone_destroy(ifnet_t ifp)3686 feth_clone_destroy(ifnet_t ifp)
3687 {
3688 if_fake_ref fakeif;
3689 #if SKYWALK
3690 boolean_t nx_attached = FALSE;
3691 #endif /* SKYWALK */
3692
3693 feth_lock();
3694 fakeif = ifnet_get_if_fake(ifp);
3695 if (fakeif == NULL || feth_is_detaching(fakeif)) {
3696 feth_unlock();
3697 return 0;
3698 }
3699 feth_set_detaching(fakeif);
3700 #if SKYWALK
3701 nx_attached = (fakeif->iff_flags & IFF_FLAGS_NX_ATTACHED) != 0;
3702 #endif /* SKYWALK */
3703 feth_unlock();
3704 feth_config(ifp, NULL);
3705 #if SKYWALK
3706 if (nx_attached) {
3707 feth_detach_netif_nexus(fakeif);
3708 feth_release(fakeif);
3709 }
3710 #endif /* SKYWALK */
3711 ifnet_detach(ifp);
3712 return 0;
3713 }
3714
3715 static void
feth_enqueue_input(ifnet_t ifp,struct mbuf * m)3716 feth_enqueue_input(ifnet_t ifp, struct mbuf * m)
3717 {
3718 struct ifnet_stat_increment_param stats = {};
3719
3720 stats.packets_in = 1;
3721 stats.bytes_in = (uint32_t)mbuf_pkthdr_len(m) + ETHER_HDR_LEN;
3722 ifnet_input(ifp, m, &stats);
3723 }
3724
3725
3726 static int
feth_add_mbuf_trailer(struct mbuf * m,void * trailer __sized_by (trailer_len),size_t trailer_len)3727 feth_add_mbuf_trailer(struct mbuf *m, void *trailer __sized_by(trailer_len), size_t trailer_len)
3728 {
3729 int ret;
3730 ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
3731
3732 ret = m_append(m, trailer_len, (caddr_t)trailer);
3733 if (ret == 1) {
3734 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3735 "%zuB trailer added", trailer_len);
3736 return 0;
3737 }
3738 FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT, "m_append failed");
3739 return ENOTSUP;
3740 }
3741
3742 static int
feth_add_mbuf_fcs(struct mbuf * m)3743 feth_add_mbuf_fcs(struct mbuf *m)
3744 {
3745 uint32_t pkt_len, offset = 0;
3746 uint32_t crc = 0;
3747 int err = 0;
3748
3749 ASSERT(sizeof(crc) == ETHER_CRC_LEN);
3750
3751 pkt_len = m->m_pkthdr.len;
3752 struct mbuf *iter = m;
3753 while (iter != NULL && offset < pkt_len) {
3754 uint32_t frag_len = iter->m_len;
3755 ASSERT(frag_len <= (pkt_len - offset));
3756 crc = crc32(crc, mtod(iter, void *), frag_len);
3757 offset += frag_len;
3758 iter = iter->m_next;
3759 }
3760
3761 err = feth_add_mbuf_trailer(m, &crc, ETHER_CRC_LEN);
3762 if (err != 0) {
3763 return err;
3764 }
3765
3766 m->m_flags |= M_HASFCS;
3767
3768 return 0;
3769 }
3770
3771 static void
feth_output_common(ifnet_t ifp,struct mbuf * m,ifnet_t peer,iff_flags_t flags,bool fcs,void * trailer __sized_by (trailer_len),size_t trailer_len)3772 feth_output_common(ifnet_t ifp, struct mbuf * m, ifnet_t peer,
3773 iff_flags_t flags, bool fcs, void *trailer __sized_by(trailer_len), size_t trailer_len)
3774 {
3775 void * frame_header;
3776
3777 if ((flags & IFF_FLAGS_HWCSUM) != 0) {
3778 m->m_pkthdr.csum_data = 0xffff;
3779 m->m_pkthdr.csum_flags =
3780 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3781 CSUM_IP_CHECKED | CSUM_IP_VALID;
3782 }
3783
3784 (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
3785 if (trailer_len != 0 && trailer != NULL) {
3786 feth_add_mbuf_trailer(m, trailer, trailer_len);
3787 }
3788 if (fcs) {
3789 feth_add_mbuf_fcs(m);
3790 }
3791 if ((flags & IFF_FLAGS_SEPARATE_FRAME_HEADER) != 0) {
3792 m = m_copyup(m, ETHER_HDR_LEN, 0);
3793 if (m == NULL) {
3794 FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT, "m_copyup failed");
3795 goto done;
3796 }
3797 frame_header = mtod(m, void *);
3798 mbuf_pkthdr_setheader(m, frame_header);
3799 m_adj(m, ETHER_HDR_LEN);
3800 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3801 "%s: frame 0x%llx data 0x%llx len %ld",
3802 ifp->if_xname,
3803 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
3804 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
3805 mbuf_len(m));
3806 } else {
3807 frame_header = mtod(m, void *);
3808 mbuf_pkthdr_setheader(m, frame_header);
3809 _mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
3810 }
3811
3812 /* tap it out */
3813 if (ifp->if_bpf != NULL) {
3814 fake_bpf_tap_out(ifp, m, frame_header);
3815 }
3816
3817 /* tap it in */
3818 if (peer->if_bpf != NULL) {
3819 fake_bpf_tap_in(peer, m, frame_header);
3820 }
3821
3822 (void)mbuf_pkthdr_setrcvif(m, peer);
3823 feth_enqueue_input(peer, m);
3824 done:
3825 return;
3826 }
3827
3828 static void
feth_start(ifnet_t ifp)3829 feth_start(ifnet_t ifp)
3830 {
3831 if_fake_ref fakeif;
3832 iff_flags_t flags = 0;
3833 bool fcs;
3834 struct mbuf * __single m;
3835 ifnet_t peer = NULL;
3836 size_t trailer_len;
3837
3838 feth_lock();
3839 fakeif = ifnet_get_if_fake(ifp);
3840 if (fakeif == NULL) {
3841 feth_unlock();
3842 return;
3843 }
3844
3845 if (fakeif->iff_start_busy) {
3846 feth_unlock();
3847 return;
3848 }
3849
3850 peer = fakeif->iff_peer;
3851 flags = fakeif->iff_flags;
3852 fcs = fakeif->iff_fcs;
3853 trailer_len = fakeif->iff_trailer_length;
3854
3855 fakeif->iff_start_busy = TRUE;
3856 feth_unlock();
3857 for (;;) {
3858 if (ifnet_dequeue(ifp, &m) != 0) {
3859 break;
3860 }
3861 if (peer == NULL) {
3862 m_freem(m);
3863 continue;
3864 }
3865 if (m != NULL) {
3866 feth_output_common(ifp, m, peer, flags, fcs,
3867 feth_trailer, trailer_len);
3868 }
3869 }
3870 feth_lock();
3871 fakeif = ifnet_get_if_fake(ifp);
3872 if (fakeif != NULL) {
3873 fakeif->iff_start_busy = FALSE;
3874 }
3875 feth_unlock();
3876 }
3877
3878 static int
feth_output(ifnet_t ifp,struct mbuf * m)3879 feth_output(ifnet_t ifp, struct mbuf * m)
3880 {
3881 if_fake_ref fakeif;
3882 iff_flags_t flags;
3883 bool fcs;
3884 size_t trailer_len;
3885 ifnet_t peer = NULL;
3886
3887 if (m == NULL) {
3888 return 0;
3889 }
3890 feth_lock();
3891 fakeif = ifnet_get_if_fake(ifp);
3892 if (fakeif != NULL) {
3893 peer = fakeif->iff_peer;
3894 flags = fakeif->iff_flags;
3895 fcs = fakeif->iff_fcs;
3896 trailer_len = fakeif->iff_trailer_length;
3897 }
3898 feth_unlock();
3899 if (peer == NULL) {
3900 m_freem(m);
3901 ifnet_stat_increment_out(ifp, 0, 0, 1);
3902 return 0;
3903 }
3904 feth_output_common(ifp, m, peer, flags, fcs, feth_trailer, trailer_len);
3905 return 0;
3906 }
3907
3908 static int
feth_config(ifnet_t ifp,ifnet_t peer)3909 feth_config(ifnet_t ifp, ifnet_t peer)
3910 {
3911 int connected = FALSE;
3912 int disconnected = FALSE;
3913 int error = 0;
3914 if_fake_ref fakeif = NULL;
3915
3916 feth_lock();
3917 fakeif = ifnet_get_if_fake(ifp);
3918 if (fakeif == NULL) {
3919 error = EINVAL;
3920 goto done;
3921 }
3922 if (peer != NULL) {
3923 /* connect to peer */
3924 if_fake_ref peer_fakeif;
3925
3926 peer_fakeif = ifnet_get_if_fake(peer);
3927 if (peer_fakeif == NULL) {
3928 error = EINVAL;
3929 goto done;
3930 }
3931 if (feth_is_detaching(fakeif) ||
3932 feth_is_detaching(peer_fakeif) ||
3933 peer_fakeif->iff_peer != NULL ||
3934 fakeif->iff_peer != NULL) {
3935 error = EBUSY;
3936 goto done;
3937 }
3938 #if SKYWALK
3939 if (fakeif->iff_pp_mode !=
3940 peer_fakeif->iff_pp_mode) {
3941 error = EINVAL;
3942 goto done;
3943 }
3944 #endif /* SKYWALK */
3945 fakeif->iff_peer = peer;
3946 peer_fakeif->iff_peer = ifp;
3947 connected = TRUE;
3948 } else if (fakeif->iff_peer != NULL) {
3949 /* disconnect from peer */
3950 if_fake_ref peer_fakeif;
3951
3952 peer = fakeif->iff_peer;
3953 peer_fakeif = ifnet_get_if_fake(peer);
3954 if (peer_fakeif == NULL) {
3955 /* should not happen */
3956 error = EINVAL;
3957 goto done;
3958 }
3959 fakeif->iff_peer = NULL;
3960 peer_fakeif->iff_peer = NULL;
3961 disconnected = TRUE;
3962 }
3963
3964 done:
3965 feth_unlock();
3966
3967 /* generate link status event if we connect or disconnect */
3968 if (connected) {
3969 interface_link_event(ifp, KEV_DL_LINK_ON);
3970 interface_link_event(peer, KEV_DL_LINK_ON);
3971 } else if (disconnected) {
3972 interface_link_event(ifp, KEV_DL_LINK_OFF);
3973 interface_link_event(peer, KEV_DL_LINK_OFF);
3974 }
3975 return error;
3976 }
3977
3978 static int
feth_set_media(ifnet_t ifp,struct if_fake_request * iffr)3979 feth_set_media(ifnet_t ifp, struct if_fake_request * iffr)
3980 {
3981 if_fake_ref fakeif;
3982 int error;
3983
3984 if (iffr->iffr_media.iffm_count > IF_FAKE_MEDIA_LIST_MAX) {
3985 /* list is too long */
3986 return EINVAL;
3987 }
3988 feth_lock();
3989 fakeif = ifnet_get_if_fake(ifp);
3990 if (fakeif == NULL) {
3991 error = EINVAL;
3992 goto done;
3993 }
3994 fakeif->iff_media_count = iffr->iffr_media.iffm_count;
3995 bcopy(iffr->iffr_media.iffm_list, fakeif->iff_media_list,
3996 iffr->iffr_media.iffm_count * sizeof(fakeif->iff_media_list[0]));
3997 #if 0
3998 /* XXX: "auto-negotiate" active with peer? */
3999 /* generate link status event? */
4000 fakeif->iff_media_current = iffr->iffr_media.iffm_current;
4001 #endif
4002 error = 0;
4003 done:
4004 feth_unlock();
4005 return error;
4006 }
4007
4008 static int
if_fake_request_copyin(user_addr_t user_addr,struct if_fake_request * iffr,u_int32_t len)4009 if_fake_request_copyin(user_addr_t user_addr,
4010 struct if_fake_request *iffr, u_int32_t len)
4011 {
4012 int error;
4013
4014 if (user_addr == USER_ADDR_NULL || len < sizeof(*iffr)) {
4015 error = EINVAL;
4016 goto done;
4017 }
4018 error = copyin(user_addr, iffr, sizeof(*iffr));
4019 if (error != 0) {
4020 goto done;
4021 }
4022 if (iffr->iffr_reserved[0] != 0 || iffr->iffr_reserved[1] != 0 ||
4023 iffr->iffr_reserved[2] != 0 || iffr->iffr_reserved[3] != 0) {
4024 error = EINVAL;
4025 goto done;
4026 }
4027 done:
4028 return error;
4029 }
4030
4031 static int
feth_set_drvspec(ifnet_t ifp,uint32_t cmd,u_int32_t len,user_addr_t user_addr)4032 feth_set_drvspec(ifnet_t ifp, uint32_t cmd, u_int32_t len,
4033 user_addr_t user_addr)
4034 {
4035 int error;
4036 struct if_fake_request iffr;
4037 ifnet_t peer;
4038
4039 switch (cmd) {
4040 case IF_FAKE_S_CMD_SET_PEER:
4041 error = if_fake_request_copyin(user_addr, &iffr, len);
4042 if (error != 0) {
4043 break;
4044 }
4045 if (iffr.iffr_peer_name[0] == '\0') {
4046 error = feth_config(ifp, NULL);
4047 break;
4048 }
4049
4050 /* ensure nul termination */
4051 iffr.iffr_peer_name[IFNAMSIZ - 1] = '\0';
4052 peer = ifunit(__unsafe_null_terminated_from_indexable(iffr.iffr_peer_name));
4053 if (peer == NULL) {
4054 error = ENXIO;
4055 break;
4056 }
4057 if (ifnet_type(peer) != IFT_ETHER) {
4058 error = EINVAL;
4059 break;
4060 }
4061 if (strcmp(ifnet_name(peer), FAKE_ETHER_NAME) != 0) {
4062 error = EINVAL;
4063 break;
4064 }
4065 error = feth_config(ifp, peer);
4066 break;
4067 case IF_FAKE_S_CMD_SET_MEDIA:
4068 error = if_fake_request_copyin(user_addr, &iffr, len);
4069 if (error != 0) {
4070 break;
4071 }
4072 error = feth_set_media(ifp, &iffr);
4073 break;
4074 case IF_FAKE_S_CMD_SET_DEQUEUE_STALL:
4075 error = if_fake_request_copyin(user_addr, &iffr, len);
4076 if (error != 0) {
4077 break;
4078 }
4079 error = feth_enable_dequeue_stall(ifp,
4080 iffr.iffr_dequeue_stall);
4081 break;
4082 default:
4083 error = EOPNOTSUPP;
4084 break;
4085 }
4086 return error;
4087 }
4088
4089 static int
feth_get_drvspec(ifnet_t ifp,u_int32_t cmd,u_int32_t len,user_addr_t user_addr)4090 feth_get_drvspec(ifnet_t ifp, u_int32_t cmd, u_int32_t len,
4091 user_addr_t user_addr)
4092 {
4093 int error = EOPNOTSUPP;
4094 if_fake_ref fakeif;
4095 struct if_fake_request iffr;
4096 ifnet_t peer;
4097
4098 switch (cmd) {
4099 case IF_FAKE_G_CMD_GET_PEER:
4100 if (len < sizeof(iffr)) {
4101 error = EINVAL;
4102 break;
4103 }
4104 feth_lock();
4105 fakeif = ifnet_get_if_fake(ifp);
4106 if (fakeif == NULL) {
4107 feth_unlock();
4108 error = EOPNOTSUPP;
4109 break;
4110 }
4111 peer = fakeif->iff_peer;
4112 feth_unlock();
4113 bzero(&iffr, sizeof(iffr));
4114 if (peer != NULL) {
4115 strlcpy(iffr.iffr_peer_name,
4116 if_name(peer),
4117 sizeof(iffr.iffr_peer_name));
4118 }
4119 error = copyout(&iffr, user_addr, sizeof(iffr));
4120 break;
4121 default:
4122 break;
4123 }
4124 return error;
4125 }
4126
4127 union ifdrvu {
4128 struct ifdrv32 *ifdrvu_32;
4129 struct ifdrv64 *ifdrvu_64;
4130 void *ifdrvu_p;
4131 };
4132
4133 static int
feth_ioctl(ifnet_t ifp,u_long cmd,void * data)4134 feth_ioctl(ifnet_t ifp, u_long cmd, void * data)
4135 {
4136 unsigned int count;
4137 struct ifdevmtu * devmtu_p;
4138 union ifdrvu drv;
4139 uint32_t drv_cmd;
4140 uint32_t drv_len;
4141 boolean_t drv_set_command = FALSE;
4142 int error = 0;
4143 struct ifmediareq32 * ifmr;
4144 struct ifreq * ifr;
4145 if_fake_ref fakeif;
4146 int status;
4147 user_addr_t user_addr;
4148
4149 ifr = (struct ifreq *)data;
4150 switch (cmd) {
4151 case SIOCSIFADDR:
4152 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
4153 break;
4154
4155 case SIOCGIFMEDIA32:
4156 case SIOCGIFMEDIA64:
4157 feth_lock();
4158 fakeif = ifnet_get_if_fake(ifp);
4159 if (fakeif == NULL) {
4160 feth_unlock();
4161 return EOPNOTSUPP;
4162 }
4163 status = (fakeif->iff_peer != NULL)
4164 ? (IFM_AVALID | IFM_ACTIVE) : IFM_AVALID;
4165 ifmr = (struct ifmediareq32 *)data;
4166 user_addr = (cmd == SIOCGIFMEDIA64) ?
4167 ((struct ifmediareq64 *)data)->ifmu_ulist :
4168 CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
4169 count = ifmr->ifm_count;
4170 ifmr->ifm_active = (fakeif->iff_peer != NULL)
4171 ? FAKE_DEFAULT_MEDIA : IFM_ETHER;
4172 ifmr->ifm_current = IFM_ETHER;
4173 ifmr->ifm_mask = 0;
4174 ifmr->ifm_status = status;
4175 if (user_addr == USER_ADDR_NULL) {
4176 ifmr->ifm_count = fakeif->iff_media_count;
4177 } else if (count > 0) {
4178 if (count > fakeif->iff_media_count) {
4179 count = fakeif->iff_media_count;
4180 }
4181 ifmr->ifm_count = count;
4182 error = copyout(&fakeif->iff_media_list, user_addr,
4183 count * sizeof(int));
4184 }
4185 feth_unlock();
4186 break;
4187
4188 case SIOCGIFDEVMTU:
4189 devmtu_p = &ifr->ifr_devmtu;
4190 devmtu_p->ifdm_current = ifnet_mtu(ifp);
4191 devmtu_p->ifdm_max = feth_max_mtu(ifp);
4192 devmtu_p->ifdm_min = IF_MINMTU;
4193 break;
4194
4195 case SIOCSIFMTU:
4196 if ((unsigned int)ifr->ifr_mtu > feth_max_mtu(ifp) ||
4197 ifr->ifr_mtu < IF_MINMTU) {
4198 error = EINVAL;
4199 } else {
4200 error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
4201 }
4202 break;
4203
4204 case SIOCSDRVSPEC32:
4205 case SIOCSDRVSPEC64:
4206 error = proc_suser(current_proc());
4207 if (error != 0) {
4208 break;
4209 }
4210 drv_set_command = TRUE;
4211 OS_FALLTHROUGH;
4212 case SIOCGDRVSPEC32:
4213 case SIOCGDRVSPEC64:
4214 drv.ifdrvu_p = data;
4215 if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
4216 drv_cmd = drv.ifdrvu_32->ifd_cmd;
4217 drv_len = drv.ifdrvu_32->ifd_len;
4218 user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
4219 } else {
4220 drv_cmd = drv.ifdrvu_64->ifd_cmd;
4221 drv_len = drv.ifdrvu_64->ifd_len;
4222 user_addr = drv.ifdrvu_64->ifd_data;
4223 }
4224 if (drv_set_command) {
4225 error = feth_set_drvspec(ifp, drv_cmd, drv_len,
4226 user_addr);
4227 } else {
4228 error = feth_get_drvspec(ifp, drv_cmd, drv_len,
4229 user_addr);
4230 }
4231 break;
4232
4233 case SIOCSIFLLADDR:
4234 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
4235 ifr->ifr_addr.sa_len);
4236 break;
4237
4238 case SIOCSIFFLAGS:
4239 if ((ifp->if_flags & IFF_UP) != 0) {
4240 /* marked up, set running if not already set */
4241 if ((ifp->if_flags & IFF_RUNNING) == 0) {
4242 /* set running */
4243 error = ifnet_set_flags(ifp, IFF_RUNNING,
4244 IFF_RUNNING);
4245 }
4246 } else if ((ifp->if_flags & IFF_RUNNING) != 0) {
4247 /* marked down, clear running */
4248 error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
4249 }
4250 break;
4251
4252 case SIOCADDMULTI:
4253 case SIOCDELMULTI:
4254 error = 0;
4255 break;
4256 case SIOCSIFCAP: {
4257 uint32_t cap;
4258
4259 feth_lock();
4260 fakeif = ifnet_get_if_fake(ifp);
4261 if (fakeif == NULL ||
4262 (fakeif->iff_flags & IFF_FLAGS_LRO) == 0) {
4263 feth_unlock();
4264 return EOPNOTSUPP;
4265 }
4266 feth_unlock();
4267 cap = (ifr->ifr_reqcap & IFCAP_LRO) != 0 ? IFCAP_LRO : 0;
4268 error = ifnet_set_capabilities_enabled(ifp, cap, IFCAP_LRO);
4269 break;
4270 }
4271 default:
4272 error = EOPNOTSUPP;
4273 break;
4274 }
4275 return error;
4276 }
4277
4278 static void
feth_if_free(ifnet_t ifp)4279 feth_if_free(ifnet_t ifp)
4280 {
4281 if_fake_ref fakeif;
4282
4283 if (ifp == NULL) {
4284 return;
4285 }
4286 feth_lock();
4287 fakeif = ifnet_get_if_fake(ifp);
4288 if (fakeif == NULL) {
4289 feth_unlock();
4290 return;
4291 }
4292 ifp->if_softc = NULL;
4293 #if SKYWALK
4294 VERIFY(fakeif->iff_doorbell_tcall == NULL);
4295 #endif /* SKYWALK */
4296 feth_unlock();
4297 feth_release(fakeif);
4298 ifnet_release(ifp);
4299 return;
4300 }
4301
4302 __private_extern__ void
if_fake_init(void)4303 if_fake_init(void)
4304 {
4305 int error;
4306
4307 #if SKYWALK
4308 (void)feth_register_nexus_domain_provider();
4309 #endif /* SKYWALK */
4310 error = if_clone_attach(&feth_cloner);
4311 if (error != 0) {
4312 return;
4313 }
4314 return;
4315 }
4316