1 /*
2 * Copyright (c) 2015-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * if_fake.c
31 * - fake network interface used for testing
32 * - "feth" (e.g. "feth0", "feth1") is a virtual ethernet interface that allows
33 * two instances to have their output/input paths "crossed-over" so that
34 * output on one is input on the other
35 */
36
37 /*
38 * Modification History:
39 *
40 * September 9, 2015 Dieter Siegmund ([email protected])
41 * - created
42 */
43
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/queue.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/kern_event.h>
54 #include <sys/mcache.h>
55 #include <sys/syslog.h>
56
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_vlan_var.h>
61 #include <net/if_fake_var.h>
62 #include <net/if_arp.h>
63 #include <net/if_dl.h>
64 #include <net/if_ether.h>
65 #include <net/if_types.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <net/dlil.h>
69
70 #include <net/kpi_interface.h>
71 #include <net/kpi_protocol.h>
72
73 #include <kern/locks.h>
74 #include <kern/zalloc.h>
75
76 #include <mach/mach_time.h>
77
78 #include <os/log.h>
79
80 #ifdef INET
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #endif
84
85 #include <net/if_media.h>
86 #include <net/ether_if_module.h>
87 #if SKYWALK
88 #include <skywalk/os_skywalk_private.h>
89 #include <skywalk/nexus/netif/nx_netif.h>
90 #include <skywalk/channel/channel_var.h>
91 #endif /* SKYWALK */
92
93 /*
94 * if_fake_debug, FE_DBGF_*
95 * - 'if_fake_debug' is a bitmask of FE_DBGF_* flags that can be set
96 * to enable additional logs for the corresponding fake function
97 * - "sysctl net.link.fake.debug" controls the value of
98 * 'if_fake_debug'
99 */
100 static uint32_t if_fake_debug = 0;
101
102 #define FE_DBGF_LIFECYCLE 0x0001
103 #define FE_DBGF_INPUT 0x0002
104 #define FE_DBGF_OUTPUT 0x0004
105 #define FE_DBGF_CONTROL 0x0008
106 #define FE_DBGF_MISC 0x0010
107
108 /*
109 * if_fake_log_level
110 * - 'if_fake_log_level' ensures that by default important logs are
111 * logged regardless of if_fake_debug by comparing the log level
112 * in FAKE_LOG to if_fake_log_level
113 * - use "sysctl net.link.fake.log_level" controls the value of
114 * 'if_fake_log_level'
115 * - the default value of 'if_fake_log_level' is LOG_NOTICE; important
116 * logs must use LOG_NOTICE to ensure they appear by default
117 */
118 #define FAKE_DBGF_ENABLED(__flag) ((if_fake_debug & __flag) != 0)
119
120 /*
121 * FAKE_LOG
122 * - macro to generate the specified log conditionally based on
123 * the specified log level and debug flags
124 */
125 #define FAKE_LOG(__level, __dbgf, __string, ...) \
126 do { \
127 if (__level <= if_fake_log_level || \
128 FAKE_DBGF_ENABLED(__dbgf)) { \
129 os_log(OS_LOG_DEFAULT, "%s: " __string, \
130 __func__, ## __VA_ARGS__); \
131 } \
132 } while (0)
133
134 static boolean_t
is_power_of_two(unsigned int val)135 is_power_of_two(unsigned int val)
136 {
137 return (val & (val - 1)) == 0;
138 }
139
140 #define FAKE_ETHER_NAME "feth"
141
142 SYSCTL_DECL(_net_link);
143 SYSCTL_NODE(_net_link, OID_AUTO, fake, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
144 "Fake interface");
145
146 static int if_fake_txstart = 1;
147 SYSCTL_INT(_net_link_fake, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
148 &if_fake_txstart, 0, "Fake interface TXSTART mode");
149
150 static int if_fake_hwcsum = 0;
151 SYSCTL_INT(_net_link_fake, OID_AUTO, hwcsum, CTLFLAG_RW | CTLFLAG_LOCKED,
152 &if_fake_hwcsum, 0, "Fake interface simulate hardware checksum");
153
154 static int if_fake_vlan_tagging = 1;
155 SYSCTL_INT(_net_link_fake, OID_AUTO, vlan_tagging, CTLFLAG_RW | CTLFLAG_LOCKED,
156 &if_fake_vlan_tagging, 0, "Fake interface VLAN tagging");
157
158 static int if_fake_nxattach = 0;
159 SYSCTL_INT(_net_link_fake, OID_AUTO, nxattach, CTLFLAG_RW | CTLFLAG_LOCKED,
160 &if_fake_nxattach, 0, "Fake interface auto-attach nexus");
161
162 static int if_fake_bsd_mode = 1;
163 SYSCTL_INT(_net_link_fake, OID_AUTO, bsd_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
164 &if_fake_bsd_mode, 0, "Fake interface attach as BSD interface");
165
166 static int if_fake_log_level = LOG_NOTICE;
167 SYSCTL_INT(_net_link_fake, OID_AUTO, log_level, CTLFLAG_RW | CTLFLAG_LOCKED,
168 &if_fake_log_level, 0, "Fake interface log level");
169
170 SYSCTL_INT(_net_link_fake, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
171 &if_fake_debug, 0, "Fake interface debug flags");
172
173 static int if_fake_wmm_mode = 0;
174 SYSCTL_INT(_net_link_fake, OID_AUTO, wmm_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
175 &if_fake_wmm_mode, 0, "Fake interface in 802.11 WMM mode");
176
177 static int if_fake_multibuflet = 0;
178 SYSCTL_INT(_net_link_fake, OID_AUTO, multibuflet, CTLFLAG_RW | CTLFLAG_LOCKED,
179 &if_fake_multibuflet, 0, "Fake interface using multi-buflet packets");
180
181 static int if_fake_low_latency = 0;
182 SYSCTL_INT(_net_link_fake, OID_AUTO, low_latency, CTLFLAG_RW | CTLFLAG_LOCKED,
183 &if_fake_low_latency, 0, "Fake interface with a low latency qset");
184
185 static int if_fake_switch_combined_mode = 0;
186 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_combined_mode,
187 CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_combined_mode, 0,
188 "Switch a qset between combined and separate mode during dequeues");
189
190 static int if_fake_switch_mode_frequency = 10;
191 SYSCTL_INT(_net_link_fake, OID_AUTO, switch_mode_frequency,
192 CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_mode_frequency, 0,
193 "The number of dequeues before we switch between the combined and separated mode");
194
195 static int if_fake_tso_support = 0;
196 SYSCTL_INT(_net_link_fake, OID_AUTO, tso_support, CTLFLAG_RW | CTLFLAG_LOCKED,
197 &if_fake_tso_support, 0, "Fake interface with support for TSO offload");
198
199 #define DEFAULT_EXPIRATION_THRESHOLD 500 /* usec */
200 static int if_fake_expiration_threshold_us = DEFAULT_EXPIRATION_THRESHOLD;
201 SYSCTL_INT(_net_link_fake, OID_AUTO, expiration_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
202 &if_fake_expiration_threshold_us, DEFAULT_EXPIRATION_THRESHOLD,
203 "Expiration threshold (usec) for expiration testing");
204
205 static int if_fake_lro = 0;
206 SYSCTL_INT(_net_link_fake, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_LOCKED,
207 &if_fake_lro, 0, "Fake interface report LRO capability");
208
209 static int if_fake_separate_frame_header = 0;
210 SYSCTL_INT(_net_link_fake, OID_AUTO, separate_frame_header,
211 CTLFLAG_RW | CTLFLAG_LOCKED,
212 &if_fake_separate_frame_header, 0, "Put frame header in separate mbuf");
213
214 typedef enum {
215 IFF_PP_MODE_GLOBAL = 0, /* share a global pool */
216 IFF_PP_MODE_PRIVATE = 1, /* creates its own rx/tx pool */
217 IFF_PP_MODE_PRIVATE_SPLIT = 2, /* creates its own split rx & tx pool */
218 } iff_pktpool_mode_t;
219 static iff_pktpool_mode_t if_fake_pktpool_mode = IFF_PP_MODE_GLOBAL;
220 SYSCTL_INT(_net_link_fake, OID_AUTO, pktpool_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
221 &if_fake_pktpool_mode, IFF_PP_MODE_GLOBAL,
222 "Fake interface packet pool mode (0 global, 1 private, 2 private split");
223
224 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX 512
225 #define FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF 96
226 static int if_fake_link_layer_aggregation_factor =
227 FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF;
228 static int
229 feth_link_layer_aggregation_factor_sysctl SYSCTL_HANDLER_ARGS
230 {
231 #pragma unused(oidp, arg1, arg2)
232 unsigned int new_value;
233 int changed;
234 int error;
235
236 error = sysctl_io_number(req, if_fake_link_layer_aggregation_factor,
237 sizeof(if_fake_link_layer_aggregation_factor), &new_value,
238 &changed);
239 if (error == 0 && changed != 0) {
240 if (new_value <= 0 ||
241 new_value > FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX) {
242 return EINVAL;
243 }
244 if_fake_link_layer_aggregation_factor = new_value;
245 }
246 return error;
247 }
248
249 SYSCTL_PROC(_net_link_fake, OID_AUTO, link_layer_aggregation_factor,
250 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
251 0, 0, feth_link_layer_aggregation_factor_sysctl, "IU",
252 "Fake interface link layer aggregation factor");
253
254 #define FETH_TX_HEADROOM_MAX 32
255 static unsigned int if_fake_tx_headroom = FETH_TX_HEADROOM_MAX;
256 static int
257 feth_tx_headroom_sysctl SYSCTL_HANDLER_ARGS
258 {
259 #pragma unused(oidp, arg1, arg2)
260 unsigned int new_value;
261 int changed;
262 int error;
263
264 error = sysctl_io_number(req, if_fake_tx_headroom,
265 sizeof(if_fake_tx_headroom), &new_value, &changed);
266 if (error == 0 && changed != 0) {
267 if (new_value > FETH_TX_HEADROOM_MAX ||
268 (new_value % 8) != 0) {
269 return EINVAL;
270 }
271 if_fake_tx_headroom = new_value;
272 }
273 return 0;
274 }
275
276 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_headroom,
277 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
278 0, 0, feth_tx_headroom_sysctl, "IU", "Fake ethernet Tx headroom");
279
280 static int if_fake_fcs = 0;
281 SYSCTL_INT(_net_link_fake, OID_AUTO, fcs, CTLFLAG_RW | CTLFLAG_LOCKED,
282 &if_fake_fcs, 0, "Fake interface using frame check sequence");
283
284 #define FETH_TRAILER_LENGTH_MAX 28
285 char feth_trailer[FETH_TRAILER_LENGTH_MAX + 1] = "trailertrailertrailertrailer";
286 static unsigned int if_fake_trailer_length = 0;
287 static int
288 feth_trailer_length_sysctl SYSCTL_HANDLER_ARGS
289 {
290 #pragma unused(oidp, arg1, arg2)
291 unsigned int new_value;
292 int changed;
293 int error;
294
295 error = sysctl_io_number(req, if_fake_trailer_length,
296 sizeof(if_fake_trailer_length), &new_value, &changed);
297 if (error == 0 && changed != 0) {
298 if (new_value > FETH_TRAILER_LENGTH_MAX) {
299 return EINVAL;
300 }
301 if_fake_trailer_length = new_value;
302 }
303 return 0;
304 }
305
306 SYSCTL_PROC(_net_link_fake, OID_AUTO, trailer_length,
307 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
308 feth_trailer_length_sysctl, "IU", "Fake interface frame trailer length");
309
310 /* sysctl net.link.fake.max_mtu */
311 #define FETH_MAX_MTU_DEFAULT 2048
312 #define FETH_MAX_MTU_MAX ((16 * 1024) - ETHER_HDR_LEN)
313
314 static unsigned int if_fake_max_mtu = FETH_MAX_MTU_DEFAULT;
315
316 /* sysctl net.link.fake.buflet_size */
317 #define FETH_BUFLET_SIZE_MIN 512
318 #define FETH_BUFLET_SIZE_MAX (32 * 1024)
319 #define FETH_TSO_BUFLET_SIZE (16 * 1024)
320
321 static unsigned int if_fake_buflet_size = FETH_BUFLET_SIZE_MIN;
322 static unsigned int if_fake_tso_buffer_size = FETH_TSO_BUFLET_SIZE;
323
324 static int
325 feth_tso_buffer_size_sysctl SYSCTL_HANDLER_ARGS
326 {
327 #pragma unused(oidp, arg1, arg2)
328 unsigned int new_value;
329 int changed;
330 int error;
331
332 error = sysctl_io_number(req, if_fake_tso_buffer_size,
333 sizeof(if_fake_tso_buffer_size), &new_value, &changed);
334 if (error == 0 && changed != 0) {
335 /* must be a power of 2 between min and max */
336 if (new_value > FETH_BUFLET_SIZE_MAX ||
337 new_value < FETH_BUFLET_SIZE_MIN ||
338 !is_power_of_two(new_value)) {
339 return EINVAL;
340 }
341 if_fake_tso_buffer_size = new_value;
342 }
343 return 0;
344 }
345
346 SYSCTL_PROC(_net_link_fake, OID_AUTO, tso_buf_size,
347 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
348 0, 0, feth_tso_buffer_size_sysctl, "IU", "Fake interface TSO buffer size");
349
350 static int
351 feth_max_mtu_sysctl SYSCTL_HANDLER_ARGS
352 {
353 #pragma unused(oidp, arg1, arg2)
354 unsigned int new_value;
355 int changed;
356 int error;
357
358 error = sysctl_io_number(req, if_fake_max_mtu,
359 sizeof(if_fake_max_mtu), &new_value, &changed);
360 if (error == 0 && changed != 0) {
361 if (new_value > FETH_MAX_MTU_MAX ||
362 new_value < ETHERMTU ||
363 new_value <= if_fake_buflet_size) {
364 return EINVAL;
365 }
366 if_fake_max_mtu = new_value;
367 }
368 return 0;
369 }
370
371 SYSCTL_PROC(_net_link_fake, OID_AUTO, max_mtu,
372 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
373 0, 0, feth_max_mtu_sysctl, "IU", "Fake interface maximum MTU");
374
375 static int
376 feth_buflet_size_sysctl SYSCTL_HANDLER_ARGS
377 {
378 #pragma unused(oidp, arg1, arg2)
379 unsigned int new_value;
380 int changed;
381 int error;
382
383 error = sysctl_io_number(req, if_fake_buflet_size,
384 sizeof(if_fake_buflet_size), &new_value, &changed);
385 if (error == 0 && changed != 0) {
386 /* must be a power of 2 between min and max */
387 if (new_value > FETH_BUFLET_SIZE_MAX ||
388 new_value < FETH_BUFLET_SIZE_MIN ||
389 !is_power_of_two(new_value) ||
390 new_value >= if_fake_max_mtu) {
391 return EINVAL;
392 }
393 if_fake_buflet_size = new_value;
394 }
395 return 0;
396 }
397
398 SYSCTL_PROC(_net_link_fake, OID_AUTO, buflet_size,
399 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
400 0, 0, feth_buflet_size_sysctl, "IU", "Fake interface buflet size");
401
402 static unsigned int if_fake_user_access = 0;
403
404 static int
405 feth_user_access_sysctl SYSCTL_HANDLER_ARGS
406 {
407 #pragma unused(oidp, arg1, arg2)
408 unsigned int new_value;
409 int changed;
410 int error;
411
412 error = sysctl_io_number(req, if_fake_user_access,
413 sizeof(if_fake_user_access), &new_value, &changed);
414 if (error == 0 && changed != 0) {
415 if (new_value != 0) {
416 if (new_value != 1) {
417 return EINVAL;
418 }
419 }
420 if_fake_user_access = new_value;
421 }
422 return 0;
423 }
424
425 SYSCTL_PROC(_net_link_fake, OID_AUTO, user_access,
426 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
427 0, 0, feth_user_access_sysctl, "IU", "Fake interface user access");
428
429 /* sysctl net.link.fake.if_adv_intvl (unit: millisecond) */
430 #define FETH_IF_ADV_INTVL_MIN 10
431 #define FETH_IF_ADV_INTVL_MAX INT_MAX
432
433 static int if_fake_if_adv_interval = 0; /* no interface advisory */
434 static int
435 feth_if_adv_interval_sysctl SYSCTL_HANDLER_ARGS
436 {
437 #pragma unused(oidp, arg1, arg2)
438 unsigned int new_value;
439 int changed;
440 int error;
441
442 error = sysctl_io_number(req, if_fake_if_adv_interval,
443 sizeof(if_fake_if_adv_interval), &new_value, &changed);
444 if (error == 0 && changed != 0) {
445 if ((new_value != 0) && (new_value > FETH_IF_ADV_INTVL_MAX ||
446 new_value < FETH_IF_ADV_INTVL_MIN)) {
447 return EINVAL;
448 }
449 if_fake_if_adv_interval = new_value;
450 }
451 return 0;
452 }
453
454 SYSCTL_PROC(_net_link_fake, OID_AUTO, if_adv_intvl,
455 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
456 feth_if_adv_interval_sysctl, "IU",
457 "Fake interface will generate interface advisories reports at the specified interval in ms");
458
459 /* sysctl net.link.fake.tx_drops */
460 /*
461 * Fake ethernet will drop packet on the transmit path at the specified
462 * rate, i.e drop one in every if_fake_tx_drops number of packets.
463 */
464 #define FETH_TX_DROPS_MIN 0
465 #define FETH_TX_DROPS_MAX INT_MAX
466 static int if_fake_tx_drops = 0; /* no packets are dropped */
467 static int
468 feth_fake_tx_drops_sysctl SYSCTL_HANDLER_ARGS
469 {
470 #pragma unused(oidp, arg1, arg2)
471 unsigned int new_value;
472 int changed;
473 int error;
474
475 error = sysctl_io_number(req, if_fake_tx_drops,
476 sizeof(if_fake_tx_drops), &new_value, &changed);
477 if (error == 0 && changed != 0) {
478 if (new_value > FETH_TX_DROPS_MAX ||
479 new_value < FETH_TX_DROPS_MIN) {
480 return EINVAL;
481 }
482 if_fake_tx_drops = new_value;
483 }
484 return 0;
485 }
486
487 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_drops,
488 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
489 feth_fake_tx_drops_sysctl, "IU",
490 "Fake interface will intermittently drop packets on Tx path");
491
492 /* sysctl.net.link.fake.tx_exp_policy */
493
494 typedef enum {
495 IFF_TX_EXP_POLICY_DISABLED = 0, /* Expiry notification disabled */
496 IFF_TX_EXP_POLICY_DROP_AND_NOTIFY = 1, /* Expiry notification enabled; drop + notify mode */
497 IFF_TX_EXP_POLICY_NOTIFY_ONLY = 2, /* Expiry notification enabled; notify only mode */
498 IFF_TX_EXP_POLICY_METADATA = 3, /* Expiry notification enabled; use packet metadata */
499 } iff_tx_exp_policy_t;
500 static iff_tx_exp_policy_t if_fake_tx_exp_policy = IFF_TX_EXP_POLICY_DISABLED;
501
502 static int
503 feth_fake_tx_exp_policy_sysctl SYSCTL_HANDLER_ARGS
504 {
505 #pragma unused(oidp, arg1, arg2)
506 unsigned int new_value;
507 int changed;
508 int error;
509
510 error = sysctl_io_number(req, if_fake_tx_exp_policy,
511 sizeof(if_fake_tx_exp_policy), &new_value, &changed);
512 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
513 "if_fake_tx_exp_policy: %u -> %u (%d)",
514 if_fake_tx_exp_policy, new_value, changed);
515 if (error == 0 && changed != 0) {
516 if (new_value > IFF_TX_EXP_POLICY_METADATA ||
517 new_value < IFF_TX_EXP_POLICY_DISABLED) {
518 return EINVAL;
519 }
520 if_fake_tx_exp_policy = new_value;
521 }
522 return 0;
523 }
524 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_exp_policy,
525 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
526 feth_fake_tx_exp_policy_sysctl, "IU",
527 "Fake interface handling policy for expired TX attempts "
528 "(0 disabled, 1 drop and notify, 2 notify only, 3 packet metadata)");
529
530 /* sysctl net.link.fake.tx_completion_mode */
531 typedef enum {
532 IFF_TX_COMPL_MODE_SYNC = 0,
533 IFF_TX_COMPL_MODE_ASYNC = 1,
534 } iff_tx_completion_mode_t;
535 static iff_tx_completion_mode_t if_tx_completion_mode = IFF_TX_COMPL_MODE_SYNC;
536 static int
537 feth_fake_tx_completion_mode_sysctl SYSCTL_HANDLER_ARGS
538 {
539 #pragma unused(oidp, arg1, arg2)
540 unsigned int new_value;
541 int changed;
542 int error;
543
544 error = sysctl_io_number(req, if_tx_completion_mode,
545 sizeof(if_tx_completion_mode), &new_value, &changed);
546 if (error == 0 && changed != 0) {
547 if (new_value > IFF_TX_COMPL_MODE_ASYNC ||
548 new_value < IFF_TX_COMPL_MODE_SYNC) {
549 return EINVAL;
550 }
551 if_tx_completion_mode = new_value;
552 }
553 return 0;
554 }
555 SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_completion_mode,
556 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
557 feth_fake_tx_completion_mode_sysctl, "IU",
558 "Fake interface tx completion mode (0 synchronous, 1 asynchronous)");
559
560 /* sysctl net.link.fake.llink_cnt */
561
562 /* The maximum number of logical links (including default link) */
563 #define FETH_MAX_LLINKS 16
564 /*
565 * The default number of logical links (including default link).
566 * Zero means logical link mode is disabled.
567 */
568 #define FETH_DEF_LLINKS 0
569
570 static uint32_t if_fake_llink_cnt = FETH_DEF_LLINKS;
571 static int
572 feth_fake_llink_cnt_sysctl SYSCTL_HANDLER_ARGS
573 {
574 #pragma unused(oidp, arg1, arg2)
575 unsigned int new_value;
576 int changed;
577 int error;
578
579 error = sysctl_io_number(req, if_fake_llink_cnt,
580 sizeof(if_fake_llink_cnt), &new_value, &changed);
581 if (error == 0 && changed != 0) {
582 if (new_value > FETH_MAX_LLINKS) {
583 return EINVAL;
584 }
585 if_fake_llink_cnt = new_value;
586 }
587 return 0;
588 }
589
590 SYSCTL_PROC(_net_link_fake, OID_AUTO, llink_cnt,
591 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
592 feth_fake_llink_cnt_sysctl, "IU",
593 "Fake interface logical link count");
594
595 /* sysctl net.link.fake.qset_cnt */
596
597 /* The maximum number of qsets for each logical link */
598 #define FETH_MAX_QSETS 16
599 /* The default number of qsets for each logical link */
600 #define FETH_DEF_QSETS 4
601
602 static uint32_t if_fake_qset_cnt = FETH_DEF_QSETS;
603 static int
604 feth_fake_qset_cnt_sysctl SYSCTL_HANDLER_ARGS
605 {
606 #pragma unused(oidp, arg1, arg2)
607 unsigned int new_value;
608 int changed;
609 int error;
610
611 error = sysctl_io_number(req, if_fake_qset_cnt,
612 sizeof(if_fake_qset_cnt), &new_value, &changed);
613 if (error == 0 && changed != 0) {
614 if (new_value == 0 ||
615 new_value > FETH_MAX_QSETS) {
616 return EINVAL;
617 }
618 if_fake_qset_cnt = new_value;
619 }
620 return 0;
621 }
622
623 SYSCTL_PROC(_net_link_fake, OID_AUTO, qset_cnt,
624 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
625 feth_fake_qset_cnt_sysctl, "IU",
626 "Fake interface queue set count");
627
628
629 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)630 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
631 {
632 mbuf_setdata(m, (char *)mbuf_data(m) + len, mbuf_len(m) - len);
633 mbuf_pkthdr_adjustlen(m, -len);
634 }
635
636 static inline void *
get_bpf_header(mbuf_t m,struct ether_header * eh_p,struct ether_vlan_header * evl_p,size_t * header_len)637 get_bpf_header(mbuf_t m, struct ether_header * eh_p,
638 struct ether_vlan_header * evl_p, size_t * header_len)
639 {
640 void * header;
641
642 /* no VLAN tag, just use the ethernet header */
643 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
644 header = eh_p;
645 *header_len = sizeof(*eh_p);
646 goto done;
647 }
648
649 /* has VLAN tag, populate the ether VLAN header */
650 bcopy(eh_p, evl_p,
651 offsetof(struct ether_header, ether_type)); /* dst+src ether */
652 evl_p->evl_encap_proto = htons(ETHERTYPE_VLAN); /* VLAN encap */
653 evl_p->evl_tag = htons(m->m_pkthdr.vlan_tag); /* tag */
654 evl_p->evl_proto = eh_p->ether_type; /* proto */
655 *header_len = sizeof(*evl_p);
656 header = evl_p;
657
658 done:
659 return header;
660 }
661
662 typedef void (*_tap_func)(ifnet_t interface, u_int32_t dlt, mbuf_t packet,
663 void *__sized_by(header_len) header, size_t header_len);
664
665 static void
fake_bpf_tap_common(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p,_tap_func func)666 fake_bpf_tap_common(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p,
667 _tap_func func)
668 {
669 struct ether_vlan_header evl;
670 void * header;
671 size_t header_len;
672
673 header = get_bpf_header(m, eh_p, &evl, &header_len);
674 (*func)(ifp, DLT_EN10MB, m, header, header_len);
675 }
676
677 static inline void
fake_bpf_tap_in(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)678 fake_bpf_tap_in(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
679 {
680 fake_bpf_tap_common(ifp, m, eh_p, bpf_tap_in);
681 }
682
683
684 static inline void
fake_bpf_tap_out(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)685 fake_bpf_tap_out(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
686 {
687 fake_bpf_tap_common(ifp, m, eh_p, bpf_tap_out);
688 }
689
690 /**
691 ** virtual ethernet structures, types
692 **/
693
694 #define IFF_NUM_TX_RINGS_WMM_MODE 4
695 #define IFF_NUM_RX_RINGS_WMM_MODE 1
696 #define IFF_MAX_TX_RINGS IFF_NUM_TX_RINGS_WMM_MODE
697 #define IFF_MAX_RX_RINGS IFF_NUM_RX_RINGS_WMM_MODE
698 #define IFF_NUM_TX_QUEUES_WMM_MODE 4
699 #define IFF_NUM_RX_QUEUES_WMM_MODE 1
700 #define IFF_MAX_TX_QUEUES IFF_NUM_TX_QUEUES_WMM_MODE
701 #define IFF_MAX_RX_QUEUES IFF_NUM_RX_QUEUES_WMM_MODE
702
703 #define IFF_MAX_BATCH_SIZE 32
704
705 typedef uint16_t iff_flags_t;
706 #define IFF_FLAGS_HWCSUM 0x0001
707 #define IFF_FLAGS_BSD_MODE 0x0002
708 #define IFF_FLAGS_DETACHING 0x0004
709 #define IFF_FLAGS_WMM_MODE 0x0008
710 #define IFF_FLAGS_MULTIBUFLETS 0x0010
711 #define IFF_FLAGS_TSO_SUPPORT 0x0020
712 #define IFF_FLAGS_LRO 0x0040
713 #define IFF_FLAGS_VLAN_MTU 0x0080
714 #define IFF_FLAGS_VLAN_TAGGING 0x0100
715 #define IFF_FLAGS_SEPARATE_FRAME_HEADER 0x0200
716
717 #if SKYWALK
718
719 typedef struct {
720 uuid_t fnx_provider;
721 uuid_t fnx_instance;
722 } fake_nx, *fake_nx_t;
723
724 typedef struct {
725 kern_netif_queue_t fq_queue;
726 } fake_queue;
727
728 typedef struct {
729 kern_netif_qset_t fqs_qset; /* provided by xnu */
730 fake_queue fqs_rx_queue[IFF_MAX_RX_QUEUES];
731 fake_queue fqs_tx_queue[IFF_MAX_TX_QUEUES];
732 uint32_t fqs_rx_queue_cnt;
733 uint32_t fqs_tx_queue_cnt;
734 uint32_t fqs_llink_idx;
735 uint32_t fqs_idx;
736 uint32_t fqs_dequeue_cnt;
737 uint64_t fqs_id;
738 boolean_t fqs_combined_mode;
739 } fake_qset;
740
741 typedef struct {
742 uint64_t fl_id;
743 uint32_t fl_idx;
744 uint32_t fl_qset_cnt;
745 fake_qset fl_qset[FETH_MAX_QSETS];
746 } fake_llink;
747
748 static kern_pbufpool_t S_pp;
749
750 #define IFF_TT_OUTPUT 0x01 /* generate trace_tag on output */
751 #define IFF_TT_INPUT 0x02 /* generate trace_tag on input */
752 static int if_fake_trace_tag_flags = 0;
753 SYSCTL_INT(_net_link_fake, OID_AUTO, trace_tag, CTLFLAG_RW | CTLFLAG_LOCKED,
754 &if_fake_trace_tag_flags, 0, "Fake interface generate trace_tag");
755 static packet_trace_tag_t if_fake_trace_tag_current = 1;
756
757 #endif /* SKYWALK */
758
759 struct if_fake {
760 char iff_name[IFNAMSIZ]; /* our unique id */
761 ifnet_t iff_ifp;
762 iff_flags_t iff_flags;
763 uint32_t iff_retain_count;
764 ifnet_t iff_peer; /* the other end */
765 int iff_media_current;
766 int iff_media_active;
767 uint32_t iff_media_count;
768 int iff_media_list[IF_FAKE_MEDIA_LIST_MAX];
769 boolean_t iff_start_busy;
770 unsigned int iff_max_mtu;
771 uint32_t iff_fcs;
772 uint32_t iff_trailer_length;
773 #if SKYWALK
774 fake_nx iff_nx;
775 struct netif_stats *iff_nifs;
776 uint32_t iff_nifs_ref;
777 uint32_t iff_llink_cnt;
778 kern_channel_ring_t iff_rx_ring[IFF_MAX_RX_RINGS];
779 kern_channel_ring_t iff_tx_ring[IFF_MAX_TX_RINGS];
780 fake_llink *iff_llink __counted_by(FETH_MAX_LLINKS);
781 thread_call_t iff_doorbell_tcall;
782 thread_call_t iff_if_adv_tcall;
783 boolean_t iff_doorbell_tcall_active;
784 boolean_t iff_waiting_for_tcall;
785 boolean_t iff_channel_connected;
786 iff_pktpool_mode_t iff_pp_mode;
787 kern_pbufpool_t iff_rx_pp;
788 kern_pbufpool_t iff_tx_pp;
789 uint32_t iff_tx_headroom;
790 unsigned int iff_adv_interval;
791 uint32_t iff_tx_drop_rate;
792 uint32_t iff_tx_pkts_count;
793 iff_tx_completion_mode_t iff_tx_completion_mode;
794 bool iff_intf_adv_enabled;
795 void *iff_intf_adv_kern_ctx;
796 kern_nexus_capab_interface_advisory_notify_fn_t iff_intf_adv_notify;
797 iff_tx_exp_policy_t iff_tx_exp_policy;
798 #endif /* SKYWALK */
799 };
800
801 typedef struct if_fake * if_fake_ref;
802
803 static if_fake_ref
804 ifnet_get_if_fake(ifnet_t ifp);
805
806 static inline boolean_t
feth_in_bsd_mode(if_fake_ref fakeif)807 feth_in_bsd_mode(if_fake_ref fakeif)
808 {
809 return (fakeif->iff_flags & IFF_FLAGS_BSD_MODE) != 0;
810 }
811
812 static inline void
feth_set_detaching(if_fake_ref fakeif)813 feth_set_detaching(if_fake_ref fakeif)
814 {
815 fakeif->iff_flags |= IFF_FLAGS_DETACHING;
816 }
817
818 static inline boolean_t
feth_is_detaching(if_fake_ref fakeif)819 feth_is_detaching(if_fake_ref fakeif)
820 {
821 return (fakeif->iff_flags & IFF_FLAGS_DETACHING) != 0;
822 }
823
824 static int
feth_enable_dequeue_stall(ifnet_t ifp,uint32_t enable)825 feth_enable_dequeue_stall(ifnet_t ifp, uint32_t enable)
826 {
827 int error;
828
829 if (enable != 0) {
830 error = ifnet_disable_output(ifp);
831 } else {
832 error = ifnet_enable_output(ifp);
833 }
834
835 return error;
836 }
837
838 #if SKYWALK
839 static inline boolean_t
feth_in_wmm_mode(if_fake_ref fakeif)840 feth_in_wmm_mode(if_fake_ref fakeif)
841 {
842 return (fakeif->iff_flags & IFF_FLAGS_WMM_MODE) != 0;
843 }
844
845 static inline boolean_t
feth_using_multibuflets(if_fake_ref fakeif)846 feth_using_multibuflets(if_fake_ref fakeif)
847 {
848 return (fakeif->iff_flags & IFF_FLAGS_MULTIBUFLETS) != 0;
849 }
850 static void feth_detach_netif_nexus(if_fake_ref fakeif);
851
852 static inline boolean_t
feth_has_intf_advisory_configured(if_fake_ref fakeif)853 feth_has_intf_advisory_configured(if_fake_ref fakeif)
854 {
855 return fakeif->iff_adv_interval > 0;
856 }
857 #endif /* SKYWALK */
858
859 static inline bool
feth_supports_tso(if_fake_ref fakeif)860 feth_supports_tso(if_fake_ref fakeif)
861 {
862 return (fakeif->iff_flags & IFF_FLAGS_TSO_SUPPORT) != 0;
863 }
864
865 static inline void
feth_set_supports_tso(if_fake_ref fakeif)866 feth_set_supports_tso(if_fake_ref fakeif)
867 {
868 fakeif->iff_flags |= IFF_FLAGS_TSO_SUPPORT;
869 }
870
871 static inline bool
feth_supports_vlan_mtu(if_fake_ref fakeif)872 feth_supports_vlan_mtu(if_fake_ref fakeif)
873 {
874 return (fakeif->iff_flags & IFF_FLAGS_VLAN_MTU) != 0;
875 }
876
877 static inline void
feth_set_supports_vlan_mtu(if_fake_ref fakeif)878 feth_set_supports_vlan_mtu(if_fake_ref fakeif)
879 {
880 fakeif->iff_flags |= IFF_FLAGS_VLAN_MTU;
881 }
882
883 static inline bool
feth_supports_vlan_tagging(if_fake_ref fakeif)884 feth_supports_vlan_tagging(if_fake_ref fakeif)
885 {
886 return (fakeif->iff_flags & IFF_FLAGS_VLAN_TAGGING) != 0;
887 }
888
889 static inline void
feth_set_supports_vlan_tagging(if_fake_ref fakeif)890 feth_set_supports_vlan_tagging(if_fake_ref fakeif)
891 {
892 fakeif->iff_flags |= IFF_FLAGS_VLAN_TAGGING;
893 }
894
895
896 #define FETH_MAXUNIT IF_MAXUNIT
897 #define FETH_ZONE_MAX_ELEM MIN(IFNETS_MAX, FETH_MAXUNIT)
898
899 static int feth_clone_create(struct if_clone *, u_int32_t, void *);
900 static int feth_clone_destroy(ifnet_t);
901 static int feth_output(ifnet_t ifp, struct mbuf *m);
902 static void feth_start(ifnet_t ifp);
903 static int feth_ioctl(ifnet_t ifp, u_long cmd, void * addr);
904 static int feth_config(ifnet_t ifp, ifnet_t peer);
905 static void feth_if_free(ifnet_t ifp);
906 static void feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp);
907 static void feth_free(if_fake_ref fakeif);
908
909 static struct if_clone
910 feth_cloner = IF_CLONE_INITIALIZER(FAKE_ETHER_NAME,
911 feth_clone_create,
912 feth_clone_destroy,
913 0,
914 FETH_MAXUNIT);
915 static void interface_link_event(ifnet_t ifp, u_int32_t event_code);
916
917 /* some media words to pretend to be ethernet */
918 #define FAKE_DEFAULT_MEDIA IFM_MAKEWORD(IFM_ETHER, IFM_10G_T, IFM_FDX, 0)
919 static int default_media_words[] = {
920 IFM_MAKEWORD(IFM_ETHER, 0, 0, 0),
921 FAKE_DEFAULT_MEDIA,
922 IFM_MAKEWORD(IFM_ETHER, IFM_2500_T, IFM_FDX, 0),
923 IFM_MAKEWORD(IFM_ETHER, IFM_5000_T, IFM_FDX, 0),
924
925 IFM_MAKEWORD(IFM_ETHER, IFM_10G_KX4, IFM_FDX, 0),
926 IFM_MAKEWORD(IFM_ETHER, IFM_20G_KR2, IFM_FDX, 0),
927 IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX, 0),
928 IFM_MAKEWORD(IFM_ETHER, IFM_25G_KR, IFM_FDX, 0),
929 IFM_MAKEWORD(IFM_ETHER, IFM_40G_SR4, IFM_FDX, 0),
930 IFM_MAKEWORD(IFM_ETHER, IFM_50G_CR2, IFM_FDX, 0),
931 IFM_MAKEWORD(IFM_ETHER, IFM_56G_R4, IFM_FDX, 0),
932 IFM_MAKEWORD(IFM_ETHER, IFM_100G_CR4, IFM_FDX, 0),
933 IFM_MAKEWORD(IFM_ETHER, IFM_400G_AUI8, IFM_FDX, 0),
934 };
935 #define default_media_words_count (sizeof(default_media_words) \
936 / sizeof (default_media_words[0]))
937
938 /**
939 ** veth locks
940 **/
941
942 static LCK_GRP_DECLARE(feth_lck_grp, "fake");
943 static LCK_MTX_DECLARE(feth_lck_mtx, &feth_lck_grp);
944
945 static inline void
feth_lock(void)946 feth_lock(void)
947 {
948 lck_mtx_lock(&feth_lck_mtx);
949 }
950
951 static inline void
feth_unlock(void)952 feth_unlock(void)
953 {
954 lck_mtx_unlock(&feth_lck_mtx);
955 }
956
957 static inline int
get_max_mtu(int bsd_mode,unsigned int max_mtu)958 get_max_mtu(int bsd_mode, unsigned int max_mtu)
959 {
960 unsigned int mtu;
961
962 if (bsd_mode != 0) {
963 mtu = (njcl > 0) ? (M16KCLBYTES - ETHER_HDR_LEN)
964 : MBIGCLBYTES - ETHER_HDR_LEN;
965 if (mtu > max_mtu) {
966 mtu = max_mtu;
967 }
968 } else {
969 mtu = max_mtu;
970 }
971 return mtu;
972 }
973
974 static inline unsigned int
feth_max_mtu(ifnet_t ifp)975 feth_max_mtu(ifnet_t ifp)
976 {
977 if_fake_ref fakeif;
978 unsigned int max_mtu = ETHERMTU;
979
980 feth_lock();
981 fakeif = ifnet_get_if_fake(ifp);
982 if (fakeif != NULL) {
983 max_mtu = fakeif->iff_max_mtu;
984 }
985 feth_unlock();
986 return max_mtu;
987 }
988
989 static void
feth_free(if_fake_ref fakeif)990 feth_free(if_fake_ref fakeif)
991 {
992 VERIFY(fakeif->iff_retain_count == 0);
993 #if SKYWALK
994 if (!feth_in_bsd_mode(fakeif)) {
995 if (fakeif->iff_pp_mode == IFF_PP_MODE_GLOBAL) {
996 VERIFY(fakeif->iff_rx_pp == S_pp);
997 VERIFY(fakeif->iff_tx_pp == S_pp);
998 pp_release(fakeif->iff_rx_pp);
999 fakeif->iff_rx_pp = NULL;
1000 pp_release(fakeif->iff_tx_pp);
1001 fakeif->iff_tx_pp = NULL;
1002 feth_lock();
1003 if (S_pp != NULL && S_pp->pp_refcnt == 1) {
1004 pp_release(S_pp);
1005 S_pp = NULL;
1006 }
1007 feth_unlock();
1008 } else {
1009 if (fakeif->iff_rx_pp != NULL) {
1010 pp_release(fakeif->iff_rx_pp);
1011 fakeif->iff_rx_pp = NULL;
1012 }
1013 if (fakeif->iff_tx_pp != NULL) {
1014 pp_release(fakeif->iff_tx_pp);
1015 fakeif->iff_tx_pp = NULL;
1016 }
1017 }
1018 }
1019 #endif /* SKYWALK */
1020
1021 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s", fakeif->iff_name);
1022 kfree_type(fake_llink, FETH_MAX_LLINKS, fakeif->iff_llink);
1023 kfree_type(struct if_fake, fakeif);
1024 }
1025
1026 static void
feth_release(if_fake_ref fakeif)1027 feth_release(if_fake_ref fakeif)
1028 {
1029 u_int32_t old_retain_count;
1030
1031 old_retain_count = OSDecrementAtomic(&fakeif->iff_retain_count);
1032 switch (old_retain_count) {
1033 case 0:
1034 VERIFY(old_retain_count != 0);
1035 break;
1036 case 1:
1037 feth_free(fakeif);
1038 break;
1039 default:
1040 break;
1041 }
1042 return;
1043 }
1044
1045 #if SKYWALK
1046
1047 static void
feth_retain(if_fake_ref fakeif)1048 feth_retain(if_fake_ref fakeif)
1049 {
1050 OSIncrementAtomic(&fakeif->iff_retain_count);
1051 }
1052
1053 static void
feth_packet_pool_init_prepare(if_fake_ref fakeif,struct kern_pbufpool_init * pp_init)1054 feth_packet_pool_init_prepare(if_fake_ref fakeif,
1055 struct kern_pbufpool_init *pp_init)
1056 {
1057 uint32_t max_mtu = fakeif->iff_max_mtu;
1058 uint32_t buflet_size = if_fake_buflet_size;
1059
1060 bzero(pp_init, sizeof(*pp_init));
1061 pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1062 pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
1063 pp_init->kbi_packets = 1024; /* TBD configurable */
1064 if (feth_supports_tso(fakeif)) {
1065 buflet_size = if_fake_tso_buffer_size;
1066 }
1067 if (feth_using_multibuflets(fakeif)) {
1068 pp_init->kbi_bufsize = buflet_size;
1069 pp_init->kbi_max_frags = howmany(max_mtu, buflet_size);
1070 pp_init->kbi_buflets = pp_init->kbi_packets *
1071 pp_init->kbi_max_frags;
1072 pp_init->kbi_flags |= KBIF_BUFFER_ON_DEMAND;
1073 } else {
1074 pp_init->kbi_bufsize = max(max_mtu, buflet_size);
1075 pp_init->kbi_max_frags = 1;
1076 pp_init->kbi_buflets = pp_init->kbi_packets;
1077 }
1078 pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
1079 if (if_fake_user_access != 0) {
1080 pp_init->kbi_flags |= KBIF_USER_ACCESS;
1081 }
1082 pp_init->kbi_ctx = NULL;
1083 pp_init->kbi_ctx_retain = NULL;
1084 pp_init->kbi_ctx_release = NULL;
1085 }
1086
1087 static errno_t
feth_packet_pool_make(if_fake_ref fakeif)1088 feth_packet_pool_make(if_fake_ref fakeif)
1089 {
1090 struct kern_pbufpool_init pp_init;
1091 errno_t err;
1092
1093 feth_packet_pool_init_prepare(fakeif, &pp_init);
1094
1095 switch (fakeif->iff_pp_mode) {
1096 case IFF_PP_MODE_GLOBAL:
1097 feth_lock();
1098 if (S_pp == NULL) {
1099 (void)snprintf((char *)pp_init.kbi_name,
1100 sizeof(pp_init.kbi_name), "%s", "feth shared pp");
1101 err = kern_pbufpool_create(&pp_init, &S_pp, NULL);
1102 }
1103 pp_retain(S_pp);
1104 feth_unlock();
1105 fakeif->iff_rx_pp = S_pp;
1106 pp_retain(S_pp);
1107 fakeif->iff_tx_pp = S_pp;
1108 break;
1109 case IFF_PP_MODE_PRIVATE:
1110 (void)snprintf((char *)pp_init.kbi_name,
1111 sizeof(pp_init.kbi_name), "%s pp", fakeif->iff_name);
1112 err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
1113 pp_retain(fakeif->iff_rx_pp);
1114 fakeif->iff_tx_pp = fakeif->iff_rx_pp;
1115 break;
1116 case IFF_PP_MODE_PRIVATE_SPLIT:
1117 (void)snprintf((char *)pp_init.kbi_name,
1118 sizeof(pp_init.kbi_name), "%s rx pp", fakeif->iff_name);
1119 pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
1120 KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
1121 pp_init.kbi_flags |= (KBIF_IODIR_IN | KBIF_BUFFER_ON_DEMAND);
1122 pp_init.kbi_packets = 1024;
1123 pp_init.kbi_bufsize = if_fake_link_layer_aggregation_factor * 1024;
1124 err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
1125 if (err != 0) {
1126 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1127 "rx pp create failed %d", err);
1128 return err;
1129 }
1130 pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
1131 KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
1132 pp_init.kbi_flags |= KBIF_IODIR_OUT;
1133 pp_init.kbi_packets = 1024; /* TBD configurable */
1134 pp_init.kbi_bufsize = fakeif->iff_max_mtu;
1135 (void)snprintf((char *)pp_init.kbi_name,
1136 sizeof(pp_init.kbi_name), "%s tx pp", fakeif->iff_name);
1137 err = kern_pbufpool_create(&pp_init, &fakeif->iff_tx_pp, NULL);
1138 if (err != 0) {
1139 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1140 "tx pp create failed %d", err);
1141 pp_release(fakeif->iff_rx_pp);
1142 return err;
1143 }
1144 break;
1145 default:
1146 VERIFY(0);
1147 __builtin_unreachable();
1148 }
1149
1150 return 0;
1151 }
1152
1153 static void
feth_packet_set_trace_tag(kern_packet_t ph,int flag)1154 feth_packet_set_trace_tag(kern_packet_t ph, int flag)
1155 {
1156 if (if_fake_trace_tag_flags & flag) {
1157 if (++if_fake_trace_tag_current == 0) {
1158 if_fake_trace_tag_current = 1;
1159 }
1160 kern_packet_set_trace_tag(ph, if_fake_trace_tag_current);
1161 }
1162 }
1163
1164 static errno_t
feth_clone_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1165 feth_clone_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1166 {
1167 errno_t err = 0;
1168 kern_pbufpool_t pp = dif->iff_rx_pp;
1169 kern_packet_t dph = 0, dph0 = 0;
1170 kern_buflet_t sbuf, dbuf0 = NULL, dbuf;
1171 void *saddr, *daddr;
1172 uint32_t soff, doff;
1173 uint32_t slen, dlen;
1174 uint32_t dlim0, dlim;
1175
1176 sbuf = kern_packet_get_next_buflet(sph, NULL);
1177 saddr = kern_buflet_get_data_address(sbuf);
1178 doff = soff = kern_buflet_get_data_offset(sbuf);
1179 dlen = slen = kern_buflet_get_data_length(sbuf);
1180
1181 /* packet clone is only supported for single-buflet */
1182 ASSERT(kern_packet_get_buflet_count(sph) == 1);
1183 ASSERT(soff == kern_packet_get_headroom(sph));
1184 ASSERT(slen == kern_packet_get_data_length(sph));
1185
1186 dph0 = *pdph;
1187 if (dph0 == 0) {
1188 dlim0 = 0;
1189 } else {
1190 dbuf0 = kern_packet_get_next_buflet(dph0, NULL);
1191 ASSERT(kern_buflet_get_object_limit(dbuf0) ==
1192 PP_BUF_OBJ_SIZE_DEF(pp));
1193 ASSERT(kern_buflet_get_data_limit(dbuf0) % 16 == 0);
1194 dlim0 = ((uintptr_t)kern_buflet_get_object_address(dbuf0) +
1195 kern_buflet_get_object_limit(dbuf0)) -
1196 ((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1197 kern_buflet_get_data_limit(dbuf0));
1198 }
1199
1200 if (doff + dlen > dlim0) {
1201 err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1202 if (err != 0) {
1203 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1204 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1205 return err;
1206 }
1207 dbuf = kern_packet_get_next_buflet(dph, NULL);
1208 ASSERT(kern_buflet_get_data_address(dbuf) ==
1209 kern_buflet_get_object_address(dbuf));
1210 daddr = kern_buflet_get_data_address(dbuf);
1211 dlim = kern_buflet_get_object_limit(dbuf);
1212 ASSERT(dlim == PP_BUF_OBJ_SIZE_DEF(pp));
1213 } else {
1214 err = kern_packet_clone_nosleep(dph0, &dph, KPKT_COPY_LIGHT);
1215 if (err != 0) {
1216 FAKE_LOG(LOG_INFO, FE_DBGF_OUTPUT,
1217 "packet clone err %d", err);
1218 return err;
1219 }
1220 dbuf = kern_packet_get_next_buflet(dph, NULL);
1221 ASSERT(kern_buflet_get_object_address(dbuf) ==
1222 kern_buflet_get_object_address(dbuf0));
1223 daddr = (void *)((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1224 kern_buflet_get_data_limit(dbuf0));
1225 dlim = dlim0;
1226 }
1227
1228 ASSERT(doff + dlen <= dlim);
1229
1230 ASSERT((uintptr_t)daddr % 16 == 0);
1231
1232 bcopy((const void *)((uintptr_t)saddr + soff),
1233 (void *)((uintptr_t)daddr + doff), slen);
1234
1235 dlim = MIN(dlim, P2ROUNDUP(doff + dlen, 16));
1236 err = kern_buflet_set_data_address(dbuf, daddr);
1237 VERIFY(err == 0);
1238 err = kern_buflet_set_data_limit(dbuf, dlim);
1239 VERIFY(err == 0);
1240 err = kern_buflet_set_data_length(dbuf, dlen);
1241 VERIFY(err == 0);
1242 err = kern_buflet_set_data_offset(dbuf, doff);
1243 VERIFY(err == 0);
1244 err = kern_packet_set_headroom(dph, doff);
1245 VERIFY(err == 0);
1246 err = kern_packet_set_link_header_length(dph,
1247 kern_packet_get_link_header_length(sph));
1248 VERIFY(err == 0);
1249 err = kern_packet_set_service_class(dph,
1250 kern_packet_get_service_class(sph));
1251 VERIFY(err == 0);
1252 err = kern_packet_finalize(dph);
1253 VERIFY(err == 0);
1254 *pdph = dph;
1255
1256 return err;
1257 }
1258
1259 static inline void
feth_copy_buflet(kern_buflet_t sbuf,kern_buflet_t dbuf)1260 feth_copy_buflet(kern_buflet_t sbuf, kern_buflet_t dbuf)
1261 {
1262 errno_t err;
1263 uint32_t off, len;
1264 uint8_t *saddr, *daddr;
1265
1266 saddr = kern_buflet_get_data_address(sbuf);
1267 off = kern_buflet_get_data_offset(sbuf);
1268 len = kern_buflet_get_data_length(sbuf);
1269 daddr = kern_buflet_get_data_address(dbuf);
1270 bcopy((saddr + off), (daddr + off), len);
1271 err = kern_buflet_set_data_offset(dbuf, off);
1272 VERIFY(err == 0);
1273 err = kern_buflet_set_data_length(dbuf, len);
1274 VERIFY(err == 0);
1275 }
1276
1277 static int
feth_add_packet_trailer(kern_packet_t ph,void * trailer,size_t trailer_len)1278 feth_add_packet_trailer(kern_packet_t ph, void *trailer, size_t trailer_len)
1279 {
1280 errno_t err = 0;
1281
1282 ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
1283
1284 kern_buflet_t buf = NULL, iter = NULL;
1285 while ((iter = kern_packet_get_next_buflet(ph, iter)) != NULL) {
1286 buf = iter;
1287 }
1288 ASSERT(buf != NULL);
1289
1290 uint32_t dlim = kern_buflet_get_data_limit(buf);
1291 uint32_t doff = kern_buflet_get_data_offset(buf);
1292 uint32_t dlen = kern_buflet_get_data_length(buf);
1293
1294 size_t trailer_room = dlim - doff - dlen;
1295
1296 if (trailer_room < trailer_len) {
1297 FAKE_LOG(LOG_INFO, FE_DBGF_OUTPUT, "not enough room");
1298 return ERANGE;
1299 }
1300
1301 void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff + dlen);
1302 memcpy(data, trailer, trailer_len);
1303
1304 err = kern_buflet_set_data_length(buf, dlen + trailer_len);
1305 VERIFY(err == 0);
1306
1307 err = kern_packet_finalize(ph);
1308 VERIFY(err == 0);
1309
1310 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%zuB trailer added", trailer_len);
1311
1312 return 0;
1313 }
1314
1315 static int
feth_add_packet_fcs(kern_packet_t ph)1316 feth_add_packet_fcs(kern_packet_t ph)
1317 {
1318 uint32_t crc = 0;
1319 int err;
1320
1321 ASSERT(sizeof(crc) == ETHER_CRC_LEN);
1322
1323 kern_buflet_t buf = NULL;
1324 while ((buf = kern_packet_get_next_buflet(ph, buf)) != NULL) {
1325 uint32_t doff = kern_buflet_get_data_offset(buf);
1326 uint32_t dlen = kern_buflet_get_data_length(buf);
1327 void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff);
1328 crc = crc32(crc, data, dlen);
1329 }
1330
1331 err = feth_add_packet_trailer(ph, &crc, ETHER_CRC_LEN);
1332 if (!err) {
1333 return err;
1334 }
1335
1336 err = kern_packet_set_link_ethfcs(ph);
1337 VERIFY(err == 0);
1338
1339 return 0;
1340 }
1341
1342 static errno_t
feth_copy_packet(if_fake_ref dif,kern_packet_t sph,kern_packet_t * pdph)1343 feth_copy_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1344 {
1345 errno_t err = 0;
1346 uint16_t i, bufcnt;
1347 mach_vm_address_t baddr;
1348 kern_buflet_t sbuf = NULL, dbuf = NULL;
1349 kern_pbufpool_t pp = dif->iff_rx_pp;
1350 kern_packet_t dph;
1351 boolean_t multi_buflet = feth_using_multibuflets(dif);
1352
1353 bufcnt = kern_packet_get_buflet_count(sph);
1354 ASSERT((bufcnt == 1) || multi_buflet);
1355 *pdph = 0;
1356
1357 err = kern_pbufpool_alloc_nosleep(pp, 1, &dph);
1358 if (err != 0) {
1359 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1360 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1361 return err;
1362 }
1363
1364 /* pre-constructed single buflet packet copy */
1365 sbuf = kern_packet_get_next_buflet(sph, NULL);
1366 dbuf = kern_packet_get_next_buflet(dph, NULL);
1367 feth_copy_buflet(sbuf, dbuf);
1368
1369 if (!multi_buflet) {
1370 goto done;
1371 }
1372
1373 /* un-constructed multi-buflet packet copy */
1374 for (i = 1; i < bufcnt; i++) {
1375 kern_buflet_t dbuf_next = NULL;
1376
1377 sbuf = kern_packet_get_next_buflet(sph, sbuf);
1378 VERIFY(sbuf != NULL);
1379 err = kern_pbufpool_alloc_buflet_nosleep(pp, &dbuf_next);
1380 if (err != 0) {
1381 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1382 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_BUF);
1383 break;
1384 }
1385 ASSERT(dbuf_next != NULL);
1386 feth_copy_buflet(sbuf, dbuf_next);
1387 err = kern_packet_add_buflet(dph, dbuf, dbuf_next);
1388 VERIFY(err == 0);
1389 dbuf = dbuf_next;
1390 }
1391 if (__improbable(err != 0)) {
1392 dbuf = NULL;
1393 while (i-- != 0) {
1394 dbuf = kern_packet_get_next_buflet(dph, dbuf);
1395 VERIFY(dbuf != NULL);
1396 baddr = (mach_vm_address_t)
1397 kern_buflet_get_data_address(dbuf);
1398 VERIFY(baddr != 0);
1399 }
1400 kern_pbufpool_free(pp, dph);
1401 dph = 0;
1402 }
1403
1404 done:
1405 if (__probable(err == 0)) {
1406 err = kern_packet_set_headroom(dph,
1407 kern_packet_get_headroom(sph));
1408 VERIFY(err == 0);
1409 err = kern_packet_set_link_header_length(dph,
1410 kern_packet_get_link_header_length(sph));
1411 VERIFY(err == 0);
1412 err = kern_packet_set_service_class(dph,
1413 kern_packet_get_service_class(sph));
1414 VERIFY(err == 0);
1415 err = kern_packet_finalize(dph);
1416 VERIFY(err == 0);
1417 VERIFY(bufcnt == kern_packet_get_buflet_count(dph));
1418 *pdph = dph;
1419 }
1420 return err;
1421 }
1422
1423 static inline void
feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)1424 feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)
1425 {
1426 /*
1427 * Nothing to do if not a TSO offloaded packet.
1428 */
1429 uint16_t seg_sz = 0;
1430 seg_sz = kern_packet_get_protocol_segment_size(ph);
1431 if (seg_sz == 0) {
1432 return;
1433 }
1434 /*
1435 * For RX, make the packet appear as a fully validated LRO packet.
1436 */
1437 packet_csum_flags_t csum_flags = PACKET_CSUM_IP_CHECKED |
1438 PACKET_CSUM_IP_VALID | PACKET_CSUM_DATA_VALID |
1439 PACKET_CSUM_PSEUDO_HDR;
1440 (void) kern_packet_set_inet_checksum(ph, csum_flags, 0, 0xFFFF, FALSE);
1441 return;
1442 }
1443
1444 static void
feth_rx_submit(if_fake_ref sif,if_fake_ref dif,kern_packet_t sphs[],uint32_t n_pkts)1445 feth_rx_submit(if_fake_ref sif, if_fake_ref dif, kern_packet_t sphs[],
1446 uint32_t n_pkts)
1447 {
1448 errno_t err = 0;
1449 struct kern_channel_ring_stat_increment stats;
1450 kern_channel_ring_t rx_ring = NULL;
1451 kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
1452 kern_packet_t sph = 0, dph = 0;
1453
1454 memset(&stats, 0, sizeof(stats));
1455
1456 rx_ring = dif->iff_rx_ring[0];
1457 if (rx_ring == NULL) {
1458 return;
1459 }
1460
1461 kr_enter(rx_ring, TRUE);
1462 kern_channel_reclaim(rx_ring);
1463 rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1464
1465 for (uint32_t i = 0; i < n_pkts && rx_slot != NULL; i++) {
1466 sph = sphs[i];
1467
1468 switch (dif->iff_pp_mode) {
1469 case IFF_PP_MODE_GLOBAL:
1470 sphs[i] = 0;
1471 dph = sph;
1472 feth_update_pkt_tso_metadata_for_rx(dph);
1473 err = kern_packet_finalize(dph);
1474 VERIFY(err == 0);
1475 break;
1476 case IFF_PP_MODE_PRIVATE:
1477 err = feth_copy_packet(dif, sph, &dph);
1478 break;
1479 case IFF_PP_MODE_PRIVATE_SPLIT:
1480 err = feth_clone_packet(dif, sph, &dph);
1481 break;
1482 default:
1483 VERIFY(0);
1484 __builtin_unreachable();
1485 }
1486 if (__improbable(err != 0)) {
1487 continue;
1488 }
1489
1490 if (sif->iff_trailer_length != 0) {
1491 feth_add_packet_trailer(dph, feth_trailer,
1492 sif->iff_trailer_length);
1493 }
1494 if (sif->iff_fcs != 0) {
1495 feth_add_packet_fcs(dph);
1496 }
1497 feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1498 bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1499 stats.kcrsi_slots_transferred++;
1500 stats.kcrsi_bytes_transferred
1501 += kern_packet_get_data_length(dph);
1502
1503 /* attach the packet to the RX ring */
1504 err = kern_channel_slot_attach_packet(rx_ring, rx_slot, dph);
1505 VERIFY(err == 0);
1506 last_rx_slot = rx_slot;
1507 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1508 }
1509
1510 if (last_rx_slot != NULL) {
1511 kern_channel_advance_slot(rx_ring, last_rx_slot);
1512 kern_channel_increment_ring_net_stats(rx_ring, dif->iff_ifp,
1513 &stats);
1514 }
1515
1516 if (rx_ring != NULL) {
1517 kr_exit(rx_ring);
1518 kern_channel_notify(rx_ring, 0);
1519 }
1520 }
1521
1522 static void
feth_rx_queue_submit(if_fake_ref sif,if_fake_ref dif,uint32_t llink_idx,uint32_t qset_idx,kern_packet_t sphs[],uint32_t n_pkts)1523 feth_rx_queue_submit(if_fake_ref sif, if_fake_ref dif, uint32_t llink_idx,
1524 uint32_t qset_idx, kern_packet_t sphs[], uint32_t n_pkts)
1525 {
1526 errno_t err = 0;
1527 kern_netif_queue_t queue;
1528 kern_packet_t sph = 0, dph = 0;
1529 fake_llink *llink;
1530 fake_qset *qset;
1531
1532 if (llink_idx >= dif->iff_llink_cnt) {
1533 FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1534 "invalid llink_idx idx %d (max %d) on peer %s",
1535 llink_idx, dif->iff_llink_cnt, dif->iff_name);
1536 return;
1537 }
1538 llink = &dif->iff_llink[llink_idx];
1539 if (qset_idx >= llink->fl_qset_cnt) {
1540 FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1541 "invalid qset_idx %d (max %d) on peer %s",
1542 qset_idx, llink->fl_qset_cnt, dif->iff_name);
1543 return;
1544 }
1545 qset = &dif->iff_llink[llink_idx].fl_qset[qset_idx];
1546 queue = qset->fqs_rx_queue[0].fq_queue;
1547 if (queue == NULL) {
1548 FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT,
1549 "NULL default queue (llink_idx %d, qset_idx %d) on peer %s",
1550 llink_idx, qset_idx, dif->iff_name);
1551 return;
1552 }
1553 for (uint32_t i = 0; i < n_pkts; i++) {
1554 uint32_t flags;
1555
1556 sph = sphs[i];
1557
1558 switch (dif->iff_pp_mode) {
1559 case IFF_PP_MODE_GLOBAL:
1560 sphs[i] = 0;
1561 dph = sph;
1562 feth_update_pkt_tso_metadata_for_rx(dph);
1563 break;
1564 case IFF_PP_MODE_PRIVATE:
1565 err = feth_copy_packet(dif, sph, &dph);
1566 break;
1567 case IFF_PP_MODE_PRIVATE_SPLIT:
1568 err = feth_clone_packet(dif, sph, &dph);
1569 break;
1570 default:
1571 VERIFY(0);
1572 __builtin_unreachable();
1573 }
1574 if (__improbable(err != 0)) {
1575 continue;
1576 }
1577
1578 if (sif->iff_trailer_length != 0) {
1579 feth_add_packet_trailer(dph, feth_trailer,
1580 sif->iff_trailer_length);
1581 }
1582 if (sif->iff_fcs != 0) {
1583 feth_add_packet_fcs(dph);
1584 }
1585 feth_packet_set_trace_tag(dph, IFF_TT_INPUT);
1586 bpf_tap_packet_in(dif->iff_ifp, DLT_EN10MB, dph, NULL, 0);
1587
1588 flags = (i == n_pkts - 1) ?
1589 KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH : 0;
1590 kern_netif_queue_rx_enqueue(queue, dph, 1, flags);
1591 }
1592 }
1593
1594 static void
feth_tx_complete(if_fake_ref fakeif,kern_packet_t phs[],uint32_t nphs)1595 feth_tx_complete(if_fake_ref fakeif, kern_packet_t phs[], uint32_t nphs)
1596 {
1597 for (uint32_t i = 0; i < nphs; i++) {
1598 kern_packet_t ph = phs[i];
1599 if (ph == 0) {
1600 continue;
1601 }
1602 int err = kern_packet_set_tx_completion_status(ph, 0);
1603 VERIFY(err == 0);
1604 kern_packet_tx_completion(ph, fakeif->iff_ifp);
1605 kern_pbufpool_free(fakeif->iff_tx_pp, phs[i]);
1606 phs[i] = 0;
1607 }
1608 }
1609
1610 #define NSEC_PER_USEC 1000ull
1611 /*
1612 * Calculate the time delta that passed from `since' to `until'.
1613 * If `until' happens before `since', returns negative value.
1614 */
1615 static bool
feth_packet_has_expired(if_fake_ref __unused fakeif,kern_packet_t ph,uint64_t * out_deadline)1616 feth_packet_has_expired(if_fake_ref __unused fakeif, kern_packet_t ph,
1617 uint64_t *out_deadline)
1618 {
1619 uint64_t now;
1620 uint64_t packet_expire_time_mach;
1621 int64_t time_until_expiration;
1622 errno_t err;
1623 bool expired = false;
1624
1625 static mach_timebase_info_data_t clock_timebase = {0, 0};
1626
1627 if (clock_timebase.denom == 0) {
1628 clock_timebase_info(&clock_timebase);
1629 VERIFY(clock_timebase.denom != 0);
1630 }
1631
1632 err = kern_packet_get_expire_time(ph, &packet_expire_time_mach);
1633 if (err) {
1634 goto out;
1635 }
1636
1637 now = mach_absolute_time();
1638 time_until_expiration = packet_expire_time_mach - now;
1639 if (time_until_expiration < 0) {
1640 /* The packet had expired */
1641 expired = true;
1642 goto out;
1643 }
1644
1645 /* Convert the time_delta from mach ticks to nanoseconds */
1646 time_until_expiration *= clock_timebase.numer;
1647 time_until_expiration /= clock_timebase.denom;
1648 /* convert from nanoseconds to microseconds */
1649 time_until_expiration /= 1000ull;
1650
1651 if (if_fake_expiration_threshold_us < time_until_expiration) {
1652 /* packet has some life ahead of it */
1653 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1654 "Packet has %llu usec until expiration",
1655 time_until_expiration);
1656 goto out;
1657 }
1658
1659 out:
1660 if (expired && out_deadline) {
1661 *out_deadline = packet_expire_time_mach;
1662 }
1663
1664 return expired;
1665 }
1666
1667 static errno_t
feth_get_packet_notification_details(if_fake_ref fakeif,kern_packet_t ph,packet_id_t * pkt_id,uint32_t * nx_port_id)1668 feth_get_packet_notification_details(if_fake_ref fakeif, kern_packet_t ph,
1669 packet_id_t *pkt_id, uint32_t *nx_port_id)
1670 {
1671 errno_t err = 0;
1672
1673 err = kern_packet_get_packetid(ph, pkt_id);
1674 if (err != 0) {
1675 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1676 "%s err=%d getting packetid", fakeif->iff_name, err);
1677 return err;
1678 }
1679
1680 err = kern_packet_get_tx_nexus_port_id(ph, nx_port_id);
1681 if (err != 0) {
1682 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1683 "%s err=%d getting nx_port_id", fakeif->iff_name, err);
1684 return err;
1685 }
1686
1687 return 0;
1688 }
1689
1690 static packet_expiry_action_t
feth_get_effective_expn_action(if_fake_ref fakeif,kern_packet_t ph)1691 feth_get_effective_expn_action(if_fake_ref fakeif, kern_packet_t ph)
1692 {
1693 errno_t err;
1694 packet_expiry_action_t expiry_action;
1695
1696 switch (fakeif->iff_tx_exp_policy) {
1697 case IFF_TX_EXP_POLICY_DISABLED:
1698 expiry_action = PACKET_EXPIRY_ACTION_NONE;
1699 break;
1700 case IFF_TX_EXP_POLICY_NOTIFY_ONLY:
1701 expiry_action = PACKET_EXPIRY_ACTION_NOTIFY;
1702 break;
1703 case IFF_TX_EXP_POLICY_DROP_AND_NOTIFY:
1704 expiry_action = PACKET_EXPIRY_ACTION_DROP;
1705 break;
1706 case IFF_TX_EXP_POLICY_METADATA:
1707 err = kern_packet_get_expiry_action(ph, &expiry_action);
1708 if (err != 0) {
1709 if (err != ENOENT) {
1710 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1711 "Error %d when getting expiry action",
1712 err);
1713 }
1714 expiry_action = PACKET_EXPIRY_ACTION_NONE;
1715 }
1716 break;
1717 default:
1718 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1719 "Unrecognized value %d for \"net.link.fake.tx_exp_policy\"",
1720 fakeif->iff_tx_exp_policy);
1721 expiry_action = PACKET_EXPIRY_ACTION_NONE;
1722 }
1723
1724 return expiry_action;
1725 }
1726
1727 /* returns true if the packet is selected for epxiration and should be dropped */
1728 static bool
feth_tx_expired_error(if_fake_ref fakeif,kern_packet_t ph)1729 feth_tx_expired_error(if_fake_ref fakeif, kern_packet_t ph)
1730 {
1731 int err = 0;
1732 uint32_t nx_port_id = 0;
1733 os_channel_event_packet_transmit_expired_t expn = {0};
1734 packet_expiry_action_t expiry_action = PACKET_EXPIRY_ACTION_NONE;
1735
1736 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC, "%s", fakeif->iff_name);
1737
1738 if (feth_packet_has_expired(fakeif, ph, &expn.packet_tx_expiration_deadline)) {
1739 expiry_action = feth_get_effective_expn_action(fakeif, ph);
1740 }
1741
1742 bool drop_packet = (expiry_action == PACKET_EXPIRY_ACTION_DROP);
1743 if (expiry_action != PACKET_EXPIRY_ACTION_NONE) {
1744 /* set the expiration status code */
1745 expn.packet_tx_expiration_status = drop_packet ?
1746 CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_DROPPED :
1747 CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_NOT_DROPPED;
1748
1749 /* Mark the expiration timestamp */
1750 expn.packet_tx_expiration_timestamp = mach_absolute_time();
1751
1752 err = feth_get_packet_notification_details(fakeif, ph,
1753 &expn.packet_id, &nx_port_id);
1754
1755 if (err == 0) {
1756 err = kern_channel_event_transmit_expired(
1757 fakeif->iff_ifp, &expn, nx_port_id);
1758 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1759 "%s sent expiry notification on nexus port "
1760 "%u notif code %u",
1761 fakeif->iff_name, nx_port_id,
1762 expn.packet_tx_expiration_status);
1763 }
1764 if (err != 0) {
1765 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1766 "%s err=%d, nx_port_id: 0x%x",
1767 fakeif->iff_name, err, nx_port_id);
1768 }
1769 }
1770
1771 return drop_packet;
1772 }
1773
1774 /* returns true if the packet is selected for TX error & dropped */
1775 static bool
feth_tx_complete_error(if_fake_ref fakeif,kern_packet_t ph)1776 feth_tx_complete_error(if_fake_ref fakeif, kern_packet_t ph)
1777 {
1778 int err;
1779
1780 if (fakeif->iff_tx_drop_rate == 0 ||
1781 fakeif->iff_tx_pkts_count != fakeif->iff_tx_drop_rate) {
1782 return false;
1783 }
1784 /* simulate TX completion error on the packet */
1785 if (fakeif->iff_tx_completion_mode == IFF_TX_COMPL_MODE_SYNC) {
1786 err = kern_packet_set_tx_completion_status(ph,
1787 CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED);
1788 VERIFY(err == 0);
1789 kern_packet_tx_completion(ph, fakeif->iff_ifp);
1790 } else {
1791 uint32_t nx_port_id = 0;
1792 os_channel_event_packet_transmit_status_t pkt_tx_status = {0};
1793
1794 pkt_tx_status.packet_status =
1795 CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED;
1796 err = feth_get_packet_notification_details(fakeif, ph,
1797 &pkt_tx_status.packet_id, &nx_port_id);
1798 if (err == 0) {
1799 err = kern_channel_event_transmit_status(
1800 fakeif->iff_ifp, &pkt_tx_status, nx_port_id);
1801 }
1802 if (err != 0) {
1803 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1804 "%s err=%d, nx_port_id: 0x%x",
1805 fakeif->iff_name, err, nx_port_id);
1806 }
1807 }
1808
1809 return true;
1810 }
1811
1812 static void
feth_if_adv(thread_call_param_t arg0,thread_call_param_t arg1)1813 feth_if_adv(thread_call_param_t arg0, thread_call_param_t arg1)
1814 {
1815 #pragma unused(arg1)
1816 errno_t error;
1817 if_fake_ref fakeif = (if_fake_ref)arg0;
1818 struct ifnet_interface_advisory if_adv;
1819 struct ifnet_stats_param if_stat;
1820
1821 feth_lock();
1822 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1823 feth_unlock();
1824 return;
1825 }
1826 feth_unlock();
1827
1828 if (!fakeif->iff_intf_adv_enabled) {
1829 goto done;
1830 }
1831
1832 error = ifnet_stat(fakeif->iff_ifp, &if_stat);
1833 if (error != 0) {
1834 FAKE_LOG(LOG_NOTICE, 0, "%s: ifnet_stat() failed %d",
1835 fakeif->iff_name, error);
1836 goto done;
1837 }
1838 if_adv.header.version = IF_INTERFACE_ADVISORY_VERSION_CURRENT;
1839 if_adv.header.direction = IF_INTERFACE_ADVISORY_DIRECTION_TX;
1840 if_adv.header.interface_type =
1841 IF_INTERFACE_ADVISORY_INTERFACE_TYPE_WIFI;
1842 if_adv.capacity.timestamp = mach_absolute_time();
1843 if_adv.capacity.rate_trend_suggestion =
1844 IF_INTERFACE_ADVISORY_RATE_SUGGESTION_RAMP_NEUTRAL;
1845 if_adv.capacity.max_bandwidth = 1000 * 1000 * 1000; /* 1Gbps */
1846 if_adv.capacity.total_byte_count = if_stat.packets_out;
1847 if_adv.capacity.average_throughput = 1000 * 1000 * 1000; /* 1Gbps */
1848 if_adv.capacity.flushable_queue_size = UINT32_MAX;
1849 if_adv.capacity.non_flushable_queue_size = UINT32_MAX;
1850 if_adv.capacity.average_delay = 1; /* ms */
1851
1852 error = fakeif->iff_intf_adv_notify(fakeif->iff_intf_adv_kern_ctx,
1853 &if_adv);
1854 if (error != 0) {
1855 FAKE_LOG(LOG_NOTICE, 0,
1856 "%s: interface advisory report failed %d",
1857 fakeif->iff_name, error);
1858 }
1859
1860 done:
1861 feth_lock();
1862 if (!feth_is_detaching(fakeif) && fakeif->iff_channel_connected) {
1863 uint64_t deadline;
1864 clock_interval_to_deadline(fakeif->iff_adv_interval,
1865 NSEC_PER_MSEC, &deadline);
1866 thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1867 }
1868 feth_unlock();
1869 }
1870
1871 static int
feth_if_adv_tcall_create(if_fake_ref fakeif)1872 feth_if_adv_tcall_create(if_fake_ref fakeif)
1873 {
1874 uint64_t deadline;
1875
1876 feth_lock();
1877 ASSERT(fakeif->iff_if_adv_tcall == NULL);
1878 ASSERT(fakeif->iff_adv_interval > 0);
1879 ASSERT(fakeif->iff_channel_connected);
1880 fakeif->iff_if_adv_tcall =
1881 thread_call_allocate_with_options(feth_if_adv,
1882 (thread_call_param_t)fakeif, THREAD_CALL_PRIORITY_KERNEL,
1883 THREAD_CALL_OPTIONS_ONCE);
1884 if (fakeif->iff_if_adv_tcall == NULL) {
1885 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
1886 "%s if_adv tcall alloc failed",
1887 fakeif->iff_name);
1888 return ENXIO;
1889 }
1890 /* retain for the interface advisory thread call */
1891 feth_retain(fakeif);
1892 clock_interval_to_deadline(fakeif->iff_adv_interval,
1893 NSEC_PER_MSEC, &deadline);
1894 thread_call_enter_delayed(fakeif->iff_if_adv_tcall, deadline);
1895 feth_unlock();
1896 return 0;
1897 }
1898
1899 static void
feth_if_adv_tcall_destroy(if_fake_ref fakeif)1900 feth_if_adv_tcall_destroy(if_fake_ref fakeif)
1901 {
1902 thread_call_t tcall;
1903
1904 feth_lock();
1905 ASSERT(fakeif->iff_if_adv_tcall != NULL);
1906 tcall = fakeif->iff_if_adv_tcall;
1907 feth_unlock();
1908 (void) thread_call_cancel_wait(tcall);
1909 if (!thread_call_free(tcall)) {
1910 boolean_t freed;
1911 (void) thread_call_cancel_wait(tcall);
1912 freed = thread_call_free(tcall);
1913 VERIFY(freed);
1914 }
1915 feth_lock();
1916 fakeif->iff_if_adv_tcall = NULL;
1917 feth_unlock();
1918 /* release for the interface advisory thread call */
1919 feth_release(fakeif);
1920 }
1921
1922
1923 /**
1924 ** nexus netif domain provider
1925 **/
1926 static errno_t
feth_nxdp_init(kern_nexus_domain_provider_t domprov)1927 feth_nxdp_init(kern_nexus_domain_provider_t domprov)
1928 {
1929 #pragma unused(domprov)
1930 return 0;
1931 }
1932
1933 static void
feth_nxdp_fini(kern_nexus_domain_provider_t domprov)1934 feth_nxdp_fini(kern_nexus_domain_provider_t domprov)
1935 {
1936 #pragma unused(domprov)
1937 }
1938
1939 static uuid_t feth_nx_dom_prov;
1940
1941 static errno_t
feth_register_nexus_domain_provider(void)1942 feth_register_nexus_domain_provider(void)
1943 {
1944 const struct kern_nexus_domain_provider_init dp_init = {
1945 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1946 .nxdpi_flags = 0,
1947 .nxdpi_init = feth_nxdp_init,
1948 .nxdpi_fini = feth_nxdp_fini
1949 };
1950 errno_t err = 0;
1951
1952 /* feth_nxdp_init() is called before this function returns */
1953 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
1954 (const uint8_t *)
1955 "com.apple.feth",
1956 &dp_init, sizeof(dp_init),
1957 &feth_nx_dom_prov);
1958 if (err != 0) {
1959 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
1960 "failed to register domain provider");
1961 return err;
1962 }
1963 return 0;
1964 }
1965
1966 /**
1967 ** netif nexus routines
1968 **/
1969 static if_fake_ref
feth_nexus_context(kern_nexus_t nexus)1970 feth_nexus_context(kern_nexus_t nexus)
1971 {
1972 if_fake_ref fakeif;
1973
1974 fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
1975 assert(fakeif != NULL);
1976 return fakeif;
1977 }
1978
1979 static uint8_t
feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1980 feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1981 {
1982 switch (svc_class) {
1983 case KPKT_SC_VO:
1984 return 0;
1985 case KPKT_SC_VI:
1986 return 1;
1987 case KPKT_SC_BE:
1988 return 2;
1989 case KPKT_SC_BK:
1990 return 3;
1991 default:
1992 VERIFY(0);
1993 return 0;
1994 }
1995 }
1996
1997 static errno_t
feth_nx_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1998 feth_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1999 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
2000 void **ring_ctx)
2001 {
2002 if_fake_ref fakeif;
2003 int err;
2004 #pragma unused(nxprov, channel, ring_ctx)
2005 feth_lock();
2006 fakeif = feth_nexus_context(nexus);
2007 if (feth_is_detaching(fakeif)) {
2008 feth_unlock();
2009 return 0;
2010 }
2011 if (is_tx_ring) {
2012 if (feth_in_wmm_mode(fakeif)) {
2013 kern_packet_svc_class_t svc_class;
2014 uint8_t ring_idx;
2015
2016 err = kern_channel_get_service_class(ring, &svc_class);
2017 VERIFY(err == 0);
2018 ring_idx = feth_find_tx_ring_by_svc(svc_class);
2019 VERIFY(ring_idx < IFF_NUM_TX_RINGS_WMM_MODE);
2020 VERIFY(fakeif->iff_tx_ring[ring_idx] == NULL);
2021 fakeif->iff_tx_ring[ring_idx] = ring;
2022 } else {
2023 VERIFY(fakeif->iff_tx_ring[0] == NULL);
2024 fakeif->iff_tx_ring[0] = ring;
2025 }
2026 } else {
2027 VERIFY(fakeif->iff_rx_ring[0] == NULL);
2028 fakeif->iff_rx_ring[0] = ring;
2029 }
2030 fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2031 feth_unlock();
2032 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: %s ring init",
2033 fakeif->iff_name, is_tx_ring ? "TX" : "RX");
2034 return 0;
2035 }
2036
2037 static void
feth_nx_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)2038 feth_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2039 kern_channel_ring_t ring)
2040 {
2041 #pragma unused(nxprov, ring)
2042 if_fake_ref fakeif;
2043 thread_call_t tcall = NULL;
2044
2045 feth_lock();
2046 fakeif = feth_nexus_context(nexus);
2047 if (fakeif->iff_rx_ring[0] == ring) {
2048 fakeif->iff_rx_ring[0] = NULL;
2049 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2050 "%s: RX ring fini", fakeif->iff_name);
2051 } else if (feth_in_wmm_mode(fakeif)) {
2052 int i;
2053 for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
2054 if (fakeif->iff_tx_ring[i] == ring) {
2055 fakeif->iff_tx_ring[i] = NULL;
2056 break;
2057 }
2058 }
2059 for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
2060 if (fakeif->iff_tx_ring[i] != NULL) {
2061 break;
2062 }
2063 }
2064 if (i == IFF_MAX_TX_RINGS) {
2065 tcall = fakeif->iff_doorbell_tcall;
2066 fakeif->iff_doorbell_tcall = NULL;
2067 }
2068 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2069 "%s: TX ring fini", fakeif->iff_name);
2070 } else if (fakeif->iff_tx_ring[0] == ring) {
2071 tcall = fakeif->iff_doorbell_tcall;
2072 fakeif->iff_doorbell_tcall = NULL;
2073 fakeif->iff_tx_ring[0] = NULL;
2074 }
2075 fakeif->iff_nifs = NULL;
2076 feth_unlock();
2077 if (tcall != NULL) {
2078 boolean_t success;
2079
2080 success = thread_call_cancel_wait(tcall);
2081 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2082 "%s: thread_call_cancel %s", fakeif->iff_name,
2083 success ? "SUCCESS" : "FAILURE");
2084 if (!success) {
2085 feth_lock();
2086 if (fakeif->iff_doorbell_tcall_active) {
2087 fakeif->iff_waiting_for_tcall = TRUE;
2088 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2089 "%s: *waiting for threadcall",
2090 fakeif->iff_name);
2091 do {
2092 msleep(fakeif, &feth_lck_mtx,
2093 PZERO, "feth threadcall", 0);
2094 } while (fakeif->iff_doorbell_tcall_active);
2095 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2096 "%s: ^threadcall done",
2097 fakeif->iff_name);
2098 fakeif->iff_waiting_for_tcall = FALSE;
2099 }
2100 feth_unlock();
2101 }
2102 success = thread_call_free(tcall);
2103 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2104 "%s: thread_call_free %s",
2105 fakeif->iff_name,
2106 success ? "SUCCESS" : "FAILURE");
2107 feth_release(fakeif);
2108 VERIFY(success == TRUE);
2109 }
2110 }
2111
2112 static errno_t
feth_nx_pre_connect(kern_nexus_provider_t nxprov,proc_t proc,kern_nexus_t nexus,nexus_port_t port,kern_channel_t channel,void ** channel_context)2113 feth_nx_pre_connect(kern_nexus_provider_t nxprov,
2114 proc_t proc, kern_nexus_t nexus, nexus_port_t port, kern_channel_t channel,
2115 void **channel_context)
2116 {
2117 #pragma unused(nxprov, proc, nexus, port, channel, channel_context)
2118 return 0;
2119 }
2120
2121 static errno_t
feth_nx_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2122 feth_nx_connected(kern_nexus_provider_t nxprov,
2123 kern_nexus_t nexus, kern_channel_t channel)
2124 {
2125 #pragma unused(nxprov, channel)
2126 int err;
2127 if_fake_ref fakeif;
2128
2129 fakeif = feth_nexus_context(nexus);
2130 feth_lock();
2131 if (feth_is_detaching(fakeif)) {
2132 feth_unlock();
2133 return EBUSY;
2134 }
2135 feth_retain(fakeif);
2136 fakeif->iff_channel_connected = TRUE;
2137 feth_unlock();
2138 if (feth_has_intf_advisory_configured(fakeif)) {
2139 err = feth_if_adv_tcall_create(fakeif);
2140 if (err != 0) {
2141 return err;
2142 }
2143 }
2144 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: connected channel %p",
2145 fakeif->iff_name, channel);
2146 return 0;
2147 }
2148
2149 static void
feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2150 feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,
2151 kern_nexus_t nexus, kern_channel_t channel)
2152 {
2153 #pragma unused(nxprov, channel)
2154 if_fake_ref fakeif;
2155
2156 fakeif = feth_nexus_context(nexus);
2157 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2158 "%s: pre-disconnect channel %p",
2159 fakeif->iff_name, channel);
2160 /* Quiesce the interface and flush any pending outbound packets. */
2161 if_down(fakeif->iff_ifp);
2162 feth_lock();
2163 fakeif->iff_channel_connected = FALSE;
2164 feth_unlock();
2165 if (fakeif->iff_if_adv_tcall != NULL) {
2166 feth_if_adv_tcall_destroy(fakeif);
2167 }
2168 }
2169
2170 static void
feth_nx_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)2171 feth_nx_disconnected(kern_nexus_provider_t nxprov,
2172 kern_nexus_t nexus, kern_channel_t channel)
2173 {
2174 #pragma unused(nxprov, channel)
2175 if_fake_ref fakeif;
2176
2177 fakeif = feth_nexus_context(nexus);
2178 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE, "%s: disconnected channel %p",
2179 fakeif->iff_name, channel);
2180 feth_release(fakeif);
2181 }
2182
2183 static errno_t
feth_nx_slot_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index,struct kern_slot_prop ** slot_prop_addr,void ** slot_context)2184 feth_nx_slot_init(kern_nexus_provider_t nxprov,
2185 kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2186 uint32_t slot_index, struct kern_slot_prop **slot_prop_addr,
2187 void **slot_context)
2188 {
2189 #pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
2190 return 0;
2191 }
2192
2193 static void
feth_nx_slot_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index)2194 feth_nx_slot_fini(kern_nexus_provider_t nxprov,
2195 kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2196 uint32_t slot_index)
2197 {
2198 #pragma unused(nxprov, nexus, ring, slot, slot_index)
2199 }
2200
2201 static errno_t
feth_nx_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)2202 feth_nx_sync_tx(kern_nexus_provider_t nxprov,
2203 kern_nexus_t nexus, kern_channel_ring_t tx_ring, uint32_t flags)
2204 {
2205 #pragma unused(nxprov)
2206 if_fake_ref fakeif;
2207 ifnet_t ifp;
2208 kern_channel_slot_t last_tx_slot = NULL;
2209 ifnet_t peer_ifp;
2210 if_fake_ref peer_fakeif = NULL;
2211 struct kern_channel_ring_stat_increment stats;
2212 kern_channel_slot_t tx_slot;
2213 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2214 kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2215 uint32_t n_pkts = 0;
2216
2217 memset(&stats, 0, sizeof(stats));
2218
2219 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2220 fakeif = feth_nexus_context(nexus);
2221 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2222 "%s ring %d flags 0x%x", fakeif->iff_name,
2223 tx_ring->ckr_ring_id, flags);
2224 (void)flags;
2225 feth_lock();
2226 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2227 feth_unlock();
2228 return 0;
2229 }
2230 ifp = fakeif->iff_ifp;
2231 peer_ifp = fakeif->iff_peer;
2232 if (peer_ifp != NULL) {
2233 peer_fakeif = ifnet_get_if_fake(peer_ifp);
2234 if (peer_fakeif != NULL) {
2235 if (feth_is_detaching(peer_fakeif)) {
2236 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2237 "%s peer fakeif %s is detaching",
2238 fakeif->iff_name, peer_fakeif->iff_name);
2239 goto done;
2240 }
2241 if (!peer_fakeif->iff_channel_connected) {
2242 if (fakeif->iff_tx_exp_policy ==
2243 IFF_TX_EXP_POLICY_DISABLED) {
2244 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2245 "%s peer fakeif %s channel not connected, expn: %d",
2246 fakeif->iff_name, peer_fakeif->iff_name,
2247 fakeif->iff_tx_exp_policy);
2248 goto done;
2249 }
2250 }
2251 } else {
2252 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2253 "%s no peer fakeif (peer %p)",
2254 fakeif->iff_name, peer_ifp);
2255 goto done;
2256 }
2257 } else {
2258 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2259 "%s no peer", fakeif->iff_name);
2260 goto done;
2261 }
2262 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
2263 while (tx_slot != NULL) {
2264 uint16_t off;
2265 kern_packet_t sph;
2266
2267 /* detach the packet from the TX ring */
2268 sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
2269 VERIFY(sph != 0);
2270 kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
2271
2272 /* bpf tap output */
2273 off = kern_packet_get_headroom(sph);
2274 VERIFY(off >= fakeif->iff_tx_headroom);
2275 kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2276 feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2277 bpf_tap_packet_out(ifp, DLT_EN10MB, sph, NULL, 0);
2278
2279 /* drop packets, if requested */
2280 fakeif->iff_tx_pkts_count++;
2281 if (feth_tx_expired_error(fakeif, sph) ||
2282 feth_tx_complete_error(fakeif, sph) ||
2283 !peer_fakeif->iff_channel_connected) {
2284 fakeif->iff_tx_pkts_count = 0;
2285 kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2286 STATS_INC(nifs, NETIF_STATS_DROP);
2287 goto next_tx_slot;
2288 }
2289
2290 ASSERT(sph != 0);
2291 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2292 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2293
2294 stats.kcrsi_slots_transferred++;
2295 stats.kcrsi_bytes_transferred
2296 += kern_packet_get_data_length(sph);
2297
2298 /* prepare batch for receiver */
2299 pkts[n_pkts++] = sph;
2300 if (n_pkts == IFF_MAX_BATCH_SIZE) {
2301 feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2302 feth_tx_complete(fakeif, pkts, n_pkts);
2303 n_pkts = 0;
2304 }
2305
2306 next_tx_slot:
2307 last_tx_slot = tx_slot;
2308 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
2309 }
2310
2311 /* catch last batch for receiver */
2312 if (n_pkts != 0) {
2313 feth_rx_submit(fakeif, peer_fakeif, pkts, n_pkts);
2314 feth_tx_complete(fakeif, pkts, n_pkts);
2315 n_pkts = 0;
2316 }
2317
2318 if (last_tx_slot != NULL) {
2319 kern_channel_advance_slot(tx_ring, last_tx_slot);
2320 kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
2321 }
2322 done:
2323 feth_unlock();
2324 return 0;
2325 }
2326
2327 static errno_t
feth_nx_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2328 feth_nx_sync_rx(kern_nexus_provider_t nxprov,
2329 kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2330 {
2331 #pragma unused(nxprov, ring, flags)
2332 if_fake_ref fakeif;
2333 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2334
2335 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
2336 fakeif = feth_nexus_context(nexus);
2337 FAKE_LOG(LOG_DEBUG, FE_DBGF_INPUT, "%s", fakeif->iff_name);
2338 return 0;
2339 }
2340
2341 static errno_t
feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif,boolean_t doorbell_ctxt)2342 feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif, boolean_t doorbell_ctxt)
2343 {
2344 int i;
2345 errno_t error = 0;
2346 boolean_t more;
2347
2348 for (i = 0; i < IFF_NUM_TX_RINGS_WMM_MODE; i++) {
2349 kern_channel_ring_t ring = fakeif->iff_tx_ring[i];
2350 if (ring != NULL) {
2351 error = kern_channel_tx_refill(ring, UINT32_MAX,
2352 UINT32_MAX, doorbell_ctxt, &more);
2353 }
2354 if (error != 0) {
2355 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2356 "%s: TX refill ring %d (%s) %d",
2357 fakeif->iff_name, ring->ckr_ring_id,
2358 doorbell_ctxt ? "sync" : "async", error);
2359 if (!((error == EAGAIN) || (error == EBUSY))) {
2360 break;
2361 }
2362 } else {
2363 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2364 "%s: TX refilled ring %d (%s)",
2365 fakeif->iff_name, ring->ckr_ring_id,
2366 doorbell_ctxt ? "sync" : "async");
2367 }
2368 }
2369 return error;
2370 }
2371
2372 static void
feth_async_doorbell(thread_call_param_t arg0,thread_call_param_t arg1)2373 feth_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
2374 {
2375 #pragma unused(arg1)
2376 errno_t error;
2377 if_fake_ref fakeif = (if_fake_ref)arg0;
2378 kern_channel_ring_t ring;
2379 boolean_t more;
2380
2381 feth_lock();
2382 ring = fakeif->iff_tx_ring[0];
2383 if (feth_is_detaching(fakeif) ||
2384 !fakeif->iff_channel_connected ||
2385 ring == NULL) {
2386 goto done;
2387 }
2388 fakeif->iff_doorbell_tcall_active = TRUE;
2389 feth_unlock();
2390 if (feth_in_wmm_mode(fakeif)) {
2391 error = feth_nx_tx_dequeue_driver_managed(fakeif, FALSE);
2392 } else {
2393 error = kern_channel_tx_refill(ring, UINT32_MAX,
2394 UINT32_MAX, FALSE, &more);
2395 }
2396 if (error != 0) {
2397 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s: TX refill failed %d",
2398 fakeif->iff_name, error);
2399 } else {
2400 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s: TX refilled",
2401 fakeif->iff_name);
2402 }
2403
2404 feth_lock();
2405 done:
2406 fakeif->iff_doorbell_tcall_active = FALSE;
2407 if (fakeif->iff_waiting_for_tcall) {
2408 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2409 "%s: threadcall waking up waiter", fakeif->iff_name);
2410 wakeup((caddr_t)fakeif);
2411 }
2412 feth_unlock();
2413 }
2414
2415 static void
feth_schedule_async_doorbell(if_fake_ref fakeif)2416 feth_schedule_async_doorbell(if_fake_ref fakeif)
2417 {
2418 thread_call_t tcall;
2419
2420 feth_lock();
2421 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2422 feth_unlock();
2423 return;
2424 }
2425 tcall = fakeif->iff_doorbell_tcall;
2426 if (tcall != NULL) {
2427 thread_call_enter(tcall);
2428 } else {
2429 tcall = thread_call_allocate_with_options(feth_async_doorbell,
2430 (thread_call_param_t)fakeif,
2431 THREAD_CALL_PRIORITY_KERNEL,
2432 THREAD_CALL_OPTIONS_ONCE);
2433 if (tcall == NULL) {
2434 FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT,
2435 "%s tcall alloc failed", fakeif->iff_name);
2436 } else {
2437 fakeif->iff_doorbell_tcall = tcall;
2438 feth_retain(fakeif);
2439 thread_call_enter(tcall);
2440 }
2441 }
2442 feth_unlock();
2443 }
2444
2445 static errno_t
feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)2446 feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,
2447 kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2448 {
2449 #pragma unused(nxprov, ring, flags)
2450 errno_t error;
2451 if_fake_ref fakeif;
2452
2453 fakeif = feth_nexus_context(nexus);
2454 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s", fakeif->iff_name);
2455
2456 if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
2457 boolean_t more;
2458 /* synchronous tx refill */
2459 if (feth_in_wmm_mode(fakeif)) {
2460 error = feth_nx_tx_dequeue_driver_managed(fakeif, TRUE);
2461 } else {
2462 error = kern_channel_tx_refill(ring, UINT32_MAX,
2463 UINT32_MAX, TRUE, &more);
2464 }
2465 if (error != 0) {
2466 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2467 "%s: TX refill (sync) %d", fakeif->iff_name, error);
2468 } else {
2469 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2470 "%s: TX refilled (sync)", fakeif->iff_name);
2471 }
2472 } else {
2473 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2474 "%s: schedule async refill", fakeif->iff_name);
2475 feth_schedule_async_doorbell(fakeif);
2476 }
2477 return 0;
2478 }
2479
2480 static errno_t
feth_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)2481 feth_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
2482 {
2483 if_fake_ref fakeif;
2484
2485 fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
2486 feth_ifnet_set_attrs(fakeif, ifp);
2487 return 0;
2488 }
2489
2490 static errno_t
feth_nx_intf_adv_config(void * prov_ctx,bool enable)2491 feth_nx_intf_adv_config(void *prov_ctx, bool enable)
2492 {
2493 if_fake_ref fakeif = prov_ctx;
2494
2495 feth_lock();
2496 fakeif->iff_intf_adv_enabled = enable;
2497 feth_unlock();
2498 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2499 "%s enable %d", fakeif->iff_name, enable);
2500 return 0;
2501 }
2502
2503 static errno_t
fill_capab_interface_advisory(if_fake_ref fakeif,void * contents,uint32_t * len)2504 fill_capab_interface_advisory(if_fake_ref fakeif, void *contents, uint32_t *len)
2505 {
2506 struct kern_nexus_capab_interface_advisory *capab = contents;
2507
2508 if (*len != sizeof(*capab)) {
2509 return EINVAL;
2510 }
2511 if (capab->kncia_version !=
2512 KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
2513 return EINVAL;
2514 }
2515 if (!feth_has_intf_advisory_configured(fakeif)) {
2516 return ENOTSUP;
2517 }
2518 VERIFY(capab->kncia_notify != NULL);
2519 fakeif->iff_intf_adv_kern_ctx = capab->kncia_kern_context;
2520 fakeif->iff_intf_adv_notify = capab->kncia_notify;
2521 capab->kncia_provider_context = fakeif;
2522 capab->kncia_config = feth_nx_intf_adv_config;
2523 return 0;
2524 }
2525
2526 static errno_t
feth_notify_steering_info(void * prov_ctx,void * qset_ctx,struct ifnet_traffic_descriptor_common * td,bool add)2527 feth_notify_steering_info(void *prov_ctx, void *qset_ctx,
2528 struct ifnet_traffic_descriptor_common *td, bool add)
2529 {
2530 #pragma unused(td)
2531 if_fake_ref fakeif = prov_ctx;
2532 fake_qset *qset = qset_ctx;
2533
2534 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
2535 "%s: notify_steering_info: qset_id 0x%llx, %s",
2536 fakeif->iff_name, qset->fqs_id, add ? "add" : "remove");
2537 return 0;
2538 }
2539
2540 static errno_t
fill_capab_qset_extensions(if_fake_ref fakeif,void * contents,uint32_t * len)2541 fill_capab_qset_extensions(if_fake_ref fakeif, void *contents, uint32_t *len)
2542 {
2543 struct kern_nexus_capab_qset_extensions *capab = contents;
2544
2545 if (*len != sizeof(*capab)) {
2546 return EINVAL;
2547 }
2548 if (capab->cqe_version !=
2549 KERN_NEXUS_CAPAB_QSET_EXTENSIONS_VERSION_1) {
2550 return EINVAL;
2551 }
2552 capab->cqe_prov_ctx = fakeif;
2553 capab->cqe_notify_steering_info = feth_notify_steering_info;
2554 return 0;
2555 }
2556
2557 static errno_t
feth_nx_capab_config(kern_nexus_provider_t nxprov,kern_nexus_t nx,kern_nexus_capab_t capab,void * contents,uint32_t * len)2558 feth_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
2559 kern_nexus_capab_t capab, void *contents, uint32_t *len)
2560 {
2561 #pragma unused(nxprov)
2562 errno_t error;
2563 if_fake_ref fakeif;
2564
2565 fakeif = feth_nexus_context(nx);
2566 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL, "%s", fakeif->iff_name);
2567
2568 switch (capab) {
2569 case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
2570 error = fill_capab_interface_advisory(fakeif, contents, len);
2571 break;
2572 case KERN_NEXUS_CAPAB_QSET_EXTENSIONS:
2573 error = fill_capab_qset_extensions(fakeif, contents, len);
2574 break;
2575 default:
2576 error = ENOTSUP;
2577 break;
2578 }
2579 return error;
2580 }
2581
2582 static int
feth_set_tso_mtu(ifnet_t ifp,uint32_t tso_v4_mtu,uint32_t tso_v6_mtu)2583 feth_set_tso_mtu(ifnet_t ifp, uint32_t tso_v4_mtu, uint32_t tso_v6_mtu)
2584 {
2585 int error;
2586
2587 error = ifnet_set_tso_mtu(ifp, AF_INET, tso_v4_mtu);
2588 if (error != 0) {
2589 FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2590 "set TSO MTU IPv4 failed on %s, err %d",
2591 if_name(ifp), error);
2592 return error;
2593 }
2594 error = ifnet_set_tso_mtu(ifp, AF_INET6, tso_v6_mtu);
2595 if (error != 0) {
2596 FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2597 "set TSO MTU IPv6 failed on %s, err %d",
2598 if_name(ifp), error);
2599 return error;
2600 }
2601 return 0;
2602 }
2603
2604 static int
feth_set_tso_offload(ifnet_t ifp)2605 feth_set_tso_offload(ifnet_t ifp)
2606 {
2607 ifnet_offload_t offload;
2608 int error;
2609
2610 offload = IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2611 error = ifnet_set_offload(ifp, offload);
2612 if (error != 0) {
2613 FAKE_LOG(LOG_NOTICE, FE_DBGF_CONTROL,
2614 "set TSO offload failed on %s, err %d",
2615 if_name(ifp), error);
2616 goto done;
2617 }
2618 error = feth_set_tso_mtu(ifp, if_fake_tso_buffer_size,
2619 if_fake_tso_buffer_size);
2620 done:
2621 return error;
2622 }
2623
2624 static errno_t
create_netif_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)2625 create_netif_provider_and_instance(if_fake_ref fakeif,
2626 struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2627 uuid_t * provider, uuid_t * instance)
2628 {
2629 errno_t err;
2630 nexus_controller_t controller = kern_nexus_shared_controller();
2631 struct kern_nexus_net_init net_init;
2632 nexus_name_t provider_name;
2633 nexus_attr_t nexus_attr = NULL;
2634 struct kern_nexus_provider_init prov_init = {
2635 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2636 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2637 .nxpi_pre_connect = feth_nx_pre_connect,
2638 .nxpi_connected = feth_nx_connected,
2639 .nxpi_pre_disconnect = feth_nx_pre_disconnect,
2640 .nxpi_disconnected = feth_nx_disconnected,
2641 .nxpi_ring_init = feth_nx_ring_init,
2642 .nxpi_ring_fini = feth_nx_ring_fini,
2643 .nxpi_slot_init = feth_nx_slot_init,
2644 .nxpi_slot_fini = feth_nx_slot_fini,
2645 .nxpi_sync_tx = feth_nx_sync_tx,
2646 .nxpi_sync_rx = feth_nx_sync_rx,
2647 .nxpi_tx_doorbell = feth_nx_tx_doorbell,
2648 .nxpi_config_capab = feth_nx_capab_config,
2649 };
2650
2651 _CASSERT(IFF_MAX_RX_RINGS == 1);
2652 err = kern_nexus_attr_create(&nexus_attr);
2653 if (err != 0) {
2654 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
2655 "nexus attribute creation failed, error %d", err);
2656 goto failed;
2657 }
2658 if (feth_in_wmm_mode(fakeif)) {
2659 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_TX_RINGS,
2660 IFF_NUM_TX_RINGS_WMM_MODE);
2661 VERIFY(err == 0);
2662 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_RX_RINGS,
2663 IFF_NUM_RX_RINGS_WMM_MODE);
2664 VERIFY(err == 0);
2665 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_QMAP,
2666 NEXUS_QMAP_TYPE_WMM);
2667 VERIFY(err == 0);
2668 }
2669
2670 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
2671 VERIFY(err == 0);
2672 snprintf((char *)provider_name, sizeof(provider_name),
2673 "com.apple.netif.%s", fakeif->iff_name);
2674 err = kern_nexus_controller_register_provider(controller,
2675 feth_nx_dom_prov,
2676 provider_name,
2677 &prov_init,
2678 sizeof(prov_init),
2679 nexus_attr,
2680 provider);
2681 if (err != 0) {
2682 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
2683 "register provider failed, error %d", err);
2684 goto failed;
2685 }
2686 bzero(&net_init, sizeof(net_init));
2687 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2688 net_init.nxneti_flags = 0;
2689 net_init.nxneti_eparams = init_params;
2690 net_init.nxneti_lladdr = NULL;
2691 net_init.nxneti_prepare = feth_netif_prepare;
2692 net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2693 net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2694 err = kern_nexus_controller_alloc_net_provider_instance(controller,
2695 *provider,
2696 fakeif,
2697 NULL,
2698 instance,
2699 &net_init,
2700 ifp);
2701 if (err != 0) {
2702 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
2703 "alloc_net_provider_instance failed, %d", err);
2704 kern_nexus_controller_deregister_provider(controller,
2705 *provider);
2706 uuid_clear(*provider);
2707 goto failed;
2708 }
2709 if (feth_supports_tso(fakeif)) {
2710 if ((err = feth_set_tso_offload(*ifp)) != 0) {
2711 goto failed;
2712 }
2713 }
2714
2715 failed:
2716 if (nexus_attr != NULL) {
2717 kern_nexus_attr_destroy(nexus_attr);
2718 }
2719 return err;
2720 }
2721
2722 /*
2723 * The nif_stats need to be referenced because we don't want it set
2724 * to NULL until the last llink is removed.
2725 */
2726 static void
get_nexus_stats(if_fake_ref fakeif,kern_nexus_t nexus)2727 get_nexus_stats(if_fake_ref fakeif, kern_nexus_t nexus)
2728 {
2729 if (++fakeif->iff_nifs_ref == 1) {
2730 ASSERT(fakeif->iff_nifs == NULL);
2731 fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2732 }
2733 }
2734
2735 static void
clear_nexus_stats(if_fake_ref fakeif)2736 clear_nexus_stats(if_fake_ref fakeif)
2737 {
2738 if (--fakeif->iff_nifs_ref == 0) {
2739 ASSERT(fakeif->iff_nifs != NULL);
2740 fakeif->iff_nifs = NULL;
2741 }
2742 }
2743
2744 static errno_t
feth_nx_qset_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * llink_ctx,uint8_t qset_idx,uint64_t qset_id,kern_netif_qset_t qset,void ** qset_ctx)2745 feth_nx_qset_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2746 void *llink_ctx, uint8_t qset_idx, uint64_t qset_id, kern_netif_qset_t qset,
2747 void **qset_ctx)
2748 {
2749 #pragma unused(nxprov)
2750 if_fake_ref fakeif;
2751 fake_llink *fl = llink_ctx;
2752 fake_qset *fqs;
2753
2754 feth_lock();
2755 fakeif = feth_nexus_context(nexus);
2756 if (feth_is_detaching(fakeif)) {
2757 feth_unlock();
2758 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2759 "%s: detaching", fakeif->iff_name);
2760 return ENXIO;
2761 }
2762 if (qset_idx >= fl->fl_qset_cnt) {
2763 feth_unlock();
2764 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2765 "%s: invalid qset_idx %d", fakeif->iff_name, qset_idx);
2766 return EINVAL;
2767 }
2768 fqs = &fl->fl_qset[qset_idx];
2769 ASSERT(fqs->fqs_qset == NULL);
2770 fqs->fqs_qset = qset;
2771 fqs->fqs_id = qset_id;
2772 *qset_ctx = fqs;
2773
2774 /* XXX This should really be done during registration */
2775 get_nexus_stats(fakeif, nexus);
2776 feth_unlock();
2777 return 0;
2778 }
2779
2780 static void
feth_nx_qset_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx)2781 feth_nx_qset_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2782 void *qset_ctx)
2783 {
2784 #pragma unused(nxprov)
2785 if_fake_ref fakeif;
2786 fake_qset *fqs = qset_ctx;
2787
2788 feth_lock();
2789 fakeif = feth_nexus_context(nexus);
2790 clear_nexus_stats(fakeif);
2791 ASSERT(fqs->fqs_qset != NULL);
2792 fqs->fqs_qset = NULL;
2793 fqs->fqs_id = 0;
2794 feth_unlock();
2795 }
2796
2797 static errno_t
feth_nx_queue_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint8_t qidx,bool tx,kern_netif_queue_t queue,void ** queue_ctx)2798 feth_nx_queue_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2799 void *qset_ctx, uint8_t qidx, bool tx, kern_netif_queue_t queue,
2800 void **queue_ctx)
2801 {
2802 #pragma unused(nxprov)
2803 if_fake_ref fakeif;
2804 fake_qset *fqs = qset_ctx;
2805 fake_queue *fq;
2806
2807 feth_lock();
2808 fakeif = feth_nexus_context(nexus);
2809 if (feth_is_detaching(fakeif)) {
2810 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2811 "%s: detaching", fakeif->iff_name);
2812 feth_unlock();
2813 return ENXIO;
2814 }
2815 if (tx) {
2816 if (qidx >= fqs->fqs_tx_queue_cnt) {
2817 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2818 "%s: invalid tx qidx %d", fakeif->iff_name, qidx);
2819 feth_unlock();
2820 return EINVAL;
2821 }
2822 fq = &fqs->fqs_tx_queue[qidx];
2823 } else {
2824 if (qidx >= fqs->fqs_rx_queue_cnt) {
2825 FAKE_LOG(LOG_DEBUG, FE_DBGF_CONTROL,
2826 "%s: invalid rx qidx %d", fakeif->iff_name, qidx);
2827 feth_unlock();
2828 return EINVAL;
2829 }
2830 fq = &fqs->fqs_rx_queue[qidx];
2831 }
2832 ASSERT(fq->fq_queue == NULL);
2833 fq->fq_queue = queue;
2834 *queue_ctx = fq;
2835 feth_unlock();
2836 return 0;
2837 }
2838
2839 static void
feth_nx_queue_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx)2840 feth_nx_queue_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2841 void *queue_ctx)
2842 {
2843 #pragma unused(nxprov, nexus)
2844 fake_queue *fq = queue_ctx;
2845
2846 feth_lock();
2847 ASSERT(fq->fq_queue != NULL);
2848 fq->fq_queue = NULL;
2849 feth_unlock();
2850 }
2851
2852 static void
feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif,kern_packet_t sph,struct netif_stats * nifs,if_fake_ref peer_fakeif,uint32_t llink_idx,uint32_t qset_idx)2853 feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif, kern_packet_t sph,
2854 struct netif_stats *nifs, if_fake_ref peer_fakeif,
2855 uint32_t llink_idx, uint32_t qset_idx)
2856 {
2857 kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2858 uint32_t n_pkts = 0;
2859
2860 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2861 "%s -> %s", fakeif->iff_name, peer_fakeif->iff_name);
2862
2863 while (sph != 0) {
2864 uint16_t off;
2865 kern_packet_t next;
2866
2867 next = kern_packet_get_next(sph);
2868 kern_packet_set_next(sph, 0);
2869
2870 /* bpf tap output */
2871 off = kern_packet_get_headroom(sph);
2872 VERIFY(off >= fakeif->iff_tx_headroom);
2873 kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2874 feth_packet_set_trace_tag(sph, IFF_TT_OUTPUT);
2875 bpf_tap_packet_out(fakeif->iff_ifp, DLT_EN10MB, sph, NULL, 0);
2876
2877 /* drop packets, if requested */
2878 fakeif->iff_tx_pkts_count++;
2879 if (feth_tx_expired_error(fakeif, sph) ||
2880 feth_tx_complete_error(fakeif, sph)) {
2881 fakeif->iff_tx_pkts_count = 0;
2882 kern_pbufpool_free(fakeif->iff_tx_pp, sph);
2883 STATS_INC(nifs, NETIF_STATS_DROP);
2884 goto next_pkt;
2885 }
2886 ASSERT(sph != 0);
2887 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2888 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2889
2890 /* prepare batch for receiver */
2891 pkts[n_pkts++] = sph;
2892 if (n_pkts == IFF_MAX_BATCH_SIZE) {
2893 feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx,
2894 qset_idx, pkts, n_pkts);
2895 feth_tx_complete(fakeif, pkts, n_pkts);
2896 n_pkts = 0;
2897 }
2898 next_pkt:
2899 sph = next;
2900 }
2901 /* catch last batch for receiver */
2902 if (n_pkts != 0) {
2903 feth_rx_queue_submit(fakeif, peer_fakeif, llink_idx, qset_idx,
2904 pkts, n_pkts);
2905 feth_tx_complete(fakeif, pkts, n_pkts);
2906 n_pkts = 0;
2907 }
2908 }
2909
2910 static errno_t
feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * qset_ctx,uint32_t flags)2911 feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2912 void *qset_ctx, uint32_t flags)
2913 {
2914 #pragma unused(nxprov)
2915 if_fake_ref fakeif;
2916 ifnet_t ifp;
2917 ifnet_t peer_ifp;
2918 if_fake_ref peer_fakeif = NULL;
2919 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2920 fake_qset *qset = qset_ctx;
2921 boolean_t detaching, connected;
2922 uint32_t i;
2923 errno_t err;
2924
2925 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2926 fakeif = feth_nexus_context(nexus);
2927 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2928 "%s qset %p, idx %d, flags 0x%x", fakeif->iff_name, qset,
2929 qset->fqs_idx, flags);
2930
2931 feth_lock();
2932 detaching = feth_is_detaching(fakeif);
2933 connected = fakeif->iff_channel_connected;
2934 if (detaching || !connected) {
2935 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2936 "%s: detaching %s, channel connected %s",
2937 fakeif->iff_name,
2938 (detaching ? "true" : "false"),
2939 (connected ? "true" : "false"));
2940 feth_unlock();
2941 return 0;
2942 }
2943 ifp = fakeif->iff_ifp;
2944 peer_ifp = fakeif->iff_peer;
2945 if (peer_ifp != NULL) {
2946 peer_fakeif = ifnet_get_if_fake(peer_ifp);
2947 if (peer_fakeif != NULL) {
2948 detaching = feth_is_detaching(peer_fakeif);
2949 connected = peer_fakeif->iff_channel_connected;
2950 if (detaching || !connected) {
2951 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2952 "peer %s: detaching %s, "
2953 "channel connected %s",
2954 peer_fakeif->iff_name,
2955 (detaching ? "true" : "false"),
2956 (connected ? "true" : "false"));
2957 goto done;
2958 }
2959 } else {
2960 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2961 "peer_fakeif is NULL");
2962 goto done;
2963 }
2964 } else {
2965 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "peer_ifp is NULL");
2966 goto done;
2967 }
2968
2969 if (if_fake_switch_combined_mode &&
2970 qset->fqs_dequeue_cnt >= if_fake_switch_mode_frequency) {
2971 if (qset->fqs_combined_mode) {
2972 kern_netif_set_qset_separate(qset->fqs_qset);
2973 } else {
2974 kern_netif_set_qset_combined(qset->fqs_qset);
2975 }
2976 qset->fqs_combined_mode = !qset->fqs_combined_mode;
2977 qset->fqs_dequeue_cnt = 0;
2978 }
2979
2980 for (i = 0; i < qset->fqs_tx_queue_cnt; i++) {
2981 kern_packet_t sph = 0;
2982 kern_netif_queue_t queue = qset->fqs_tx_queue[i].fq_queue;
2983 boolean_t more = FALSE;
2984
2985 err = kern_netif_queue_tx_dequeue(queue, UINT32_MAX, UINT32_MAX,
2986 &more, &sph);
2987 if (err != 0 && err != EAGAIN) {
2988 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
2989 "%s queue %p dequeue failed: err "
2990 "%d", fakeif->iff_name, queue, err);
2991 }
2992 feth_nx_tx_queue_deliver_pkt_chain(fakeif, sph, nifs,
2993 peer_fakeif, qset->fqs_llink_idx, qset->fqs_idx);
2994 }
2995
2996 done:
2997 feth_unlock();
2998 return 0;
2999 }
3000
3001
3002 static errno_t
feth_nx_queue_tx_push(kern_nexus_provider_t nxprov,kern_nexus_t nexus,void * queue_ctx,kern_packet_t * ph,uint32_t * packetCount,uint32_t * byteCount)3003 feth_nx_queue_tx_push(kern_nexus_provider_t nxprov,
3004 kern_nexus_t nexus, void *queue_ctx, kern_packet_t *ph,
3005 uint32_t *packetCount, uint32_t *byteCount)
3006 {
3007 #pragma unused(nxprov)
3008 if_fake_ref fakeif;
3009 ifnet_t ifp;
3010 ifnet_t peer_ifp;
3011 if_fake_ref peer_fakeif = NULL;
3012 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
3013 fake_queue *fq = queue_ctx;
3014 boolean_t detaching, connected;
3015
3016 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
3017 fakeif = feth_nexus_context(nexus);
3018 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "%s queue %p", fakeif->iff_name, fq);
3019
3020 feth_lock();
3021
3022 detaching = feth_is_detaching(fakeif);
3023 connected = fakeif->iff_channel_connected;
3024 if (detaching || !connected) {
3025 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3026 "%s: detaching %s, channel connected %s",
3027 fakeif->iff_name,
3028 (detaching ? "true" : "false"),
3029 (connected ? "true" : "false"));
3030 goto done;
3031 }
3032 ifp = fakeif->iff_ifp;
3033 peer_ifp = fakeif->iff_peer;
3034 if (peer_ifp != NULL) {
3035 peer_fakeif = ifnet_get_if_fake(peer_ifp);
3036 if (peer_fakeif != NULL) {
3037 detaching = feth_is_detaching(peer_fakeif);
3038 connected = peer_fakeif->iff_channel_connected;
3039 if (detaching || !connected) {
3040 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3041 "peer %s: detaching %s, "
3042 "channel connected %s",
3043 peer_fakeif->iff_name,
3044 (detaching ? "true" : "false"),
3045 (connected ? "true" : "false"));
3046 goto done;
3047 }
3048 } else {
3049 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3050 "peer_fakeif is NULL");
3051 goto done;
3052 }
3053 } else {
3054 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT, "peer_ifp is NULL");
3055 goto done;
3056 }
3057
3058 *packetCount = *byteCount = 0;
3059
3060 kern_packet_t sph = *ph;
3061 while (sph != 0) {
3062 (*packetCount)++;
3063 *byteCount += kern_packet_get_data_length(sph);
3064 sph = kern_packet_get_next(sph);
3065 }
3066
3067 feth_nx_tx_queue_deliver_pkt_chain(fakeif, *ph, nifs,
3068 peer_fakeif, 0, 0);
3069
3070 *ph = 0;
3071
3072 done:
3073 feth_unlock();
3074 return 0;
3075 }
3076
3077
3078 static void
fill_qset_info_and_params(if_fake_ref fakeif,fake_llink * llink_info,uint32_t qset_idx,struct kern_nexus_netif_llink_qset_init * qset_init,bool is_def,bool is_low_latency)3079 fill_qset_info_and_params(if_fake_ref fakeif, fake_llink *llink_info,
3080 uint32_t qset_idx, struct kern_nexus_netif_llink_qset_init *qset_init,
3081 bool is_def, bool is_low_latency)
3082 {
3083 fake_qset *qset_info = &llink_info->fl_qset[qset_idx];
3084
3085 qset_init->nlqi_flags =
3086 (is_def ? KERN_NEXUS_NET_LLINK_QSET_DEFAULT : 0) |
3087 (is_low_latency ? KERN_NEXUS_NET_LLINK_QSET_LOW_LATENCY : 0) |
3088 KERN_NEXUS_NET_LLINK_QSET_AQM;
3089
3090 if (feth_in_wmm_mode(fakeif)) {
3091 qset_init->nlqi_flags |= KERN_NEXUS_NET_LLINK_QSET_WMM_MODE;
3092 qset_init->nlqi_num_txqs = IFF_NUM_TX_QUEUES_WMM_MODE;
3093 qset_init->nlqi_num_rxqs = IFF_NUM_RX_QUEUES_WMM_MODE;
3094 } else {
3095 qset_init->nlqi_num_txqs = 1;
3096 qset_init->nlqi_num_rxqs = 1;
3097 }
3098 qset_info->fqs_tx_queue_cnt = qset_init->nlqi_num_txqs;
3099 qset_info->fqs_rx_queue_cnt = qset_init->nlqi_num_rxqs;
3100
3101 /* These are needed for locating the peer qset */
3102 qset_info->fqs_llink_idx = llink_info->fl_idx;
3103 qset_info->fqs_idx = qset_idx;
3104 }
3105
3106 static void
fill_llink_info_and_params(if_fake_ref fakeif,uint32_t llink_idx,struct kern_nexus_netif_llink_init * llink_init,uint32_t llink_id,struct kern_nexus_netif_llink_qset_init * qset_init,uint32_t qset_cnt,uint32_t flags)3107 fill_llink_info_and_params(if_fake_ref fakeif, uint32_t llink_idx,
3108 struct kern_nexus_netif_llink_init *llink_init, uint32_t llink_id,
3109 struct kern_nexus_netif_llink_qset_init *qset_init, uint32_t qset_cnt,
3110 uint32_t flags)
3111 {
3112 fake_llink *llink_info = &fakeif->iff_llink[llink_idx];
3113 uint32_t i;
3114 bool create_ll_qset = if_fake_low_latency && (llink_idx != 0);
3115
3116 for (i = 0; i < qset_cnt; i++) {
3117 fill_qset_info_and_params(fakeif, llink_info, i,
3118 &qset_init[i], i == 0, create_ll_qset && i == 1);
3119 }
3120 llink_info->fl_idx = llink_idx;
3121
3122 /* This doesn't have to be the same as llink_idx */
3123 llink_info->fl_id = llink_id;
3124 llink_info->fl_qset_cnt = qset_cnt;
3125
3126 llink_init->nli_link_id = llink_id;
3127 llink_init->nli_num_qsets = qset_cnt;
3128 llink_init->nli_qsets = qset_init;
3129 llink_init->nli_flags = flags;
3130 llink_init->nli_ctx = llink_info;
3131 }
3132
3133 static errno_t
create_non_default_llinks(if_fake_ref fakeif)3134 create_non_default_llinks(if_fake_ref fakeif)
3135 {
3136 struct kern_nexus *nx;
3137 fake_nx_t fnx = &fakeif->iff_nx;
3138 struct kern_nexus_netif_llink_init llink_init;
3139 struct kern_nexus_netif_llink_qset_init qset_init[FETH_MAX_QSETS];
3140 errno_t err;
3141 uint64_t llink_id;
3142 uint32_t i;
3143
3144 nx = nx_find(fnx->fnx_instance, FALSE);
3145 if (nx == NULL) {
3146 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3147 "%s: nx not found", fakeif->iff_name);
3148 return ENXIO;
3149 }
3150 /* Default llink starts at index 0 */
3151 for (i = 1; i < if_fake_llink_cnt; i++) {
3152 llink_id = (uint64_t)i;
3153
3154 /*
3155 * The llink_init and qset_init structures are reused for
3156 * each llink creation.
3157 */
3158 fill_llink_info_and_params(fakeif, i, &llink_init,
3159 llink_id, qset_init, if_fake_qset_cnt, 0);
3160 err = kern_nexus_netif_llink_add(nx, &llink_init);
3161 if (err != 0) {
3162 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3163 "%s: llink add failed, error %d",
3164 fakeif->iff_name, err);
3165 goto fail;
3166 }
3167 fakeif->iff_llink_cnt++;
3168 }
3169 nx_release(nx);
3170 return 0;
3171
3172 fail:
3173 for (i = 0; i < fakeif->iff_llink_cnt; i++) {
3174 int error;
3175 fake_llink * __single ll = &fakeif->iff_llink[i];
3176
3177 error = kern_nexus_netif_llink_remove(nx, ll->fl_id);
3178 if (error != 0) {
3179 FAKE_LOG(LOG_DEBUG, FE_DBGF_MISC,
3180 "%s: llink remove failed, llink_id 0x%llx, "
3181 "error %d", fakeif->iff_name,
3182 ll->fl_id, error);
3183 }
3184 ll->fl_id = 0;
3185 }
3186 fakeif->iff_llink_cnt = 0;
3187 nx_release(nx);
3188 return err;
3189 }
3190
3191 static errno_t
create_netif_llink_provider_and_instance(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)3192 create_netif_llink_provider_and_instance(if_fake_ref fakeif,
3193 struct ifnet_init_eparams * init_params, ifnet_t *ifp,
3194 uuid_t * provider, uuid_t * instance)
3195 {
3196 errno_t err;
3197 nexus_controller_t controller = kern_nexus_shared_controller();
3198 struct kern_nexus_net_init net_init;
3199 struct kern_nexus_netif_llink_init llink_init;
3200 struct kern_nexus_netif_llink_qset_init qsets[FETH_MAX_QSETS];
3201
3202 nexus_name_t provider_name;
3203 nexus_attr_t nexus_attr = NULL;
3204 struct kern_nexus_netif_provider_init prov_init = {
3205 .nxnpi_version = KERN_NEXUS_DOMAIN_PROVIDER_NETIF,
3206 .nxnpi_flags = NXPIF_VIRTUAL_DEVICE,
3207 .nxnpi_pre_connect = feth_nx_pre_connect,
3208 .nxnpi_connected = feth_nx_connected,
3209 .nxnpi_pre_disconnect = feth_nx_pre_disconnect,
3210 .nxnpi_disconnected = feth_nx_disconnected,
3211 .nxnpi_qset_init = feth_nx_qset_init,
3212 .nxnpi_qset_fini = feth_nx_qset_fini,
3213 .nxnpi_queue_init = feth_nx_queue_init,
3214 .nxnpi_queue_fini = feth_nx_queue_fini,
3215 .nxnpi_tx_qset_notify = feth_nx_tx_qset_notify,
3216 .nxnpi_config_capab = feth_nx_capab_config,
3217 .nxnpi_queue_tx_push = feth_nx_queue_tx_push
3218 };
3219
3220 err = kern_nexus_attr_create(&nexus_attr);
3221 if (err != 0) {
3222 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3223 "nexus attribute creation failed, error %d", err);
3224 goto failed;
3225 }
3226
3227 err = kern_nexus_attr_set(nexus_attr, NEXUS_ATTR_ANONYMOUS, 1);
3228 VERIFY(err == 0);
3229
3230 snprintf((char *)provider_name, sizeof(provider_name),
3231 "com.apple.netif.%s", fakeif->iff_name);
3232 err = kern_nexus_controller_register_provider(controller,
3233 feth_nx_dom_prov,
3234 provider_name,
3235 (struct kern_nexus_provider_init *)&prov_init,
3236 sizeof(prov_init),
3237 nexus_attr,
3238 provider);
3239 if (err != 0) {
3240 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3241 "register provider failed, error %d", err);
3242 goto failed;
3243 }
3244 bzero(&net_init, sizeof(net_init));
3245 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
3246 net_init.nxneti_flags = 0;
3247 net_init.nxneti_eparams = init_params;
3248 net_init.nxneti_lladdr = NULL;
3249 net_init.nxneti_prepare = feth_netif_prepare;
3250 net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
3251 net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
3252
3253 /*
3254 * Assume llink id is same as the index for if_fake.
3255 * This is not required for other drivers.
3256 */
3257 _CASSERT(NETIF_LLINK_ID_DEFAULT == 0);
3258 fill_llink_info_and_params(fakeif, 0, &llink_init,
3259 NETIF_LLINK_ID_DEFAULT, qsets, if_fake_qset_cnt,
3260 KERN_NEXUS_NET_LLINK_DEFAULT);
3261
3262 net_init.nxneti_llink = &llink_init;
3263
3264 err = kern_nexus_controller_alloc_net_provider_instance(controller,
3265 *provider, fakeif, NULL, instance, &net_init, ifp);
3266 if (err != 0) {
3267 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3268 "alloc_net_provider_instance failed, %d", err);
3269 kern_nexus_controller_deregister_provider(controller,
3270 *provider);
3271 uuid_clear(*provider);
3272 goto failed;
3273 }
3274 fakeif->iff_llink_cnt++;
3275
3276 if (if_fake_llink_cnt > 1) {
3277 err = create_non_default_llinks(fakeif);
3278 if (err != 0) {
3279 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3280 "create_non_default_llinks failed, %d", err);
3281 feth_detach_netif_nexus(fakeif);
3282 goto failed;
3283 }
3284 }
3285 if (feth_supports_tso(fakeif)) {
3286 if ((err = feth_set_tso_offload(*ifp)) != 0) {
3287 goto failed;
3288 }
3289 }
3290 failed:
3291 if (nexus_attr != NULL) {
3292 kern_nexus_attr_destroy(nexus_attr);
3293 }
3294 return err;
3295 }
3296
3297 static errno_t
feth_attach_netif_nexus(if_fake_ref fakeif,struct ifnet_init_eparams * init_params,ifnet_t * ifp)3298 feth_attach_netif_nexus(if_fake_ref fakeif,
3299 struct ifnet_init_eparams * init_params, ifnet_t *ifp)
3300 {
3301 errno_t error;
3302 fake_nx_t nx = &fakeif->iff_nx;
3303
3304 error = feth_packet_pool_make(fakeif);
3305 if (error != 0) {
3306 return error;
3307 }
3308 if (if_fake_llink_cnt == 0) {
3309 return create_netif_provider_and_instance(fakeif, init_params,
3310 ifp, &nx->fnx_provider, &nx->fnx_instance);
3311 } else {
3312 return create_netif_llink_provider_and_instance(fakeif,
3313 init_params, ifp, &nx->fnx_provider,
3314 &nx->fnx_instance);
3315 }
3316 }
3317
3318 static void
remove_non_default_llinks(if_fake_ref fakeif)3319 remove_non_default_llinks(if_fake_ref fakeif)
3320 {
3321 struct kern_nexus *nx;
3322 fake_nx_t fnx = &fakeif->iff_nx;
3323 uint32_t i;
3324
3325 if (fakeif->iff_llink_cnt <= 1) {
3326 return;
3327 }
3328 nx = nx_find(fnx->fnx_instance, FALSE);
3329 if (nx == NULL) {
3330 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3331 "%s: nx not found", fakeif->iff_name);
3332 return;
3333 }
3334 /* Default llink (at index 0) is freed separately */
3335 for (i = 1; i < fakeif->iff_llink_cnt; i++) {
3336 int err;
3337
3338 err = kern_nexus_netif_llink_remove(nx, fakeif->
3339 iff_llink[i].fl_id);
3340 if (err != 0) {
3341 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3342 "%s: llink remove failed, llink_id 0x%llx, "
3343 "error %d", fakeif->iff_name,
3344 fakeif->iff_llink[i].fl_id, err);
3345 }
3346 fakeif->iff_llink[i].fl_id = 0;
3347 }
3348 fakeif->iff_llink_cnt = 0;
3349 nx_release(nx);
3350 }
3351
3352 static void
detach_provider_and_instance(uuid_t provider,uuid_t instance)3353 detach_provider_and_instance(uuid_t provider, uuid_t instance)
3354 {
3355 nexus_controller_t controller = kern_nexus_shared_controller();
3356 errno_t err;
3357
3358 if (!uuid_is_null(instance)) {
3359 err = kern_nexus_controller_free_provider_instance(controller,
3360 instance);
3361 if (err != 0) {
3362 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3363 "free_provider_instance failed %d", err);
3364 }
3365 uuid_clear(instance);
3366 }
3367 if (!uuid_is_null(provider)) {
3368 err = kern_nexus_controller_deregister_provider(controller,
3369 provider);
3370 if (err != 0) {
3371 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3372 "deregister_provider %d", err);
3373 }
3374 uuid_clear(provider);
3375 }
3376 return;
3377 }
3378
3379 static void
feth_detach_netif_nexus(if_fake_ref fakeif)3380 feth_detach_netif_nexus(if_fake_ref fakeif)
3381 {
3382 fake_nx_t fnx = &fakeif->iff_nx;
3383
3384 remove_non_default_llinks(fakeif);
3385 detach_provider_and_instance(fnx->fnx_provider, fnx->fnx_instance);
3386 }
3387
3388 #endif /* SKYWALK */
3389
3390 /**
3391 ** feth interface routines
3392 **/
3393 static void
feth_ifnet_set_attrs(if_fake_ref fakeif,ifnet_t ifp)3394 feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp)
3395 {
3396 errno_t error;
3397 ifnet_offload_t offload = 0;
3398
3399 ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
3400 ifnet_set_baudrate(ifp, 0);
3401 ifnet_set_mtu(ifp, ETHERMTU);
3402 ifnet_set_flags(ifp,
3403 IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX,
3404 0xffff);
3405 ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
3406 if ((fakeif->iff_flags & IFF_FLAGS_LRO) != 0) {
3407 offload |= IFNET_LRO;
3408 }
3409 if ((fakeif->iff_flags & IFF_FLAGS_HWCSUM) != 0) {
3410 offload |= IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
3411 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6;
3412 }
3413 if (feth_supports_tso(fakeif)) {
3414 offload |= IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
3415 }
3416 if (feth_supports_vlan_tagging(fakeif)) {
3417 offload |= IFNET_VLAN_TAGGING;
3418 } else if (feth_supports_vlan_mtu(fakeif)) {
3419 offload |= IFNET_VLAN_MTU;
3420 }
3421 error = ifnet_set_offload(ifp, offload);
3422 if (error != 0) {
3423 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3424 "ifnet_set_offload(%s, 0x%x) failed, %d",
3425 ifp->if_xname, offload, error);
3426 } else {
3427 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3428 "ifnet_set_offload(%s, 0x%x) succeeded",
3429 ifp->if_xname, offload);
3430 }
3431 }
3432
3433 static void
interface_link_event(ifnet_t ifp,u_int32_t event_code)3434 interface_link_event(ifnet_t ifp, u_int32_t event_code)
3435 {
3436 struct event {
3437 u_int32_t ifnet_family;
3438 u_int32_t unit;
3439 char if_name[IFNAMSIZ];
3440 };
3441 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3442 struct kern_event_msg *header = (struct kern_event_msg*)message;
3443 struct event *data = (struct event *)(header + 1);
3444
3445 header->total_size = sizeof(message);
3446 header->vendor_code = KEV_VENDOR_APPLE;
3447 header->kev_class = KEV_NETWORK_CLASS;
3448 header->kev_subclass = KEV_DL_SUBCLASS;
3449 header->event_code = event_code;
3450 data->ifnet_family = ifnet_family(ifp);
3451 data->unit = (u_int32_t)ifnet_unit(ifp);
3452 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
3453 ifnet_event(ifp, header);
3454 }
3455
3456 static if_fake_ref
ifnet_get_if_fake(ifnet_t ifp)3457 ifnet_get_if_fake(ifnet_t ifp)
3458 {
3459 return (if_fake_ref)ifnet_softc(ifp);
3460 }
3461
3462 static int
feth_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)3463 feth_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
3464 {
3465 bool bsd_mode;
3466 int error;
3467 if_fake_ref fakeif;
3468 struct ifnet_init_eparams feth_init;
3469 fake_llink *iff_llink;
3470 ifnet_t ifp;
3471 uint8_t mac_address[ETHER_ADDR_LEN];
3472 bool multi_buflet;
3473 iff_pktpool_mode_t pktpool_mode;
3474 bool tso_support;
3475
3476 /* make local copy of globals needed to make consistency checks below */
3477 bsd_mode = (if_fake_bsd_mode != 0);
3478 multi_buflet = (if_fake_multibuflet != 0);
3479 tso_support = (if_fake_tso_support != 0);
3480 pktpool_mode = if_fake_pktpool_mode;
3481
3482 if (!bsd_mode) {
3483 /* consistency checks */
3484 if (if_fake_llink_cnt == 0 &&
3485 strbufcmp(sk_ll_prefix, FAKE_ETHER_NAME) == 0) {
3486 FAKE_LOG(LOG_NOTICE, FE_DBGF_LIFECYCLE,
3487 "feth used as ifname prefix but logical link "
3488 "support in feth is disabled.");
3489 return EINVAL;
3490 }
3491 if (tso_support && pktpool_mode != IFF_PP_MODE_GLOBAL) {
3492 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3493 "TSO mode requires global packet pool mode");
3494 return EINVAL;
3495 }
3496 if (multi_buflet && pktpool_mode == IFF_PP_MODE_PRIVATE_SPLIT) {
3497 FAKE_LOG(LOG_DEBUG, FE_DBGF_LIFECYCLE,
3498 "multi-buflet not supported for split rx & tx pool");
3499 return EINVAL;
3500 }
3501 }
3502
3503 iff_llink = kalloc_type(fake_llink, FETH_MAX_LLINKS, Z_WAITOK_ZERO);
3504 if (iff_llink == NULL) {
3505 return ENOBUFS;
3506 }
3507 fakeif = kalloc_type(struct if_fake, Z_WAITOK_ZERO_NOFAIL);
3508 fakeif->iff_llink = iff_llink;
3509 fakeif->iff_retain_count = 1;
3510 #define FAKE_ETHER_NAME_LEN (sizeof(FAKE_ETHER_NAME) - 1)
3511 _CASSERT(FAKE_ETHER_NAME_LEN == 4);
3512 bcopy(FAKE_ETHER_NAME, mac_address, FAKE_ETHER_NAME_LEN);
3513 mac_address[ETHER_ADDR_LEN - 2] = (unit & 0xff00) >> 8;
3514 mac_address[ETHER_ADDR_LEN - 1] = unit & 0xff;
3515 if (bsd_mode) {
3516 fakeif->iff_flags |= IFF_FLAGS_BSD_MODE;
3517 }
3518 if (if_fake_hwcsum != 0) {
3519 fakeif->iff_flags |= IFF_FLAGS_HWCSUM;
3520 }
3521 if (if_fake_lro != 0) {
3522 fakeif->iff_flags |= IFF_FLAGS_LRO;
3523 }
3524 if (if_fake_vlan_tagging != 0) {
3525 /* support VLAN tagging in hardware */
3526 feth_set_supports_vlan_tagging(fakeif);
3527 } else {
3528 /* support VLAN mtu-sized packets */
3529 feth_set_supports_vlan_mtu(fakeif);
3530 }
3531 if (if_fake_separate_frame_header != 0) {
3532 fakeif->iff_flags |= IFF_FLAGS_SEPARATE_FRAME_HEADER;
3533 }
3534 fakeif->iff_max_mtu = get_max_mtu(bsd_mode, if_fake_max_mtu);
3535 fakeif->iff_fcs = if_fake_fcs;
3536 fakeif->iff_trailer_length = if_fake_trailer_length;
3537
3538 /* use the interface name as the unique id for ifp recycle */
3539 if ((unsigned int)
3540 snprintf(fakeif->iff_name, sizeof(fakeif->iff_name), "%s%d",
3541 ifc->ifc_name, unit) >= sizeof(fakeif->iff_name)) {
3542 feth_release(fakeif);
3543 return EINVAL;
3544 }
3545 bzero(&feth_init, sizeof(feth_init));
3546 feth_init.ver = IFNET_INIT_CURRENT_VERSION;
3547 feth_init.len = sizeof(feth_init);
3548 if (feth_in_bsd_mode(fakeif)) {
3549 if (if_fake_txstart != 0) {
3550 feth_init.start = feth_start;
3551 } else {
3552 feth_init.flags |= IFNET_INIT_LEGACY;
3553 feth_init.output = feth_output;
3554 }
3555 if (tso_support) {
3556 feth_set_supports_tso(fakeif);
3557 }
3558 }
3559 #if SKYWALK
3560 else {
3561 feth_init.flags |= IFNET_INIT_SKYWALK_NATIVE;
3562 /*
3563 * Currently we support WMM mode only for Skywalk native
3564 * interface.
3565 */
3566 if (if_fake_wmm_mode != 0) {
3567 fakeif->iff_flags |= IFF_FLAGS_WMM_MODE;
3568 }
3569
3570 if (multi_buflet) {
3571 fakeif->iff_flags |= IFF_FLAGS_MULTIBUFLETS;
3572 }
3573
3574 fakeif->iff_pp_mode = pktpool_mode;
3575 if (tso_support) {
3576 feth_set_supports_tso(fakeif);
3577 }
3578
3579 fakeif->iff_tx_headroom = if_fake_tx_headroom;
3580 fakeif->iff_adv_interval = if_fake_if_adv_interval;
3581 if (fakeif->iff_adv_interval > 0) {
3582 feth_init.flags |= IFNET_INIT_IF_ADV;
3583 }
3584 fakeif->iff_tx_drop_rate = if_fake_tx_drops;
3585 fakeif->iff_tx_completion_mode = if_tx_completion_mode;
3586 fakeif->iff_tx_exp_policy = if_fake_tx_exp_policy;
3587 }
3588 feth_init.tx_headroom = fakeif->iff_tx_headroom;
3589 #endif /* SKYWALK */
3590 if (if_fake_nxattach == 0) {
3591 feth_init.flags |= IFNET_INIT_NX_NOAUTO;
3592 }
3593 feth_init.uniqueid = fakeif->iff_name;
3594 feth_init.uniqueid_len = strlen(fakeif->iff_name);
3595 feth_init.name = ifc->ifc_name;
3596 feth_init.unit = unit;
3597 feth_init.family = IFNET_FAMILY_ETHERNET;
3598 feth_init.type = IFT_ETHER;
3599 feth_init.demux = ether_demux;
3600 feth_init.add_proto = ether_add_proto;
3601 feth_init.del_proto = ether_del_proto;
3602 feth_init.check_multi = ether_check_multi;
3603 feth_init.framer_extended = ether_frameout_extended;
3604 feth_init.softc = fakeif;
3605 feth_init.ioctl = feth_ioctl;
3606 feth_init.set_bpf_tap = NULL;
3607 feth_init.detach = feth_if_free;
3608 feth_init.broadcast_addr = etherbroadcastaddr;
3609 feth_init.broadcast_len = ETHER_ADDR_LEN;
3610 if (feth_in_bsd_mode(fakeif)) {
3611 error = ifnet_allocate_extended(&feth_init, &ifp);
3612 if (error) {
3613 feth_release(fakeif);
3614 return error;
3615 }
3616 feth_ifnet_set_attrs(fakeif, ifp);
3617 if (feth_supports_tso(fakeif)) {
3618 feth_set_tso_mtu(ifp, IP_MAXPACKET, IP_MAXPACKET);
3619 }
3620 }
3621 #if SKYWALK
3622 else {
3623 if (feth_in_wmm_mode(fakeif)) {
3624 feth_init.output_sched_model =
3625 IFNET_SCHED_MODEL_DRIVER_MANAGED;
3626 }
3627 error = feth_attach_netif_nexus(fakeif, &feth_init, &ifp);
3628 if (error != 0) {
3629 feth_release(fakeif);
3630 return error;
3631 }
3632 /* take an additional reference to ensure that it doesn't go away */
3633 feth_retain(fakeif);
3634 fakeif->iff_ifp = ifp;
3635 }
3636 #endif /* SKYWALK */
3637 fakeif->iff_media_count = MIN(default_media_words_count, IF_FAKE_MEDIA_LIST_MAX);
3638 bcopy(default_media_words, fakeif->iff_media_list,
3639 fakeif->iff_media_count * sizeof(fakeif->iff_media_list[0]));
3640 if (feth_in_bsd_mode(fakeif)) {
3641 error = ifnet_attach(ifp, NULL);
3642 if (error) {
3643 ifnet_release(ifp);
3644 feth_release(fakeif);
3645 return error;
3646 }
3647 fakeif->iff_ifp = ifp;
3648 }
3649
3650 ifnet_set_lladdr(ifp, mac_address, sizeof(mac_address));
3651
3652 /* attach as ethernet */
3653 bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
3654 return 0;
3655 }
3656
3657 static int
feth_clone_destroy(ifnet_t ifp)3658 feth_clone_destroy(ifnet_t ifp)
3659 {
3660 if_fake_ref fakeif;
3661 #if SKYWALK
3662 boolean_t nx_attached = FALSE;
3663 #endif /* SKYWALK */
3664
3665 feth_lock();
3666 fakeif = ifnet_get_if_fake(ifp);
3667 if (fakeif == NULL || feth_is_detaching(fakeif)) {
3668 feth_unlock();
3669 return 0;
3670 }
3671 feth_set_detaching(fakeif);
3672 #if SKYWALK
3673 nx_attached = !feth_in_bsd_mode(fakeif);
3674 #endif /* SKYWALK */
3675 feth_unlock();
3676
3677 #if SKYWALK
3678 if (nx_attached) {
3679 feth_detach_netif_nexus(fakeif);
3680 feth_release(fakeif);
3681 }
3682 #endif /* SKYWALK */
3683 feth_config(ifp, NULL);
3684 ifnet_detach(ifp);
3685 return 0;
3686 }
3687
3688 static void
feth_enqueue_input(ifnet_t ifp,struct mbuf * m)3689 feth_enqueue_input(ifnet_t ifp, struct mbuf * m)
3690 {
3691 struct ifnet_stat_increment_param stats = {};
3692
3693 stats.packets_in = 1;
3694 stats.bytes_in = (uint32_t)mbuf_pkthdr_len(m) + ETHER_HDR_LEN;
3695 ifnet_input(ifp, m, &stats);
3696 }
3697
3698
3699 static int
feth_add_mbuf_trailer(struct mbuf * m,void * trailer,size_t trailer_len)3700 feth_add_mbuf_trailer(struct mbuf *m, void *trailer, size_t trailer_len)
3701 {
3702 int ret;
3703 ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
3704
3705 ret = m_append(m, trailer_len, (caddr_t)trailer);
3706 if (ret == 1) {
3707 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3708 "%zuB trailer added", trailer_len);
3709 return 0;
3710 }
3711 FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT, "m_append failed");
3712 return ENOTSUP;
3713 }
3714
3715 static int
feth_add_mbuf_fcs(struct mbuf * m)3716 feth_add_mbuf_fcs(struct mbuf *m)
3717 {
3718 uint32_t pkt_len, offset = 0;
3719 uint32_t crc = 0;
3720 int err = 0;
3721
3722 ASSERT(sizeof(crc) == ETHER_CRC_LEN);
3723
3724 pkt_len = m->m_pkthdr.len;
3725 struct mbuf *iter = m;
3726 while (iter != NULL && offset < pkt_len) {
3727 uint32_t frag_len = iter->m_len;
3728 ASSERT(frag_len <= (pkt_len - offset));
3729 crc = crc32(crc, mtod(iter, void *), frag_len);
3730 offset += frag_len;
3731 iter = iter->m_next;
3732 }
3733
3734 err = feth_add_mbuf_trailer(m, &crc, ETHER_CRC_LEN);
3735 if (err != 0) {
3736 return err;
3737 }
3738
3739 m->m_flags |= M_HASFCS;
3740
3741 return 0;
3742 }
3743
3744 static void
feth_output_common(ifnet_t ifp,struct mbuf * m,ifnet_t peer,iff_flags_t flags,bool fcs,void * trailer,size_t trailer_len)3745 feth_output_common(ifnet_t ifp, struct mbuf * m, ifnet_t peer,
3746 iff_flags_t flags, bool fcs, void *trailer, size_t trailer_len)
3747 {
3748 void * frame_header;
3749
3750 if ((flags & IFF_FLAGS_HWCSUM) != 0) {
3751 m->m_pkthdr.csum_data = 0xffff;
3752 m->m_pkthdr.csum_flags =
3753 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3754 CSUM_IP_CHECKED | CSUM_IP_VALID;
3755 }
3756
3757 (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
3758 if (trailer_len != 0 && trailer != NULL) {
3759 feth_add_mbuf_trailer(m, trailer, trailer_len);
3760 }
3761 if (fcs) {
3762 feth_add_mbuf_fcs(m);
3763 }
3764 if ((flags & IFF_FLAGS_SEPARATE_FRAME_HEADER) != 0) {
3765 m = m_copyup(m, ETHER_HDR_LEN, 0);
3766 if (m == NULL) {
3767 FAKE_LOG(LOG_NOTICE, FE_DBGF_OUTPUT, "m_copyup failed");
3768 goto done;
3769 }
3770 frame_header = mbuf_data(m);
3771 mbuf_pkthdr_setheader(m, frame_header);
3772 m_adj(m, ETHER_HDR_LEN);
3773 FAKE_LOG(LOG_DEBUG, FE_DBGF_OUTPUT,
3774 "%s: frame 0x%llx data 0x%llx len %ld",
3775 ifp->if_xname,
3776 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
3777 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
3778 mbuf_len(m));
3779 } else {
3780 frame_header = mbuf_data(m);
3781 mbuf_pkthdr_setheader(m, frame_header);
3782 _mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
3783 }
3784
3785 /* tap it out */
3786 if (ifp->if_bpf != NULL) {
3787 fake_bpf_tap_out(ifp, m, frame_header);
3788 }
3789
3790 /* tap it in */
3791 if (peer->if_bpf != NULL) {
3792 fake_bpf_tap_in(peer, m, frame_header);
3793 }
3794
3795 (void)mbuf_pkthdr_setrcvif(m, peer);
3796 feth_enqueue_input(peer, m);
3797 done:
3798 return;
3799 }
3800
3801 static void
feth_start(ifnet_t ifp)3802 feth_start(ifnet_t ifp)
3803 {
3804 if_fake_ref fakeif;
3805 iff_flags_t flags = 0;
3806 bool fcs;
3807 struct mbuf * m;
3808 ifnet_t peer = NULL;
3809 size_t trailer_len;
3810
3811 feth_lock();
3812 fakeif = ifnet_get_if_fake(ifp);
3813 if (fakeif == NULL) {
3814 feth_unlock();
3815 return;
3816 }
3817
3818 if (fakeif->iff_start_busy) {
3819 feth_unlock();
3820 return;
3821 }
3822
3823 peer = fakeif->iff_peer;
3824 flags = fakeif->iff_flags;
3825 fcs = fakeif->iff_fcs;
3826 trailer_len = fakeif->iff_trailer_length;
3827
3828 fakeif->iff_start_busy = TRUE;
3829 feth_unlock();
3830 for (;;) {
3831 if (ifnet_dequeue(ifp, &m) != 0) {
3832 break;
3833 }
3834 if (peer == NULL) {
3835 m_freem(m);
3836 continue;
3837 }
3838 if (m != NULL) {
3839 feth_output_common(ifp, m, peer, flags, fcs,
3840 feth_trailer, trailer_len);
3841 }
3842 }
3843 feth_lock();
3844 fakeif = ifnet_get_if_fake(ifp);
3845 if (fakeif != NULL) {
3846 fakeif->iff_start_busy = FALSE;
3847 }
3848 feth_unlock();
3849 }
3850
3851 static int
feth_output(ifnet_t ifp,struct mbuf * m)3852 feth_output(ifnet_t ifp, struct mbuf * m)
3853 {
3854 if_fake_ref fakeif;
3855 iff_flags_t flags;
3856 bool fcs;
3857 size_t trailer_len;
3858 ifnet_t peer = NULL;
3859
3860 if (m == NULL) {
3861 return 0;
3862 }
3863 feth_lock();
3864 fakeif = ifnet_get_if_fake(ifp);
3865 if (fakeif != NULL) {
3866 peer = fakeif->iff_peer;
3867 flags = fakeif->iff_flags;
3868 fcs = fakeif->iff_fcs;
3869 trailer_len = fakeif->iff_trailer_length;
3870 }
3871 feth_unlock();
3872 if (peer == NULL) {
3873 m_freem(m);
3874 ifnet_stat_increment_out(ifp, 0, 0, 1);
3875 return 0;
3876 }
3877 feth_output_common(ifp, m, peer, flags, fcs, feth_trailer, trailer_len);
3878 return 0;
3879 }
3880
3881 static int
feth_config(ifnet_t ifp,ifnet_t peer)3882 feth_config(ifnet_t ifp, ifnet_t peer)
3883 {
3884 int connected = FALSE;
3885 int disconnected = FALSE;
3886 int error = 0;
3887 if_fake_ref fakeif = NULL;
3888
3889 feth_lock();
3890 fakeif = ifnet_get_if_fake(ifp);
3891 if (fakeif == NULL) {
3892 error = EINVAL;
3893 goto done;
3894 }
3895 if (peer != NULL) {
3896 /* connect to peer */
3897 if_fake_ref peer_fakeif;
3898
3899 peer_fakeif = ifnet_get_if_fake(peer);
3900 if (peer_fakeif == NULL) {
3901 error = EINVAL;
3902 goto done;
3903 }
3904 if (feth_is_detaching(fakeif) ||
3905 feth_is_detaching(peer_fakeif) ||
3906 peer_fakeif->iff_peer != NULL ||
3907 fakeif->iff_peer != NULL) {
3908 error = EBUSY;
3909 goto done;
3910 }
3911 #if SKYWALK
3912 if (fakeif->iff_pp_mode !=
3913 peer_fakeif->iff_pp_mode) {
3914 error = EINVAL;
3915 goto done;
3916 }
3917 #endif /* SKYWALK */
3918 fakeif->iff_peer = peer;
3919 peer_fakeif->iff_peer = ifp;
3920 connected = TRUE;
3921 } else if (fakeif->iff_peer != NULL) {
3922 /* disconnect from peer */
3923 if_fake_ref peer_fakeif;
3924
3925 peer = fakeif->iff_peer;
3926 peer_fakeif = ifnet_get_if_fake(peer);
3927 if (peer_fakeif == NULL) {
3928 /* should not happen */
3929 error = EINVAL;
3930 goto done;
3931 }
3932 fakeif->iff_peer = NULL;
3933 peer_fakeif->iff_peer = NULL;
3934 disconnected = TRUE;
3935 }
3936
3937 done:
3938 feth_unlock();
3939
3940 /* generate link status event if we connect or disconnect */
3941 if (connected) {
3942 interface_link_event(ifp, KEV_DL_LINK_ON);
3943 interface_link_event(peer, KEV_DL_LINK_ON);
3944 } else if (disconnected) {
3945 interface_link_event(ifp, KEV_DL_LINK_OFF);
3946 interface_link_event(peer, KEV_DL_LINK_OFF);
3947 }
3948 return error;
3949 }
3950
3951 static int
feth_set_media(ifnet_t ifp,struct if_fake_request * iffr)3952 feth_set_media(ifnet_t ifp, struct if_fake_request * iffr)
3953 {
3954 if_fake_ref fakeif;
3955 int error;
3956
3957 if (iffr->iffr_media.iffm_count > IF_FAKE_MEDIA_LIST_MAX) {
3958 /* list is too long */
3959 return EINVAL;
3960 }
3961 feth_lock();
3962 fakeif = ifnet_get_if_fake(ifp);
3963 if (fakeif == NULL) {
3964 error = EINVAL;
3965 goto done;
3966 }
3967 fakeif->iff_media_count = iffr->iffr_media.iffm_count;
3968 bcopy(iffr->iffr_media.iffm_list, fakeif->iff_media_list,
3969 iffr->iffr_media.iffm_count * sizeof(fakeif->iff_media_list[0]));
3970 #if 0
3971 /* XXX: "auto-negotiate" active with peer? */
3972 /* generate link status event? */
3973 fakeif->iff_media_current = iffr->iffr_media.iffm_current;
3974 #endif
3975 error = 0;
3976 done:
3977 feth_unlock();
3978 return error;
3979 }
3980
3981 static int
if_fake_request_copyin(user_addr_t user_addr,struct if_fake_request * iffr,u_int32_t len)3982 if_fake_request_copyin(user_addr_t user_addr,
3983 struct if_fake_request *iffr, u_int32_t len)
3984 {
3985 int error;
3986
3987 if (user_addr == USER_ADDR_NULL || len < sizeof(*iffr)) {
3988 error = EINVAL;
3989 goto done;
3990 }
3991 error = copyin(user_addr, iffr, sizeof(*iffr));
3992 if (error != 0) {
3993 goto done;
3994 }
3995 if (iffr->iffr_reserved[0] != 0 || iffr->iffr_reserved[1] != 0 ||
3996 iffr->iffr_reserved[2] != 0 || iffr->iffr_reserved[3] != 0) {
3997 error = EINVAL;
3998 goto done;
3999 }
4000 done:
4001 return error;
4002 }
4003
4004 static int
feth_set_drvspec(ifnet_t ifp,uint32_t cmd,u_int32_t len,user_addr_t user_addr)4005 feth_set_drvspec(ifnet_t ifp, uint32_t cmd, u_int32_t len,
4006 user_addr_t user_addr)
4007 {
4008 int error;
4009 struct if_fake_request iffr;
4010 ifnet_t peer;
4011
4012 switch (cmd) {
4013 case IF_FAKE_S_CMD_SET_PEER:
4014 error = if_fake_request_copyin(user_addr, &iffr, len);
4015 if (error != 0) {
4016 break;
4017 }
4018 if (iffr.iffr_peer_name[0] == '\0') {
4019 error = feth_config(ifp, NULL);
4020 break;
4021 }
4022
4023 /* ensure nul termination */
4024 iffr.iffr_peer_name[IFNAMSIZ - 1] = '\0';
4025 peer = ifunit(iffr.iffr_peer_name);
4026 if (peer == NULL) {
4027 error = ENXIO;
4028 break;
4029 }
4030 if (ifnet_type(peer) != IFT_ETHER) {
4031 error = EINVAL;
4032 break;
4033 }
4034 if (strcmp(ifnet_name(peer), FAKE_ETHER_NAME) != 0) {
4035 error = EINVAL;
4036 break;
4037 }
4038 error = feth_config(ifp, peer);
4039 break;
4040 case IF_FAKE_S_CMD_SET_MEDIA:
4041 error = if_fake_request_copyin(user_addr, &iffr, len);
4042 if (error != 0) {
4043 break;
4044 }
4045 error = feth_set_media(ifp, &iffr);
4046 break;
4047 case IF_FAKE_S_CMD_SET_DEQUEUE_STALL:
4048 error = if_fake_request_copyin(user_addr, &iffr, len);
4049 if (error != 0) {
4050 break;
4051 }
4052 error = feth_enable_dequeue_stall(ifp,
4053 iffr.iffr_dequeue_stall);
4054 break;
4055 default:
4056 error = EOPNOTSUPP;
4057 break;
4058 }
4059 return error;
4060 }
4061
4062 static int
feth_get_drvspec(ifnet_t ifp,u_int32_t cmd,u_int32_t len,user_addr_t user_addr)4063 feth_get_drvspec(ifnet_t ifp, u_int32_t cmd, u_int32_t len,
4064 user_addr_t user_addr)
4065 {
4066 int error = EOPNOTSUPP;
4067 if_fake_ref fakeif;
4068 struct if_fake_request iffr;
4069 ifnet_t peer;
4070
4071 switch (cmd) {
4072 case IF_FAKE_G_CMD_GET_PEER:
4073 if (len < sizeof(iffr)) {
4074 error = EINVAL;
4075 break;
4076 }
4077 feth_lock();
4078 fakeif = ifnet_get_if_fake(ifp);
4079 if (fakeif == NULL) {
4080 feth_unlock();
4081 error = EOPNOTSUPP;
4082 break;
4083 }
4084 peer = fakeif->iff_peer;
4085 feth_unlock();
4086 bzero(&iffr, sizeof(iffr));
4087 if (peer != NULL) {
4088 strlcpy(iffr.iffr_peer_name,
4089 if_name(peer),
4090 sizeof(iffr.iffr_peer_name));
4091 }
4092 error = copyout(&iffr, user_addr, sizeof(iffr));
4093 break;
4094 default:
4095 break;
4096 }
4097 return error;
4098 }
4099
4100 union ifdrvu {
4101 struct ifdrv32 *ifdrvu_32;
4102 struct ifdrv64 *ifdrvu_64;
4103 void *ifdrvu_p;
4104 };
4105
4106 static int
feth_ioctl(ifnet_t ifp,u_long cmd,void * data)4107 feth_ioctl(ifnet_t ifp, u_long cmd, void * data)
4108 {
4109 unsigned int count;
4110 struct ifdevmtu * devmtu_p;
4111 union ifdrvu drv;
4112 uint32_t drv_cmd;
4113 uint32_t drv_len;
4114 boolean_t drv_set_command = FALSE;
4115 int error = 0;
4116 struct ifmediareq32 * ifmr;
4117 struct ifreq * ifr;
4118 if_fake_ref fakeif;
4119 int status;
4120 user_addr_t user_addr;
4121
4122 ifr = (struct ifreq *)data;
4123 switch (cmd) {
4124 case SIOCSIFADDR:
4125 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
4126 break;
4127
4128 case SIOCGIFMEDIA32:
4129 case SIOCGIFMEDIA64:
4130 feth_lock();
4131 fakeif = ifnet_get_if_fake(ifp);
4132 if (fakeif == NULL) {
4133 feth_unlock();
4134 return EOPNOTSUPP;
4135 }
4136 status = (fakeif->iff_peer != NULL)
4137 ? (IFM_AVALID | IFM_ACTIVE) : IFM_AVALID;
4138 ifmr = (struct ifmediareq32 *)data;
4139 user_addr = (cmd == SIOCGIFMEDIA64) ?
4140 ((struct ifmediareq64 *)data)->ifmu_ulist :
4141 CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
4142 count = ifmr->ifm_count;
4143 ifmr->ifm_active = (fakeif->iff_peer != NULL)
4144 ? FAKE_DEFAULT_MEDIA : IFM_ETHER;
4145 ifmr->ifm_current = IFM_ETHER;
4146 ifmr->ifm_mask = 0;
4147 ifmr->ifm_status = status;
4148 if (user_addr == USER_ADDR_NULL) {
4149 ifmr->ifm_count = fakeif->iff_media_count;
4150 } else if (count > 0) {
4151 if (count > fakeif->iff_media_count) {
4152 count = fakeif->iff_media_count;
4153 }
4154 ifmr->ifm_count = count;
4155 error = copyout(&fakeif->iff_media_list, user_addr,
4156 count * sizeof(int));
4157 }
4158 feth_unlock();
4159 break;
4160
4161 case SIOCGIFDEVMTU:
4162 devmtu_p = &ifr->ifr_devmtu;
4163 devmtu_p->ifdm_current = ifnet_mtu(ifp);
4164 devmtu_p->ifdm_max = feth_max_mtu(ifp);
4165 devmtu_p->ifdm_min = IF_MINMTU;
4166 break;
4167
4168 case SIOCSIFMTU:
4169 if ((unsigned int)ifr->ifr_mtu > feth_max_mtu(ifp) ||
4170 ifr->ifr_mtu < IF_MINMTU) {
4171 error = EINVAL;
4172 } else {
4173 error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
4174 }
4175 break;
4176
4177 case SIOCSDRVSPEC32:
4178 case SIOCSDRVSPEC64:
4179 error = proc_suser(current_proc());
4180 if (error != 0) {
4181 break;
4182 }
4183 drv_set_command = TRUE;
4184 OS_FALLTHROUGH;
4185 case SIOCGDRVSPEC32:
4186 case SIOCGDRVSPEC64:
4187 drv.ifdrvu_p = data;
4188 if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
4189 drv_cmd = drv.ifdrvu_32->ifd_cmd;
4190 drv_len = drv.ifdrvu_32->ifd_len;
4191 user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
4192 } else {
4193 drv_cmd = drv.ifdrvu_64->ifd_cmd;
4194 drv_len = drv.ifdrvu_64->ifd_len;
4195 user_addr = drv.ifdrvu_64->ifd_data;
4196 }
4197 if (drv_set_command) {
4198 error = feth_set_drvspec(ifp, drv_cmd, drv_len,
4199 user_addr);
4200 } else {
4201 error = feth_get_drvspec(ifp, drv_cmd, drv_len,
4202 user_addr);
4203 }
4204 break;
4205
4206 case SIOCSIFLLADDR:
4207 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
4208 ifr->ifr_addr.sa_len);
4209 break;
4210
4211 case SIOCSIFFLAGS:
4212 if ((ifp->if_flags & IFF_UP) != 0) {
4213 /* marked up, set running if not already set */
4214 if ((ifp->if_flags & IFF_RUNNING) == 0) {
4215 /* set running */
4216 error = ifnet_set_flags(ifp, IFF_RUNNING,
4217 IFF_RUNNING);
4218 }
4219 } else if ((ifp->if_flags & IFF_RUNNING) != 0) {
4220 /* marked down, clear running */
4221 error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
4222 }
4223 break;
4224
4225 case SIOCADDMULTI:
4226 case SIOCDELMULTI:
4227 error = 0;
4228 break;
4229 case SIOCSIFCAP: {
4230 uint32_t cap;
4231
4232 feth_lock();
4233 fakeif = ifnet_get_if_fake(ifp);
4234 if (fakeif == NULL ||
4235 (fakeif->iff_flags & IFF_FLAGS_LRO) == 0) {
4236 feth_unlock();
4237 return EOPNOTSUPP;
4238 }
4239 feth_unlock();
4240 cap = (ifr->ifr_reqcap & IFCAP_LRO) != 0 ? IFCAP_LRO : 0;
4241 error = ifnet_set_capabilities_enabled(ifp, cap, IFCAP_LRO);
4242 break;
4243 }
4244 default:
4245 error = EOPNOTSUPP;
4246 break;
4247 }
4248 return error;
4249 }
4250
4251 static void
feth_if_free(ifnet_t ifp)4252 feth_if_free(ifnet_t ifp)
4253 {
4254 if_fake_ref fakeif;
4255
4256 if (ifp == NULL) {
4257 return;
4258 }
4259 feth_lock();
4260 fakeif = ifnet_get_if_fake(ifp);
4261 if (fakeif == NULL) {
4262 feth_unlock();
4263 return;
4264 }
4265 ifp->if_softc = NULL;
4266 #if SKYWALK
4267 VERIFY(fakeif->iff_doorbell_tcall == NULL);
4268 #endif /* SKYWALK */
4269 feth_unlock();
4270 feth_release(fakeif);
4271 ifnet_release(ifp);
4272 return;
4273 }
4274
4275 __private_extern__ void
if_fake_init(void)4276 if_fake_init(void)
4277 {
4278 int error;
4279
4280 #if SKYWALK
4281 (void)feth_register_nexus_domain_provider();
4282 #endif /* SKYWALK */
4283 error = if_clone_attach(&feth_cloner);
4284 if (error != 0) {
4285 return;
4286 }
4287 return;
4288 }
4289