xref: /xnu-10063.121.3/bsd/net/if_redirect.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * if_redirect.c
31  * Virtual network interface that redirects traffic to a delegate interface.
32  */
33 
34 #include <sys/sysctl.h>
35 #include <net/dlil.h>
36 #include <net/ethernet.h>
37 #include <net/kpi_interface.h>
38 #include <net/bpf.h>
39 #include <net/if_media.h>
40 #include <net/if_ether.h>
41 #include <net/if_redirect.h>
42 #include <os/log.h>
43 
44 #include <skywalk/os_skywalk_private.h>
45 #include <skywalk/nexus/netif/nx_netif.h>
46 
47 #define RD_NAME                 "rd"
48 #define RD_MAXUNIT              IF_MAXUNIT
49 #define RD_ZONE_MAX_ELEM        MIN(IFNETS_MAX, RD_MAXUNIT)
50 #define RD_MAX_MTU              2048
51 
52 #define RD_MAX_TX_RINGS         1
53 #define RD_MAX_RX_RINGS         1
54 #define RD_POOL_SIZE            1024
55 
56 static uint8_t default_mac[ETHER_ADDR_LEN] = {0x0, 0x1, 0x2, 0x3, 0x4, 0x5};
57 
58 SYSCTL_DECL(_net_link);
59 SYSCTL_NODE(_net_link, OID_AUTO, redirect, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
60     "Redirect interface");
61 
62 static int if_redirect_debug = 0;
63 SYSCTL_INT(_net_link_redirect, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
64     &if_redirect_debug, 0, "Redirect interface debug logs");
65 
66 os_log_t redirect_log_handle = NULL;
67 
68 #define RDLOG(level, format, ...) do {                                        \
69 	if (level == LOG_ERR) {                                               \
70 	        os_log_error(redirect_log_handle, "%s: " format "\n",         \
71 	            __FUNCTION__, ##__VA_ARGS__);                             \
72 	} else {                                                              \
73 	        if (__probable(if_redirect_debug == 0)) {                     \
74 	                break;                                                \
75 	        }                                                             \
76 	        if (level == LOG_DEBUG) {                                     \
77 	                os_log_debug(redirect_log_handle, "%s: " format "\n", \
78 	                    __FUNCTION__, ##__VA_ARGS__);                     \
79 	        } else if (level == LOG_INFO) {                               \
80 	                os_log_info(redirect_log_handle, "%s: " format "\n",  \
81 	                    __FUNCTION__, ##__VA_ARGS__);                     \
82 	        }                                                             \
83 	}                                                                     \
84 } while (0)
85 
86 #define RDLOG_ERR(format, ...) RDLOG(LOG_ERR, format, ##__VA_ARGS__)
87 #define RDLOG_DBG(format, ...) RDLOG(LOG_DEBUG, format, ##__VA_ARGS__)
88 #define RDLOG_INFO(format, ...) RDLOG(LOG_INFO, format, ##__VA_ARGS__)
89 
90 #define RD_MEDIA_LIST_MAX 27
91 
92 typedef struct {
93 	uuid_t                 rnx_provider;
94 	uuid_t                 rnx_instance;
95 } redirect_nx, *redirect_nx_t;
96 
97 typedef struct {
98 	char                   rd_name[IFNAMSIZ]; /* our unique id */
99 	lck_mtx_t              rd_lock;
100 	uint32_t               rd_ftype;
101 	ifnet_t                rd_ifp;
102 	ifnet_t                rd_delegate_ifp;
103 
104 	/* General state of the interface */
105 	boolean_t              rd_detaching;
106 	boolean_t              rd_connected;
107 
108 	/* Used for tracking delegate related state info */
109 	boolean_t              rd_self_ref;
110 	boolean_t              rd_delegate_parent_set;
111 	boolean_t              rd_delegate_ref;
112 	boolean_t              rd_fsw_rx_cb_set;
113 	boolean_t              rd_delegate_set;
114 	boolean_t              rd_mac_addr_set;
115 	boolean_t              rd_detach_notify_set;
116 
117 	unsigned int           rd_max_mtu;
118 	uint32_t               rd_retain_count;
119 	kern_pbufpool_t        rd_pp;
120 	kern_channel_ring_t    rd_rx_ring[RD_MAX_RX_RINGS];
121 	kern_channel_ring_t    rd_tx_ring[RD_MAX_TX_RINGS];
122 	redirect_nx            rd_nx;
123 	struct netif_stats     *rd_nifs;
124 	void                   *rd_intf_adv_kern_ctx;
125 	thread_call_t          rd_doorbell_tcall;
126 	boolean_t              rd_doorbell_tcall_active;
127 	boolean_t              rd_waiting_for_tcall;
128 	bool                   rd_intf_adv_enabled;
129 	kern_nexus_capab_interface_advisory_notify_fn_t rd_intf_adv_notify;
130 } if_redirect, *if_redirect_t;
131 
132 static if_redirect_t ifnet_get_if_redirect(ifnet_t);
133 static int redirect_clone_create(struct if_clone *, uint32_t, void *);
134 static int redirect_clone_destroy(ifnet_t);
135 static int redirect_ioctl(ifnet_t, u_long, void *);
136 static void redirect_if_free(ifnet_t);
137 static void redirect_free(if_redirect_t);
138 static errno_t redirect_demux(ifnet_t, mbuf_t, char *, protocol_family_t *);
139 static errno_t redirect_add_proto(ifnet_t, protocol_family_t,
140     const struct ifnet_demux_desc *, uint32_t);
141 static errno_t redirect_del_proto(ifnet_t, protocol_family_t);
142 static void redirect_clear_delegate_locked(if_redirect_t);
143 static void redirect_clear_delegate(if_redirect_t);
144 
145 static struct if_clone
146     redirect_cloner = IF_CLONE_INITIALIZER(RD_NAME,
147     redirect_clone_create,
148     redirect_clone_destroy,
149     0,
150     RD_MAXUNIT);
151 static void interface_link_event(ifnet_t ifp, uint32_t event_code);
152 
153 static LCK_GRP_DECLARE(redirect_lock_group, "redirect");
154 static LCK_ATTR_DECLARE(redirect_lock_attr, 0, 0);
155 
156 #define RD_LOCK_INIT(rd) \
157 	lck_mtx_init(&(rd)->rd_lock, &redirect_lock_group, &redirect_lock_attr)
158 #define RD_LOCK(rd) \
159 	lck_mtx_lock(&(rd)->rd_lock)
160 #define RD_UNLOCK(rd) \
161 	lck_mtx_unlock(&(rd)->rd_lock)
162 #define RD_LOCK_DESTROY(rd) \
163 	lck_mtx_destroy(&(rd)->rd_lock, &redirect_lock_group)
164 
165 static inline boolean_t
redirect_is_usable(if_redirect_t rd)166 redirect_is_usable(if_redirect_t rd)
167 {
168 	return !rd->rd_detaching && rd->rd_connected;
169 }
170 
171 static inline unsigned int
redirect_max_mtu(ifnet_t ifp)172 redirect_max_mtu(ifnet_t ifp)
173 {
174 	if_redirect_t rd;
175 	unsigned int max_mtu = ETHERMTU;
176 
177 	rd = ifnet_get_if_redirect(ifp);
178 	if (rd == NULL) {
179 		RDLOG_ERR("rd is NULL");
180 		goto done;
181 	}
182 	max_mtu = rd->rd_max_mtu;
183 done:
184 	return max_mtu;
185 }
186 
187 static void
redirect_free(if_redirect_t rd)188 redirect_free(if_redirect_t rd)
189 {
190 	VERIFY(rd->rd_retain_count == 0);
191 
192 	if (rd->rd_pp != NULL) {
193 		pp_release(rd->rd_pp);
194 		rd->rd_pp = NULL;
195 	}
196 	RD_LOCK_DESTROY(rd);
197 	RDLOG_DBG("%s", rd->rd_name);
198 	kfree_type(if_redirect, rd);
199 }
200 
201 static void
redirect_release(if_redirect_t rd)202 redirect_release(if_redirect_t rd)
203 {
204 	uint32_t old_retain_count;
205 
206 	old_retain_count = OSDecrementAtomic(&rd->rd_retain_count);
207 	switch (old_retain_count) {
208 	case 0:
209 		VERIFY(old_retain_count != 0);
210 		break;
211 	case 1:
212 		redirect_free(rd);
213 		break;
214 	default:
215 		break;
216 	}
217 	return;
218 }
219 
220 static void
redirect_retain(if_redirect_t rd)221 redirect_retain(if_redirect_t rd)
222 {
223 	OSIncrementAtomic(&rd->rd_retain_count);
224 }
225 
226 static void
redirect_bpf_tap(ifnet_t ifp,kern_packet_t pkt,bool input)227 redirect_bpf_tap(ifnet_t ifp, kern_packet_t pkt, bool input)
228 {
229 	uint32_t dlt;
230 
231 	switch (ifp->if_family) {
232 	case IFNET_FAMILY_ETHERNET:
233 		dlt = DLT_EN10MB;
234 		break;
235 	case IFNET_FAMILY_CELLULAR:
236 	case IFNET_FAMILY_UTUN:
237 	case IFNET_FAMILY_IPSEC:
238 		dlt = DLT_RAW;
239 		break;
240 	default:
241 		DTRACE_SKYWALK1(invalid__family, ifnet_t, ifp);
242 		return;
243 	}
244 
245 	if (input) {
246 		bpf_tap_packet_in(ifp, dlt, pkt, NULL, 0);
247 	} else {
248 		bpf_tap_packet_out(ifp, dlt, pkt, NULL, 0);
249 	}
250 }
251 
252 static void
redirect_packet_pool_init_prepare(if_redirect_t rd,struct kern_pbufpool_init * pp_init)253 redirect_packet_pool_init_prepare(if_redirect_t rd,
254     struct kern_pbufpool_init *pp_init)
255 {
256 	uint32_t max_mtu = rd->rd_max_mtu;
257 
258 	bzero(pp_init, sizeof(*pp_init));
259 	pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
260 	pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
261 	pp_init->kbi_packets = RD_POOL_SIZE;
262 	pp_init->kbi_bufsize = max_mtu;
263 	pp_init->kbi_max_frags = 1;
264 	pp_init->kbi_buflets =  (2 * pp_init->kbi_packets); /* Tx/Rx pool */
265 	pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
266 	pp_init->kbi_ctx = NULL;
267 	pp_init->kbi_ctx_retain = NULL;
268 	pp_init->kbi_ctx_release = NULL;
269 }
270 
271 static errno_t
redirect_packet_pool_make(if_redirect_t rd)272 redirect_packet_pool_make(if_redirect_t rd)
273 {
274 	struct kern_pbufpool_init pp_init;
275 	errno_t err;
276 
277 	redirect_packet_pool_init_prepare(rd, &pp_init);
278 	(void)snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
279 	    "%s pp", rd->rd_name);
280 
281 	err = kern_pbufpool_create(&pp_init, &rd->rd_pp, NULL);
282 	return err;
283 }
284 
285 static int
redirect_enqueue_pkt(struct nx_netif * nif,struct __kern_packet * pkt,boolean_t flush,boolean_t * drop)286 redirect_enqueue_pkt(struct nx_netif *nif, struct __kern_packet *pkt,
287     boolean_t flush, boolean_t *drop)
288 {
289 	ifnet_t ifp = nif->nif_ifp;
290 	uint64_t qset_id;
291 	int err;
292 
293 	if (NX_LLINK_PROV(nif->nif_nx) &&
294 	    ifp->if_traffic_rule_count > 0 &&
295 	    nxctl_inet_traffic_rule_find_qset_id_with_pkt(ifp->if_xname,
296 	    pkt, &qset_id) == 0) {
297 		struct netif_qset *qset;
298 
299 		/*
300 		 * This always returns a qset because if the qset id is invalid the
301 		 * default qset is returned.
302 		 */
303 		qset = nx_netif_find_qset(nif, qset_id);
304 		ASSERT(qset != NULL);
305 		pkt->pkt_qset_idx = qset->nqs_idx;
306 		err = ifnet_enqueue_ifcq_pkt(ifp, qset->nqs_ifcq, pkt, flush, drop);
307 		nx_netif_qset_release(&qset);
308 	} else {
309 		/* callee consumes packet */
310 		err = ifnet_enqueue_pkt(ifp, pkt, flush, drop);
311 	}
312 	return err;
313 }
314 
315 static int
redirect_enqueue_mbuf(struct nx_netif * nif,struct mbuf * m,boolean_t flush,boolean_t * drop)316 redirect_enqueue_mbuf(struct nx_netif *nif, struct mbuf *m,
317     boolean_t flush, boolean_t *drop)
318 {
319 	return ifnet_enqueue_mbuf(nif->nif_ifp, m, flush, drop);
320 }
321 
322 static int
redirect_tx_submit(ifnet_t delegate_ifp,struct pktq * spktq,uint32_t if_flowhash)323 redirect_tx_submit(ifnet_t delegate_ifp, struct pktq *spktq, uint32_t if_flowhash)
324 {
325 	struct __kern_packet *spkt, *pkt;
326 	struct nx_netif *nif;
327 	struct netif_stats *nifs;
328 	struct nexus_netif_adapter *dev_nifna;
329 	struct mbuf *m;
330 	boolean_t drop, native, compat;
331 	errno_t err;
332 	int cnt = 0;
333 
334 	if (!ifnet_datamov_begin(delegate_ifp)) {
335 		RDLOG_ERR("delegate interface is being detached");
336 		DTRACE_SKYWALK1(delegate__detached, ifnet_t, delegate_ifp);
337 		return ENXIO;
338 	}
339 	if (NA(delegate_ifp) == NULL) {
340 		RDLOG_ERR("nexus adapter is not present");
341 		DTRACE_SKYWALK1(no__nexus, ifnet_t, delegate_ifp);
342 		err = ENXIO;
343 		goto done;
344 	}
345 	dev_nifna = NA(delegate_ifp);
346 	nif = dev_nifna->nifna_netif;
347 	nifs = &nif->nif_stats;
348 
349 	native = (dev_nifna->nifna_up.na_type == NA_NETIF_DEV);
350 	compat = (dev_nifna->nifna_up.na_type == NA_NETIF_COMPAT_DEV);
351 
352 	while (KPKTQ_LEN(spktq) > 0) {
353 		KPKTQ_DEQUEUE(spktq, spkt);
354 		ASSERT(spkt != NULL);
355 		drop = FALSE;
356 
357 		if (__probable(native)) {
358 			pkt = nx_netif_pkt_to_pkt(dev_nifna, spkt, NETIF_CONVERT_TX);
359 			if (pkt == NULL) {
360 				continue;
361 			}
362 			pkt->pkt_flowsrc_type = FLOWSRC_IFNET;
363 			pkt->pkt_flow_token = if_flowhash;
364 			pkt->pkt_pflags |= (PKT_F_FLOW_ADV | PKTF_FLOW_ID);
365 
366 			netif_ifp_inc_traffic_class_out_pkt(delegate_ifp,
367 			    pkt->pkt_svc_class, 1, pkt->pkt_length);
368 
369 			err = redirect_enqueue_pkt(nif, pkt, FALSE, &drop);
370 		} else {
371 			ASSERT(compat);
372 			m = nx_netif_pkt_to_mbuf(dev_nifna, spkt, NETIF_CONVERT_TX);
373 			if (m == NULL) {
374 				continue;
375 			}
376 			m->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
377 			m->m_pkthdr.pkt_mpriv_srcid = if_flowhash;
378 			m->m_pkthdr.pkt_flags =
379 			    (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
380 
381 			ifp_inc_traffic_class_out(delegate_ifp, m);
382 
383 			err = redirect_enqueue_mbuf(nif, m, FALSE, &drop);
384 		}
385 		if (__probable(err == 0)) {
386 			cnt++;
387 		} else {
388 			RDLOG_ERR("enqueue failed: %d", err);
389 			if (drop) {
390 				STATS_INC(nifs, NETIF_STATS_TX_DROP_ENQ_AQM);
391 				STATS_INC(nifs, NETIF_STATS_DROP);
392 			}
393 			DTRACE_SKYWALK3(enqueue__failed,
394 			    ifnet_t, delegate_ifp, boolean_t, drop, int, err);
395 			break;
396 		}
397 	}
398 done:
399 	if (cnt > 0) {
400 		netif_transmit(delegate_ifp, NETIF_XMIT_FLAG_REDIRECT);
401 	}
402 	ifnet_datamov_end(delegate_ifp);
403 	return err;
404 }
405 
406 /*
407  *  nexus netif domain provider
408  */
409 static errno_t
redirect_nxdp_init(kern_nexus_domain_provider_t domprov)410 redirect_nxdp_init(kern_nexus_domain_provider_t domprov)
411 {
412 #pragma unused(domprov)
413 	return 0;
414 }
415 
416 static void
redirect_nxdp_fini(kern_nexus_domain_provider_t domprov)417 redirect_nxdp_fini(kern_nexus_domain_provider_t domprov)
418 {
419 #pragma unused(domprov)
420 }
421 
422 static uuid_t redirect_nx_dom_prov;
423 
424 static errno_t
redirect_register_nexus_domain_provider(void)425 redirect_register_nexus_domain_provider(void)
426 {
427 	const struct kern_nexus_domain_provider_init dp_init = {
428 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
429 		.nxdpi_flags = 0,
430 		.nxdpi_init = redirect_nxdp_init,
431 		.nxdpi_fini = redirect_nxdp_fini
432 	};
433 
434 	errno_t err = 0;
435 
436 	/* redirect_nxdp_init() is called before this function returns */
437 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
438 	    (const uint8_t *)
439 	    "com.apple.redirect",
440 	    &dp_init, sizeof(dp_init),
441 	    &redirect_nx_dom_prov);
442 	if (err != 0) {
443 		RDLOG_ERR("failed to register domain provider");
444 		return err;
445 	}
446 	return 0;
447 }
448 
449 /*
450  * netif nexus routines
451  */
452 static if_redirect_t
redirect_nexus_context(kern_nexus_t nexus)453 redirect_nexus_context(kern_nexus_t nexus)
454 {
455 	if_redirect_t rd;
456 
457 	rd = (if_redirect_t)kern_nexus_get_context(nexus);
458 	assert(rd != NULL);
459 	return rd;
460 }
461 
462 static errno_t
redirect_nx_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)463 redirect_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
464     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
465     void **ring_ctx)
466 {
467 #pragma unused(nxprov, channel, ring_ctx)
468 	if_redirect_t rd;
469 
470 	rd = redirect_nexus_context(nexus);
471 	RD_LOCK(rd);
472 	if (rd->rd_detaching) {
473 		DTRACE_SKYWALK1(detaching, if_redirect_t, rd);
474 		RD_UNLOCK(rd);
475 		return ENXIO;
476 	}
477 	if (is_tx_ring) {
478 		_CASSERT(RD_MAX_TX_RINGS == 1);
479 		VERIFY(rd->rd_tx_ring[0] == NULL);
480 		rd->rd_tx_ring[0] = ring;
481 	} else {
482 		_CASSERT(RD_MAX_RX_RINGS == 1);
483 		VERIFY(rd->rd_rx_ring[0] == NULL);
484 		rd->rd_rx_ring[0] = ring;
485 	}
486 
487 	rd->rd_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
488 	RD_UNLOCK(rd);
489 	RDLOG_INFO("%s: %s ring init", rd->rd_name,
490 	    is_tx_ring ? "TX" : "RX");
491 	return 0;
492 }
493 
494 static void
redirect_nx_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)495 redirect_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
496     kern_channel_ring_t ring)
497 {
498 #pragma unused(nxprov, ring)
499 	if_redirect_t rd;
500 	thread_call_t tcall = NULL;
501 
502 	rd = redirect_nexus_context(nexus);
503 	RD_LOCK(rd);
504 	if (rd->rd_rx_ring[0] == ring) {
505 		RDLOG_INFO("%s: RX ring fini", rd->rd_name);
506 		rd->rd_rx_ring[0] = NULL;
507 	} else if (rd->rd_tx_ring[0] == ring) {
508 		RDLOG_INFO("%s: TX ring fini", rd->rd_name);
509 		tcall = rd->rd_doorbell_tcall;
510 		rd->rd_doorbell_tcall = NULL;
511 		rd->rd_tx_ring[0] = NULL;
512 	}
513 	rd->rd_nifs = NULL;
514 	RD_UNLOCK(rd);
515 
516 	if (tcall != NULL) {
517 		boolean_t success;
518 
519 		success = thread_call_cancel_wait(tcall);
520 		RDLOG_INFO("%s: thread_call_cancel %s",
521 		    rd->rd_name, success ? "SUCCESS" : "FAILURE");
522 		if (!success) {
523 			RD_LOCK(rd);
524 			if (rd->rd_doorbell_tcall_active) {
525 				rd->rd_waiting_for_tcall = TRUE;
526 				RDLOG_INFO("%s: *waiting for threadcall",
527 				    rd->rd_name);
528 				do {
529 					msleep(rd, &rd->rd_lock,
530 					    PZERO, "redirect threadcall", 0);
531 				} while (rd->rd_doorbell_tcall_active);
532 				RDLOG_INFO("%s: threadcall done",
533 				    rd->rd_name);
534 				rd->rd_waiting_for_tcall = FALSE;
535 			}
536 			RD_UNLOCK(rd);
537 		}
538 		success = thread_call_free(tcall);
539 		RDLOG_INFO("%s: thread_call_free %s",
540 		    rd->rd_name, success ? "SUCCESS" : "FAILURE");
541 		redirect_release(rd);
542 		VERIFY(success == TRUE);
543 	}
544 }
545 
546 static errno_t
redirect_nx_pre_connect(kern_nexus_provider_t nxprov,proc_t proc,kern_nexus_t nexus,nexus_port_t port,kern_channel_t channel,void ** channel_context)547 redirect_nx_pre_connect(kern_nexus_provider_t nxprov,
548     proc_t proc, kern_nexus_t nexus, nexus_port_t port,
549     kern_channel_t channel, void **channel_context)
550 {
551 #pragma unused(nxprov, proc, nexus, port, channel, channel_context)
552 	return 0;
553 }
554 
555 static errno_t
redirect_nx_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)556 redirect_nx_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
557     kern_channel_t channel)
558 {
559 #pragma unused(nxprov, channel)
560 	if_redirect_t rd = NULL;
561 
562 	rd = redirect_nexus_context(nexus);
563 	RD_LOCK(rd);
564 	if (rd->rd_detaching) {
565 		DTRACE_SKYWALK1(detaching, if_redirect_t, rd);
566 		RD_UNLOCK(rd);
567 		return EBUSY;
568 	}
569 	redirect_retain(rd);
570 	rd->rd_connected = TRUE;
571 	RD_UNLOCK(rd);
572 
573 	RDLOG_DBG("%s: connected channel %p", rd->rd_name, channel);
574 	return 0;
575 }
576 
577 static void
redirect_nx_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)578 redirect_nx_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
579     kern_channel_t channel)
580 {
581 #pragma unused(nxprov, channel)
582 	if_redirect_t rd;
583 
584 	rd = redirect_nexus_context(nexus);
585 	RDLOG_INFO("%s: pre-disconnect channel %p", rd->rd_name, channel);
586 	/* Quiesce the interface and flush any pending outbound packets */
587 	if_down(rd->rd_ifp);
588 	RD_LOCK(rd);
589 	rd->rd_connected = FALSE;
590 	RD_UNLOCK(rd);
591 }
592 
593 static void
redirect_nx_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)594 redirect_nx_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
595     kern_channel_t channel)
596 {
597 #pragma unused(nxprov, channel)
598 	if_redirect_t rd;
599 
600 	rd = redirect_nexus_context(nexus);
601 	RDLOG_INFO("%s: disconnected channel %p", rd->rd_name, channel);
602 	redirect_release(rd);
603 }
604 
605 static errno_t
redirect_nx_slot_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index,struct kern_slot_prop ** slot_prop_addr,void ** slot_context)606 redirect_nx_slot_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
607     kern_channel_ring_t ring, kern_channel_slot_t slot, uint32_t slot_index,
608     struct kern_slot_prop **slot_prop_addr, void **slot_context)
609 {
610 #pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
611 	return 0;
612 }
613 
614 static void
redirect_nx_slot_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index)615 redirect_nx_slot_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
616     kern_channel_ring_t ring, kern_channel_slot_t slot, uint32_t slot_index)
617 {
618 #pragma unused(nxprov, nexus, ring, slot, slot_index)
619 }
620 
621 static errno_t
redirect_nx_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)622 redirect_nx_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
623     kern_channel_ring_t tx_ring, uint32_t flags)
624 {
625 #pragma unused(nxprov)
626 	if_redirect_t rd;
627 	ifnet_t ifp;
628 	kern_channel_slot_t last_tx_slot = NULL;
629 	ifnet_t delegate_ifp;
630 	struct kern_channel_ring_stat_increment stats;
631 	kern_channel_slot_t tx_slot = NULL;
632 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
633 	struct pktq tx_pktq;
634 	uint32_t n_pkts = 0;
635 	int error = 0;
636 
637 	bzero(&stats, sizeof(stats));
638 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
639 	rd = redirect_nexus_context(nexus);
640 	RDLOG_INFO("%s ring %d flags 0x%x", rd->rd_name, tx_ring->ckr_ring_id, flags);
641 
642 	if (__improbable(!redirect_is_usable(rd))) {
643 		RDLOG_INFO("%s is not usable", rd->rd_name);
644 		DTRACE_SKYWALK1(unusable, if_redirect_t, rd);
645 		return ENOENT;
646 	}
647 	ifp = rd->rd_ifp;
648 	delegate_ifp = rd->rd_delegate_ifp;
649 
650 	KPKTQ_INIT(&tx_pktq);
651 	while ((tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL)) != NULL) {
652 		kern_packet_t sph;
653 
654 		/* detach the packet from the TX ring */
655 		sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
656 		VERIFY(sph != 0);
657 		kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
658 
659 		/* bpf tap output */
660 		redirect_bpf_tap(ifp, sph, false);
661 
662 		ASSERT(sph != 0);
663 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
664 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
665 
666 		stats.kcrsi_slots_transferred++;
667 		stats.kcrsi_bytes_transferred += kern_packet_get_data_length(sph);
668 
669 		KPKTQ_ENQUEUE(&tx_pktq, SK_PTR_ADDR_KPKT(sph));
670 		n_pkts++;
671 
672 		last_tx_slot = tx_slot;
673 	}
674 	if (last_tx_slot != NULL) {
675 		kern_channel_advance_slot(tx_ring, last_tx_slot);
676 		kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
677 	}
678 	if (__improbable(delegate_ifp == NULL)) {
679 		RDLOG_INFO("%s has no delegate", rd->rd_name);
680 		DTRACE_SKYWALK1(no__delegate, if_redirect_t, rd);
681 		error = ENXIO;
682 		goto done;
683 	}
684 	if (n_pkts > 0) {
685 		redirect_tx_submit(delegate_ifp, &tx_pktq, ifp->if_flowhash);
686 	}
687 done:
688 	/*
689 	 * Packets not enqueued into delegate interface AQM
690 	 */
691 	if (KPKTQ_LEN(&tx_pktq) > 0) {
692 		DTRACE_SKYWALK2(unsent, if_redirect_t, rd, struct pktq *, &tx_pktq);
693 		STATS_ADD(nifs, NETIF_STATS_DROP_NO_DELEGATE, KPKTQ_LEN(&tx_pktq));
694 		pp_free_pktq(&tx_pktq);
695 	}
696 	return error;
697 }
698 
699 static void
redirect_rx_cb(void * arg,struct pktq * spktq)700 redirect_rx_cb(void *arg, struct pktq *spktq)
701 {
702 	if_redirect_t rd = arg;
703 	struct __kern_packet *spkt, *pkt;
704 	kern_packet_t ph;
705 	kern_channel_ring_t rx_ring = NULL;
706 	kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
707 	struct kern_channel_ring_stat_increment stats;
708 	int err;
709 
710 	/*
711 	 * The ring cannot disappear before the callback is finished and removed.
712 	 */
713 	rx_ring = rd->rd_rx_ring[0];
714 	if (rx_ring == NULL) {
715 		DTRACE_SKYWALK2(no__ring__drop, if_redirect_t, rd, struct pktq *, spktq);
716 		pp_free_pktq(spktq);
717 		return;
718 	}
719 	bzero(&stats, sizeof(stats));
720 	kr_enter(rx_ring, TRUE);
721 	kern_channel_reclaim(rx_ring);
722 
723 	while (KPKTQ_LEN(spktq) > 0) {
724 		KPKTQ_DEQUEUE(spktq, spkt);
725 
726 		rx_slot = kern_channel_get_next_slot(rx_ring, last_rx_slot, NULL);
727 		if (rx_slot == NULL) {
728 			DTRACE_SKYWALK2(no__slot__drop, if_redirect_t, rd,
729 			    struct __kern_packet *, spkt);
730 			pp_free_packet_single(spkt);
731 			continue;
732 		}
733 		pkt = nx_netif_pkt_to_pkt(rd->rd_ifp->if_na, spkt, NETIF_CONVERT_RX);
734 		if (pkt == NULL) {
735 			DTRACE_SKYWALK1(copy__drop, if_redirect_t, rd);
736 			continue;
737 		}
738 		ph = SK_PKT2PH(pkt);
739 		stats.kcrsi_slots_transferred++;
740 		stats.kcrsi_bytes_transferred += kern_packet_get_data_length(ph);
741 
742 		redirect_bpf_tap(rd->rd_ifp, ph, true);
743 
744 		err = kern_channel_slot_attach_packet(rx_ring, rx_slot, ph);
745 		VERIFY(err == 0);
746 		last_rx_slot = rx_slot;
747 	}
748 	ASSERT(KPKTQ_EMPTY(spktq));
749 	if (last_rx_slot != NULL) {
750 		kern_channel_advance_slot(rx_ring, last_rx_slot);
751 		kern_channel_increment_ring_net_stats(rx_ring, rd->rd_ifp, &stats);
752 	}
753 	kr_exit(rx_ring);
754 	if (last_rx_slot != NULL) {
755 		kern_channel_notify(rx_ring, 0);
756 	}
757 }
758 
759 static errno_t
redirect_nx_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)760 redirect_nx_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
761     kern_channel_ring_t ring, uint32_t flags)
762 {
763 #pragma unused(nxprov, nexus, ring, flags)
764 	return 0;
765 }
766 
767 static void
redirect_async_doorbell(thread_call_param_t arg0,thread_call_param_t arg1)768 redirect_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
769 {
770 #pragma unused(arg1)
771 	errno_t error;
772 	if_redirect_t rd = (if_redirect_t)arg0;
773 	kern_channel_ring_t ring;
774 	boolean_t more;
775 
776 	RD_LOCK(rd);
777 	ring = rd->rd_tx_ring[0];
778 	if (__improbable(!redirect_is_usable(rd) || ring == NULL)) {
779 		DTRACE_SKYWALK2(unusable, if_redirect_t, rd, kern_channel_ring_t, ring);
780 		goto done;
781 	}
782 	rd->rd_doorbell_tcall_active = TRUE;
783 	RD_UNLOCK(rd);
784 
785 	error = kern_channel_tx_refill(ring, UINT32_MAX, UINT32_MAX, FALSE,
786 	    &more);
787 	if (error != 0 && error != EAGAIN) {
788 		RDLOG_ERR("%s: Tx refill failed %d", rd->rd_name, error);
789 	} else {
790 		RDLOG_DBG("%s: Tx refilled", rd->rd_name);
791 	}
792 
793 	RD_LOCK(rd);
794 done:
795 	rd->rd_doorbell_tcall_active = FALSE;
796 	if (rd->rd_waiting_for_tcall) {
797 		RDLOG_INFO("%s: threadcall waking up waiter", rd->rd_name);
798 		wakeup((caddr_t)rd);
799 	}
800 	RD_UNLOCK(rd);
801 }
802 
803 static void
redirect_schedule_async_doorbell(if_redirect_t rd)804 redirect_schedule_async_doorbell(if_redirect_t rd)
805 {
806 	thread_call_t tcall;
807 
808 	RD_LOCK(rd);
809 	if (__improbable(!redirect_is_usable(rd))) {
810 		DTRACE_SKYWALK1(unusable, if_redirect_t, rd);
811 		RD_UNLOCK(rd);
812 		return;
813 	}
814 	tcall = rd->rd_doorbell_tcall;
815 	if (tcall != NULL) {
816 		thread_call_enter(tcall);
817 	} else {
818 		tcall = thread_call_allocate_with_options(redirect_async_doorbell,
819 		    (thread_call_param_t)rd,
820 		    THREAD_CALL_PRIORITY_KERNEL,
821 		    THREAD_CALL_OPTIONS_ONCE);
822 		if (tcall == NULL) {
823 			RDLOG_ERR("%s: tcall alloc failed", rd->rd_name);
824 		} else {
825 			rd->rd_doorbell_tcall = tcall;
826 			redirect_retain(rd);
827 			thread_call_enter(tcall);
828 		}
829 	}
830 	RD_UNLOCK(rd);
831 }
832 
833 static errno_t
redirect_nx_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)834 redirect_nx_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
835     kern_channel_ring_t ring, uint32_t flags)
836 {
837 #pragma unused(nxprov, ring, flags)
838 	errno_t error;
839 	if_redirect_t rd;
840 
841 	rd = redirect_nexus_context(nexus);
842 	RDLOG_DBG("%s", rd->rd_name);
843 
844 	if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
845 		boolean_t more;
846 		/* synchronous tx refill */
847 		error = kern_channel_tx_refill(ring, UINT32_MAX, UINT32_MAX,
848 		    TRUE, &more);
849 		if (error != 0 && error != EAGAIN) {
850 			RDLOG_ERR("%s: Tx refill (sync) %d", rd->rd_name, error);
851 		} else {
852 			RDLOG_DBG("%s: Tx refilled (sync)", rd->rd_name);
853 		}
854 	} else {
855 		RDLOG_DBG("%s: schedule async refill", rd->rd_name);
856 		redirect_schedule_async_doorbell(rd);
857 	}
858 	return 0;
859 }
860 
861 static errno_t
redirect_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)862 redirect_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
863 {
864 	if_redirect_t rd;
865 
866 	rd = (if_redirect_t)kern_nexus_get_context(nexus);
867 
868 	(void)ifnet_set_capabilities_enabled(ifp, 0, -1);
869 	ifnet_set_baudrate(ifp, 0);
870 	ifnet_set_mtu(ifp, ETHERMTU);
871 	ifnet_set_offload(ifp, 0);
872 
873 	if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET) {
874 		ifnet_set_flags(ifp,
875 		    IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX, 0xffff);
876 		ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
877 		ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
878 	} else {
879 		ifnet_set_flags(ifp, IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
880 	}
881 	return 0;
882 }
883 
884 static void
redirect_delegate_adv_config(ifnet_t delegate_ifp,bool enable)885 redirect_delegate_adv_config(ifnet_t delegate_ifp, bool enable)
886 {
887 	struct nx_netif *delegate_nif;
888 
889 	ASSERT(delegate_ifp != NULL);
890 	if (!SKYWALK_NATIVE(delegate_ifp)) {
891 		RDLOG_ERR("%s is not skywalk native", if_name(delegate_ifp));
892 		DTRACE_SKYWALK1(not__native, ifnet_t, delegate_ifp);
893 		return;
894 	}
895 	delegate_nif = NA(delegate_ifp)->nifna_netif;
896 	nx_netif_config_interface_advisory(delegate_nif->nif_nx, enable);
897 }
898 
899 static errno_t
redirect_nx_intf_adv_config(void * prov_ctx,bool enable)900 redirect_nx_intf_adv_config(void *prov_ctx, bool enable)
901 {
902 	if_redirect_t rd = (if_redirect_t)prov_ctx;
903 
904 	RD_LOCK(rd);
905 	if (!redirect_is_usable(rd)) {
906 		RDLOG_ERR("cannot %s advisory on %s because it is not usable",
907 		    enable ? "enable" : "disable", if_name(rd->rd_ifp));
908 		DTRACE_SKYWALK1(unusable, if_redirect_t, rd);
909 		RD_UNLOCK(rd);
910 		return ENXIO;
911 	}
912 	if (rd->rd_intf_adv_enabled == enable) {
913 		RDLOG_ERR("advisory is already %s on %s",
914 		    enable ? "enable" : "disable", if_name(rd->rd_ifp));
915 		DTRACE_SKYWALK1(advisory__already__set, if_redirect_t, rd);
916 		RD_UNLOCK(rd);
917 		return ENXIO;
918 	}
919 	if (!rd->rd_delegate_set) {
920 		RDLOG_ERR("delegate is not set on %s", if_name(rd->rd_ifp));
921 		DTRACE_SKYWALK1(no__delegate, if_redirect_t, rd);
922 		RD_UNLOCK(rd);
923 		return ENXIO;
924 	}
925 	redirect_delegate_adv_config(rd->rd_delegate_ifp, enable);
926 	rd->rd_intf_adv_enabled = enable;
927 	RD_UNLOCK(rd);
928 	return 0;
929 }
930 
931 static errno_t
fill_capab_interface_advisory(if_redirect_t rd,void * contents,uint32_t * len)932 fill_capab_interface_advisory(if_redirect_t rd, void *contents,
933     uint32_t *len)
934 {
935 	struct kern_nexus_capab_interface_advisory *capab = contents;
936 
937 	if (*len != sizeof(*capab)) {
938 		DTRACE_SKYWALK2(invalid__len, uint32_t, *len, size_t, sizeof(*capab));
939 		return EINVAL;
940 	}
941 	if (capab->kncia_version !=
942 	    KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
943 		DTRACE_SKYWALK2(invalid__ver, uint32_t, capab->kncia_version,
944 		    uint32_t, KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1);
945 		return EINVAL;
946 	}
947 	VERIFY(capab->kncia_notify != NULL);
948 	rd->rd_intf_adv_kern_ctx = capab->kncia_kern_context;
949 	rd->rd_intf_adv_notify = capab->kncia_notify;
950 	capab->kncia_provider_context = rd;
951 	capab->kncia_config = redirect_nx_intf_adv_config;
952 	return 0;
953 }
954 
955 static errno_t
redirect_nx_capab_config(kern_nexus_provider_t nxprov,kern_nexus_t nx,kern_nexus_capab_t capab,void * contents,uint32_t * len)956 redirect_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
957     kern_nexus_capab_t capab, void *contents, uint32_t *len)
958 {
959 #pragma unused(nxprov)
960 	errno_t error;
961 	if_redirect_t rd;
962 
963 	rd = redirect_nexus_context(nx);
964 
965 	switch (capab) {
966 	case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
967 		error = fill_capab_interface_advisory(rd, contents, len);
968 		break;
969 	default:
970 		error = ENOTSUP;
971 		break;
972 	}
973 	return error;
974 }
975 
976 static errno_t
create_netif_provider_and_instance(if_redirect_t rd,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)977 create_netif_provider_and_instance(if_redirect_t rd,
978     struct ifnet_init_eparams *init_params, ifnet_t *ifp,
979     uuid_t *provider, uuid_t *instance)
980 {
981 	errno_t err = 0;
982 	nexus_controller_t controller = kern_nexus_shared_controller();
983 	struct kern_nexus_net_init net_init = {};
984 	nexus_name_t provider_name = {};
985 	nexus_attr_t nexus_attr = NULL;
986 
987 	struct kern_nexus_provider_init prov_init = {
988 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
989 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
990 		.nxpi_pre_connect = redirect_nx_pre_connect,
991 		.nxpi_connected = redirect_nx_connected,
992 		.nxpi_pre_disconnect = redirect_nx_pre_disconnect,
993 		.nxpi_disconnected = redirect_nx_disconnected,
994 		.nxpi_ring_init = redirect_nx_ring_init,
995 		.nxpi_ring_fini = redirect_nx_ring_fini,
996 		.nxpi_slot_init = redirect_nx_slot_init,
997 		.nxpi_slot_fini = redirect_nx_slot_fini,
998 		.nxpi_sync_tx = redirect_nx_sync_tx,
999 		.nxpi_sync_rx = redirect_nx_sync_rx,
1000 		.nxpi_tx_doorbell = redirect_nx_tx_doorbell,
1001 		.nxpi_config_capab = redirect_nx_capab_config,
1002 	};
1003 
1004 	err = kern_nexus_attr_create(&nexus_attr);
1005 	if (err != 0) {
1006 		RDLOG_ERR("%s nexus attribution creation failed, error: %d",
1007 		    rd->rd_name, err);
1008 		DTRACE_SKYWALK2(attr__create__failed, if_redirect_t, rd, int, err);
1009 		goto failed;
1010 	}
1011 
1012 	snprintf((char *)provider_name, sizeof(provider_name),
1013 	    "com.apple.netif.%s", rd->rd_name);
1014 	err = kern_nexus_controller_register_provider(controller,
1015 	    redirect_nx_dom_prov,
1016 	    provider_name,
1017 	    &prov_init,
1018 	    sizeof(prov_init),
1019 	    nexus_attr,
1020 	    provider);
1021 	if (err != 0) {
1022 		RDLOG_ERR("%s register provider failed, error %d", rd->rd_name, err);
1023 		DTRACE_SKYWALK2(register__failed, if_redirect_t, rd, int, err);
1024 		goto failed;
1025 	}
1026 
1027 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1028 	net_init.nxneti_flags = 0;
1029 	net_init.nxneti_eparams = init_params;
1030 	net_init.nxneti_lladdr = NULL;
1031 	net_init.nxneti_prepare = redirect_netif_prepare;
1032 	net_init.nxneti_rx_pbufpool = rd->rd_pp;
1033 	net_init.nxneti_tx_pbufpool = rd->rd_pp;
1034 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
1035 	    *provider, rd, NULL, instance, &net_init, ifp);
1036 	if (err != 0) {
1037 		RDLOG_ERR("%s alloc net provider instance failed %d", rd->rd_name, err);
1038 		DTRACE_SKYWALK2(alloc__provider__instance__failed, if_redirect_t, rd, int, err);
1039 		kern_nexus_controller_deregister_provider(controller, *provider);
1040 		uuid_clear(*provider);
1041 		goto failed;
1042 	}
1043 failed:
1044 	if (nexus_attr != NULL) {
1045 		kern_nexus_attr_destroy(nexus_attr);
1046 	}
1047 	return err;
1048 }
1049 
1050 static errno_t
redirect_attach_netif_nexus(if_redirect_t rd,struct ifnet_init_eparams * init_params,ifnet_t * ifp)1051 redirect_attach_netif_nexus(if_redirect_t rd,
1052     struct ifnet_init_eparams *init_params, ifnet_t *ifp)
1053 {
1054 	errno_t error = 0;
1055 	redirect_nx_t nx = &rd->rd_nx;
1056 
1057 	error = redirect_packet_pool_make(rd);
1058 	if (error != 0) {
1059 		RDLOG_ERR("%s packet pool make failed: %d", rd->rd_name, error);
1060 		DTRACE_SKYWALK2(pool__make__failed, if_redirect_t, rd, int, error);
1061 		return error;
1062 	}
1063 
1064 	return create_netif_provider_and_instance(rd, init_params, ifp,
1065 	           &nx->rnx_provider, &nx->rnx_instance);
1066 }
1067 
1068 static void
detach_provider_and_instance(uuid_t provider,uuid_t instance)1069 detach_provider_and_instance(uuid_t provider, uuid_t instance)
1070 {
1071 	nexus_controller_t controller = kern_nexus_shared_controller();
1072 	errno_t err;
1073 
1074 	if (!uuid_is_null(instance)) {
1075 		err = kern_nexus_controller_free_provider_instance(controller,
1076 		    instance);
1077 		if (err != 0) {
1078 			RDLOG_ERR("free_provider_instance failed %d", err);
1079 		}
1080 		uuid_clear(instance);
1081 	}
1082 	if (!uuid_is_null(provider)) {
1083 		err = kern_nexus_controller_deregister_provider(controller,
1084 		    provider);
1085 		if (err != 0) {
1086 			RDLOG_ERR("deregister_provider failed %d", err);
1087 		}
1088 		uuid_clear(provider);
1089 	}
1090 	return;
1091 }
1092 
1093 static void
redirect_detach_netif_nexus(if_redirect_t rd)1094 redirect_detach_netif_nexus(if_redirect_t rd)
1095 {
1096 	redirect_nx_t rnx = &rd->rd_nx;
1097 	detach_provider_and_instance(rnx->rnx_provider, rnx->rnx_instance);
1098 }
1099 
1100 static void
interface_link_event(ifnet_t ifp,uint32_t event_code)1101 interface_link_event(ifnet_t ifp, uint32_t event_code)
1102 {
1103 	struct event {
1104 		uint32_t ifnet_family;
1105 		uint32_t unit;
1106 		char if_name[IFNAMSIZ];
1107 	};
1108 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
1109 	struct kern_event_msg *header = (struct kern_event_msg *)message;
1110 	struct event *data = (struct event *)(header + 1);
1111 
1112 	header->total_size = sizeof(message);
1113 	header->vendor_code = KEV_VENDOR_APPLE;
1114 	header->kev_class = KEV_NETWORK_CLASS;
1115 	header->kev_subclass = KEV_DL_SUBCLASS;
1116 	header->event_code = event_code;
1117 	data->ifnet_family = ifnet_family(ifp);
1118 	data->unit = (uint32_t)ifnet_unit(ifp);
1119 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
1120 	ifnet_event(ifp, header);
1121 }
1122 
1123 static if_redirect_t
ifnet_get_if_redirect(ifnet_t ifp)1124 ifnet_get_if_redirect(ifnet_t ifp)
1125 {
1126 	return (if_redirect_t)ifnet_softc(ifp);
1127 }
1128 
1129 static int
redirect_clone_create(struct if_clone * ifc,uint32_t unit,void * param)1130 redirect_clone_create(struct if_clone *ifc, uint32_t unit, void *param)
1131 {
1132 	int error;
1133 	if_redirect_t rd;
1134 	struct ifnet_init_eparams rd_init;
1135 	struct if_redirect_create_params params;
1136 	user_addr_t param_addr = (user_addr_t)param;
1137 	ifnet_t ifp;
1138 
1139 	if (param_addr == USER_ADDR_NULL) {
1140 		RDLOG_ERR("create params not specified");
1141 		DTRACE_SKYWALK2(no__param, struct if_clone *, ifc, uint32_t, unit);
1142 		return EINVAL;
1143 	}
1144 	error = copyin(param_addr, &params, sizeof(params));
1145 	if (error != 0) {
1146 		RDLOG_ERR("copyin failed: error %d", error);
1147 		DTRACE_SKYWALK1(copyin__failed, int, error);
1148 		return error;
1149 	}
1150 	if ((params.ircp_type != RD_CREATE_PARAMS_TYPE &&
1151 	    params.ircp_type != RD_CREATE_PARAMS_TYPE_NOATTACH) ||
1152 	    params.ircp_len != sizeof(params)) {
1153 		RDLOG_ERR("invalid type(0x%x) or len(0x%d)", params.ircp_type,
1154 		    params.ircp_len);
1155 		DTRACE_SKYWALK2(invalid__params, uint16_t, params.ircp_type,
1156 		    uint16_t, params.ircp_len);
1157 		return EINVAL;
1158 	}
1159 	if (params.ircp_ftype != IFRTYPE_FAMILY_ETHERNET &&
1160 	    params.ircp_ftype != IFRTYPE_FAMILY_CELLULAR) {
1161 		RDLOG_ERR("functional type(0x%x) not supported", params.ircp_ftype);
1162 		DTRACE_SKYWALK1(invalid__ftype, uint32_t, params.ircp_ftype);
1163 		return ENOTSUP;
1164 	}
1165 
1166 	rd = kalloc_type(if_redirect, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1167 	RD_LOCK_INIT(rd);
1168 	rd->rd_ftype = params.ircp_ftype;
1169 	rd->rd_retain_count = 1;
1170 	rd->rd_max_mtu = RD_MAX_MTU;
1171 
1172 	/* use the interface name as the unique id for ifp recycle */
1173 	if ((unsigned int)
1174 	    snprintf(rd->rd_name, sizeof(rd->rd_name), "%s%d",
1175 	    ifc->ifc_name, unit) >= sizeof(rd->rd_name)) {
1176 		redirect_release(rd);
1177 		RDLOG_ERR("invalid ifc_name(%s) or unit(%d)", ifc->ifc_name, unit);
1178 		DTRACE_SKYWALK2(invalid__name__or__unit, char *, ifc->ifc_name,
1179 		    uint32_t, unit);
1180 		return EINVAL;
1181 	}
1182 
1183 	bzero(&rd_init, sizeof(rd_init));
1184 	rd_init.ver = IFNET_INIT_CURRENT_VERSION;
1185 	rd_init.len = sizeof(rd_init);
1186 	rd_init.flags |= (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_IF_ADV);
1187 	if (params.ircp_type == RD_CREATE_PARAMS_TYPE_NOATTACH) {
1188 		rd_init.flags |= IFNET_INIT_NX_NOAUTO;
1189 	}
1190 	rd_init.uniqueid = rd->rd_name;
1191 	rd_init.uniqueid_len = (uint32_t)strlen(rd->rd_name);
1192 	rd_init.name = ifc->ifc_name;
1193 	rd_init.unit = unit;
1194 	rd_init.softc = rd;
1195 	rd_init.ioctl = redirect_ioctl;
1196 	rd_init.detach = redirect_if_free;
1197 	rd_init.subfamily = IFNET_SUBFAMILY_REDIRECT;
1198 
1199 	if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET) {
1200 		rd_init.family = IFNET_FAMILY_ETHERNET;
1201 		rd_init.type = IFT_ETHER;
1202 		rd_init.demux = ether_demux;
1203 		rd_init.add_proto = ether_add_proto;
1204 		rd_init.del_proto = ether_del_proto;
1205 		rd_init.check_multi = ether_check_multi;
1206 		rd_init.framer_extended = ether_frameout_extended;
1207 		rd_init.broadcast_addr = etherbroadcastaddr;
1208 		rd_init.broadcast_len = ETHER_ADDR_LEN;
1209 	} else {
1210 		rd_init.family = IFNET_FAMILY_CELLULAR;
1211 		rd_init.type = IFT_CELLULAR;
1212 		rd_init.demux = redirect_demux;
1213 		rd_init.add_proto = redirect_add_proto;
1214 		rd_init.del_proto = redirect_del_proto;
1215 	}
1216 	error = redirect_attach_netif_nexus(rd, &rd_init, &ifp);
1217 	if (error != 0) {
1218 		redirect_release(rd);
1219 		RDLOG_ERR("attach netif nexus failed: error %d", error);
1220 		DTRACE_SKYWALK1(attach__nexus__failed, int, error);
1221 		return error;
1222 	}
1223 
1224 	/* take an additional reference for nexus controller */
1225 	redirect_retain(rd);
1226 	rd->rd_ifp = ifp;
1227 
1228 	if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET) {
1229 		/* mac address will be set after delegate is configured */
1230 		(void) ifnet_set_lladdr(ifp, default_mac, ETHER_ADDR_LEN);
1231 		bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
1232 	} else {
1233 		bpfattach(ifp, DLT_RAW, 0);
1234 	}
1235 	return 0;
1236 }
1237 
1238 /*
1239  * This function is meant for cleaning up everything, not just delegate
1240  * related info.
1241  */
1242 static void
redirect_cleanup(if_redirect_t rd)1243 redirect_cleanup(if_redirect_t rd)
1244 {
1245 	redirect_clear_delegate(rd);
1246 	rd->rd_intf_adv_enabled = false;
1247 }
1248 
1249 static int
redirect_clone_destroy(ifnet_t ifp)1250 redirect_clone_destroy(ifnet_t ifp)
1251 {
1252 	if_redirect_t rd;
1253 
1254 	rd = ifnet_get_if_redirect(ifp);
1255 	if (rd == NULL) {
1256 		RDLOG_ERR("rd is NULL");
1257 		DTRACE_SKYWALK1(null__rd, ifnet_t, ifp);
1258 		return ENXIO;
1259 	}
1260 	RD_LOCK(rd);
1261 	if (rd->rd_detaching) {
1262 		RDLOG_ERR("%s is detaching", rd->rd_name);
1263 		DTRACE_SKYWALK1(detaching, if_redirect_t, rd);
1264 		RD_UNLOCK(rd);
1265 		return 0;
1266 	}
1267 	rd->rd_detaching = TRUE;
1268 	RD_UNLOCK(rd);
1269 
1270 	redirect_cleanup(rd);
1271 	redirect_detach_netif_nexus(rd);
1272 	/*
1273 	 * Releasing reference held for nexus controller
1274 	 */
1275 	redirect_release(rd);
1276 	interface_link_event(ifp, KEV_DL_LINK_OFF);
1277 	ifnet_detach(ifp);
1278 	return 0;
1279 }
1280 
1281 static int
if_redirect_request_copyin(user_addr_t user_addr,struct if_redirect_request * ifrr,uint64_t len)1282 if_redirect_request_copyin(user_addr_t user_addr,
1283     struct if_redirect_request *ifrr, uint64_t len)
1284 {
1285 	int error;
1286 
1287 	if (user_addr == USER_ADDR_NULL || len < sizeof(*ifrr)) {
1288 		RDLOG_ERR("user_addr(0x%llx) or len(%llu) < %lu",
1289 		    user_addr, len, sizeof(*ifrr));
1290 		error = EINVAL;
1291 		goto done;
1292 	}
1293 	error = copyin(user_addr, ifrr, sizeof(*ifrr));
1294 	if (error != 0) {
1295 		RDLOG_ERR("copyin failed: %d", error);
1296 		goto done;
1297 	}
1298 	if (ifrr->ifrr_reserved[0] != 0 || ifrr->ifrr_reserved[1] != 0 ||
1299 	    ifrr->ifrr_reserved[2] != 0 || ifrr->ifrr_reserved[3] != 0) {
1300 		RDLOG_ERR("reserved[0]=0x%llu, reserved[1]=0x%llu"
1301 		    "reserved[2]=0x%llu, reserved[3]=0x%llu", ifrr->ifrr_reserved[0],
1302 		    ifrr->ifrr_reserved[1], ifrr->ifrr_reserved[2],
1303 		    ifrr->ifrr_reserved[3]);
1304 		error = EINVAL;
1305 		goto done;
1306 	}
1307 done:
1308 	return error;
1309 }
1310 
1311 static void
redirect_detach_notify(void * arg)1312 redirect_detach_notify(void *arg)
1313 {
1314 	if_redirect_t rd = arg;
1315 
1316 	redirect_clear_delegate(rd);
1317 }
1318 
1319 static int
redirect_set_delegate(if_redirect_t rd,ifnet_t delegate_ifp)1320 redirect_set_delegate(if_redirect_t rd, ifnet_t delegate_ifp)
1321 {
1322 	ifnet_t ifp = rd->rd_ifp;
1323 	int error;
1324 
1325 	RD_LOCK(rd);
1326 	if (rd->rd_detaching) {
1327 		RDLOG_ERR("%s is detaching", rd->rd_name);
1328 		DTRACE_SKYWALK2(detaching, if_redirect_t, rd, ifnet_t, delegate_ifp);
1329 		RD_UNLOCK(rd);
1330 		return ENXIO;
1331 	}
1332 	if (rd->rd_delegate_ifp != NULL) {
1333 		if (rd->rd_delegate_ifp == delegate_ifp) {
1334 			RDLOG_ERR("cannot configure the same delegate");
1335 			DTRACE_SKYWALK2(same__ifp, if_redirect_t, rd,
1336 			    ifnet_t, delegate_ifp);
1337 			RD_UNLOCK(rd);
1338 			return EALREADY;
1339 		} else {
1340 			redirect_clear_delegate_locked(rd);
1341 		}
1342 	}
1343 	ASSERT(rd->rd_delegate_ifp == NULL);
1344 
1345 	if (!ifnet_is_attached(ifp, 1)) {
1346 		RDLOG_ERR("failed to get self reference");
1347 		DTRACE_SKYWALK2(ifp__detaching, if_redirect_t, rd, ifnet_t, ifp);
1348 		error = ENXIO;
1349 		goto fail;
1350 	}
1351 	ASSERT(!rd->rd_self_ref);
1352 	rd->rd_self_ref = TRUE;
1353 
1354 	/* This saves the reference taken above */
1355 	error = ifnet_set_delegate_parent(delegate_ifp, ifp);
1356 	if (error != 0) {
1357 		RDLOG_ERR("failed to set delegate parent");
1358 		DTRACE_SKYWALK4(set__delegate__parent__failed, if_redirect_t, rd,
1359 		    ifnet_t, delegate_ifp, ifnet_t, ifp, int, error);
1360 		goto fail;
1361 	}
1362 	ASSERT(!rd->rd_delegate_parent_set);
1363 	rd->rd_delegate_parent_set = TRUE;
1364 
1365 	if (!ifnet_is_attached(delegate_ifp, 1)) {
1366 		RDLOG_ERR("failed to get delegate reference");
1367 		DTRACE_SKYWALK2(delegate__detaching, if_redirect_t, rd,
1368 		    ifnet_t, delegate_ifp);
1369 		error = ENXIO;
1370 		goto fail;
1371 	}
1372 	ASSERT(rd->rd_delegate_ifp == NULL);
1373 	rd->rd_delegate_ifp = delegate_ifp;
1374 	ASSERT(!rd->rd_delegate_ref);
1375 	rd->rd_delegate_ref = TRUE;
1376 
1377 	error = ifnet_set_flowswitch_rx_callback(delegate_ifp, redirect_rx_cb, rd);
1378 	if (error != 0) {
1379 		RDLOG_ERR("failed to set fsw rx callback: %d", error);
1380 		DTRACE_SKYWALK3(set__fsw__rx__cb__fail, if_redirect_t, rd, ifnet_t,
1381 		    delegate_ifp, int, error);
1382 		goto fail;
1383 	}
1384 	ASSERT(!rd->rd_fsw_rx_cb_set);
1385 	rd->rd_fsw_rx_cb_set = TRUE;
1386 
1387 	error = ifnet_set_delegate(ifp, delegate_ifp);
1388 	if (error != 0) {
1389 		RDLOG_ERR("failed to set delegate ifp: %d", error);
1390 		DTRACE_SKYWALK4(set__delegate__fail, if_redirect_t, rd, ifnet_t, ifp,
1391 		    ifnet_t, delegate_ifp, int, error);
1392 		goto fail;
1393 	}
1394 	ASSERT(!rd->rd_delegate_set);
1395 	rd->rd_delegate_set = TRUE;
1396 
1397 	if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET) {
1398 		uint8_t mac_addr[ETHER_ADDR_LEN];
1399 
1400 		error = ifnet_lladdr_copy_bytes(delegate_ifp, mac_addr,
1401 		    ETHER_ADDR_LEN);
1402 		if (error != 0) {
1403 			RDLOG_ERR("failed to get mac addr from %s, error %d",
1404 			    if_name(delegate_ifp), error);
1405 			DTRACE_SKYWALK3(lladdr__copy__fail, if_redirect_t, rd,
1406 			    ifnet_t, delegate_ifp, int, error);
1407 			goto fail;
1408 		}
1409 		error = ifnet_set_lladdr(ifp, mac_addr, ETHER_ADDR_LEN);
1410 		if (error != 0) {
1411 			RDLOG_ERR("failed to set mac addr for %s, error %d",
1412 			    if_name(ifp), error);
1413 			DTRACE_SKYWALK3(set__lladdr__fail, if_redirect_t, rd,
1414 			    ifnet_t, ifp, int, error);
1415 			goto fail;
1416 		}
1417 		ASSERT(!rd->rd_mac_addr_set);
1418 		rd->rd_mac_addr_set = TRUE;
1419 	}
1420 	/*
1421 	 * This is enabled out-of-band from redirect_set_delegate() but we should do
1422 	 * this here in case we move to a different delegate.
1423 	 */
1424 	if (rd->rd_intf_adv_enabled) {
1425 		redirect_delegate_adv_config(delegate_ifp, true);
1426 	}
1427 	ifnet_set_detach_notify(delegate_ifp, redirect_detach_notify, rd);
1428 	rd->rd_detach_notify_set = TRUE;
1429 
1430 	/*
1431 	 * Check that the delegate is still attached. If not, the detach notify above
1432 	 * could've been missed and we would have to cleanup everything here.
1433 	 */
1434 	if (!ifnet_is_attached(delegate_ifp, 0)) {
1435 		RDLOG_ERR("delegate %s detached during setup", if_name(delegate_ifp));
1436 		DTRACE_SKYWALK2(delegate__detached, if_redirect_t, rd,
1437 		    ifnet_t, delegate_ifp);
1438 		error = ENXIO;
1439 		goto fail;
1440 	}
1441 	RD_UNLOCK(rd);
1442 	return 0;
1443 
1444 fail:
1445 	redirect_clear_delegate_locked(rd);
1446 	RD_UNLOCK(rd);
1447 	return error;
1448 }
1449 
1450 static void
redirect_clear_delegate_locked(if_redirect_t rd)1451 redirect_clear_delegate_locked(if_redirect_t rd)
1452 {
1453 	ifnet_t ifp = rd->rd_ifp;
1454 	ifnet_t delegate_ifp = rd->rd_delegate_ifp;
1455 	int error;
1456 
1457 	if (rd->rd_detach_notify_set) {
1458 		ASSERT(delegate_ifp != NULL);
1459 		ifnet_set_detach_notify(delegate_ifp, NULL, NULL);
1460 		rd->rd_detach_notify_set = FALSE;
1461 	}
1462 	if (rd->rd_intf_adv_enabled && delegate_ifp != NULL) {
1463 		redirect_delegate_adv_config(delegate_ifp, false);
1464 		/*
1465 		 * We don't clear rd_intf_adv_enabled because we want to reenable
1466 		 * advisory after moving to a different delegate.
1467 		 */
1468 	}
1469 	if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET && rd->rd_mac_addr_set) {
1470 		ASSERT(delegate_ifp != NULL);
1471 		error = ifnet_set_lladdr(ifp, default_mac, ETHER_ADDR_LEN);
1472 		if (error != 0) {
1473 			RDLOG_ERR("failed to set mac addr for %s, error %d",
1474 			    if_name(ifp), error);
1475 			DTRACE_SKYWALK3(set__lladdr__fail, if_redirect_t, rd,
1476 			    ifnet_t, ifp, int, error);
1477 		}
1478 		rd->rd_mac_addr_set = FALSE;
1479 	}
1480 	if (rd->rd_delegate_set) {
1481 		ASSERT(delegate_ifp != NULL);
1482 		(void) ifnet_set_delegate(ifp, NULL);
1483 		rd->rd_delegate_set = FALSE;
1484 	}
1485 	if (rd->rd_fsw_rx_cb_set) {
1486 		ASSERT(delegate_ifp != NULL);
1487 		(void) ifnet_set_flowswitch_rx_callback(delegate_ifp, NULL, NULL);
1488 		rd->rd_fsw_rx_cb_set = FALSE;
1489 	}
1490 	if (rd->rd_delegate_ref) {
1491 		ASSERT(delegate_ifp != NULL);
1492 		rd->rd_delegate_ifp = NULL;
1493 		ifnet_decr_iorefcnt(delegate_ifp);
1494 		rd->rd_delegate_ref = FALSE;
1495 	}
1496 	if (rd->rd_delegate_parent_set) {
1497 		ASSERT(delegate_ifp != NULL);
1498 		ifnet_set_delegate_parent(delegate_ifp, NULL);
1499 		rd->rd_delegate_parent_set = FALSE;
1500 	}
1501 	if (rd->rd_self_ref) {
1502 		ifnet_decr_iorefcnt(ifp);
1503 		rd->rd_self_ref = FALSE;
1504 	}
1505 }
1506 
1507 static void
redirect_clear_delegate(if_redirect_t rd)1508 redirect_clear_delegate(if_redirect_t rd)
1509 {
1510 	RD_LOCK(rd);
1511 	redirect_clear_delegate_locked(rd);
1512 	RD_UNLOCK(rd);
1513 }
1514 
1515 static int
redirect_ioctl_set_delegate(ifnet_t ifp,user_addr_t user_addr,uint64_t len)1516 redirect_ioctl_set_delegate(ifnet_t ifp, user_addr_t user_addr, uint64_t len)
1517 {
1518 	if_redirect_t rd = NULL;
1519 	struct if_redirect_request ifrr;
1520 	ifnet_t delegate_ifp = NULL;
1521 	int error;
1522 
1523 	error = if_redirect_request_copyin(user_addr, &ifrr, len);
1524 	if (error != 0) {
1525 		RDLOG_ERR("if_redirect_request_copyin failed: error %d", error);
1526 		DTRACE_SKYWALK4(copyin__failed, ifnet_t, ifp, user_addr_t, user_addr,
1527 		    uint64_t, len, int, error);
1528 		goto done;
1529 	}
1530 	if (ifrr.ifrr_delegate_name[0] == '\0') {
1531 		RDLOG_ERR("NULL delegate name");
1532 		DTRACE_SKYWALK1(null__delegate, ifnet_t, ifp);
1533 		error = EINVAL;
1534 		goto done;
1535 	}
1536 	/* ensure null termination */
1537 	ifrr.ifrr_delegate_name[IFNAMSIZ - 1] = '\0';
1538 	delegate_ifp = ifunit_ref(ifrr.ifrr_delegate_name);
1539 	if (delegate_ifp == NULL) {
1540 		RDLOG_ERR("delegate %s not found", ifrr.ifrr_delegate_name);
1541 		DTRACE_SKYWALK2(invalid__name, ifnet_t, ifp, char *,
1542 		    ifrr.ifrr_delegate_name);
1543 		error = ENOENT;
1544 		goto done;
1545 	}
1546 	rd = ifnet_get_if_redirect(ifp);
1547 	if (rd == NULL) {
1548 		RDLOG_ERR("rd is NULL");
1549 		DTRACE_SKYWALK1(null__rd, ifnet_t, ifp);
1550 		error = ENOENT;
1551 		goto done;
1552 	}
1553 	/* Verify that the delegate type is supported */
1554 	if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET) {
1555 		if (delegate_ifp->if_family != IFNET_FAMILY_ETHERNET) {
1556 			RDLOG_ERR("%s's family %d not compatible "
1557 			    "with ethernet functional type", if_name(delegate_ifp),
1558 			    delegate_ifp->if_family);
1559 			DTRACE_SKYWALK2(delegate__incompatible__ether, if_redirect_t, rd,
1560 			    ifnet_t, delegate_ifp);
1561 			error = EINVAL;
1562 			goto done;
1563 		}
1564 		if (ifnet_is_low_latency(delegate_ifp)) {
1565 			RDLOG_ERR("low latency %s cannot be a delegate",
1566 			    if_name(delegate_ifp));
1567 			DTRACE_SKYWALK2(delegate__is__ll, if_redirect_t, rd,
1568 			    ifnet_t, delegate_ifp);
1569 			error = EINVAL;
1570 			goto done;
1571 		}
1572 	} else {
1573 		ASSERT(rd->rd_ftype == IFRTYPE_FAMILY_CELLULAR);
1574 		if (delegate_ifp->if_family != IFNET_FAMILY_CELLULAR &&
1575 		    delegate_ifp->if_family != IFNET_FAMILY_UTUN &&
1576 		    delegate_ifp->if_family != IFNET_FAMILY_IPSEC) {
1577 			RDLOG_ERR("%s's family %d not compatible "
1578 			    "with cellular functional type", if_name(delegate_ifp),
1579 			    delegate_ifp->if_family);
1580 			DTRACE_SKYWALK2(delegate__incompatible__cell, if_redirect_t, rd,
1581 			    ifnet_t, delegate_ifp);
1582 			error = EINVAL;
1583 			goto done;
1584 		}
1585 	}
1586 	if (delegate_ifp->if_subfamily == IFNET_SUBFAMILY_REDIRECT) {
1587 		RDLOG_ERR("delegate %s cannot be redirect", if_name(delegate_ifp));
1588 		DTRACE_SKYWALK2(delegate__is__redirect, if_redirect_t, rd,
1589 		    ifnet_t, delegate_ifp);
1590 		error = EINVAL;
1591 		goto done;
1592 	}
1593 	error = redirect_set_delegate(rd, delegate_ifp);
1594 done:
1595 	if (delegate_ifp != NULL) {
1596 		ifnet_decr_iorefcnt(delegate_ifp);
1597 	}
1598 	return error;
1599 }
1600 
1601 static int
redirect_set_drvspec(ifnet_t ifp,uint64_t cmd,uint64_t len,user_addr_t user_addr)1602 redirect_set_drvspec(ifnet_t ifp, uint64_t cmd, uint64_t len,
1603     user_addr_t user_addr)
1604 {
1605 	int error;
1606 
1607 	switch (cmd) {
1608 	case RD_S_CMD_SET_DELEGATE:
1609 		error = redirect_ioctl_set_delegate(ifp, user_addr, len);
1610 		break;
1611 	default:
1612 		error = EOPNOTSUPP;
1613 		break;
1614 	}
1615 	return error;
1616 }
1617 
1618 static int
redirect_get_drvspec(ifnet_t ifp,uint64_t cmd,uint64_t len,user_addr_t user_addr)1619 redirect_get_drvspec(ifnet_t ifp, uint64_t cmd, uint64_t len,
1620     user_addr_t user_addr)
1621 {
1622 #pragma unused(ifp, cmd, len, user_addr)
1623 	return 0;
1624 }
1625 
1626 union ifdrvu {
1627 	struct ifdrv32  *ifdrvu_32;
1628 	struct ifdrv64  *ifdrvu_64;
1629 	void            *ifdrvu_p;
1630 };
1631 
1632 static errno_t
redirect_ioctl(ifnet_t ifp,u_long cmd,void * data)1633 redirect_ioctl(ifnet_t ifp, u_long cmd, void *data)
1634 {
1635 	if_redirect_t rd = NULL;
1636 	struct ifreq *ifr = NULL;
1637 	union ifdrvu drv;
1638 	uint64_t drv_cmd;
1639 	uint64_t drv_len;
1640 	boolean_t drv_set_command = FALSE;
1641 	user_addr_t user_addr;
1642 	int error = 0;
1643 
1644 	rd = ifnet_get_if_redirect(ifp);
1645 	if (rd == NULL) {
1646 		RDLOG_ERR("rd is NULL");
1647 		DTRACE_SKYWALK1(null__rd, ifnet_t, ifp);
1648 		return ENXIO;
1649 	}
1650 	RD_LOCK(rd);
1651 	if (rd->rd_detaching) {
1652 		RDLOG_ERR("%s is detaching", rd->rd_name);
1653 		DTRACE_SKYWALK1(detaching, if_redirect_t, rd);
1654 		RD_UNLOCK(rd);
1655 		return ENXIO;
1656 	}
1657 	RD_UNLOCK(rd);
1658 
1659 	ifr = (struct ifreq *)data;
1660 
1661 	switch (cmd) {
1662 	case SIOCSIFADDR:
1663 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1664 		break;
1665 	case SIOCGIFMEDIA32:
1666 	case SIOCGIFMEDIA64: {
1667 		struct ifmediareq *ifmr;
1668 
1669 		RD_LOCK(rd);
1670 		if (rd->rd_ftype != IFRTYPE_FAMILY_ETHERNET) {
1671 			DTRACE_SKYWALK1(not__ether, if_redirect_t, rd);
1672 			RD_UNLOCK(rd);
1673 			return EOPNOTSUPP;
1674 		}
1675 		ifmr = (struct ifmediareq *)data;
1676 		ifmr->ifm_current = IFM_ETHER;
1677 		ifmr->ifm_mask = 0;
1678 		ifmr->ifm_status = (IFM_AVALID | IFM_ACTIVE);
1679 		ifmr->ifm_active = IFM_ETHER;
1680 		ifmr->ifm_count = 1;
1681 
1682 		user_addr = (cmd == SIOCGIFMEDIA64) ?
1683 		    ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
1684 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
1685 		if (user_addr != USER_ADDR_NULL) {
1686 			error = copyout(&ifmr->ifm_current, user_addr, sizeof(int));
1687 		}
1688 		RD_UNLOCK(rd);
1689 		break;
1690 	}
1691 	case SIOCGIFDEVMTU: {
1692 		struct ifdevmtu *devmtu_p;
1693 
1694 		devmtu_p = &ifr->ifr_devmtu;
1695 		devmtu_p->ifdm_current = ifnet_mtu(ifp);
1696 		devmtu_p->ifdm_max = redirect_max_mtu(ifp);
1697 		devmtu_p->ifdm_min = IF_MINMTU;
1698 		break;
1699 	}
1700 	case SIOCSIFMTU:
1701 		if ((unsigned int)ifr->ifr_mtu > redirect_max_mtu(ifp) ||
1702 		    ifr->ifr_mtu < IF_MINMTU) {
1703 			error = EINVAL;
1704 		} else {
1705 			error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
1706 		}
1707 		break;
1708 	case SIOCSIFFLAGS:
1709 		if ((ifp->if_flags & IFF_UP) != 0) {
1710 			/* marked up, set running if not already set */
1711 			if ((ifp->if_flags & IFF_RUNNING) == 0) {
1712 				/* set running */
1713 				error = ifnet_set_flags(ifp, IFF_RUNNING,
1714 				    IFF_RUNNING);
1715 			}
1716 		} else if ((ifp->if_flags & IFF_RUNNING) != 0) {
1717 			/* marked down, clear running */
1718 			error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
1719 		}
1720 		break;
1721 	case SIOCSDRVSPEC32:
1722 	case SIOCSDRVSPEC64:
1723 		error = proc_suser(current_proc());
1724 		if (error != 0) {
1725 			break;
1726 		}
1727 		drv_set_command = TRUE;
1728 		OS_FALLTHROUGH;
1729 	case SIOCGDRVSPEC32:
1730 	case SIOCGDRVSPEC64:
1731 		drv.ifdrvu_p = data;
1732 		if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
1733 			drv_cmd = drv.ifdrvu_32->ifd_cmd;
1734 			drv_len = drv.ifdrvu_32->ifd_len;
1735 			user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
1736 		} else {
1737 			drv_cmd = drv.ifdrvu_64->ifd_cmd;
1738 			drv_len = drv.ifdrvu_64->ifd_len;
1739 			user_addr = drv.ifdrvu_64->ifd_data;
1740 		}
1741 		if (drv_set_command) {
1742 			error = redirect_set_drvspec(ifp, drv_cmd, drv_len,
1743 			    user_addr);
1744 		} else {
1745 			error = redirect_get_drvspec(ifp, drv_cmd, drv_len,
1746 			    user_addr);
1747 		}
1748 		break;
1749 	case SIOCADDMULTI:
1750 	case SIOCDELMULTI:
1751 		error = 0;
1752 		break;
1753 
1754 	default:
1755 		error = EOPNOTSUPP;
1756 		break;
1757 	}
1758 
1759 	return error;
1760 }
1761 
1762 static void
redirect_if_free(ifnet_t ifp)1763 redirect_if_free(ifnet_t ifp)
1764 {
1765 	if_redirect_t rd = NULL;
1766 
1767 	if (ifp == NULL) {
1768 		RDLOG_ERR("ifp is NULL");
1769 		DTRACE_SKYWALK(null__ifp);
1770 		return;
1771 	}
1772 	rd = ifnet_get_if_redirect(ifp);
1773 	if (rd == NULL) {
1774 		RDLOG_ERR("rd is NULL");
1775 		DTRACE_SKYWALK1(null__rd, ifnet_t, ifp);
1776 		return;
1777 	}
1778 	RD_LOCK(rd);
1779 	ifp->if_softc = NULL;
1780 	VERIFY(rd->rd_doorbell_tcall == NULL);
1781 	RD_UNLOCK(rd);
1782 	redirect_release(rd);
1783 	ifnet_release(ifp);
1784 	return;
1785 }
1786 
1787 /*
1788  * Network interface functions
1789  */
1790 static errno_t
redirect_demux(__unused ifnet_t ifp,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)1791 redirect_demux(__unused ifnet_t ifp, mbuf_t data, __unused char *frame_header,
1792     protocol_family_t *protocol)
1793 {
1794 	struct ip *ip;
1795 	u_int ip_version;
1796 
1797 	while (data != NULL && mbuf_len(data) < 1) {
1798 		data = mbuf_next(data);
1799 	}
1800 
1801 	if (data == NULL) {
1802 		RDLOG_DBG("data is NULL");
1803 		DTRACE_SKYWALK(null__data);
1804 		return ENOENT;
1805 	}
1806 
1807 	ip = mtod(data, struct ip *);
1808 	ip_version = ip->ip_v;
1809 
1810 	switch (ip_version) {
1811 	case 4:
1812 		*protocol = PF_INET;
1813 		return 0;
1814 	case 6:
1815 		*protocol = PF_INET6;
1816 		return 0;
1817 	default:
1818 		*protocol = PF_UNSPEC;
1819 		break;
1820 	}
1821 
1822 	return 0;
1823 }
1824 
1825 static errno_t
redirect_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused uint32_t demux_count)1826 redirect_add_proto(__unused ifnet_t interface, protocol_family_t protocol,
1827     __unused const struct ifnet_demux_desc *demux_array,
1828     __unused uint32_t demux_count)
1829 {
1830 	switch (protocol) {
1831 	case PF_INET:
1832 		return 0;
1833 	case PF_INET6:
1834 		return 0;
1835 	default:
1836 		break;
1837 	}
1838 
1839 	return ENOPROTOOPT;
1840 }
1841 
1842 static errno_t
redirect_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)1843 redirect_del_proto(__unused ifnet_t interface,
1844     __unused protocol_family_t protocol)
1845 {
1846 	return 0;
1847 }
1848 
1849 __private_extern__ void
if_redirect_init(void)1850 if_redirect_init(void)
1851 {
1852 	int error;
1853 
1854 	redirect_log_handle = os_log_create("com.apple.xnu.net.redirect", "redirect");
1855 	(void)redirect_register_nexus_domain_provider();
1856 	error = if_clone_attach(&redirect_cloner);
1857 	if (error != 0) {
1858 		return;
1859 	}
1860 	return;
1861 }
1862