xref: /xnu-12377.81.4/bsd/net/if_redirect.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * if_redirect.c
31  * Virtual network interface that redirects traffic to a delegate interface.
32  */
33 
34 #include <sys/sysctl.h>
35 #include <net/dlil.h>
36 #include <net/ethernet.h>
37 #include <net/kpi_interface.h>
38 #include <net/bpf.h>
39 #include <net/if_media.h>
40 #include <net/if_ether.h>
41 #include <net/if_redirect.h>
42 #include <netinet/icmp6.h>
43 #include <os/log.h>
44 
45 #include <skywalk/os_skywalk_private.h>
46 #include <skywalk/nexus/netif/nx_netif.h>
47 
48 #define RD_NAME                 "rd"
49 #define RD_MAXUNIT              IF_MAXUNIT
50 #define RD_ZONE_MAX_ELEM        MIN(IFNETS_MAX, RD_MAXUNIT)
51 #define RD_MAX_MTU              2048
52 
53 #define RD_MAX_TX_RINGS         1
54 #define RD_MAX_RX_RINGS         1
55 #define RD_POOL_SIZE            1024
56 
57 static uint8_t default_mac[ETHER_ADDR_LEN] = {0x0, 0x1, 0x2, 0x3, 0x4, 0x5};
58 
59 SYSCTL_DECL(_net_link);
60 SYSCTL_NODE(_net_link, OID_AUTO, redirect, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
61     "Redirect interface");
62 
63 static int if_redirect_debug = 0;
64 SYSCTL_INT(_net_link_redirect, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
65     &if_redirect_debug, 0, "Redirect interface debug logs");
66 
67 os_log_t redirect_log_handle = NULL;
68 
69 #define RDLOG(level, format, ...) do {                                        \
70 	if (level == LOG_ERR) {                                               \
71 	        os_log_error(redirect_log_handle, "%s: " format "\n",         \
72 	            __FUNCTION__, ##__VA_ARGS__);                             \
73 	} else {                                                              \
74 	        if (__probable(if_redirect_debug == 0)) {                     \
75 	                break;                                                \
76 	        }                                                             \
77 	        if (level == LOG_DEBUG) {                                     \
78 	                os_log_debug(redirect_log_handle, "%s: " format "\n", \
79 	                    __FUNCTION__, ##__VA_ARGS__);                     \
80 	        } else if (level == LOG_INFO) {                               \
81 	                os_log_info(redirect_log_handle, "%s: " format "\n",  \
82 	                    __FUNCTION__, ##__VA_ARGS__);                     \
83 	        }                                                             \
84 	}                                                                     \
85 } while (0)
86 
87 #define RDLOG_ERR(format, ...) RDLOG(LOG_ERR, format, ##__VA_ARGS__)
88 #define RDLOG_DBG(format, ...) RDLOG(LOG_DEBUG, format, ##__VA_ARGS__)
89 #define RDLOG_INFO(format, ...) RDLOG(LOG_INFO, format, ##__VA_ARGS__)
90 
91 #define RD_MEDIA_LIST_MAX 27
92 
93 typedef struct {
94 	uuid_t                 rnx_provider;
95 	uuid_t                 rnx_instance;
96 } redirect_nx, *redirect_nx_t;
97 
98 typedef struct {
99 	char                   rd_name[IFNAMSIZ]; /* our unique id */
100 	lck_mtx_t              rd_lock;
101 	uint32_t               rd_ftype;
102 	ifnet_t                rd_ifp;
103 	ifnet_t                rd_delegate_ifp;
104 
105 	/* General state of the interface */
106 	boolean_t              rd_detaching;
107 	boolean_t              rd_connected;
108 
109 	/* Used for tracking delegate related state info */
110 	boolean_t              rd_self_ref;
111 	boolean_t              rd_delegate_parent_set;
112 	boolean_t              rd_delegate_ref;
113 	boolean_t              rd_fsw_rx_cb_set;
114 	boolean_t              rd_delegate_set;
115 	boolean_t              rd_mac_addr_set;
116 	boolean_t              rd_detach_notify_set;
117 
118 	unsigned int           rd_max_mtu;
119 	uint32_t               rd_retain_count;
120 	kern_pbufpool_t        rd_pp;
121 	kern_channel_ring_t    rd_rx_ring[RD_MAX_RX_RINGS];
122 	kern_channel_ring_t    rd_tx_ring[RD_MAX_TX_RINGS];
123 	redirect_nx            rd_nx;
124 	struct netif_stats     *rd_nifs;
125 	void                   *rd_intf_adv_kern_ctx;
126 	thread_call_t          rd_doorbell_tcall;
127 	boolean_t              rd_doorbell_tcall_active;
128 	boolean_t              rd_waiting_for_tcall;
129 	bool                   rd_intf_adv_enabled;
130 	kern_nexus_capab_interface_advisory_notify_fn_t rd_intf_adv_notify;
131 } if_redirect, *if_redirect_t;
132 
133 static if_redirect_t ifnet_get_if_redirect(ifnet_t);
134 static int redirect_clone_create(struct if_clone *, uint32_t, void *);
135 static int redirect_clone_destroy(ifnet_t);
136 static int redirect_ioctl(ifnet_t, u_long, void *);
137 static void redirect_if_free(ifnet_t);
138 static void redirect_free(if_redirect_t);
139 static errno_t redirect_demux(ifnet_t, mbuf_t, char *, protocol_family_t *);
140 static errno_t redirect_add_proto(ifnet_t, protocol_family_t,
141     const struct ifnet_demux_desc *, uint32_t);
142 static errno_t redirect_del_proto(ifnet_t, protocol_family_t);
143 static void redirect_clear_delegate_locked(if_redirect_t);
144 static void redirect_clear_delegate(if_redirect_t);
145 
146 static struct if_clone
147     redirect_cloner = IF_CLONE_INITIALIZER(RD_NAME,
148     redirect_clone_create,
149     redirect_clone_destroy,
150     0,
151     RD_MAXUNIT);
152 static void interface_link_event(ifnet_t ifp, uint32_t event_code);
153 
154 static LCK_GRP_DECLARE(redirect_lock_group, "redirect");
155 static LCK_ATTR_DECLARE(redirect_lock_attr, 0, 0);
156 
157 #define RD_LOCK_INIT(rd) \
158 	lck_mtx_init(&(rd)->rd_lock, &redirect_lock_group, &redirect_lock_attr)
159 #define RD_LOCK(rd) \
160 	lck_mtx_lock(&(rd)->rd_lock)
161 #define RD_UNLOCK(rd) \
162 	lck_mtx_unlock(&(rd)->rd_lock)
163 #define RD_LOCK_DESTROY(rd) \
164 	lck_mtx_destroy(&(rd)->rd_lock, &redirect_lock_group)
165 
166 static inline boolean_t
redirect_is_usable(if_redirect_t rd)167 redirect_is_usable(if_redirect_t rd)
168 {
169 	return !rd->rd_detaching && rd->rd_connected;
170 }
171 
172 static inline unsigned int
redirect_max_mtu(ifnet_t ifp)173 redirect_max_mtu(ifnet_t ifp)
174 {
175 	if_redirect_t rd;
176 	unsigned int max_mtu = ETHERMTU;
177 
178 	rd = ifnet_get_if_redirect(ifp);
179 	if (rd == NULL) {
180 		RDLOG_ERR("rd is NULL");
181 		goto done;
182 	}
183 	max_mtu = rd->rd_max_mtu;
184 done:
185 	return max_mtu;
186 }
187 
188 static void
redirect_free(if_redirect_t rd)189 redirect_free(if_redirect_t rd)
190 {
191 	VERIFY(rd->rd_retain_count == 0);
192 
193 	if (rd->rd_pp != NULL) {
194 		pp_release(rd->rd_pp);
195 		rd->rd_pp = NULL;
196 	}
197 	RD_LOCK_DESTROY(rd);
198 	RDLOG_DBG("%s", rd->rd_name);
199 	kfree_type(if_redirect, rd);
200 }
201 
202 static void
redirect_release(if_redirect_t rd)203 redirect_release(if_redirect_t rd)
204 {
205 	uint32_t old_retain_count;
206 
207 	old_retain_count = OSDecrementAtomic(&rd->rd_retain_count);
208 	switch (old_retain_count) {
209 	case 0:
210 		VERIFY(old_retain_count != 0);
211 		break;
212 	case 1:
213 		redirect_free(rd);
214 		break;
215 	default:
216 		break;
217 	}
218 	return;
219 }
220 
221 static void
redirect_retain(if_redirect_t rd)222 redirect_retain(if_redirect_t rd)
223 {
224 	OSIncrementAtomic(&rd->rd_retain_count);
225 }
226 
227 static void
redirect_bpf_tap(ifnet_t ifp,kern_packet_t pkt,bool input)228 redirect_bpf_tap(ifnet_t ifp, kern_packet_t pkt, bool input)
229 {
230 	uint32_t dlt;
231 
232 	switch (ifp->if_family) {
233 	case IFNET_FAMILY_ETHERNET:
234 		dlt = DLT_EN10MB;
235 		break;
236 	case IFNET_FAMILY_CELLULAR:
237 	case IFNET_FAMILY_UTUN:
238 	case IFNET_FAMILY_IPSEC:
239 		dlt = DLT_RAW;
240 		break;
241 	default:
242 		DTRACE_SKYWALK1(invalid__family, ifnet_t, ifp);
243 		return;
244 	}
245 
246 	if (input) {
247 		bpf_tap_packet_in(ifp, dlt, pkt, NULL, 0);
248 	} else {
249 		bpf_tap_packet_out(ifp, dlt, pkt, NULL, 0);
250 	}
251 }
252 
253 static void
redirect_packet_pool_init_prepare(if_redirect_t rd,struct kern_pbufpool_init * pp_init)254 redirect_packet_pool_init_prepare(if_redirect_t rd,
255     struct kern_pbufpool_init *pp_init)
256 {
257 	uint32_t max_mtu = rd->rd_max_mtu;
258 
259 	bzero(pp_init, sizeof(*pp_init));
260 	pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
261 	pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
262 	pp_init->kbi_packets = RD_POOL_SIZE;
263 	pp_init->kbi_bufsize = max_mtu;
264 	pp_init->kbi_max_frags = 1;
265 	pp_init->kbi_buflets =  (2 * pp_init->kbi_packets); /* Tx/Rx pool */
266 	pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
267 	pp_init->kbi_ctx = NULL;
268 	pp_init->kbi_ctx_retain = NULL;
269 	pp_init->kbi_ctx_release = NULL;
270 }
271 
272 static errno_t
redirect_packet_pool_make(if_redirect_t rd)273 redirect_packet_pool_make(if_redirect_t rd)
274 {
275 	struct kern_pbufpool_init pp_init;
276 	errno_t err;
277 
278 	redirect_packet_pool_init_prepare(rd, &pp_init);
279 	(void)snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
280 	    "%s pp", rd->rd_name);
281 
282 	err = kern_pbufpool_create(&pp_init, &rd->rd_pp, NULL);
283 	return err;
284 }
285 
286 static int
redirect_enqueue_pkt(struct nx_netif * nif,struct __kern_packet * pkt,boolean_t flush,boolean_t * drop)287 redirect_enqueue_pkt(struct nx_netif *nif, struct __kern_packet *pkt,
288     boolean_t flush, boolean_t *drop)
289 {
290 	ifnet_t ifp = nif->nif_ifp;
291 	uint64_t qset_id;
292 	int err;
293 
294 	if (NX_LLINK_PROV(nif->nif_nx) &&
295 	    ifp->if_inet_traffic_rule_count > 0 &&
296 	    nxctl_inet_traffic_rule_find_qset_id_with_pkt(ifp->if_xname,
297 	    pkt, &qset_id) == 0) {
298 		struct netif_qset * __single qset;
299 
300 		/*
301 		 * This always returns a qset because if the qset id is invalid the
302 		 * default qset is returned.
303 		 */
304 		qset = nx_netif_find_qset(nif, qset_id);
305 		ASSERT(qset != NULL);
306 		pkt->pkt_qset_idx = qset->nqs_idx;
307 		err = ifnet_enqueue_pkt(ifp, qset->nqs_ifcq, pkt, flush, drop);
308 		nx_netif_qset_release(&qset);
309 	} else {
310 		/* callee consumes packet */
311 		err = ifnet_enqueue_pkt(ifp, ifp->if_snd, pkt, flush, drop);
312 	}
313 	return err;
314 }
315 
316 static int
redirect_enqueue_mbuf(struct nx_netif * nif,struct mbuf * m,boolean_t flush,boolean_t * drop)317 redirect_enqueue_mbuf(struct nx_netif *nif, struct mbuf *m,
318     boolean_t flush, boolean_t *drop)
319 {
320 	return ifnet_enqueue_mbuf(nif->nif_ifp, m, flush, drop);
321 }
322 
323 static int
redirect_tx_submit(ifnet_t delegate_ifp,struct pktq * spktq)324 redirect_tx_submit(ifnet_t delegate_ifp, struct pktq *spktq)
325 {
326 	struct __kern_packet *spkt, *pkt;
327 	struct nx_netif *nif;
328 	struct netif_stats *nifs;
329 	struct nexus_netif_adapter *dev_nifna;
330 	struct mbuf *m;
331 	boolean_t drop, native, compat;
332 	errno_t err;
333 	int cnt = 0;
334 
335 	if (!ifnet_datamov_begin(delegate_ifp)) {
336 		RDLOG_ERR("delegate interface is being detached");
337 		DTRACE_SKYWALK1(delegate__detached, ifnet_t, delegate_ifp);
338 		return ENXIO;
339 	}
340 	if (NA(delegate_ifp) == NULL) {
341 		RDLOG_ERR("nexus adapter is not present");
342 		DTRACE_SKYWALK1(no__nexus, ifnet_t, delegate_ifp);
343 		err = ENXIO;
344 		goto done;
345 	}
346 	dev_nifna = NA(delegate_ifp);
347 	nif = dev_nifna->nifna_netif;
348 	nifs = &nif->nif_stats;
349 
350 	native = (dev_nifna->nifna_up.na_type == NA_NETIF_DEV);
351 	compat = (dev_nifna->nifna_up.na_type == NA_NETIF_COMPAT_DEV);
352 
353 	while (KPKTQ_LEN(spktq) > 0) {
354 		KPKTQ_DEQUEUE(spktq, spkt);
355 		ASSERT(spkt != NULL);
356 		drop = FALSE;
357 
358 		if (__probable(native)) {
359 			pkt = nx_netif_pkt_to_pkt(dev_nifna, spkt, NETIF_CONVERT_TX);
360 			if (pkt == NULL) {
361 				continue;
362 			}
363 
364 			pkt->pkt_pflags |= PKT_F_FLOW_ID;
365 			pkt->pkt_pflags &= ~PKT_F_FLOW_ADV;
366 
367 			netif_ifp_inc_traffic_class_out_pkt(delegate_ifp,
368 			    pkt->pkt_svc_class, 1, pkt->pkt_length);
369 
370 			err = redirect_enqueue_pkt(nif, pkt, FALSE, &drop);
371 		} else {
372 			ASSERT(compat);
373 			m = nx_netif_pkt_to_mbuf(dev_nifna, spkt, NETIF_CONVERT_TX);
374 			if (m == NULL) {
375 				continue;
376 			}
377 
378 			m->m_pkthdr.pkt_flags = PKTF_FLOW_ID;
379 			m->m_pkthdr.pkt_flags &= ~PKTF_FLOW_ADV;
380 
381 			ifp_inc_traffic_class_out(delegate_ifp, m);
382 
383 			err = redirect_enqueue_mbuf(nif, m, FALSE, &drop);
384 		}
385 		if (__probable(err == 0)) {
386 			cnt++;
387 		} else {
388 			RDLOG_ERR("enqueue failed: %d", err);
389 			if (drop) {
390 				STATS_INC(nifs, NETIF_STATS_TX_DROP_ENQ_AQM);
391 				STATS_INC(nifs, NETIF_STATS_DROP);
392 			}
393 			DTRACE_SKYWALK3(enqueue__failed,
394 			    ifnet_t, delegate_ifp, boolean_t, drop, int, err);
395 			break;
396 		}
397 	}
398 done:
399 	if (cnt > 0) {
400 		netif_transmit(delegate_ifp, NETIF_XMIT_FLAG_REDIRECT);
401 	}
402 	ifnet_datamov_end(delegate_ifp);
403 	return err;
404 }
405 
406 /*
407  *  nexus netif domain provider
408  */
409 static errno_t
redirect_nxdp_init(kern_nexus_domain_provider_t domprov)410 redirect_nxdp_init(kern_nexus_domain_provider_t domprov)
411 {
412 #pragma unused(domprov)
413 	return 0;
414 }
415 
416 static void
redirect_nxdp_fini(kern_nexus_domain_provider_t domprov)417 redirect_nxdp_fini(kern_nexus_domain_provider_t domprov)
418 {
419 #pragma unused(domprov)
420 }
421 
422 static uuid_t redirect_nx_dom_prov;
423 
424 static errno_t
redirect_register_nexus_domain_provider(void)425 redirect_register_nexus_domain_provider(void)
426 {
427 	const struct kern_nexus_domain_provider_init dp_init = {
428 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
429 		.nxdpi_flags = 0,
430 		.nxdpi_init = redirect_nxdp_init,
431 		.nxdpi_fini = redirect_nxdp_fini
432 	};
433 	nexus_domain_provider_name_t domain_provider_name = "com.apple.redirect";
434 	errno_t err = 0;
435 
436 	/* redirect_nxdp_init() is called before this function returns */
437 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
438 	    domain_provider_name,
439 	    &dp_init, sizeof(dp_init),
440 	    &redirect_nx_dom_prov);
441 	if (err != 0) {
442 		RDLOG_ERR("failed to register domain provider");
443 		return err;
444 	}
445 	return 0;
446 }
447 
448 /*
449  * netif nexus routines
450  */
451 static if_redirect_t
redirect_nexus_context(kern_nexus_t nexus)452 redirect_nexus_context(kern_nexus_t nexus)
453 {
454 	if_redirect_t rd;
455 
456 	rd = (if_redirect_t)kern_nexus_get_context(nexus);
457 	assert(rd != NULL);
458 	return rd;
459 }
460 
461 static errno_t
redirect_nx_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)462 redirect_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
463     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
464     void **ring_ctx)
465 {
466 #pragma unused(nxprov, channel, ring_ctx)
467 	if_redirect_t rd;
468 
469 	rd = redirect_nexus_context(nexus);
470 	RD_LOCK(rd);
471 	if (rd->rd_detaching) {
472 		DTRACE_SKYWALK1(detaching, if_redirect_t, rd);
473 		RD_UNLOCK(rd);
474 		return ENXIO;
475 	}
476 	if (is_tx_ring) {
477 		static_assert(RD_MAX_TX_RINGS == 1);
478 		VERIFY(rd->rd_tx_ring[0] == NULL);
479 		rd->rd_tx_ring[0] = ring;
480 	} else {
481 		static_assert(RD_MAX_RX_RINGS == 1);
482 		VERIFY(rd->rd_rx_ring[0] == NULL);
483 		rd->rd_rx_ring[0] = ring;
484 	}
485 
486 	rd->rd_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
487 	RD_UNLOCK(rd);
488 	RDLOG_INFO("%s: %s ring init", rd->rd_name,
489 	    is_tx_ring ? "TX" : "RX");
490 	return 0;
491 }
492 
493 static void
redirect_nx_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)494 redirect_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
495     kern_channel_ring_t ring)
496 {
497 #pragma unused(nxprov, ring)
498 	if_redirect_t rd;
499 	thread_call_t __single tcall = NULL;
500 
501 	rd = redirect_nexus_context(nexus);
502 	RD_LOCK(rd);
503 	if (rd->rd_rx_ring[0] == ring) {
504 		RDLOG_INFO("%s: RX ring fini", rd->rd_name);
505 		rd->rd_rx_ring[0] = NULL;
506 	} else if (rd->rd_tx_ring[0] == ring) {
507 		RDLOG_INFO("%s: TX ring fini", rd->rd_name);
508 		tcall = rd->rd_doorbell_tcall;
509 		rd->rd_doorbell_tcall = NULL;
510 		rd->rd_tx_ring[0] = NULL;
511 	}
512 	rd->rd_nifs = NULL;
513 	RD_UNLOCK(rd);
514 
515 	if (tcall != NULL) {
516 		boolean_t success;
517 
518 		success = thread_call_cancel_wait(tcall);
519 		RDLOG_INFO("%s: thread_call_cancel %s",
520 		    rd->rd_name, success ? "SUCCESS" : "FAILURE");
521 		if (!success) {
522 			RD_LOCK(rd);
523 			if (rd->rd_doorbell_tcall_active) {
524 				rd->rd_waiting_for_tcall = TRUE;
525 				RDLOG_INFO("%s: *waiting for threadcall",
526 				    rd->rd_name);
527 				do {
528 					msleep(rd, &rd->rd_lock,
529 					    PZERO, "redirect threadcall", 0);
530 				} while (rd->rd_doorbell_tcall_active);
531 				RDLOG_INFO("%s: threadcall done",
532 				    rd->rd_name);
533 				rd->rd_waiting_for_tcall = FALSE;
534 			}
535 			RD_UNLOCK(rd);
536 		}
537 		success = thread_call_free(tcall);
538 		RDLOG_INFO("%s: thread_call_free %s",
539 		    rd->rd_name, success ? "SUCCESS" : "FAILURE");
540 		redirect_release(rd);
541 		VERIFY(success == TRUE);
542 	}
543 }
544 
545 static errno_t
redirect_nx_pre_connect(kern_nexus_provider_t nxprov,proc_t proc,kern_nexus_t nexus,nexus_port_t port,kern_channel_t channel,void ** channel_context)546 redirect_nx_pre_connect(kern_nexus_provider_t nxprov,
547     proc_t proc, kern_nexus_t nexus, nexus_port_t port,
548     kern_channel_t channel, void **channel_context)
549 {
550 #pragma unused(nxprov, proc, nexus, port, channel, channel_context)
551 	return 0;
552 }
553 
554 static errno_t
redirect_nx_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)555 redirect_nx_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
556     kern_channel_t channel)
557 {
558 #pragma unused(nxprov, channel)
559 	if_redirect_t rd = NULL;
560 
561 	rd = redirect_nexus_context(nexus);
562 	RD_LOCK(rd);
563 	if (rd->rd_detaching) {
564 		DTRACE_SKYWALK1(detaching, if_redirect_t, rd);
565 		RD_UNLOCK(rd);
566 		return EBUSY;
567 	}
568 	redirect_retain(rd);
569 	rd->rd_connected = TRUE;
570 	RD_UNLOCK(rd);
571 
572 	RDLOG_DBG("%s: connected channel %p", rd->rd_name, channel);
573 	return 0;
574 }
575 
576 static void
redirect_nx_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)577 redirect_nx_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
578     kern_channel_t channel)
579 {
580 #pragma unused(nxprov, channel)
581 	if_redirect_t rd;
582 
583 	rd = redirect_nexus_context(nexus);
584 	RDLOG_INFO("%s: pre-disconnect channel %p", rd->rd_name, channel);
585 	/* Quiesce the interface and flush any pending outbound packets */
586 	if_down(rd->rd_ifp);
587 	RD_LOCK(rd);
588 	rd->rd_connected = FALSE;
589 	RD_UNLOCK(rd);
590 }
591 
592 static void
redirect_nx_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)593 redirect_nx_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
594     kern_channel_t channel)
595 {
596 #pragma unused(nxprov, channel)
597 	if_redirect_t rd;
598 
599 	rd = redirect_nexus_context(nexus);
600 	RDLOG_INFO("%s: disconnected channel %p", rd->rd_name, channel);
601 	redirect_release(rd);
602 }
603 
604 static errno_t
redirect_nx_slot_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index,struct kern_slot_prop ** slot_prop_addr,void ** slot_context)605 redirect_nx_slot_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
606     kern_channel_ring_t ring, kern_channel_slot_t slot, uint32_t slot_index,
607     struct kern_slot_prop **slot_prop_addr, void **slot_context)
608 {
609 #pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
610 	return 0;
611 }
612 
613 static void
redirect_nx_slot_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,kern_channel_slot_t slot,uint32_t slot_index)614 redirect_nx_slot_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
615     kern_channel_ring_t ring, kern_channel_slot_t slot, uint32_t slot_index)
616 {
617 #pragma unused(nxprov, nexus, ring, slot, slot_index)
618 }
619 
620 static errno_t
redirect_nx_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)621 redirect_nx_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
622     kern_channel_ring_t tx_ring, uint32_t flags)
623 {
624 #pragma unused(nxprov)
625 	if_redirect_t rd;
626 	ifnet_t ifp;
627 	kern_channel_slot_t last_tx_slot = NULL;
628 	ifnet_t delegate_ifp;
629 	struct kern_channel_ring_stat_increment stats;
630 	kern_channel_slot_t tx_slot = NULL;
631 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
632 	struct pktq tx_pktq;
633 	uint32_t n_pkts = 0;
634 	int error = 0;
635 
636 	bzero(&stats, sizeof(stats));
637 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
638 	rd = redirect_nexus_context(nexus);
639 	RDLOG_INFO("%s ring %d flags 0x%x", rd->rd_name, tx_ring->ckr_ring_id, flags);
640 
641 	if (__improbable(!redirect_is_usable(rd))) {
642 		RDLOG_INFO("%s is not usable", rd->rd_name);
643 		DTRACE_SKYWALK1(unusable, if_redirect_t, rd);
644 		return ENOENT;
645 	}
646 	ifp = rd->rd_ifp;
647 	delegate_ifp = rd->rd_delegate_ifp;
648 
649 	KPKTQ_INIT(&tx_pktq);
650 	while ((tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL)) != NULL) {
651 		kern_packet_t sph;
652 
653 		/* detach the packet from the TX ring */
654 		sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
655 		VERIFY(sph != 0);
656 		kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
657 
658 		/* bpf tap output */
659 		redirect_bpf_tap(ifp, sph, false);
660 
661 		ASSERT(sph != 0);
662 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
663 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
664 
665 		stats.kcrsi_slots_transferred++;
666 		stats.kcrsi_bytes_transferred += kern_packet_get_data_length(sph);
667 
668 		KPKTQ_ENQUEUE(&tx_pktq, SK_PTR_ADDR_KPKT(sph));
669 		n_pkts++;
670 
671 		last_tx_slot = tx_slot;
672 	}
673 	if (last_tx_slot != NULL) {
674 		kern_channel_advance_slot(tx_ring, last_tx_slot);
675 		kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
676 	}
677 	if (__improbable(delegate_ifp == NULL)) {
678 		RDLOG_INFO("%s has no delegate", rd->rd_name);
679 		DTRACE_SKYWALK1(no__delegate, if_redirect_t, rd);
680 		error = ENXIO;
681 		goto done;
682 	}
683 	if (n_pkts > 0) {
684 		redirect_tx_submit(delegate_ifp, &tx_pktq);
685 	}
686 done:
687 	/*
688 	 * Packets not enqueued into delegate interface AQM
689 	 */
690 	if (KPKTQ_LEN(&tx_pktq) > 0) {
691 		DTRACE_SKYWALK2(unsent, if_redirect_t, rd, struct pktq *, &tx_pktq);
692 		STATS_ADD(nifs, NETIF_STATS_DROP_NO_DELEGATE, KPKTQ_LEN(&tx_pktq));
693 		pp_free_pktq(&tx_pktq);
694 	}
695 	return error;
696 }
697 
698 static boolean_t
pkt_is_for_delegate(if_redirect_t rd,struct __kern_packet * pkt)699 pkt_is_for_delegate(if_redirect_t rd, struct __kern_packet *pkt)
700 {
701 #if !(DEVELOPMENT || DEBUG)
702 #pragma unused(rd)
703 #endif
704 	uint8_t proto;
705 	uint8_t *hdr;
706 	uint32_t l4len;
707 
708 	if ((pkt->pkt_qum_qflags & QUM_F_FLOW_CLASSIFIED) == 0) {
709 		DTRACE_SKYWALK2(not__classified, if_redirect_t, rd,
710 		    struct __kern_packet *, pkt);
711 		return FALSE;
712 	}
713 	if (pkt->pkt_flow_ip_hdr == 0 || pkt->pkt_flow_ip_hlen == 0) {
714 		RDLOG_ERR("%s: classifier info missing", rd->rd_name);
715 		DTRACE_SKYWALK2(classifier__info__missing, if_redirect_t, rd,
716 		    struct __kern_packet *, pkt);
717 		return FALSE;
718 	}
719 	proto = pkt->pkt_flow_ip_proto;
720 	l4len = pkt->pkt_length - pkt->pkt_l2_len - pkt->pkt_flow_ip_hlen;
721 	hdr = __unsafe_forge_bidi_indexable(uint8_t *, pkt->pkt_flow_ip_hdr + pkt->pkt_flow_ip_hlen,
722 	    l4len);
723 	if (proto == IPPROTO_ICMPV6) {
724 		struct icmp6_hdr *icmp6;
725 
726 		if (l4len < sizeof(*icmp6)) {
727 			RDLOG_ERR("%s: l4len(%u) < icmp6len(%lu)", rd->rd_name,
728 			    l4len, sizeof(*icmp6));
729 			DTRACE_SKYWALK3(too__small__v6, if_redirect_t, rd,
730 			    struct __kern_packet *, pkt, uint32_t, l4len);
731 			return FALSE;
732 		}
733 
734 		icmp6 = (struct icmp6_hdr *)(void *)hdr;
735 		if (icmp6->icmp6_type == ND_ROUTER_ADVERT) {
736 			DTRACE_SKYWALK3(icmp6__ra, if_redirect_t, rd,
737 			    struct __kern_packet *, pkt, struct icmp6 *, icmp6);
738 			return TRUE;
739 		}
740 	}
741 	return FALSE;
742 }
743 
744 static void
redirect_rx_cb(void * arg,struct pktq * spktq)745 redirect_rx_cb(void *arg, struct pktq *spktq)
746 {
747 	if_redirect_t __single rd = arg;
748 	struct __kern_packet *spkt, *pkt;
749 	struct pktq rpktq;
750 	kern_packet_t ph;
751 	kern_channel_ring_t rx_ring = NULL;
752 	kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
753 	struct kern_channel_ring_stat_increment stats;
754 	int err;
755 
756 	/*
757 	 * The ring cannot disappear before the callback is finished and removed.
758 	 */
759 	rx_ring = rd->rd_rx_ring[0];
760 	if (rx_ring == NULL) {
761 		DTRACE_SKYWALK2(no__ring__drop, if_redirect_t, rd, struct pktq *, spktq);
762 		pp_free_pktq(spktq);
763 		return;
764 	}
765 	KPKTQ_INIT(&rpktq);
766 	bzero(&stats, sizeof(stats));
767 	kr_enter(rx_ring, TRUE);
768 	kern_channel_reclaim(rx_ring);
769 
770 	while (KPKTQ_LEN(spktq) > 0) {
771 		KPKTQ_DEQUEUE(spktq, spkt);
772 		if (pkt_is_for_delegate(rd, spkt)) {
773 			KPKTQ_ENQUEUE(&rpktq, spkt);
774 			continue;
775 		}
776 		rx_slot = kern_channel_get_next_slot(rx_ring, last_rx_slot, NULL);
777 		if (rx_slot == NULL) {
778 			DTRACE_SKYWALK2(no__slot__drop, if_redirect_t, rd,
779 			    struct __kern_packet *, spkt);
780 			pp_free_packet_single(spkt);
781 			continue;
782 		}
783 		pkt = nx_netif_pkt_to_pkt(rd->rd_ifp->if_na, spkt, NETIF_CONVERT_RX);
784 		if (pkt == NULL) {
785 			DTRACE_SKYWALK1(copy__drop, if_redirect_t, rd);
786 			continue;
787 		}
788 		ph = SK_PKT2PH(pkt);
789 		stats.kcrsi_slots_transferred++;
790 		stats.kcrsi_bytes_transferred += kern_packet_get_data_length(ph);
791 
792 		redirect_bpf_tap(rd->rd_ifp, ph, true);
793 
794 		err = kern_channel_slot_attach_packet(rx_ring, rx_slot, ph);
795 		VERIFY(err == 0);
796 		last_rx_slot = rx_slot;
797 	}
798 	ASSERT(KPKTQ_EMPTY(spktq));
799 	KPKTQ_CONCAT(spktq, &rpktq);
800 	if (last_rx_slot != NULL) {
801 		kern_channel_advance_slot(rx_ring, last_rx_slot);
802 		kern_channel_increment_ring_net_stats(rx_ring, rd->rd_ifp, &stats);
803 	}
804 	kr_exit(rx_ring);
805 	if (last_rx_slot != NULL) {
806 		kern_channel_notify(rx_ring, 0);
807 	}
808 }
809 
810 static errno_t
redirect_nx_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)811 redirect_nx_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
812     kern_channel_ring_t ring, uint32_t flags)
813 {
814 #pragma unused(nxprov, nexus, ring, flags)
815 	return 0;
816 }
817 
818 static void
redirect_async_doorbell(thread_call_param_t arg0,thread_call_param_t arg1)819 redirect_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
820 {
821 #pragma unused(arg1)
822 	errno_t error;
823 	if_redirect_t rd = (if_redirect_t)arg0;
824 	kern_channel_ring_t ring;
825 	boolean_t more;
826 
827 	RD_LOCK(rd);
828 	ring = rd->rd_tx_ring[0];
829 	if (__improbable(!redirect_is_usable(rd) || ring == NULL)) {
830 		DTRACE_SKYWALK2(unusable, if_redirect_t, rd, kern_channel_ring_t, ring);
831 		goto done;
832 	}
833 	rd->rd_doorbell_tcall_active = TRUE;
834 	RD_UNLOCK(rd);
835 
836 	error = kern_channel_tx_refill(ring, UINT32_MAX, UINT32_MAX, FALSE,
837 	    &more);
838 	if (error != 0 && error != EAGAIN) {
839 		RDLOG_ERR("%s: Tx refill failed %d", rd->rd_name, error);
840 	} else {
841 		RDLOG_DBG("%s: Tx refilled", rd->rd_name);
842 	}
843 
844 	RD_LOCK(rd);
845 done:
846 	rd->rd_doorbell_tcall_active = FALSE;
847 	if (rd->rd_waiting_for_tcall) {
848 		RDLOG_INFO("%s: threadcall waking up waiter", rd->rd_name);
849 		wakeup((caddr_t)rd);
850 	}
851 	RD_UNLOCK(rd);
852 }
853 
854 static void
redirect_schedule_async_doorbell(if_redirect_t rd)855 redirect_schedule_async_doorbell(if_redirect_t rd)
856 {
857 	thread_call_t __single tcall;
858 
859 	RD_LOCK(rd);
860 	if (__improbable(!redirect_is_usable(rd))) {
861 		DTRACE_SKYWALK1(unusable, if_redirect_t, rd);
862 		RD_UNLOCK(rd);
863 		return;
864 	}
865 	tcall = rd->rd_doorbell_tcall;
866 	if (tcall != NULL) {
867 		thread_call_enter(tcall);
868 	} else {
869 		tcall = thread_call_allocate_with_options(redirect_async_doorbell,
870 		    (thread_call_param_t)rd,
871 		    THREAD_CALL_PRIORITY_KERNEL,
872 		    THREAD_CALL_OPTIONS_ONCE);
873 		if (tcall == NULL) {
874 			RDLOG_ERR("%s: tcall alloc failed", rd->rd_name);
875 		} else {
876 			rd->rd_doorbell_tcall = tcall;
877 			redirect_retain(rd);
878 			thread_call_enter(tcall);
879 		}
880 	}
881 	RD_UNLOCK(rd);
882 }
883 
884 static errno_t
redirect_nx_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags)885 redirect_nx_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
886     kern_channel_ring_t ring, uint32_t flags)
887 {
888 #pragma unused(nxprov, ring, flags)
889 	errno_t error;
890 	if_redirect_t rd;
891 
892 	rd = redirect_nexus_context(nexus);
893 	RDLOG_DBG("%s", rd->rd_name);
894 
895 	if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
896 		boolean_t more;
897 		/* synchronous tx refill */
898 		error = kern_channel_tx_refill(ring, UINT32_MAX, UINT32_MAX,
899 		    TRUE, &more);
900 		if (error != 0 && error != EAGAIN) {
901 			RDLOG_ERR("%s: Tx refill (sync) %d", rd->rd_name, error);
902 		} else {
903 			RDLOG_DBG("%s: Tx refilled (sync)", rd->rd_name);
904 		}
905 	} else {
906 		RDLOG_DBG("%s: schedule async refill", rd->rd_name);
907 		redirect_schedule_async_doorbell(rd);
908 	}
909 	return 0;
910 }
911 
912 static errno_t
redirect_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)913 redirect_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
914 {
915 	if_redirect_t rd;
916 
917 	rd = (if_redirect_t)kern_nexus_get_context(nexus);
918 
919 	(void)ifnet_set_capabilities_enabled(ifp, 0, -1);
920 	ifnet_set_baudrate(ifp, 0);
921 	ifnet_set_mtu(ifp, ETHERMTU);
922 	ifnet_set_offload(ifp, 0);
923 
924 	if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET) {
925 		ifnet_set_flags(ifp,
926 		    IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX, 0xffff);
927 		ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
928 		ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
929 	} else {
930 		ifnet_set_flags(ifp, IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
931 	}
932 	return 0;
933 }
934 
935 static void
redirect_delegate_adv_config(ifnet_t delegate_ifp,bool enable)936 redirect_delegate_adv_config(ifnet_t delegate_ifp, bool enable)
937 {
938 	struct nx_netif *delegate_nif;
939 
940 	ASSERT(delegate_ifp != NULL);
941 	if (!SKYWALK_NATIVE(delegate_ifp)) {
942 		RDLOG_ERR("%s is not skywalk native", if_name(delegate_ifp));
943 		DTRACE_SKYWALK1(not__native, ifnet_t, delegate_ifp);
944 		return;
945 	}
946 	delegate_nif = NA(delegate_ifp)->nifna_netif;
947 	nx_netif_config_interface_advisory(delegate_nif->nif_nx, enable);
948 }
949 
950 static errno_t
redirect_nx_intf_adv_config(void * prov_ctx,bool enable)951 redirect_nx_intf_adv_config(void *prov_ctx, bool enable)
952 {
953 	if_redirect_t rd = (if_redirect_t)prov_ctx;
954 
955 	RD_LOCK(rd);
956 	if (!redirect_is_usable(rd)) {
957 		RDLOG_ERR("cannot %s advisory on %s because it is not usable",
958 		    enable ? "enable" : "disable", if_name(rd->rd_ifp));
959 		DTRACE_SKYWALK1(unusable, if_redirect_t, rd);
960 		RD_UNLOCK(rd);
961 		return ENXIO;
962 	}
963 	if (rd->rd_intf_adv_enabled == enable) {
964 		RDLOG_ERR("advisory is already %s on %s",
965 		    enable ? "enable" : "disable", if_name(rd->rd_ifp));
966 		DTRACE_SKYWALK1(advisory__already__set, if_redirect_t, rd);
967 		RD_UNLOCK(rd);
968 		return ENXIO;
969 	}
970 	if (!rd->rd_delegate_set) {
971 		RDLOG_ERR("delegate is not set on %s", if_name(rd->rd_ifp));
972 		DTRACE_SKYWALK1(no__delegate, if_redirect_t, rd);
973 		RD_UNLOCK(rd);
974 		return ENXIO;
975 	}
976 	redirect_delegate_adv_config(rd->rd_delegate_ifp, enable);
977 	rd->rd_intf_adv_enabled = enable;
978 	RD_UNLOCK(rd);
979 	return 0;
980 }
981 
982 static errno_t
fill_capab_interface_advisory(if_redirect_t rd,void * contents,uint32_t * len)983 fill_capab_interface_advisory(if_redirect_t rd, void *contents,
984     uint32_t *len)
985 {
986 	struct kern_nexus_capab_interface_advisory * __single capab = contents;
987 
988 	if (*len != sizeof(*capab)) {
989 		DTRACE_SKYWALK2(invalid__len, uint32_t, *len, size_t, sizeof(*capab));
990 		return EINVAL;
991 	}
992 	if (capab->kncia_version !=
993 	    KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
994 		DTRACE_SKYWALK2(invalid__ver, uint32_t, capab->kncia_version,
995 		    uint32_t, KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1);
996 		return EINVAL;
997 	}
998 	VERIFY(capab->kncia_notify != NULL);
999 	rd->rd_intf_adv_kern_ctx = capab->kncia_kern_context;
1000 	rd->rd_intf_adv_notify = capab->kncia_notify;
1001 	capab->kncia_provider_context = rd;
1002 	capab->kncia_config = redirect_nx_intf_adv_config;
1003 	return 0;
1004 }
1005 
1006 static errno_t
redirect_nx_capab_config(kern_nexus_provider_t nxprov,kern_nexus_t nx,kern_nexus_capab_t capab,void * contents,uint32_t * len)1007 redirect_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
1008     kern_nexus_capab_t capab, void *contents, uint32_t *len)
1009 {
1010 #pragma unused(nxprov)
1011 	errno_t error;
1012 	if_redirect_t rd;
1013 
1014 	rd = redirect_nexus_context(nx);
1015 
1016 	switch (capab) {
1017 	case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
1018 		error = fill_capab_interface_advisory(rd, contents, len);
1019 		break;
1020 	default:
1021 		error = ENOTSUP;
1022 		break;
1023 	}
1024 	return error;
1025 }
1026 
1027 static errno_t
create_netif_provider_and_instance(if_redirect_t rd,struct ifnet_init_eparams * init_params,ifnet_t * ifp,uuid_t * provider,uuid_t * instance)1028 create_netif_provider_and_instance(if_redirect_t rd,
1029     struct ifnet_init_eparams *init_params, ifnet_t *ifp,
1030     uuid_t *provider, uuid_t *instance)
1031 {
1032 	errno_t err = 0;
1033 	nexus_controller_t controller = kern_nexus_shared_controller();
1034 	struct kern_nexus_net_init net_init = {};
1035 	nexus_name_t provider_name = {};
1036 	nexus_attr_t __single nexus_attr = NULL;
1037 
1038 	struct kern_nexus_provider_init prov_init = {
1039 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1040 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1041 		.nxpi_pre_connect = redirect_nx_pre_connect,
1042 		.nxpi_connected = redirect_nx_connected,
1043 		.nxpi_pre_disconnect = redirect_nx_pre_disconnect,
1044 		.nxpi_disconnected = redirect_nx_disconnected,
1045 		.nxpi_ring_init = redirect_nx_ring_init,
1046 		.nxpi_ring_fini = redirect_nx_ring_fini,
1047 		.nxpi_slot_init = redirect_nx_slot_init,
1048 		.nxpi_slot_fini = redirect_nx_slot_fini,
1049 		.nxpi_sync_tx = redirect_nx_sync_tx,
1050 		.nxpi_sync_rx = redirect_nx_sync_rx,
1051 		.nxpi_tx_doorbell = redirect_nx_tx_doorbell,
1052 		.nxpi_config_capab = redirect_nx_capab_config,
1053 	};
1054 
1055 	err = kern_nexus_attr_create(&nexus_attr);
1056 	if (err != 0) {
1057 		RDLOG_ERR("%s nexus attribution creation failed, error: %d",
1058 		    rd->rd_name, err);
1059 		DTRACE_SKYWALK2(attr__create__failed, if_redirect_t, rd, int, err);
1060 		goto failed;
1061 	}
1062 
1063 	snprintf((char *)provider_name, sizeof(provider_name),
1064 	    "com.apple.netif.%s", rd->rd_name);
1065 	err = kern_nexus_controller_register_provider(controller,
1066 	    redirect_nx_dom_prov,
1067 	    provider_name,
1068 	    &prov_init,
1069 	    sizeof(prov_init),
1070 	    nexus_attr,
1071 	    provider);
1072 	if (err != 0) {
1073 		RDLOG_ERR("%s register provider failed, error %d", rd->rd_name, err);
1074 		DTRACE_SKYWALK2(register__failed, if_redirect_t, rd, int, err);
1075 		goto failed;
1076 	}
1077 
1078 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1079 	net_init.nxneti_flags = 0;
1080 	net_init.nxneti_eparams = init_params;
1081 	net_init.nxneti_lladdr = NULL;
1082 	net_init.nxneti_prepare = redirect_netif_prepare;
1083 	net_init.nxneti_rx_pbufpool = rd->rd_pp;
1084 	net_init.nxneti_tx_pbufpool = rd->rd_pp;
1085 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
1086 	    *provider, rd, NULL, instance, &net_init, ifp);
1087 	if (err != 0) {
1088 		RDLOG_ERR("%s alloc net provider instance failed %d", rd->rd_name, err);
1089 		DTRACE_SKYWALK2(alloc__provider__instance__failed, if_redirect_t, rd, int, err);
1090 		kern_nexus_controller_deregister_provider(controller, *provider);
1091 		uuid_clear(*provider);
1092 		goto failed;
1093 	}
1094 failed:
1095 	if (nexus_attr != NULL) {
1096 		kern_nexus_attr_destroy(nexus_attr);
1097 	}
1098 	return err;
1099 }
1100 
1101 static errno_t
redirect_attach_netif_nexus(if_redirect_t rd,struct ifnet_init_eparams * init_params,ifnet_t * ifp)1102 redirect_attach_netif_nexus(if_redirect_t rd,
1103     struct ifnet_init_eparams *init_params, ifnet_t *ifp)
1104 {
1105 	errno_t error = 0;
1106 	redirect_nx_t nx = &rd->rd_nx;
1107 
1108 	error = redirect_packet_pool_make(rd);
1109 	if (error != 0) {
1110 		RDLOG_ERR("%s packet pool make failed: %d", rd->rd_name, error);
1111 		DTRACE_SKYWALK2(pool__make__failed, if_redirect_t, rd, int, error);
1112 		return error;
1113 	}
1114 
1115 	return create_netif_provider_and_instance(rd, init_params, ifp,
1116 	           &nx->rnx_provider, &nx->rnx_instance);
1117 }
1118 
1119 static void
detach_provider_and_instance(uuid_t provider,uuid_t instance)1120 detach_provider_and_instance(uuid_t provider, uuid_t instance)
1121 {
1122 	nexus_controller_t controller = kern_nexus_shared_controller();
1123 	errno_t err;
1124 
1125 	if (!uuid_is_null(instance)) {
1126 		err = kern_nexus_controller_free_provider_instance(controller,
1127 		    instance);
1128 		if (err != 0) {
1129 			RDLOG_ERR("free_provider_instance failed %d", err);
1130 		}
1131 		uuid_clear(instance);
1132 	}
1133 	if (!uuid_is_null(provider)) {
1134 		err = kern_nexus_controller_deregister_provider(controller,
1135 		    provider);
1136 		if (err != 0) {
1137 			RDLOG_ERR("deregister_provider failed %d", err);
1138 		}
1139 		uuid_clear(provider);
1140 	}
1141 	return;
1142 }
1143 
1144 static void
redirect_detach_netif_nexus(if_redirect_t rd)1145 redirect_detach_netif_nexus(if_redirect_t rd)
1146 {
1147 	redirect_nx_t rnx = &rd->rd_nx;
1148 	detach_provider_and_instance(rnx->rnx_provider, rnx->rnx_instance);
1149 }
1150 
1151 static void
interface_link_event(ifnet_t ifp,uint32_t event_code)1152 interface_link_event(ifnet_t ifp, uint32_t event_code)
1153 {
1154 	struct event {
1155 		uint32_t ifnet_family;
1156 		uint32_t unit;
1157 		char if_name[IFNAMSIZ];
1158 	};
1159 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
1160 	struct kern_event_msg *header = (struct kern_event_msg *)message;
1161 	struct event *data = (struct event *)(message + KEV_MSG_HEADER_SIZE);
1162 
1163 	header->total_size = sizeof(message);
1164 	header->vendor_code = KEV_VENDOR_APPLE;
1165 	header->kev_class = KEV_NETWORK_CLASS;
1166 	header->kev_subclass = KEV_DL_SUBCLASS;
1167 	header->event_code = event_code;
1168 	data->ifnet_family = ifnet_family(ifp);
1169 	data->unit = (uint32_t)ifnet_unit(ifp);
1170 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
1171 	ifnet_event(ifp, header);
1172 }
1173 
1174 static if_redirect_t
ifnet_get_if_redirect(ifnet_t ifp)1175 ifnet_get_if_redirect(ifnet_t ifp)
1176 {
1177 	return (if_redirect_t)ifnet_softc(ifp);
1178 }
1179 
1180 static int
redirect_clone_create(struct if_clone * ifc,uint32_t unit,void * param)1181 redirect_clone_create(struct if_clone *ifc, uint32_t unit, void *param)
1182 {
1183 	int error;
1184 	if_redirect_t rd;
1185 	struct ifnet_init_eparams rd_init;
1186 	struct if_redirect_create_params params;
1187 	user_addr_t param_addr = (user_addr_t)param;
1188 	ifnet_t __single ifp;
1189 
1190 	if (param_addr == USER_ADDR_NULL) {
1191 		RDLOG_ERR("create params not specified");
1192 		DTRACE_SKYWALK2(no__param, struct if_clone *, ifc, uint32_t, unit);
1193 		return EINVAL;
1194 	}
1195 	error = copyin(param_addr, &params, sizeof(params));
1196 	if (error != 0) {
1197 		RDLOG_ERR("copyin failed: error %d", error);
1198 		DTRACE_SKYWALK1(copyin__failed, int, error);
1199 		return error;
1200 	}
1201 	if ((params.ircp_type != RD_CREATE_PARAMS_TYPE &&
1202 	    params.ircp_type != RD_CREATE_PARAMS_TYPE_NOATTACH) ||
1203 	    params.ircp_len != sizeof(params)) {
1204 		RDLOG_ERR("invalid type(0x%x) or len(0x%d)", params.ircp_type,
1205 		    params.ircp_len);
1206 		DTRACE_SKYWALK2(invalid__params, uint16_t, params.ircp_type,
1207 		    uint16_t, params.ircp_len);
1208 		return EINVAL;
1209 	}
1210 	if (params.ircp_ftype != IFRTYPE_FAMILY_ETHERNET &&
1211 	    params.ircp_ftype != IFRTYPE_FAMILY_CELLULAR) {
1212 		RDLOG_ERR("functional type(0x%x) not supported", params.ircp_ftype);
1213 		DTRACE_SKYWALK1(invalid__ftype, uint32_t, params.ircp_ftype);
1214 		return ENOTSUP;
1215 	}
1216 
1217 	rd = kalloc_type(if_redirect, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1218 	RD_LOCK_INIT(rd);
1219 	rd->rd_ftype = params.ircp_ftype;
1220 	rd->rd_retain_count = 1;
1221 	rd->rd_max_mtu = RD_MAX_MTU;
1222 
1223 	/* use the interface name as the unique id for ifp recycle */
1224 	if ((unsigned int)
1225 	    snprintf(rd->rd_name, sizeof(rd->rd_name), "%s%d",
1226 	    ifc->ifc_name, unit) >= sizeof(rd->rd_name)) {
1227 		redirect_release(rd);
1228 		RDLOG_ERR("invalid ifc_name(%s) or unit(%d)", ifc->ifc_name, unit);
1229 		DTRACE_SKYWALK2(invalid__name__or__unit, char *, ifc->ifc_name,
1230 		    uint32_t, unit);
1231 		return EINVAL;
1232 	}
1233 
1234 	bzero(&rd_init, sizeof(rd_init));
1235 	rd_init.ver = IFNET_INIT_CURRENT_VERSION;
1236 	rd_init.len = sizeof(rd_init);
1237 	rd_init.flags |= (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_IF_ADV);
1238 	if (params.ircp_type == RD_CREATE_PARAMS_TYPE_NOATTACH) {
1239 		rd_init.flags |= IFNET_INIT_NX_NOAUTO;
1240 	}
1241 	rd_init.uniqueid_len = (uint32_t)strbuflen(rd->rd_name);
1242 	rd_init.uniqueid = rd->rd_name;
1243 	rd_init.name = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
1244 	rd_init.unit = unit;
1245 	rd_init.softc = rd;
1246 	rd_init.ioctl = redirect_ioctl;
1247 	rd_init.detach = redirect_if_free;
1248 	rd_init.subfamily = IFNET_SUBFAMILY_REDIRECT;
1249 
1250 	if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET) {
1251 		rd_init.family = IFNET_FAMILY_ETHERNET;
1252 		rd_init.type = IFT_ETHER;
1253 		rd_init.demux = ether_demux;
1254 		rd_init.add_proto = ether_add_proto;
1255 		rd_init.del_proto = ether_del_proto;
1256 		rd_init.check_multi = ether_check_multi;
1257 		rd_init.framer_extended = ether_frameout_extended;
1258 		rd_init.broadcast_addr = etherbroadcastaddr;
1259 		rd_init.broadcast_len = ETHER_ADDR_LEN;
1260 	} else {
1261 		rd_init.family = IFNET_FAMILY_CELLULAR;
1262 		rd_init.type = IFT_CELLULAR;
1263 		rd_init.demux = redirect_demux;
1264 		rd_init.add_proto = redirect_add_proto;
1265 		rd_init.del_proto = redirect_del_proto;
1266 	}
1267 	error = redirect_attach_netif_nexus(rd, &rd_init, &ifp);
1268 	if (error != 0) {
1269 		redirect_release(rd);
1270 		RDLOG_ERR("attach netif nexus failed: error %d", error);
1271 		DTRACE_SKYWALK1(attach__nexus__failed, int, error);
1272 		return error;
1273 	}
1274 
1275 	/* take an additional reference for nexus controller */
1276 	redirect_retain(rd);
1277 	rd->rd_ifp = ifp;
1278 
1279 	if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET) {
1280 		/* mac address will be set after delegate is configured */
1281 		(void) ifnet_set_lladdr(ifp, default_mac, ETHER_ADDR_LEN);
1282 		bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
1283 	} else {
1284 		bpfattach(ifp, DLT_RAW, 0);
1285 	}
1286 	return 0;
1287 }
1288 
1289 /*
1290  * This function is meant for cleaning up everything, not just delegate
1291  * related info.
1292  */
1293 static void
redirect_cleanup(if_redirect_t rd)1294 redirect_cleanup(if_redirect_t rd)
1295 {
1296 	redirect_clear_delegate(rd);
1297 	rd->rd_intf_adv_enabled = false;
1298 }
1299 
1300 static int
redirect_clone_destroy(ifnet_t ifp)1301 redirect_clone_destroy(ifnet_t ifp)
1302 {
1303 	if_redirect_t rd;
1304 
1305 	rd = ifnet_get_if_redirect(ifp);
1306 	if (rd == NULL) {
1307 		RDLOG_ERR("rd is NULL");
1308 		DTRACE_SKYWALK1(null__rd, ifnet_t, ifp);
1309 		return ENXIO;
1310 	}
1311 	RD_LOCK(rd);
1312 	if (rd->rd_detaching) {
1313 		RDLOG_ERR("%s is detaching", rd->rd_name);
1314 		DTRACE_SKYWALK1(detaching, if_redirect_t, rd);
1315 		RD_UNLOCK(rd);
1316 		return 0;
1317 	}
1318 	rd->rd_detaching = TRUE;
1319 	RD_UNLOCK(rd);
1320 
1321 	redirect_cleanup(rd);
1322 	redirect_detach_netif_nexus(rd);
1323 	/*
1324 	 * Releasing reference held for nexus controller
1325 	 */
1326 	redirect_release(rd);
1327 	interface_link_event(ifp, KEV_DL_LINK_OFF);
1328 	ifnet_detach(ifp);
1329 	return 0;
1330 }
1331 
1332 static int
if_redirect_request_copyin(user_addr_t user_addr,struct if_redirect_request * ifrr,uint64_t len)1333 if_redirect_request_copyin(user_addr_t user_addr,
1334     struct if_redirect_request *ifrr, uint64_t len)
1335 {
1336 	int error;
1337 
1338 	if (user_addr == USER_ADDR_NULL || len < sizeof(*ifrr)) {
1339 		RDLOG_ERR("user_addr(0x%llx) or len(%llu) < %lu",
1340 		    user_addr, len, sizeof(*ifrr));
1341 		error = EINVAL;
1342 		goto done;
1343 	}
1344 	error = copyin(user_addr, ifrr, sizeof(*ifrr));
1345 	if (error != 0) {
1346 		RDLOG_ERR("copyin failed: %d", error);
1347 		goto done;
1348 	}
1349 	if (ifrr->ifrr_reserved[0] != 0 || ifrr->ifrr_reserved[1] != 0 ||
1350 	    ifrr->ifrr_reserved[2] != 0 || ifrr->ifrr_reserved[3] != 0) {
1351 		RDLOG_ERR("reserved[0]=0x%llu, reserved[1]=0x%llu"
1352 		    "reserved[2]=0x%llu, reserved[3]=0x%llu", ifrr->ifrr_reserved[0],
1353 		    ifrr->ifrr_reserved[1], ifrr->ifrr_reserved[2],
1354 		    ifrr->ifrr_reserved[3]);
1355 		error = EINVAL;
1356 		goto done;
1357 	}
1358 done:
1359 	return error;
1360 }
1361 
1362 static void
redirect_detach_notify(void * arg)1363 redirect_detach_notify(void *arg)
1364 {
1365 	if_redirect_t __single rd = arg;
1366 
1367 	redirect_clear_delegate(rd);
1368 }
1369 
1370 static int
redirect_set_delegate(if_redirect_t rd,ifnet_t delegate_ifp)1371 redirect_set_delegate(if_redirect_t rd, ifnet_t delegate_ifp)
1372 {
1373 	ifnet_t ifp = rd->rd_ifp;
1374 	int error;
1375 
1376 	RD_LOCK(rd);
1377 	if (rd->rd_detaching) {
1378 		RDLOG_ERR("%s is detaching", rd->rd_name);
1379 		DTRACE_SKYWALK2(detaching, if_redirect_t, rd, ifnet_t, delegate_ifp);
1380 		RD_UNLOCK(rd);
1381 		return ENXIO;
1382 	}
1383 	if (rd->rd_delegate_ifp != NULL) {
1384 		if (rd->rd_delegate_ifp == delegate_ifp) {
1385 			RDLOG_ERR("cannot configure the same delegate");
1386 			DTRACE_SKYWALK2(same__ifp, if_redirect_t, rd,
1387 			    ifnet_t, delegate_ifp);
1388 			RD_UNLOCK(rd);
1389 			return EALREADY;
1390 		} else {
1391 			redirect_clear_delegate_locked(rd);
1392 		}
1393 	}
1394 	ASSERT(rd->rd_delegate_ifp == NULL);
1395 
1396 	if (!ifnet_get_ioref(ifp)) {
1397 		RDLOG_ERR("failed to get self reference");
1398 		DTRACE_SKYWALK2(ifp__detaching, if_redirect_t, rd, ifnet_t, ifp);
1399 		error = ENXIO;
1400 		goto fail;
1401 	}
1402 	ASSERT(!rd->rd_self_ref);
1403 	rd->rd_self_ref = TRUE;
1404 
1405 	/* This saves the reference taken above */
1406 	error = ifnet_set_delegate_parent(delegate_ifp, ifp);
1407 	if (error != 0) {
1408 		RDLOG_ERR("failed to set delegate parent");
1409 		DTRACE_SKYWALK4(set__delegate__parent__failed, if_redirect_t, rd,
1410 		    ifnet_t, delegate_ifp, ifnet_t, ifp, int, error);
1411 		goto fail;
1412 	}
1413 	ASSERT(!rd->rd_delegate_parent_set);
1414 	rd->rd_delegate_parent_set = TRUE;
1415 
1416 	if (!ifnet_get_ioref(delegate_ifp)) {
1417 		RDLOG_ERR("failed to get delegate reference");
1418 		DTRACE_SKYWALK2(delegate__detaching, if_redirect_t, rd,
1419 		    ifnet_t, delegate_ifp);
1420 		error = ENXIO;
1421 		goto fail;
1422 	}
1423 	ASSERT(rd->rd_delegate_ifp == NULL);
1424 	rd->rd_delegate_ifp = delegate_ifp;
1425 	ASSERT(!rd->rd_delegate_ref);
1426 	rd->rd_delegate_ref = TRUE;
1427 
1428 	error = ifnet_set_flowswitch_rx_callback(delegate_ifp, redirect_rx_cb, rd);
1429 	if (error != 0) {
1430 		RDLOG_ERR("failed to set fsw rx callback: %d", error);
1431 		DTRACE_SKYWALK3(set__fsw__rx__cb__fail, if_redirect_t, rd, ifnet_t,
1432 		    delegate_ifp, int, error);
1433 		goto fail;
1434 	}
1435 	ASSERT(!rd->rd_fsw_rx_cb_set);
1436 	rd->rd_fsw_rx_cb_set = TRUE;
1437 
1438 	error = ifnet_set_delegate(ifp, delegate_ifp);
1439 	if (error != 0) {
1440 		RDLOG_ERR("failed to set delegate ifp: %d", error);
1441 		DTRACE_SKYWALK4(set__delegate__fail, if_redirect_t, rd, ifnet_t, ifp,
1442 		    ifnet_t, delegate_ifp, int, error);
1443 		goto fail;
1444 	}
1445 	ASSERT(!rd->rd_delegate_set);
1446 	rd->rd_delegate_set = TRUE;
1447 	RDLOG_INFO("%s set delegate to %s", if_name(ifp), if_name(delegate_ifp));
1448 
1449 	if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET) {
1450 		uint8_t mac_addr[ETHER_ADDR_LEN];
1451 
1452 		error = ifnet_lladdr_copy_bytes(delegate_ifp, mac_addr,
1453 		    ETHER_ADDR_LEN);
1454 		if (error != 0) {
1455 			RDLOG_ERR("failed to get mac addr from %s, error %d",
1456 			    if_name(delegate_ifp), error);
1457 			DTRACE_SKYWALK3(lladdr__copy__fail, if_redirect_t, rd,
1458 			    ifnet_t, delegate_ifp, int, error);
1459 			goto fail;
1460 		}
1461 		error = ifnet_set_lladdr(ifp, mac_addr, ETHER_ADDR_LEN);
1462 		if (error != 0) {
1463 			RDLOG_ERR("failed to set mac addr for %s, error %d",
1464 			    if_name(ifp), error);
1465 			DTRACE_SKYWALK3(set__lladdr__fail, if_redirect_t, rd,
1466 			    ifnet_t, ifp, int, error);
1467 			goto fail;
1468 		}
1469 		ASSERT(!rd->rd_mac_addr_set);
1470 		rd->rd_mac_addr_set = TRUE;
1471 	}
1472 	/*
1473 	 * This is enabled out-of-band from redirect_set_delegate() but we should do
1474 	 * this here in case we move to a different delegate.
1475 	 */
1476 	if (rd->rd_intf_adv_enabled) {
1477 		redirect_delegate_adv_config(delegate_ifp, true);
1478 	}
1479 	ifnet_set_detach_notify(delegate_ifp, redirect_detach_notify, rd);
1480 	rd->rd_detach_notify_set = TRUE;
1481 
1482 	/*
1483 	 * Check that the delegate is still attached. If not, the detach notify above
1484 	 * could've been missed and we would have to cleanup everything here.
1485 	 */
1486 	if (!ifnet_is_fully_attached(delegate_ifp)) {
1487 		RDLOG_ERR("delegate %s detached during setup", if_name(delegate_ifp));
1488 		DTRACE_SKYWALK2(delegate__detached, if_redirect_t, rd,
1489 		    ifnet_t, delegate_ifp);
1490 		error = ENXIO;
1491 		goto fail;
1492 	}
1493 	RD_UNLOCK(rd);
1494 	return 0;
1495 
1496 fail:
1497 	redirect_clear_delegate_locked(rd);
1498 	RD_UNLOCK(rd);
1499 	return error;
1500 }
1501 
1502 static void
redirect_clear_delegate_locked(if_redirect_t rd)1503 redirect_clear_delegate_locked(if_redirect_t rd)
1504 {
1505 	ifnet_t ifp = rd->rd_ifp;
1506 	ifnet_t delegate_ifp = rd->rd_delegate_ifp;
1507 	int error;
1508 
1509 	if (rd->rd_detach_notify_set) {
1510 		ASSERT(delegate_ifp != NULL);
1511 		ifnet_set_detach_notify(delegate_ifp, NULL, NULL);
1512 		rd->rd_detach_notify_set = FALSE;
1513 	}
1514 	if (rd->rd_intf_adv_enabled && delegate_ifp != NULL) {
1515 		redirect_delegate_adv_config(delegate_ifp, false);
1516 		/*
1517 		 * We don't clear rd_intf_adv_enabled because we want to reenable
1518 		 * advisory after moving to a different delegate.
1519 		 */
1520 	}
1521 	if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET && rd->rd_mac_addr_set) {
1522 		ASSERT(delegate_ifp != NULL);
1523 		error = ifnet_set_lladdr(ifp, default_mac, ETHER_ADDR_LEN);
1524 		if (error != 0) {
1525 			RDLOG_ERR("failed to set mac addr for %s, error %d",
1526 			    if_name(ifp), error);
1527 			DTRACE_SKYWALK3(set__lladdr__fail, if_redirect_t, rd,
1528 			    ifnet_t, ifp, int, error);
1529 		}
1530 		rd->rd_mac_addr_set = FALSE;
1531 	}
1532 	if (rd->rd_delegate_set) {
1533 		ASSERT(delegate_ifp != NULL);
1534 		(void) ifnet_set_delegate(ifp, NULL);
1535 		rd->rd_delegate_set = FALSE;
1536 	}
1537 	if (rd->rd_fsw_rx_cb_set) {
1538 		ASSERT(delegate_ifp != NULL);
1539 		(void) ifnet_set_flowswitch_rx_callback(delegate_ifp, NULL, NULL);
1540 		rd->rd_fsw_rx_cb_set = FALSE;
1541 	}
1542 	if (rd->rd_delegate_ref) {
1543 		ASSERT(delegate_ifp != NULL);
1544 		rd->rd_delegate_ifp = NULL;
1545 		ifnet_decr_iorefcnt(delegate_ifp);
1546 		rd->rd_delegate_ref = FALSE;
1547 	}
1548 	if (rd->rd_delegate_parent_set) {
1549 		ASSERT(delegate_ifp != NULL);
1550 		ifnet_set_delegate_parent(delegate_ifp, NULL);
1551 		rd->rd_delegate_parent_set = FALSE;
1552 	}
1553 	if (rd->rd_self_ref) {
1554 		ifnet_decr_iorefcnt(ifp);
1555 		rd->rd_self_ref = FALSE;
1556 	}
1557 }
1558 
1559 static void
redirect_clear_delegate(if_redirect_t rd)1560 redirect_clear_delegate(if_redirect_t rd)
1561 {
1562 	RD_LOCK(rd);
1563 	redirect_clear_delegate_locked(rd);
1564 	RD_UNLOCK(rd);
1565 }
1566 
1567 static int
redirect_ioctl_set_delegate(ifnet_t ifp,user_addr_t user_addr,uint64_t len)1568 redirect_ioctl_set_delegate(ifnet_t ifp, user_addr_t user_addr, uint64_t len)
1569 {
1570 	if_redirect_t rd = NULL;
1571 	struct if_redirect_request ifrr;
1572 	ifnet_t delegate_ifp = NULL;
1573 	int error;
1574 
1575 	error = if_redirect_request_copyin(user_addr, &ifrr, len);
1576 	if (error != 0) {
1577 		RDLOG_ERR("if_redirect_request_copyin failed: error %d", error);
1578 		DTRACE_SKYWALK4(copyin__failed, ifnet_t, ifp, user_addr_t, user_addr,
1579 		    uint64_t, len, int, error);
1580 		goto done;
1581 	}
1582 	if (ifrr.ifrr_delegate_name[0] == '\0') {
1583 		RDLOG_ERR("NULL delegate name");
1584 		DTRACE_SKYWALK1(null__delegate, ifnet_t, ifp);
1585 		error = EINVAL;
1586 		goto done;
1587 	}
1588 	/* ensure null termination */
1589 	ifrr.ifrr_delegate_name[IFNAMSIZ - 1] = '\0';
1590 	delegate_ifp = ifunit_ref(__unsafe_null_terminated_from_indexable(ifrr.ifrr_delegate_name));
1591 	if (delegate_ifp == NULL) {
1592 		RDLOG_ERR("delegate %s not found", ifrr.ifrr_delegate_name);
1593 		DTRACE_SKYWALK2(invalid__name, ifnet_t, ifp, char *,
1594 		    ifrr.ifrr_delegate_name);
1595 		error = ENOENT;
1596 		goto done;
1597 	}
1598 	rd = ifnet_get_if_redirect(ifp);
1599 	if (rd == NULL) {
1600 		RDLOG_ERR("rd is NULL");
1601 		DTRACE_SKYWALK1(null__rd, ifnet_t, ifp);
1602 		error = ENOENT;
1603 		goto done;
1604 	}
1605 	/* Verify that the delegate type is supported */
1606 	if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET) {
1607 		if (delegate_ifp->if_family != IFNET_FAMILY_ETHERNET) {
1608 			RDLOG_ERR("%s's family %d not compatible "
1609 			    "with ethernet functional type", if_name(delegate_ifp),
1610 			    delegate_ifp->if_family);
1611 			DTRACE_SKYWALK2(delegate__incompatible__ether, if_redirect_t, rd,
1612 			    ifnet_t, delegate_ifp);
1613 			error = EINVAL;
1614 			goto done;
1615 		}
1616 		if (ifnet_is_low_latency(delegate_ifp)) {
1617 			RDLOG_ERR("low latency %s cannot be a delegate",
1618 			    if_name(delegate_ifp));
1619 			DTRACE_SKYWALK2(delegate__is__ll, if_redirect_t, rd,
1620 			    ifnet_t, delegate_ifp);
1621 			error = EINVAL;
1622 			goto done;
1623 		}
1624 	} else {
1625 		ASSERT(rd->rd_ftype == IFRTYPE_FAMILY_CELLULAR);
1626 		if (delegate_ifp->if_family != IFNET_FAMILY_CELLULAR &&
1627 		    delegate_ifp->if_family != IFNET_FAMILY_UTUN &&
1628 		    delegate_ifp->if_family != IFNET_FAMILY_IPSEC) {
1629 			RDLOG_ERR("%s's family %d not compatible "
1630 			    "with cellular functional type", if_name(delegate_ifp),
1631 			    delegate_ifp->if_family);
1632 			DTRACE_SKYWALK2(delegate__incompatible__cell, if_redirect_t, rd,
1633 			    ifnet_t, delegate_ifp);
1634 			error = EINVAL;
1635 			goto done;
1636 		}
1637 	}
1638 	if (delegate_ifp->if_subfamily == IFNET_SUBFAMILY_REDIRECT) {
1639 		RDLOG_ERR("delegate %s cannot be redirect", if_name(delegate_ifp));
1640 		DTRACE_SKYWALK2(delegate__is__redirect, if_redirect_t, rd,
1641 		    ifnet_t, delegate_ifp);
1642 		error = EINVAL;
1643 		goto done;
1644 	}
1645 	error = redirect_set_delegate(rd, delegate_ifp);
1646 done:
1647 	if (delegate_ifp != NULL) {
1648 		ifnet_decr_iorefcnt(delegate_ifp);
1649 	}
1650 	return error;
1651 }
1652 
1653 static int
redirect_set_drvspec(ifnet_t ifp,uint64_t cmd,uint64_t len,user_addr_t user_addr)1654 redirect_set_drvspec(ifnet_t ifp, uint64_t cmd, uint64_t len,
1655     user_addr_t user_addr)
1656 {
1657 	int error;
1658 
1659 	switch (cmd) {
1660 	case RD_S_CMD_SET_DELEGATE:
1661 		error = redirect_ioctl_set_delegate(ifp, user_addr, len);
1662 		break;
1663 	default:
1664 		error = EOPNOTSUPP;
1665 		break;
1666 	}
1667 	return error;
1668 }
1669 
1670 static int
redirect_get_drvspec(ifnet_t ifp,uint64_t cmd,uint64_t len,user_addr_t user_addr)1671 redirect_get_drvspec(ifnet_t ifp, uint64_t cmd, uint64_t len,
1672     user_addr_t user_addr)
1673 {
1674 #pragma unused(ifp, cmd, len, user_addr)
1675 	return 0;
1676 }
1677 
1678 union ifdrvu {
1679 	struct ifdrv32  *ifdrvu_32;
1680 	struct ifdrv64  *ifdrvu_64;
1681 	void            *ifdrvu_p;
1682 };
1683 
1684 static errno_t
redirect_ioctl(ifnet_t ifp,u_long cmd,void * data)1685 redirect_ioctl(ifnet_t ifp, u_long cmd, void *data)
1686 {
1687 	if_redirect_t rd = NULL;
1688 	struct ifreq *ifr = NULL;
1689 	union ifdrvu drv;
1690 	uint64_t drv_cmd;
1691 	uint64_t drv_len;
1692 	boolean_t drv_set_command = FALSE;
1693 	user_addr_t user_addr;
1694 	int error = 0;
1695 
1696 	rd = ifnet_get_if_redirect(ifp);
1697 	if (rd == NULL) {
1698 		RDLOG_ERR("rd is NULL");
1699 		DTRACE_SKYWALK1(null__rd, ifnet_t, ifp);
1700 		return ENXIO;
1701 	}
1702 	RD_LOCK(rd);
1703 	if (rd->rd_detaching) {
1704 		RDLOG_ERR("%s is detaching", rd->rd_name);
1705 		DTRACE_SKYWALK1(detaching, if_redirect_t, rd);
1706 		RD_UNLOCK(rd);
1707 		return ENXIO;
1708 	}
1709 	RD_UNLOCK(rd);
1710 
1711 	ifr = (struct ifreq *)data;
1712 
1713 	switch (cmd) {
1714 	case SIOCSIFADDR:
1715 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1716 		break;
1717 	case SIOCGIFMEDIA32:
1718 	case SIOCGIFMEDIA64: {
1719 		struct ifmediareq32 *ifmr;
1720 
1721 		RD_LOCK(rd);
1722 		if (rd->rd_ftype != IFRTYPE_FAMILY_ETHERNET) {
1723 			DTRACE_SKYWALK1(not__ether, if_redirect_t, rd);
1724 			RD_UNLOCK(rd);
1725 			return EOPNOTSUPP;
1726 		}
1727 		ifmr = (struct ifmediareq32 *)data;
1728 		ifmr->ifm_current = IFM_ETHER;
1729 		ifmr->ifm_mask = 0;
1730 		ifmr->ifm_status = (IFM_AVALID | IFM_ACTIVE);
1731 		ifmr->ifm_active = IFM_ETHER;
1732 		ifmr->ifm_count = 1;
1733 
1734 		user_addr = (cmd == SIOCGIFMEDIA64) ?
1735 		    ((struct ifmediareq64 *)data)->ifmu_ulist :
1736 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
1737 		if (user_addr != USER_ADDR_NULL) {
1738 			error = copyout(&ifmr->ifm_current, user_addr, sizeof(int));
1739 		}
1740 		RD_UNLOCK(rd);
1741 		break;
1742 	}
1743 	case SIOCGIFDEVMTU: {
1744 		struct ifdevmtu *devmtu_p;
1745 
1746 		devmtu_p = &ifr->ifr_devmtu;
1747 		devmtu_p->ifdm_current = ifnet_mtu(ifp);
1748 		devmtu_p->ifdm_max = redirect_max_mtu(ifp);
1749 		devmtu_p->ifdm_min = IF_MINMTU;
1750 		break;
1751 	}
1752 	case SIOCSIFMTU:
1753 		if ((unsigned int)ifr->ifr_mtu > redirect_max_mtu(ifp) ||
1754 		    ifr->ifr_mtu < IF_MINMTU) {
1755 			error = EINVAL;
1756 		} else {
1757 			error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
1758 		}
1759 		break;
1760 	case SIOCSIFFLAGS:
1761 		if ((ifp->if_flags & IFF_UP) != 0) {
1762 			/* marked up, set running if not already set */
1763 			if ((ifp->if_flags & IFF_RUNNING) == 0) {
1764 				/* set running */
1765 				error = ifnet_set_flags(ifp, IFF_RUNNING,
1766 				    IFF_RUNNING);
1767 			}
1768 		} else if ((ifp->if_flags & IFF_RUNNING) != 0) {
1769 			/* marked down, clear running */
1770 			error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
1771 		}
1772 		break;
1773 	case SIOCSDRVSPEC32:
1774 	case SIOCSDRVSPEC64:
1775 		error = proc_suser(current_proc());
1776 		if (error != 0) {
1777 			break;
1778 		}
1779 		drv_set_command = TRUE;
1780 		OS_FALLTHROUGH;
1781 	case SIOCGDRVSPEC32:
1782 	case SIOCGDRVSPEC64:
1783 		drv.ifdrvu_p = data;
1784 		if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
1785 			drv_cmd = drv.ifdrvu_32->ifd_cmd;
1786 			drv_len = drv.ifdrvu_32->ifd_len;
1787 			user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
1788 		} else {
1789 			drv_cmd = drv.ifdrvu_64->ifd_cmd;
1790 			drv_len = drv.ifdrvu_64->ifd_len;
1791 			user_addr = drv.ifdrvu_64->ifd_data;
1792 		}
1793 		if (drv_set_command) {
1794 			error = redirect_set_drvspec(ifp, drv_cmd, drv_len,
1795 			    user_addr);
1796 		} else {
1797 			error = redirect_get_drvspec(ifp, drv_cmd, drv_len,
1798 			    user_addr);
1799 		}
1800 		break;
1801 	case SIOCADDMULTI:
1802 	case SIOCDELMULTI:
1803 		error = 0;
1804 		break;
1805 
1806 	default:
1807 		error = EOPNOTSUPP;
1808 		break;
1809 	}
1810 
1811 	return error;
1812 }
1813 
1814 static void
redirect_if_free(ifnet_t ifp)1815 redirect_if_free(ifnet_t ifp)
1816 {
1817 	if_redirect_t rd = NULL;
1818 
1819 	if (ifp == NULL) {
1820 		RDLOG_ERR("ifp is NULL");
1821 		DTRACE_SKYWALK(null__ifp);
1822 		return;
1823 	}
1824 	rd = ifnet_get_if_redirect(ifp);
1825 	if (rd == NULL) {
1826 		RDLOG_ERR("rd is NULL");
1827 		DTRACE_SKYWALK1(null__rd, ifnet_t, ifp);
1828 		return;
1829 	}
1830 	RD_LOCK(rd);
1831 	ifp->if_softc = NULL;
1832 	VERIFY(rd->rd_doorbell_tcall == NULL);
1833 	RD_UNLOCK(rd);
1834 	redirect_release(rd);
1835 	ifnet_release(ifp);
1836 	return;
1837 }
1838 
1839 /*
1840  * Network interface functions
1841  */
1842 static errno_t
redirect_demux(__unused ifnet_t ifp,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)1843 redirect_demux(__unused ifnet_t ifp, mbuf_t data, __unused char *frame_header,
1844     protocol_family_t *protocol)
1845 {
1846 	struct ip *ip;
1847 	u_int ip_version;
1848 
1849 	while (data != NULL && mbuf_len(data) < 1) {
1850 		data = mbuf_next(data);
1851 	}
1852 
1853 	if (data == NULL) {
1854 		RDLOG_DBG("data is NULL");
1855 		DTRACE_SKYWALK(null__data);
1856 		return ENOENT;
1857 	}
1858 
1859 	ip = mtod(data, struct ip *);
1860 	ip_version = ip->ip_v;
1861 
1862 	switch (ip_version) {
1863 	case 4:
1864 		*protocol = PF_INET;
1865 		return 0;
1866 	case 6:
1867 		*protocol = PF_INET6;
1868 		return 0;
1869 	default:
1870 		*protocol = PF_UNSPEC;
1871 		break;
1872 	}
1873 
1874 	return 0;
1875 }
1876 
1877 static errno_t
redirect_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused uint32_t demux_count)1878 redirect_add_proto(__unused ifnet_t interface, protocol_family_t protocol,
1879     __unused const struct ifnet_demux_desc *demux_array,
1880     __unused uint32_t demux_count)
1881 {
1882 	switch (protocol) {
1883 	case PF_INET:
1884 		return 0;
1885 	case PF_INET6:
1886 		return 0;
1887 	default:
1888 		break;
1889 	}
1890 
1891 	return ENOPROTOOPT;
1892 }
1893 
1894 static errno_t
redirect_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)1895 redirect_del_proto(__unused ifnet_t interface,
1896     __unused protocol_family_t protocol)
1897 {
1898 	return 0;
1899 }
1900 
1901 __private_extern__ void
if_redirect_init(void)1902 if_redirect_init(void)
1903 {
1904 	int error;
1905 
1906 	redirect_log_handle = os_log_create("com.apple.xnu.net.redirect", "redirect");
1907 	(void)redirect_register_nexus_domain_provider();
1908 	error = if_clone_attach(&redirect_cloner);
1909 	if (error != 0) {
1910 		return;
1911 	}
1912 	return;
1913 }
1914