xref: /xnu-8792.81.2/bsd/net/if_ipsec.c (revision 19c3b8c28c31cb8130e034cfb5df6bf9ba342d90)
1 /*
2  * Copyright (c) 2012-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 #include <sys/systm.h>
31 #include <sys/kern_control.h>
32 #include <net/kpi_protocol.h>
33 #include <net/kpi_interface.h>
34 #include <sys/socket.h>
35 #include <sys/socketvar.h>
36 #include <net/if.h>
37 #include <net/if_types.h>
38 #include <net/bpf.h>
39 #include <net/if_ipsec.h>
40 #include <sys/mbuf.h>
41 #include <sys/sockio.h>
42 #include <netinet/in.h>
43 #include <netinet/ip6.h>
44 #include <netinet6/in6_var.h>
45 #include <netinet6/ip6_var.h>
46 #include <sys/kauth.h>
47 #include <netinet6/ipsec.h>
48 #include <netinet6/ipsec6.h>
49 #include <netinet6/esp.h>
50 #include <netinet6/esp6.h>
51 #include <netinet/ip.h>
52 #include <net/flowadv.h>
53 #include <net/necp.h>
54 #include <netkey/key.h>
55 #include <net/pktap.h>
56 #include <kern/zalloc.h>
57 #include <os/log.h>
58 
59 #if SKYWALK
60 #include <skywalk/os_skywalk_private.h>
61 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
62 #include <skywalk/nexus/netif/nx_netif.h>
63 #define IPSEC_NEXUS 1
64 #else // SKYWALK
65 #define IPSEC_NEXUS 0
66 #endif // SKYWALK
67 
68 extern int net_qos_policy_restricted;
69 extern int net_qos_policy_restrict_avapps;
70 
71 /* Kernel Control functions */
72 static errno_t  ipsec_ctl_setup(u_int32_t *unit, void **unitinfo);
73 static errno_t  ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
74     void **unitinfo);
75 static errno_t  ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
76     void **unitinfo);
77 static errno_t  ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
78     void *unitinfo);
79 static errno_t  ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
80     void *unitinfo, mbuf_t m, int flags);
81 static errno_t  ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
82     int opt, void *data, size_t *len);
83 static errno_t  ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
84     int opt, void *data, size_t len);
85 
86 /* Network Interface functions */
87 static void     ipsec_start(ifnet_t     interface);
88 static errno_t  ipsec_output(ifnet_t interface, mbuf_t data);
89 static errno_t  ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
90     protocol_family_t *protocol);
91 static errno_t  ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
92     const struct ifnet_demux_desc *demux_array,
93     u_int32_t demux_count);
94 static errno_t  ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
95 static errno_t  ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
96 static void             ipsec_detached(ifnet_t interface);
97 
98 /* Protocol handlers */
99 static errno_t  ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
100 static errno_t  ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
101     mbuf_t m, char *frame_header);
102 static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
103     mbuf_t *packet, const struct sockaddr *dest, void *route,
104     char *frame_type, char *link_layer_dest);
105 
106 static kern_ctl_ref     ipsec_kctlref;
107 static LCK_ATTR_DECLARE(ipsec_lck_attr, 0, 0);
108 static LCK_GRP_DECLARE(ipsec_lck_grp, "ipsec");
109 static LCK_MTX_DECLARE_ATTR(ipsec_lock, &ipsec_lck_grp, &ipsec_lck_attr);
110 
111 #if IPSEC_NEXUS
112 
113 SYSCTL_DECL(_net_ipsec);
114 SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
115 static int if_ipsec_verify_interface_creation = 0;
116 SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
117 
118 #define IPSEC_IF_VERIFY(_e)             if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
119 
120 #define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
121 #define IPSEC_IF_DEFAULT_RING_SIZE 64
122 #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
123 #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
124 #define IPSEC_IF_DEFAULT_BUF_SEG_SIZE   skmem_usr_buf_seg_size
125 
126 #define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES
127 #define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
128 #define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
129 #define IPSEC_NETIF_WMM_RX_RING_COUNT 1
130 #define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT
131 #define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT
132 
133 #define IPSEC_IF_MIN_RING_SIZE 8
134 #define IPSEC_IF_MAX_RING_SIZE 1024
135 
136 #define IPSEC_IF_MIN_SLOT_SIZE 1024
137 #define IPSEC_IF_MAX_SLOT_SIZE 4096
138 
139 #define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512
140 
141 #define IPSEC_KPIPE_FLAG_WAKE_PKT 0x01
142 
143 static int if_ipsec_max_pending_input = IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT;
144 
145 static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
146 static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
147 static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
148 
149 static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
150 static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
151 static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
152 
153 SYSCTL_INT(_net_ipsec, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_max_pending_input, 0, "");
154 SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
155     &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
156 SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
157     &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
158 SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
159     &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
160 
161 static int if_ipsec_debug = 0;
162 SYSCTL_INT(_net_ipsec, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_debug, 0, "");
163 
164 static errno_t
165 ipsec_register_nexus(void);
166 
167 typedef struct ipsec_nx {
168 	uuid_t if_provider;
169 	uuid_t if_instance;
170 	uuid_t fsw_provider;
171 	uuid_t fsw_instance;
172 	uuid_t fsw_device;
173 	uuid_t fsw_agent;
174 } *ipsec_nx_t;
175 
176 static nexus_controller_t ipsec_ncd;
177 static int ipsec_ncd_refcount;
178 static uuid_t ipsec_kpipe_uuid;
179 
180 #endif // IPSEC_NEXUS
181 
182 /* Control block allocated for each kernel control connection */
183 struct ipsec_pcb {
184 	TAILQ_ENTRY(ipsec_pcb)  ipsec_chain;
185 	kern_ctl_ref            ipsec_ctlref;
186 	ifnet_t                 ipsec_ifp;
187 	u_int32_t               ipsec_unit;
188 	u_int32_t               ipsec_unique_id;
189 	// These external flags can be set with IPSEC_OPT_FLAGS
190 	u_int32_t               ipsec_external_flags;
191 	// These internal flags are only used within this driver
192 	u_int32_t               ipsec_internal_flags;
193 	u_int32_t               ipsec_input_frag_size;
194 	bool                    ipsec_frag_size_set;
195 	int                     ipsec_ext_ifdata_stats;
196 	mbuf_svc_class_t        ipsec_output_service_class;
197 	char                    ipsec_if_xname[IFXNAMSIZ];
198 	char                    ipsec_unique_name[IFXNAMSIZ];
199 	// PCB lock protects state fields, like ipsec_kpipe_count
200 	decl_lck_rw_data(, ipsec_pcb_lock);
201 	// lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers
202 	decl_lck_mtx_data(, ipsec_pcb_data_move_lock);
203 	u_int32_t               ipsec_pcb_data_move; /* number of data moving contexts */
204 	u_int32_t               ipsec_pcb_drainers; /* number of threads waiting to drain */
205 	u_int32_t               ipsec_pcb_data_path_state; /* internal state of interface data path */
206 	ipsec_dscp_mapping_t    ipsec_output_dscp_mapping;
207 
208 #if IPSEC_NEXUS
209 	lck_mtx_t               ipsec_input_chain_lock;
210 	lck_mtx_t               ipsec_kpipe_encrypt_lock;
211 	lck_mtx_t               ipsec_kpipe_decrypt_lock;
212 	struct mbuf *           ipsec_input_chain;
213 	struct mbuf *           ipsec_input_chain_last;
214 	u_int32_t               ipsec_input_chain_count;
215 	// Input chain lock protects the list of input mbufs
216 	// The input chain lock must be taken AFTER the PCB lock if both are held
217 	struct ipsec_nx         ipsec_nx;
218 	u_int32_t               ipsec_kpipe_count;
219 	pid_t                   ipsec_kpipe_pid;
220 	uuid_t                  ipsec_kpipe_proc_uuid;
221 	uuid_t                  ipsec_kpipe_uuid[IPSEC_IF_MAX_RING_COUNT];
222 	void *                  ipsec_kpipe_rxring[IPSEC_IF_MAX_RING_COUNT];
223 	void *                  ipsec_kpipe_txring[IPSEC_IF_MAX_RING_COUNT];
224 	kern_pbufpool_t         ipsec_kpipe_pp;
225 	u_int32_t               ipsec_kpipe_tx_ring_size;
226 	u_int32_t               ipsec_kpipe_rx_ring_size;
227 
228 	kern_nexus_t            ipsec_netif_nexus;
229 	kern_pbufpool_t         ipsec_netif_pp;
230 	void *                  ipsec_netif_rxring[IPSEC_NETIF_MAX_RX_RING_COUNT];
231 	void *                  ipsec_netif_txring[IPSEC_NETIF_MAX_TX_RING_COUNT];
232 	uint64_t                ipsec_netif_txring_size;
233 
234 	u_int32_t               ipsec_slot_size;
235 	u_int32_t               ipsec_netif_ring_size;
236 	u_int32_t               ipsec_tx_fsw_ring_size;
237 	u_int32_t               ipsec_rx_fsw_ring_size;
238 	bool                    ipsec_use_netif;
239 	bool                    ipsec_needs_netagent;
240 #endif // IPSEC_NEXUS
241 };
242 
243 /* These are internal flags not exposed outside this file */
244 #define IPSEC_FLAGS_KPIPE_ALLOCATED 1
245 
246 /* data movement refcounting functions */
247 static boolean_t ipsec_data_move_begin(struct ipsec_pcb *pcb);
248 static void ipsec_data_move_end(struct ipsec_pcb *pcb);
249 static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb);
250 
251 /* Data path states */
252 #define IPSEC_PCB_DATA_PATH_READY    0x1
253 
254 /* Macros to set/clear/test data path states */
255 #define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY)
256 #define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY)
257 #define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0)
258 
259 #if IPSEC_NEXUS
260 /* Macros to clear/set/test flags. */
261 static inline void
ipsec_flag_set(struct ipsec_pcb * pcb,uint32_t flag)262 ipsec_flag_set(struct ipsec_pcb *pcb, uint32_t flag)
263 {
264 	pcb->ipsec_internal_flags |= flag;
265 }
266 static inline void
ipsec_flag_clr(struct ipsec_pcb * pcb,uint32_t flag)267 ipsec_flag_clr(struct ipsec_pcb *pcb, uint32_t flag)
268 {
269 	pcb->ipsec_internal_flags &= ~flag;
270 }
271 
272 static inline bool
ipsec_flag_isset(struct ipsec_pcb * pcb,uint32_t flag)273 ipsec_flag_isset(struct ipsec_pcb *pcb, uint32_t flag)
274 {
275 	return !!(pcb->ipsec_internal_flags & flag);
276 }
277 #endif // IPSEC_NEXUS
278 
279 TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
280 
281 static ZONE_DEFINE(ipsec_pcb_zone, "net.if_ipsec",
282     sizeof(struct ipsec_pcb), ZC_ZFREE_CLEARMEM);
283 
284 #define IPSECQ_MAXLEN 256
285 
286 #if IPSEC_NEXUS
287 static int
288 sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
289 {
290 #pragma unused(arg1, arg2)
291 	int value = if_ipsec_ring_size;
292 
293 	int error = sysctl_handle_int(oidp, &value, 0, req);
294 	if (error || !req->newptr) {
295 		return error;
296 	}
297 
298 	if (value < IPSEC_IF_MIN_RING_SIZE ||
299 	    value > IPSEC_IF_MAX_RING_SIZE) {
300 		return EINVAL;
301 	}
302 
303 	if_ipsec_ring_size = value;
304 
305 	return 0;
306 }
307 
308 static int
309 sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
310 {
311 #pragma unused(arg1, arg2)
312 	int value = if_ipsec_tx_fsw_ring_size;
313 
314 	int error = sysctl_handle_int(oidp, &value, 0, req);
315 	if (error || !req->newptr) {
316 		return error;
317 	}
318 
319 	if (value < IPSEC_IF_MIN_RING_SIZE ||
320 	    value > IPSEC_IF_MAX_RING_SIZE) {
321 		return EINVAL;
322 	}
323 
324 	if_ipsec_tx_fsw_ring_size = value;
325 
326 	return 0;
327 }
328 
329 static int
330 sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
331 {
332 #pragma unused(arg1, arg2)
333 	int value = if_ipsec_rx_fsw_ring_size;
334 
335 	int error = sysctl_handle_int(oidp, &value, 0, req);
336 	if (error || !req->newptr) {
337 		return error;
338 	}
339 
340 	if (value < IPSEC_IF_MIN_RING_SIZE ||
341 	    value > IPSEC_IF_MAX_RING_SIZE) {
342 		return EINVAL;
343 	}
344 
345 	if_ipsec_rx_fsw_ring_size = value;
346 
347 	return 0;
348 }
349 
350 
351 static inline bool
ipsec_in_wmm_mode(struct ipsec_pcb * pcb)352 ipsec_in_wmm_mode(struct ipsec_pcb *pcb)
353 {
354 	return pcb->ipsec_kpipe_count == IPSEC_IF_WMM_RING_COUNT;
355 }
356 
357 #endif // IPSEC_NEXUS
358 
359 errno_t
ipsec_register_control(void)360 ipsec_register_control(void)
361 {
362 	struct kern_ctl_reg     kern_ctl;
363 	errno_t                 result = 0;
364 
365 #if IPSEC_NEXUS
366 	ipsec_register_nexus();
367 #endif // IPSEC_NEXUS
368 
369 	TAILQ_INIT(&ipsec_head);
370 
371 	bzero(&kern_ctl, sizeof(kern_ctl));
372 	strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
373 	kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
374 	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP; /* Require root */
375 	kern_ctl.ctl_sendsize = 64 * 1024;
376 	kern_ctl.ctl_recvsize = 64 * 1024;
377 	kern_ctl.ctl_setup = ipsec_ctl_setup;
378 	kern_ctl.ctl_bind = ipsec_ctl_bind;
379 	kern_ctl.ctl_connect = ipsec_ctl_connect;
380 	kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
381 	kern_ctl.ctl_send = ipsec_ctl_send;
382 	kern_ctl.ctl_setopt = ipsec_ctl_setopt;
383 	kern_ctl.ctl_getopt = ipsec_ctl_getopt;
384 
385 	result = ctl_register(&kern_ctl, &ipsec_kctlref);
386 	if (result != 0) {
387 		os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - ctl_register failed: %d\n", result);
388 		return result;
389 	}
390 
391 	/* Register the protocol plumbers */
392 	if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC,
393 	    ipsec_attach_proto, NULL)) != 0) {
394 		os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n",
395 		    result);
396 		ctl_deregister(ipsec_kctlref);
397 		return result;
398 	}
399 
400 	/* Register the protocol plumbers */
401 	if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC,
402 	    ipsec_attach_proto, NULL)) != 0) {
403 		proto_unregister_plumber(PF_INET, IFNET_FAMILY_IPSEC);
404 		ctl_deregister(ipsec_kctlref);
405 		os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n",
406 		    result);
407 		return result;
408 	}
409 
410 	return 0;
411 }
412 
413 /* Helpers */
414 int
ipsec_interface_isvalid(ifnet_t interface)415 ipsec_interface_isvalid(ifnet_t interface)
416 {
417 	struct ipsec_pcb *pcb = NULL;
418 
419 	if (interface == NULL) {
420 		return 0;
421 	}
422 
423 	pcb = ifnet_softc(interface);
424 
425 	if (pcb == NULL) {
426 		return 0;
427 	}
428 
429 	/* When ctl disconnects, ipsec_unit is set to 0 */
430 	if (pcb->ipsec_unit == 0) {
431 		return 0;
432 	}
433 
434 	return 1;
435 }
436 
437 #if IPSEC_NEXUS
438 boolean_t
ipsec_interface_needs_netagent(ifnet_t interface)439 ipsec_interface_needs_netagent(ifnet_t interface)
440 {
441 	struct ipsec_pcb *pcb = NULL;
442 
443 	if (interface == NULL) {
444 		return FALSE;
445 	}
446 
447 	pcb = ifnet_softc(interface);
448 
449 	if (pcb == NULL) {
450 		return FALSE;
451 	}
452 
453 	return pcb->ipsec_needs_netagent == true;
454 }
455 #endif // IPSEC_NEXUS
456 
457 static errno_t
ipsec_ifnet_set_attrs(ifnet_t ifp)458 ipsec_ifnet_set_attrs(ifnet_t ifp)
459 {
460 	/* Set flags and additional information. */
461 	ifnet_set_mtu(ifp, 1500);
462 	ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
463 
464 	/* The interface must generate its own IPv6 LinkLocal address,
465 	 * if possible following the recommendation of RFC2472 to the 64bit interface ID
466 	 */
467 	ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
468 
469 #if !IPSEC_NEXUS
470 	/* Reset the stats in case as the interface may have been recycled */
471 	struct ifnet_stats_param stats;
472 	bzero(&stats, sizeof(struct ifnet_stats_param));
473 	ifnet_set_stat(ifp, &stats);
474 #endif // !IPSEC_NEXUS
475 
476 	return 0;
477 }
478 
479 #if IPSEC_NEXUS
480 
481 static uuid_t ipsec_nx_dom_prov;
482 
483 static errno_t
ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)484 ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
485 {
486 	return 0;
487 }
488 
489 static void
ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)490 ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
491 {
492 	// Ignore
493 }
494 
495 static errno_t
ipsec_register_nexus(void)496 ipsec_register_nexus(void)
497 {
498 	const struct kern_nexus_domain_provider_init dp_init = {
499 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
500 		.nxdpi_flags = 0,
501 		.nxdpi_init = ipsec_nxdp_init,
502 		.nxdpi_fini = ipsec_nxdp_fini
503 	};
504 	errno_t err = 0;
505 
506 	/* ipsec_nxdp_init() is called before this function returns */
507 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
508 	    (const uint8_t *) "com.apple.ipsec",
509 	    &dp_init, sizeof(dp_init),
510 	    &ipsec_nx_dom_prov);
511 	if (err != 0) {
512 		os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
513 		return err;
514 	}
515 	return 0;
516 }
517 
518 static errno_t
ipsec_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)519 ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
520 {
521 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
522 	pcb->ipsec_netif_nexus = nexus;
523 	return ipsec_ifnet_set_attrs(ifp);
524 }
525 
526 static errno_t
ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,proc_t p,kern_nexus_t nexus,nexus_port_t nexus_port,kern_channel_t channel,void ** ch_ctx)527 ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
528     proc_t p, kern_nexus_t nexus,
529     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
530 {
531 #pragma unused(nxprov, p)
532 #pragma unused(nexus, nexus_port, channel, ch_ctx)
533 	return 0;
534 }
535 
536 static errno_t
ipsec_nexus_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)537 ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
538     kern_channel_t channel)
539 {
540 #pragma unused(nxprov, channel)
541 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
542 	boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, 1);
543 	/* Mark the data path as ready */
544 	if (ok) {
545 		lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
546 		IPSEC_SET_DATA_PATH_READY(pcb);
547 		lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
548 	}
549 	return ok ? 0 : ENXIO;
550 }
551 
552 static void
ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)553 ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
554     kern_channel_t channel)
555 {
556 #pragma unused(nxprov, channel)
557 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
558 
559 	VERIFY(pcb->ipsec_kpipe_count != 0);
560 
561 	/* Wait until all threads in the data paths are done. */
562 	ipsec_wait_data_move_drain(pcb);
563 }
564 
565 static void
ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)566 ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
567     kern_channel_t channel)
568 {
569 #pragma unused(nxprov, channel)
570 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
571 
572 	/* Wait until all threads in the data paths are done. */
573 	ipsec_wait_data_move_drain(pcb);
574 }
575 
576 static void
ipsec_nexus_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)577 ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
578     kern_channel_t channel)
579 {
580 #pragma unused(nxprov, channel)
581 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
582 	if (pcb->ipsec_netif_nexus == nexus) {
583 		pcb->ipsec_netif_nexus = NULL;
584 	}
585 	ifnet_decr_iorefcnt(pcb->ipsec_ifp);
586 }
587 
588 static errno_t
ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)589 ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
590     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
591     void **ring_ctx)
592 {
593 #pragma unused(nxprov)
594 #pragma unused(channel)
595 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
596 	uint8_t ring_idx;
597 
598 	for (ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
599 		if (!uuid_compare(channel->ch_info->cinfo_nx_uuid, pcb->ipsec_kpipe_uuid[ring_idx])) {
600 			break;
601 		}
602 	}
603 
604 	if (ring_idx == pcb->ipsec_kpipe_count) {
605 		uuid_string_t uuidstr;
606 		uuid_unparse(channel->ch_info->cinfo_nx_uuid, uuidstr);
607 		os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n", __func__, pcb->ipsec_if_xname, uuidstr);
608 		return ENOENT;
609 	}
610 
611 	*ring_ctx = (void *)(uintptr_t)ring_idx;
612 
613 	if (!is_tx_ring) {
614 		VERIFY(pcb->ipsec_kpipe_rxring[ring_idx] == NULL);
615 		pcb->ipsec_kpipe_rxring[ring_idx] = ring;
616 	} else {
617 		VERIFY(pcb->ipsec_kpipe_txring[ring_idx] == NULL);
618 		pcb->ipsec_kpipe_txring[ring_idx] = ring;
619 	}
620 	return 0;
621 }
622 
623 static void
ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)624 ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
625     kern_channel_ring_t ring)
626 {
627 #pragma unused(nxprov)
628 	bool found = false;
629 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
630 
631 	for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
632 		if (pcb->ipsec_kpipe_rxring[i] == ring) {
633 			pcb->ipsec_kpipe_rxring[i] = NULL;
634 			found = true;
635 		} else if (pcb->ipsec_kpipe_txring[i] == ring) {
636 			pcb->ipsec_kpipe_txring[i] = NULL;
637 			found = true;
638 		}
639 	}
640 	VERIFY(found);
641 }
642 
643 static errno_t
ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)644 ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
645     kern_channel_ring_t tx_ring, uint32_t flags)
646 {
647 #pragma unused(nxprov)
648 #pragma unused(flags)
649 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
650 
651 	if (!ipsec_data_move_begin(pcb)) {
652 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
653 		return 0;
654 	}
655 
656 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
657 
658 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
659 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
660 		ipsec_data_move_end(pcb);
661 		return 0;
662 	}
663 
664 	VERIFY(pcb->ipsec_kpipe_count);
665 
666 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
667 	if (tx_slot == NULL) {
668 		// Nothing to write, bail
669 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
670 		ipsec_data_move_end(pcb);
671 		return 0;
672 	}
673 
674 	// Signal the netif ring to read
675 	kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
676 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
677 
678 	if (rx_ring != NULL) {
679 		kern_channel_notify(rx_ring, 0);
680 	}
681 
682 	ipsec_data_move_end(pcb);
683 	return 0;
684 }
685 
686 static mbuf_t
ipsec_encrypt_mbuf(ifnet_t interface,mbuf_t data)687 ipsec_encrypt_mbuf(ifnet_t interface,
688     mbuf_t data)
689 {
690 	struct ipsec_output_state ipsec_state;
691 	int error = 0;
692 	uint32_t af;
693 
694 	// Make sure this packet isn't looping through the interface
695 	if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
696 		error = -1;
697 		goto ipsec_output_err;
698 	}
699 
700 	// Mark the interface so NECP can evaluate tunnel policy
701 	necp_mark_packet_from_interface(data, interface);
702 
703 	struct ip *ip = mtod(data, struct ip *);
704 	u_int ip_version = ip->ip_v;
705 
706 	switch (ip_version) {
707 	case 4: {
708 		af = AF_INET;
709 
710 		memset(&ipsec_state, 0, sizeof(ipsec_state));
711 		ipsec_state.m = data;
712 		ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
713 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
714 
715 		error = ipsec4_interface_output(&ipsec_state, interface);
716 		if (error == 0 && ipsec_state.tunneled == 6) {
717 			// Tunneled in IPv6 - packet is gone
718 			// TODO: Don't lose mbuf
719 			data = NULL;
720 			goto done;
721 		}
722 
723 		data = ipsec_state.m;
724 		if (error || data == NULL) {
725 			if (error) {
726 				os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec4_output error %d\n", error);
727 			}
728 			goto ipsec_output_err;
729 		}
730 		goto done;
731 	}
732 	case 6: {
733 		af = AF_INET6;
734 
735 		data = ipsec6_splithdr(data);
736 		if (data == NULL) {
737 			os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
738 			goto ipsec_output_err;
739 		}
740 
741 		struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
742 
743 		memset(&ipsec_state, 0, sizeof(ipsec_state));
744 		ipsec_state.m = data;
745 		ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
746 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
747 
748 		error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
749 		if (error == 0 && ipsec_state.tunneled == 4) {
750 			// Tunneled in IPv4 - packet is gone
751 			// TODO: Don't lose mbuf
752 			data = NULL;
753 			goto done;
754 		}
755 		data = ipsec_state.m;
756 		if (error || data == NULL) {
757 			if (error) {
758 				os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_output error %d\n", error);
759 			}
760 			goto ipsec_output_err;
761 		}
762 		goto done;
763 	}
764 	default: {
765 		os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version);
766 		error = -1;
767 		goto ipsec_output_err;
768 	}
769 	}
770 
771 done:
772 	return data;
773 
774 ipsec_output_err:
775 	if (data) {
776 		mbuf_freem(data);
777 	}
778 	return NULL;
779 }
780 
781 static errno_t
ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)782 ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
783     kern_channel_ring_t rx_ring, uint32_t flags)
784 {
785 #pragma unused(nxprov)
786 #pragma unused(flags)
787 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
788 	struct kern_channel_ring_stat_increment rx_ring_stats;
789 	uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring);
790 
791 	if (!ipsec_data_move_begin(pcb)) {
792 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
793 		return 0;
794 	}
795 
796 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
797 
798 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
799 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
800 		ipsec_data_move_end(pcb);
801 		return 0;
802 	}
803 
804 	VERIFY(pcb->ipsec_kpipe_count);
805 	VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
806 
807 	// Reclaim user-released slots
808 	(void) kern_channel_reclaim(rx_ring);
809 
810 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
811 	if (avail == 0) {
812 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
813 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
814 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
815 		ipsec_data_move_end(pcb);
816 		return 0;
817 	}
818 
819 	kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx];
820 	if (tx_ring == NULL) {
821 		// Net-If TX ring not set up yet, nothing to read
822 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
823 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
824 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
825 		ipsec_data_move_end(pcb);
826 		return 0;
827 	}
828 
829 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
830 
831 	// Unlock ipsec before entering ring
832 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
833 
834 	(void)kr_enter(tx_ring, TRUE);
835 
836 	// Lock again after entering and validate
837 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
838 	if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
839 		// Ring no longer valid
840 		// Unlock first, then exit ring
841 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
842 		kr_exit(tx_ring);
843 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
844 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
845 		ipsec_data_move_end(pcb);
846 		return 0;
847 	}
848 
849 	struct kern_channel_ring_stat_increment tx_ring_stats;
850 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
851 	kern_channel_slot_t tx_pslot = NULL;
852 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
853 	if (tx_slot == NULL) {
854 		// Nothing to read, don't bother signalling
855 		// Unlock first, then exit ring
856 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
857 		kr_exit(tx_ring);
858 		ipsec_data_move_end(pcb);
859 		return 0;
860 	}
861 
862 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
863 	VERIFY(rx_pp != NULL);
864 	struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
865 	VERIFY(tx_pp != NULL);
866 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
867 	kern_channel_slot_t rx_pslot = NULL;
868 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
869 	kern_packet_t tx_chain_ph = 0;
870 
871 	while (rx_slot != NULL && tx_slot != NULL) {
872 		size_t length = 0;
873 		mbuf_t data = NULL;
874 		errno_t error = 0;
875 
876 		// Allocate rx packet
877 		kern_packet_t rx_ph = 0;
878 		error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
879 		if (__improbable(error != 0)) {
880 			os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
881 			    pcb->ipsec_ifp->if_xname);
882 			break;
883 		}
884 
885 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
886 
887 		if (tx_ph == 0) {
888 			// Advance TX ring
889 			tx_pslot = tx_slot;
890 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
891 			kern_pbufpool_free(rx_pp, rx_ph);
892 			continue;
893 		}
894 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
895 		if (tx_chain_ph != 0) {
896 			kern_packet_append(tx_ph, tx_chain_ph);
897 		}
898 		tx_chain_ph = tx_ph;
899 
900 		// Advance TX ring
901 		tx_pslot = tx_slot;
902 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
903 
904 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
905 		VERIFY(tx_buf != NULL);
906 		uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
907 		VERIFY(tx_baddr != NULL);
908 		tx_baddr += kern_buflet_get_data_offset(tx_buf);
909 
910 		bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
911 
912 		length = MIN(kern_packet_get_data_length(tx_ph),
913 		    pcb->ipsec_slot_size);
914 
915 		// Increment TX stats
916 		tx_ring_stats.kcrsi_slots_transferred++;
917 		tx_ring_stats.kcrsi_bytes_transferred += length;
918 
919 		if (length > 0) {
920 			error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
921 			if (error == 0) {
922 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
923 				if (error == 0) {
924 					// Encrypt and send packet
925 					lck_mtx_lock(&pcb->ipsec_kpipe_encrypt_lock);
926 					data = ipsec_encrypt_mbuf(pcb->ipsec_ifp, data);
927 					lck_mtx_unlock(&pcb->ipsec_kpipe_encrypt_lock);
928 				} else {
929 					os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
930 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
931 					STATS_INC(nifs, NETIF_STATS_DROP);
932 					mbuf_freem(data);
933 					data = NULL;
934 				}
935 			} else {
936 				os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
937 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
938 				STATS_INC(nifs, NETIF_STATS_DROP);
939 			}
940 		} else {
941 			os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
942 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
943 			STATS_INC(nifs, NETIF_STATS_DROP);
944 		}
945 
946 		if (data == NULL) {
947 			os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
948 			kern_pbufpool_free(rx_pp, rx_ph);
949 			break;
950 		}
951 
952 		length = mbuf_pkthdr_len(data);
953 		if (length > PP_BUF_SIZE_DEF(rx_pp)) {
954 			// Flush data
955 			mbuf_freem(data);
956 			kern_pbufpool_free(rx_pp, rx_ph);
957 			os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
958 			    pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
959 			continue;
960 		}
961 
962 		// Fillout rx packet
963 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
964 		VERIFY(rx_buf != NULL);
965 		void *rx_baddr = kern_buflet_get_data_address(rx_buf);
966 		VERIFY(rx_baddr != NULL);
967 
968 		// Copy-in data from mbuf to buflet
969 		mbuf_copydata(data, 0, length, (void *)rx_baddr);
970 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
971 
972 		// Finalize and attach the packet
973 		error = kern_buflet_set_data_offset(rx_buf, 0);
974 		VERIFY(error == 0);
975 		error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
976 		VERIFY(error == 0);
977 		error = kern_packet_finalize(rx_ph);
978 		VERIFY(error == 0);
979 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
980 		VERIFY(error == 0);
981 
982 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
983 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
984 
985 		rx_ring_stats.kcrsi_slots_transferred++;
986 		rx_ring_stats.kcrsi_bytes_transferred += length;
987 
988 		if (!pcb->ipsec_ext_ifdata_stats) {
989 			ifnet_stat_increment_out(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
990 		}
991 
992 		mbuf_freem(data);
993 
994 		rx_pslot = rx_slot;
995 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
996 	}
997 
998 	if (rx_pslot) {
999 		kern_channel_advance_slot(rx_ring, rx_pslot);
1000 		kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1001 	}
1002 
1003 	if (tx_chain_ph != 0) {
1004 		kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
1005 	}
1006 
1007 	if (tx_pslot) {
1008 		kern_channel_advance_slot(tx_ring, tx_pslot);
1009 		kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1010 		(void)kern_channel_reclaim(tx_ring);
1011 	}
1012 
1013 	/* always reenable output */
1014 	errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
1015 	if (error != 0) {
1016 		os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
1017 	}
1018 
1019 	// Unlock first, then exit ring
1020 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1021 
1022 	if (tx_pslot != NULL) {
1023 		kern_channel_notify(tx_ring, 0);
1024 	}
1025 	kr_exit(tx_ring);
1026 
1027 	ipsec_data_move_end(pcb);
1028 	return 0;
1029 }
1030 
1031 static uint8_t
ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1032 ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1033 {
1034 	switch (svc_class) {
1035 	case KPKT_SC_VO: {
1036 		return 0;
1037 	}
1038 	case KPKT_SC_VI: {
1039 		return 1;
1040 	}
1041 	case KPKT_SC_BE: {
1042 		return 2;
1043 	}
1044 	case KPKT_SC_BK: {
1045 		return 3;
1046 	}
1047 	default: {
1048 		VERIFY(0);
1049 		return 0;
1050 	}
1051 	}
1052 }
1053 
1054 static errno_t
ipsec_netif_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1055 ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1056     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1057     void **ring_ctx)
1058 {
1059 #pragma unused(nxprov)
1060 #pragma unused(channel)
1061 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1062 
1063 	if (!is_tx_ring) {
1064 		VERIFY(pcb->ipsec_netif_rxring[0] == NULL);
1065 		pcb->ipsec_netif_rxring[0] = ring;
1066 	} else {
1067 		uint8_t ring_idx = 0;
1068 		if (ipsec_in_wmm_mode(pcb)) {
1069 			int err;
1070 			kern_packet_svc_class_t svc_class;
1071 			err = kern_channel_get_service_class(ring, &svc_class);
1072 			VERIFY(err == 0);
1073 			ring_idx = ipsec_find_tx_ring_by_svc(svc_class);
1074 			VERIFY(ring_idx < IPSEC_IF_WMM_RING_COUNT);
1075 		}
1076 
1077 		*ring_ctx = (void *)(uintptr_t)ring_idx;
1078 
1079 		VERIFY(pcb->ipsec_netif_txring[ring_idx] == NULL);
1080 		pcb->ipsec_netif_txring[ring_idx] = ring;
1081 	}
1082 	return 0;
1083 }
1084 
1085 static void
ipsec_netif_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)1086 ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1087     kern_channel_ring_t ring)
1088 {
1089 #pragma unused(nxprov)
1090 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1091 	bool found = false;
1092 
1093 	for (int i = 0; i < IPSEC_NETIF_MAX_RX_RING_COUNT; i++) {
1094 		if (pcb->ipsec_netif_rxring[i] == ring) {
1095 			pcb->ipsec_netif_rxring[i] = NULL;
1096 			VERIFY(!found);
1097 			found = true;
1098 		}
1099 	}
1100 	for (int i = 0; i < IPSEC_NETIF_MAX_TX_RING_COUNT; i++) {
1101 		if (pcb->ipsec_netif_txring[i] == ring) {
1102 			pcb->ipsec_netif_txring[i] = NULL;
1103 			VERIFY(!found);
1104 			found = true;
1105 		}
1106 	}
1107 	VERIFY(found);
1108 }
1109 
1110 static bool
ipsec_netif_check_policy(ifnet_t interface,mbuf_t data)1111 ipsec_netif_check_policy(ifnet_t interface, mbuf_t data)
1112 {
1113 	necp_kernel_policy_result necp_result = 0;
1114 	necp_kernel_policy_result_parameter necp_result_parameter = {};
1115 	uint32_t necp_matched_policy_id = 0;
1116 	struct ip_out_args args4 = { };
1117 	struct ip6_out_args args6 = { };
1118 
1119 	// This packet has been marked with IP level policy, do not mark again.
1120 	if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
1121 		return true;
1122 	}
1123 
1124 	size_t length = mbuf_pkthdr_len(data);
1125 	if (length < sizeof(struct ip)) {
1126 		return false;
1127 	}
1128 
1129 	struct ip *ip = mtod(data, struct ip *);
1130 	u_int ip_version = ip->ip_v;
1131 	switch (ip_version) {
1132 	case 4: {
1133 		if (interface != NULL) {
1134 			args4.ipoa_flags |= IPOAF_BOUND_IF;
1135 			args4.ipoa_boundif = interface->if_index;
1136 		}
1137 		necp_matched_policy_id = necp_ip_output_find_policy_match(data, IP_OUTARGS, &args4, NULL,
1138 		    &necp_result, &necp_result_parameter);
1139 		break;
1140 	}
1141 	case 6: {
1142 		if (interface != NULL) {
1143 			args6.ip6oa_flags |= IP6OAF_BOUND_IF;
1144 			args6.ip6oa_boundif = interface->if_index;
1145 		}
1146 		necp_matched_policy_id = necp_ip6_output_find_policy_match(data, IPV6_OUTARGS, &args6, NULL,
1147 		    &necp_result, &necp_result_parameter);
1148 		break;
1149 	}
1150 	default: {
1151 		return false;
1152 	}
1153 	}
1154 
1155 	if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
1156 	    necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
1157 		/* Drop and flow divert packets should be blocked at the IP layer */
1158 		return false;
1159 	}
1160 
1161 	necp_mark_packet_from_ip(data, necp_matched_policy_id);
1162 	return true;
1163 }
1164 
1165 static errno_t
ipsec_netif_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)1166 ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1167     kern_channel_ring_t tx_ring, uint32_t flags)
1168 {
1169 #pragma unused(nxprov)
1170 #pragma unused(flags)
1171 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1172 
1173 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1174 
1175 	if (!ipsec_data_move_begin(pcb)) {
1176 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1177 		return 0;
1178 	}
1179 
1180 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1181 
1182 	struct kern_channel_ring_stat_increment tx_ring_stats;
1183 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1184 	kern_channel_slot_t tx_pslot = NULL;
1185 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1186 	kern_packet_t tx_chain_ph = 0;
1187 
1188 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
1189 
1190 	if (tx_slot == NULL) {
1191 		// Nothing to write, don't bother signalling
1192 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1193 		ipsec_data_move_end(pcb);
1194 		return 0;
1195 	}
1196 
1197 	if (pcb->ipsec_kpipe_count &&
1198 	    ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1199 		// Select the corresponding kpipe rx ring
1200 		uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(tx_ring);
1201 		VERIFY(ring_idx < IPSEC_IF_MAX_RING_COUNT);
1202 		kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1203 
1204 		// Unlock while calling notify
1205 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1206 
1207 		// Signal the kernel pipe ring to read
1208 		if (rx_ring != NULL) {
1209 			kern_channel_notify(rx_ring, 0);
1210 		}
1211 
1212 		ipsec_data_move_end(pcb);
1213 		return 0;
1214 	}
1215 
1216 	// If we're here, we're injecting into the BSD stack
1217 	while (tx_slot != NULL) {
1218 		size_t length = 0;
1219 		mbuf_t data = NULL;
1220 
1221 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1222 
1223 		if (tx_ph == 0) {
1224 			// Advance TX ring
1225 			tx_pslot = tx_slot;
1226 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1227 			continue;
1228 		}
1229 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
1230 		if (tx_chain_ph != 0) {
1231 			kern_packet_append(tx_ph, tx_chain_ph);
1232 		}
1233 		tx_chain_ph = tx_ph;
1234 
1235 		// Advance TX ring
1236 		tx_pslot = tx_slot;
1237 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1238 
1239 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1240 		VERIFY(tx_buf != NULL);
1241 		uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1242 		VERIFY(tx_baddr != 0);
1243 		tx_baddr += kern_buflet_get_data_offset(tx_buf);
1244 
1245 		bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1246 
1247 		length = MIN(kern_packet_get_data_length(tx_ph),
1248 		    pcb->ipsec_slot_size);
1249 
1250 		if (length > 0) {
1251 			errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1252 			if (error == 0) {
1253 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1254 				if (error == 0) {
1255 					// Mark packet from policy
1256 					uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1257 					necp_mark_packet_from_ip(data, policy_id);
1258 
1259 					// Check policy with NECP
1260 					if (!ipsec_netif_check_policy(pcb->ipsec_ifp, data)) {
1261 						os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1262 						STATS_INC(nifs, NETIF_STATS_DROP);
1263 						mbuf_freem(data);
1264 						data = NULL;
1265 					} else {
1266 						// Send through encryption
1267 						error = ipsec_output(pcb->ipsec_ifp, data);
1268 						if (error != 0) {
1269 							os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
1270 						}
1271 					}
1272 				} else {
1273 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1274 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1275 					STATS_INC(nifs, NETIF_STATS_DROP);
1276 					mbuf_freem(data);
1277 					data = NULL;
1278 				}
1279 			} else {
1280 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1281 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1282 				STATS_INC(nifs, NETIF_STATS_DROP);
1283 			}
1284 		} else {
1285 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1286 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1287 			STATS_INC(nifs, NETIF_STATS_DROP);
1288 		}
1289 
1290 		if (data == NULL) {
1291 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
1292 			break;
1293 		}
1294 
1295 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1296 		STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
1297 
1298 		tx_ring_stats.kcrsi_slots_transferred++;
1299 		tx_ring_stats.kcrsi_bytes_transferred += length;
1300 	}
1301 
1302 	if (tx_chain_ph != 0) {
1303 		kern_pbufpool_free_chain(tx_ring->ckr_pp, tx_chain_ph);
1304 	}
1305 
1306 	if (tx_pslot) {
1307 		kern_channel_advance_slot(tx_ring, tx_pslot);
1308 		kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1309 		(void)kern_channel_reclaim(tx_ring);
1310 	}
1311 
1312 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1313 	ipsec_data_move_end(pcb);
1314 
1315 	return 0;
1316 }
1317 
1318 static errno_t
ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags,uint8_t ring_idx)1319 ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1320     kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx)
1321 {
1322 #pragma unused(nxprov)
1323 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1324 	boolean_t more = false;
1325 	errno_t rc = 0;
1326 
1327 	VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0);
1328 
1329 	/*
1330 	 * Refill and sync the ring; we may be racing against another thread doing
1331 	 * an RX sync that also wants to do kr_enter(), and so use the blocking
1332 	 * variant here.
1333 	 */
1334 	rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1335 	if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
1336 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n", __func__,
1337 		    pcb->ipsec_if_xname, ring->ckr_name, rc);
1338 	}
1339 
1340 	(void) kr_enter(ring, TRUE);
1341 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1342 	if (ring != pcb->ipsec_netif_txring[ring_idx]) {
1343 		// ring no longer valid
1344 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1345 		kr_exit(ring);
1346 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n", __func__,
1347 		    pcb->ipsec_if_xname, ring->ckr_name, ring_idx);
1348 		return ENXIO;
1349 	}
1350 
1351 	if (pcb->ipsec_kpipe_count) {
1352 		uint32_t tx_available = kern_channel_available_slot_count(ring);
1353 		if (pcb->ipsec_netif_txring_size > 0 &&
1354 		    tx_available >= pcb->ipsec_netif_txring_size - 1) {
1355 			// No room left in tx ring, disable output for now
1356 			errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1357 			if (error != 0) {
1358 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
1359 			}
1360 		}
1361 	}
1362 
1363 	if (pcb->ipsec_kpipe_count) {
1364 		kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1365 
1366 		// Unlock while calling notify
1367 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1368 		// Signal the kernel pipe ring to read
1369 		if (rx_ring != NULL) {
1370 			kern_channel_notify(rx_ring, 0);
1371 		}
1372 	} else {
1373 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1374 	}
1375 
1376 	kr_exit(ring);
1377 
1378 	return 0;
1379 }
1380 
1381 static errno_t
ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,__unused uint32_t flags)1382 ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1383     kern_channel_ring_t ring, __unused uint32_t flags)
1384 {
1385 	errno_t ret = 0;
1386 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1387 
1388 	if (!ipsec_data_move_begin(pcb)) {
1389 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1390 		return 0;
1391 	}
1392 
1393 	if (ipsec_in_wmm_mode(pcb)) {
1394 		for (uint8_t i = 0; i < IPSEC_IF_WMM_RING_COUNT; i++) {
1395 			kern_channel_ring_t nring = pcb->ipsec_netif_txring[i];
1396 			ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, nring, flags, i);
1397 			if (ret) {
1398 				break;
1399 			}
1400 		}
1401 	} else {
1402 		ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring, flags, 0);
1403 	}
1404 
1405 	ipsec_data_move_end(pcb);
1406 	return ret;
1407 }
1408 
1409 static errno_t
ipsec_netif_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)1410 ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1411     kern_channel_ring_t rx_ring, uint32_t flags)
1412 {
1413 #pragma unused(nxprov)
1414 #pragma unused(flags)
1415 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1416 	struct kern_channel_ring_stat_increment rx_ring_stats;
1417 
1418 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1419 
1420 	if (!ipsec_data_move_begin(pcb)) {
1421 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1422 		return 0;
1423 	}
1424 
1425 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1426 
1427 	// Reclaim user-released slots
1428 	(void) kern_channel_reclaim(rx_ring);
1429 
1430 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1431 
1432 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
1433 	if (avail == 0) {
1434 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1435 		ipsec_data_move_end(pcb);
1436 		return 0;
1437 	}
1438 
1439 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1440 	VERIFY(rx_pp != NULL);
1441 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1442 	kern_channel_slot_t rx_pslot = NULL;
1443 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1444 
1445 	while (rx_slot != NULL) {
1446 		// Check for a waiting packet
1447 		lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1448 		mbuf_t data = pcb->ipsec_input_chain;
1449 		if (data == NULL) {
1450 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1451 			break;
1452 		}
1453 
1454 		// Allocate rx packet
1455 		kern_packet_t rx_ph = 0;
1456 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1457 		if (__improbable(error != 0)) {
1458 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1459 			STATS_INC(nifs, NETIF_STATS_DROP);
1460 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1461 			break;
1462 		}
1463 
1464 		// Advance waiting packets
1465 		if (pcb->ipsec_input_chain_count > 0) {
1466 			pcb->ipsec_input_chain_count--;
1467 		}
1468 		pcb->ipsec_input_chain = data->m_nextpkt;
1469 		data->m_nextpkt = NULL;
1470 		if (pcb->ipsec_input_chain == NULL) {
1471 			pcb->ipsec_input_chain_last = NULL;
1472 		}
1473 		lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1474 
1475 		size_t length = mbuf_pkthdr_len(data);
1476 
1477 		if (length < sizeof(struct ip)) {
1478 			// Flush data
1479 			mbuf_freem(data);
1480 			kern_pbufpool_free(rx_pp, rx_ph);
1481 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1482 			STATS_INC(nifs, NETIF_STATS_DROP);
1483 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1484 			    pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
1485 			continue;
1486 		}
1487 
1488 		uint32_t af = 0;
1489 		struct ip *ip = mtod(data, struct ip *);
1490 		u_int ip_version = ip->ip_v;
1491 		switch (ip_version) {
1492 		case 4: {
1493 			af = AF_INET;
1494 			break;
1495 		}
1496 		case 6: {
1497 			af = AF_INET6;
1498 			break;
1499 		}
1500 		default: {
1501 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1502 			    pcb->ipsec_ifp->if_xname, ip_version);
1503 			break;
1504 		}
1505 		}
1506 
1507 		if (length > PP_BUF_SIZE_DEF(rx_pp) ||
1508 		    (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
1509 			// We need to fragment to send up into the netif
1510 
1511 			u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp);
1512 			if (pcb->ipsec_frag_size_set &&
1513 			    pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) {
1514 				fragment_mtu = pcb->ipsec_input_frag_size;
1515 			}
1516 
1517 			mbuf_t fragment_chain = NULL;
1518 			switch (af) {
1519 			case AF_INET: {
1520 				// ip_fragment expects the length in host order
1521 				ip->ip_len = ntohs(ip->ip_len);
1522 
1523 				// ip_fragment will modify the original data, don't free
1524 				int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1525 				if (fragment_error == 0 && data != NULL) {
1526 					fragment_chain = data;
1527 				} else {
1528 					STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1529 					STATS_INC(nifs, NETIF_STATS_DROP);
1530 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1531 					    pcb->ipsec_ifp->if_xname, length, fragment_error);
1532 				}
1533 				break;
1534 			}
1535 			case AF_INET6: {
1536 				if (length < sizeof(struct ip6_hdr)) {
1537 					mbuf_freem(data);
1538 					STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1539 					STATS_INC(nifs, NETIF_STATS_DROP);
1540 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1541 					    pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1542 				} else {
1543 					// ip6_do_fragmentation will free the original data on success only
1544 					struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1545 
1546 					int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
1547 					    ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid()));
1548 					if (fragment_error == 0 && data != NULL) {
1549 						fragment_chain = data;
1550 					} else {
1551 						mbuf_freem(data);
1552 						STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1553 						STATS_INC(nifs, NETIF_STATS_DROP);
1554 						os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1555 						    pcb->ipsec_ifp->if_xname, length, fragment_error);
1556 					}
1557 				}
1558 				break;
1559 			}
1560 			default: {
1561 				// Cannot fragment unknown families
1562 				mbuf_freem(data);
1563 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1564 				STATS_INC(nifs, NETIF_STATS_DROP);
1565 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1566 				    pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
1567 				break;
1568 			}
1569 			}
1570 
1571 			if (fragment_chain != NULL) {
1572 				// Add fragments to chain before continuing
1573 				lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1574 				if (pcb->ipsec_input_chain != NULL) {
1575 					pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1576 				} else {
1577 					pcb->ipsec_input_chain = fragment_chain;
1578 				}
1579 				pcb->ipsec_input_chain_count++;
1580 				while (fragment_chain->m_nextpkt) {
1581 					VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1582 					fragment_chain = fragment_chain->m_nextpkt;
1583 					pcb->ipsec_input_chain_count++;
1584 				}
1585 				pcb->ipsec_input_chain_last = fragment_chain;
1586 				lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1587 			}
1588 
1589 			// Make sure to free unused rx packet
1590 			kern_pbufpool_free(rx_pp, rx_ph);
1591 
1592 			continue;
1593 		}
1594 
1595 		mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1596 
1597 		// Fillout rx packet
1598 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1599 		VERIFY(rx_buf != NULL);
1600 		void *rx_baddr = kern_buflet_get_data_address(rx_buf);
1601 		VERIFY(rx_baddr != NULL);
1602 
1603 		// Copy-in data from mbuf to buflet
1604 		mbuf_copydata(data, 0, length, (void *)rx_baddr);
1605 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
1606 
1607 		// Finalize and attach the packet
1608 		error = kern_buflet_set_data_offset(rx_buf, 0);
1609 		VERIFY(error == 0);
1610 		error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
1611 		VERIFY(error == 0);
1612 		error = kern_packet_set_headroom(rx_ph, 0);
1613 		VERIFY(error == 0);
1614 		error = kern_packet_finalize(rx_ph);
1615 		VERIFY(error == 0);
1616 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1617 		VERIFY(error == 0);
1618 
1619 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1620 		STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
1621 		bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1622 
1623 		rx_ring_stats.kcrsi_slots_transferred++;
1624 		rx_ring_stats.kcrsi_bytes_transferred += length;
1625 
1626 		if (!pcb->ipsec_ext_ifdata_stats) {
1627 			ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
1628 		}
1629 
1630 		mbuf_freem(data);
1631 
1632 		// Advance ring
1633 		rx_pslot = rx_slot;
1634 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1635 	}
1636 
1637 	for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
1638 		struct kern_channel_ring_stat_increment tx_ring_stats;
1639 		bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1640 		kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
1641 		kern_channel_slot_t tx_pslot = NULL;
1642 		kern_channel_slot_t tx_slot = NULL;
1643 		if (tx_ring == NULL) {
1644 			// Net-If TX ring not set up yet, nothing to read
1645 			goto done;
1646 		}
1647 		// Unlock ipsec before entering ring
1648 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1649 
1650 		(void)kr_enter(tx_ring, TRUE);
1651 
1652 		// Lock again after entering and validate
1653 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1654 
1655 		if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
1656 			goto done;
1657 		}
1658 
1659 		tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1660 		if (tx_slot == NULL) {
1661 			// Nothing to read, don't bother signalling
1662 			goto done;
1663 		}
1664 
1665 		while (rx_slot != NULL && tx_slot != NULL) {
1666 			size_t length = 0;
1667 			mbuf_t data = NULL;
1668 			errno_t error = 0;
1669 			uint32_t af;
1670 
1671 			// Allocate rx packet
1672 			kern_packet_t rx_ph = 0;
1673 			error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1674 			if (__improbable(error != 0)) {
1675 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1676 				STATS_INC(nifs, NETIF_STATS_DROP);
1677 				break;
1678 			}
1679 
1680 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1681 
1682 			// Advance TX ring
1683 			tx_pslot = tx_slot;
1684 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1685 
1686 			if (tx_ph == 0) {
1687 				kern_pbufpool_free(rx_pp, rx_ph);
1688 				continue;
1689 			}
1690 
1691 			kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1692 			VERIFY(tx_buf != NULL);
1693 			uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1694 			VERIFY(tx_baddr != 0);
1695 			tx_baddr += kern_buflet_get_data_offset(tx_buf);
1696 
1697 			length = MIN(kern_packet_get_data_length(tx_ph),
1698 			    pcb->ipsec_slot_size);
1699 
1700 			// Increment TX stats
1701 			tx_ring_stats.kcrsi_slots_transferred++;
1702 			tx_ring_stats.kcrsi_bytes_transferred += length;
1703 
1704 			if (length >= sizeof(struct ip)) {
1705 				error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1706 				if (error == 0) {
1707 					error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1708 					if (error == 0) {
1709 						// Check for wake packet flag
1710 						uuid_t flow_uuid;
1711 						kern_packet_get_flow_uuid(tx_ph, &flow_uuid);
1712 						u_int8_t *id_8 = (u_int8_t *)(uintptr_t)flow_uuid;
1713 						if ((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT) {
1714 							os_log_info(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: wake packet flag is set\n",
1715 							    pcb->ipsec_ifp->if_xname);
1716 							data->m_pkthdr.pkt_flags |= PKTF_WAKE_PKT;
1717 						}
1718 
1719 						lck_mtx_lock(&pcb->ipsec_kpipe_decrypt_lock);
1720 						struct ip *ip = mtod(data, struct ip *);
1721 						u_int ip_version = ip->ip_v;
1722 						switch (ip_version) {
1723 						case 4: {
1724 							af = AF_INET;
1725 							ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip);
1726 							ip->ip_off = ntohs(ip->ip_off);
1727 
1728 							if (length < ip->ip_len) {
1729 								os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
1730 								    pcb->ipsec_ifp->if_xname, length, ip->ip_len);
1731 								STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1732 								STATS_INC(nifs, NETIF_STATS_DROP);
1733 								mbuf_freem(data);
1734 								data = NULL;
1735 							} else {
1736 								data = esp4_input_extended(data, sizeof(struct ip), pcb->ipsec_ifp);
1737 							}
1738 							break;
1739 						}
1740 						case 6: {
1741 							if (length < sizeof(struct ip6_hdr)) {
1742 								os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
1743 								    pcb->ipsec_ifp->if_xname, length);
1744 								STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1745 								STATS_INC(nifs, NETIF_STATS_DROP);
1746 								mbuf_freem(data);
1747 								data = NULL;
1748 							} else {
1749 								af = AF_INET6;
1750 								struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1751 								const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen);
1752 								if (length < ip6_len) {
1753 									os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
1754 									    pcb->ipsec_ifp->if_xname, length, ip6_len);
1755 									STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1756 									STATS_INC(nifs, NETIF_STATS_DROP);
1757 									mbuf_freem(data);
1758 									data = NULL;
1759 								} else {
1760 									int offset = sizeof(struct ip6_hdr);
1761 									esp6_input_extended(&data, &offset, ip6->ip6_nxt, pcb->ipsec_ifp);
1762 								}
1763 							}
1764 							break;
1765 						}
1766 						default: {
1767 							os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: unknown ip version %u\n",
1768 							    pcb->ipsec_ifp->if_xname, ip_version);
1769 							STATS_INC(nifs, NETIF_STATS_DROP);
1770 							mbuf_freem(data);
1771 							data = NULL;
1772 							break;
1773 						}
1774 						}
1775 						lck_mtx_unlock(&pcb->ipsec_kpipe_decrypt_lock);
1776 					} else {
1777 						os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1778 						STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1779 						STATS_INC(nifs, NETIF_STATS_DROP);
1780 						mbuf_freem(data);
1781 						data = NULL;
1782 					}
1783 				} else {
1784 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1785 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1786 					STATS_INC(nifs, NETIF_STATS_DROP);
1787 				}
1788 			} else {
1789 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb->ipsec_ifp->if_xname, length);
1790 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1791 				STATS_INC(nifs, NETIF_STATS_DROP);
1792 			}
1793 
1794 			if (data == NULL) {
1795 				// Failed to get decrypted data data
1796 				kern_pbufpool_free(rx_pp, rx_ph);
1797 				continue;
1798 			}
1799 
1800 			length = mbuf_pkthdr_len(data);
1801 			if (length > PP_BUF_SIZE_DEF(rx_pp)) {
1802 				// Flush data
1803 				mbuf_freem(data);
1804 				kern_pbufpool_free(rx_pp, rx_ph);
1805 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1806 				STATS_INC(nifs, NETIF_STATS_DROP);
1807 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
1808 				    pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
1809 				continue;
1810 			}
1811 
1812 			mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1813 
1814 			// Fillout rx packet
1815 			kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1816 			VERIFY(rx_buf != NULL);
1817 			void *rx_baddr = kern_buflet_get_data_address(rx_buf);
1818 			VERIFY(rx_baddr != NULL);
1819 
1820 			// Copy-in data from mbuf to buflet
1821 			mbuf_copydata(data, 0, length, (void *)rx_baddr);
1822 			kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
1823 
1824 			// Finalize and attach the packet
1825 			error = kern_buflet_set_data_offset(rx_buf, 0);
1826 			VERIFY(error == 0);
1827 			error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
1828 			VERIFY(error == 0);
1829 			error = kern_packet_set_link_header_offset(rx_ph, 0);
1830 			VERIFY(error == 0);
1831 			error = kern_packet_set_network_header_offset(rx_ph, 0);
1832 			VERIFY(error == 0);
1833 			error = kern_packet_finalize(rx_ph);
1834 			VERIFY(error == 0);
1835 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1836 			VERIFY(error == 0);
1837 
1838 			STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1839 			STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
1840 			bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1841 
1842 			rx_ring_stats.kcrsi_slots_transferred++;
1843 			rx_ring_stats.kcrsi_bytes_transferred += length;
1844 
1845 			if (!pcb->ipsec_ext_ifdata_stats) {
1846 				ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
1847 			}
1848 
1849 			mbuf_freem(data);
1850 
1851 			rx_pslot = rx_slot;
1852 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1853 		}
1854 
1855 done:
1856 		if (tx_pslot) {
1857 			kern_channel_advance_slot(tx_ring, tx_pslot);
1858 			kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1859 			(void)kern_channel_reclaim(tx_ring);
1860 		}
1861 
1862 		// Unlock first, then exit ring
1863 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1864 		if (tx_ring != NULL) {
1865 			if (tx_pslot != NULL) {
1866 				kern_channel_notify(tx_ring, 0);
1867 			}
1868 			kr_exit(tx_ring);
1869 		}
1870 
1871 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1872 	}
1873 
1874 	if (rx_pslot) {
1875 		kern_channel_advance_slot(rx_ring, rx_pslot);
1876 		kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1877 	}
1878 
1879 
1880 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1881 
1882 	ipsec_data_move_end(pcb);
1883 	return 0;
1884 }
1885 
1886 static errno_t
ipsec_nexus_ifattach(struct ipsec_pcb * pcb,struct ifnet_init_eparams * init_params,struct ifnet ** ifp)1887 ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
1888     struct ifnet_init_eparams *init_params,
1889     struct ifnet **ifp)
1890 {
1891 	errno_t err;
1892 	nexus_controller_t controller = kern_nexus_shared_controller();
1893 	struct kern_nexus_net_init net_init;
1894 	struct kern_pbufpool_init pp_init;
1895 
1896 	nexus_name_t provider_name;
1897 	snprintf((char *)provider_name, sizeof(provider_name),
1898 	    "com.apple.netif.%s", pcb->ipsec_if_xname);
1899 
1900 	struct kern_nexus_provider_init prov_init = {
1901 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1902 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1903 		.nxpi_pre_connect = ipsec_nexus_pre_connect,
1904 		.nxpi_connected = ipsec_nexus_connected,
1905 		.nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
1906 		.nxpi_disconnected = ipsec_nexus_disconnected,
1907 		.nxpi_ring_init = ipsec_netif_ring_init,
1908 		.nxpi_ring_fini = ipsec_netif_ring_fini,
1909 		.nxpi_slot_init = NULL,
1910 		.nxpi_slot_fini = NULL,
1911 		.nxpi_sync_tx = ipsec_netif_sync_tx,
1912 		.nxpi_sync_rx = ipsec_netif_sync_rx,
1913 		.nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
1914 	};
1915 
1916 	nexus_attr_t nxa = NULL;
1917 	err = kern_nexus_attr_create(&nxa);
1918 	IPSEC_IF_VERIFY(err == 0);
1919 	if (err != 0) {
1920 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1921 		    __func__, err);
1922 		goto failed;
1923 	}
1924 
1925 	uint64_t slot_buffer_size = pcb->ipsec_slot_size;
1926 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1927 	VERIFY(err == 0);
1928 
1929 	// Reset ring size for netif nexus to limit memory usage
1930 	uint64_t ring_size = pcb->ipsec_netif_ring_size;
1931 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1932 	VERIFY(err == 0);
1933 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1934 	VERIFY(err == 0);
1935 
1936 	assert(err == 0);
1937 
1938 	if (ipsec_in_wmm_mode(pcb)) {
1939 		os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n",
1940 		    __func__, pcb->ipsec_if_xname);
1941 
1942 		init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED;
1943 
1944 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_RINGS,
1945 		    IPSEC_NETIF_WMM_TX_RING_COUNT);
1946 		VERIFY(err == 0);
1947 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_RINGS,
1948 		    IPSEC_NETIF_WMM_RX_RING_COUNT);
1949 		VERIFY(err == 0);
1950 
1951 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_QMAP, NEXUS_QMAP_TYPE_WMM);
1952 		VERIFY(err == 0);
1953 	}
1954 
1955 	pcb->ipsec_netif_txring_size = ring_size;
1956 
1957 	bzero(&pp_init, sizeof(pp_init));
1958 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1959 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
1960 	// Note: we need more packets than can be held in the tx and rx rings because
1961 	// packets can also be in the AQM queue(s)
1962 	pp_init.kbi_packets = pcb->ipsec_netif_ring_size * (2 * pcb->ipsec_kpipe_count + 1);
1963 	pp_init.kbi_bufsize = pcb->ipsec_slot_size;
1964 	pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
1965 	pp_init.kbi_max_frags = 1;
1966 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1967 	    "%s", provider_name);
1968 	pp_init.kbi_ctx = NULL;
1969 	pp_init.kbi_ctx_retain = NULL;
1970 	pp_init.kbi_ctx_release = NULL;
1971 
1972 	err = kern_pbufpool_create(&pp_init, &pcb->ipsec_netif_pp, NULL);
1973 	if (err != 0) {
1974 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
1975 		goto failed;
1976 	}
1977 
1978 	err = kern_nexus_controller_register_provider(controller,
1979 	    ipsec_nx_dom_prov,
1980 	    provider_name,
1981 	    &prov_init,
1982 	    sizeof(prov_init),
1983 	    nxa,
1984 	    &pcb->ipsec_nx.if_provider);
1985 	IPSEC_IF_VERIFY(err == 0);
1986 	if (err != 0) {
1987 		os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
1988 		    __func__, err);
1989 		goto failed;
1990 	}
1991 
1992 	bzero(&net_init, sizeof(net_init));
1993 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1994 	net_init.nxneti_flags = 0;
1995 	net_init.nxneti_eparams = init_params;
1996 	net_init.nxneti_lladdr = NULL;
1997 	net_init.nxneti_prepare = ipsec_netif_prepare;
1998 	net_init.nxneti_rx_pbufpool = pcb->ipsec_netif_pp;
1999 	net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp;
2000 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
2001 	    pcb->ipsec_nx.if_provider,
2002 	    pcb,
2003 	    NULL,
2004 	    &pcb->ipsec_nx.if_instance,
2005 	    &net_init,
2006 	    ifp);
2007 	IPSEC_IF_VERIFY(err == 0);
2008 	if (err != 0) {
2009 		os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
2010 		    __func__, err);
2011 		kern_nexus_controller_deregister_provider(controller,
2012 		    pcb->ipsec_nx.if_provider);
2013 		uuid_clear(pcb->ipsec_nx.if_provider);
2014 		goto failed;
2015 	}
2016 
2017 failed:
2018 	if (nxa) {
2019 		kern_nexus_attr_destroy(nxa);
2020 	}
2021 	if (err && pcb->ipsec_netif_pp != NULL) {
2022 		kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2023 		pcb->ipsec_netif_pp = NULL;
2024 	}
2025 	return err;
2026 }
2027 
2028 static void
ipsec_detach_provider_and_instance(uuid_t provider,uuid_t instance)2029 ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
2030 {
2031 	nexus_controller_t controller = kern_nexus_shared_controller();
2032 	errno_t err;
2033 
2034 	if (!uuid_is_null(instance)) {
2035 		err = kern_nexus_controller_free_provider_instance(controller,
2036 		    instance);
2037 		if (err != 0) {
2038 			os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
2039 			    __func__, err);
2040 		}
2041 		uuid_clear(instance);
2042 	}
2043 	if (!uuid_is_null(provider)) {
2044 		err = kern_nexus_controller_deregister_provider(controller,
2045 		    provider);
2046 		if (err != 0) {
2047 			os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
2048 		}
2049 		uuid_clear(provider);
2050 	}
2051 	return;
2052 }
2053 
2054 static void
ipsec_nexus_detach(struct ipsec_pcb * pcb)2055 ipsec_nexus_detach(struct ipsec_pcb *pcb)
2056 {
2057 	ipsec_nx_t nx = &pcb->ipsec_nx;
2058 	nexus_controller_t controller = kern_nexus_shared_controller();
2059 	errno_t err;
2060 
2061 	if (!uuid_is_null(nx->fsw_device)) {
2062 		err = kern_nexus_ifdetach(controller,
2063 		    nx->fsw_instance,
2064 		    nx->fsw_device);
2065 		if (err != 0) {
2066 			os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
2067 			    __func__, err);
2068 		}
2069 	}
2070 
2071 	ipsec_detach_provider_and_instance(nx->fsw_provider,
2072 	    nx->fsw_instance);
2073 	ipsec_detach_provider_and_instance(nx->if_provider,
2074 	    nx->if_instance);
2075 
2076 	if (pcb->ipsec_netif_pp != NULL) {
2077 		kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2078 		pcb->ipsec_netif_pp = NULL;
2079 	}
2080 	memset(nx, 0, sizeof(*nx));
2081 }
2082 
2083 static errno_t
ipsec_create_fs_provider_and_instance(struct ipsec_pcb * pcb,const char * type_name,const char * ifname,uuid_t * provider,uuid_t * instance)2084 ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
2085     const char *type_name,
2086     const char *ifname,
2087     uuid_t *provider, uuid_t *instance)
2088 {
2089 	nexus_attr_t attr = NULL;
2090 	nexus_controller_t controller = kern_nexus_shared_controller();
2091 	uuid_t dom_prov;
2092 	errno_t err;
2093 	struct kern_nexus_init init;
2094 	nexus_name_t    provider_name;
2095 
2096 	err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
2097 	    &dom_prov);
2098 	IPSEC_IF_VERIFY(err == 0);
2099 	if (err != 0) {
2100 		os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
2101 		    __func__, type_name, err);
2102 		goto failed;
2103 	}
2104 
2105 	err = kern_nexus_attr_create(&attr);
2106 	IPSEC_IF_VERIFY(err == 0);
2107 	if (err != 0) {
2108 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2109 		    __func__, err);
2110 		goto failed;
2111 	}
2112 
2113 	uint64_t slot_buffer_size = pcb->ipsec_slot_size;
2114 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2115 	VERIFY(err == 0);
2116 
2117 	// Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
2118 	uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
2119 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
2120 	VERIFY(err == 0);
2121 	uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
2122 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
2123 	VERIFY(err == 0);
2124 	/*
2125 	 * Configure flowswitch to use super-packet (multi-buflet).
2126 	 * This allows flowswitch to perform intra-stack packet aggregation.
2127 	 */
2128 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
2129 	    NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
2130 	VERIFY(err == 0);
2131 
2132 	snprintf((char *)provider_name, sizeof(provider_name),
2133 	    "com.apple.%s.%s", type_name, ifname);
2134 	err = kern_nexus_controller_register_provider(controller,
2135 	    dom_prov,
2136 	    provider_name,
2137 	    NULL,
2138 	    0,
2139 	    attr,
2140 	    provider);
2141 	kern_nexus_attr_destroy(attr);
2142 	attr = NULL;
2143 	IPSEC_IF_VERIFY(err == 0);
2144 	if (err != 0) {
2145 		os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
2146 		    __func__, type_name, err);
2147 		goto failed;
2148 	}
2149 	bzero(&init, sizeof(init));
2150 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2151 	err = kern_nexus_controller_alloc_provider_instance(controller,
2152 	    *provider,
2153 	    NULL, NULL,
2154 	    instance, &init);
2155 	IPSEC_IF_VERIFY(err == 0);
2156 	if (err != 0) {
2157 		os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
2158 		    __func__, type_name, err);
2159 		kern_nexus_controller_deregister_provider(controller,
2160 		    *provider);
2161 		uuid_clear(*provider);
2162 	}
2163 failed:
2164 	return err;
2165 }
2166 
2167 static errno_t
ipsec_flowswitch_attach(struct ipsec_pcb * pcb)2168 ipsec_flowswitch_attach(struct ipsec_pcb *pcb)
2169 {
2170 	nexus_controller_t controller = kern_nexus_shared_controller();
2171 	errno_t err = 0;
2172 	ipsec_nx_t nx = &pcb->ipsec_nx;
2173 
2174 	// Allocate flowswitch
2175 	err = ipsec_create_fs_provider_and_instance(pcb,
2176 	    "flowswitch",
2177 	    pcb->ipsec_ifp->if_xname,
2178 	    &nx->fsw_provider,
2179 	    &nx->fsw_instance);
2180 	if (err != 0) {
2181 		os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
2182 		    __func__);
2183 		goto failed;
2184 	}
2185 
2186 	// Attach flowswitch to device port
2187 	err = kern_nexus_ifattach(controller, nx->fsw_instance,
2188 	    NULL, nx->if_instance,
2189 	    FALSE, &nx->fsw_device);
2190 	if (err != 0) {
2191 		os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
2192 		goto failed;
2193 	}
2194 
2195 	// Extract the agent UUID and save for later
2196 	struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
2197 	if (flowswitch_nx != NULL) {
2198 		struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
2199 		if (flowswitch != NULL) {
2200 			FSW_RLOCK(flowswitch);
2201 			uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
2202 			FSW_UNLOCK(flowswitch);
2203 		} else {
2204 			os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - flowswitch is NULL\n");
2205 		}
2206 		nx_release(flowswitch_nx);
2207 	} else {
2208 		os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n");
2209 	}
2210 
2211 	return 0;
2212 
2213 failed:
2214 	ipsec_nexus_detach(pcb);
2215 
2216 	errno_t detach_error = 0;
2217 	if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
2218 		panic("ipsec_flowswitch_attach - ifnet_detach failed: %d", detach_error);
2219 		/* NOT REACHED */
2220 	}
2221 
2222 	return err;
2223 }
2224 
2225 #pragma mark Kernel Pipe Nexus
2226 
2227 static errno_t
ipsec_register_kernel_pipe_nexus(struct ipsec_pcb * pcb)2228 ipsec_register_kernel_pipe_nexus(struct ipsec_pcb *pcb)
2229 {
2230 	nexus_attr_t nxa = NULL;
2231 	errno_t result;
2232 
2233 	lck_mtx_lock(&ipsec_lock);
2234 	if (ipsec_ncd_refcount++) {
2235 		lck_mtx_unlock(&ipsec_lock);
2236 		return 0;
2237 	}
2238 
2239 	result = kern_nexus_controller_create(&ipsec_ncd);
2240 	if (result) {
2241 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
2242 		    __FUNCTION__, result);
2243 		goto done;
2244 	}
2245 
2246 	uuid_t dom_prov;
2247 	result = kern_nexus_get_default_domain_provider(
2248 		NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
2249 	if (result) {
2250 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
2251 		    __FUNCTION__, result);
2252 		goto done;
2253 	}
2254 
2255 	struct kern_nexus_provider_init prov_init = {
2256 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2257 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2258 		.nxpi_pre_connect = ipsec_nexus_pre_connect,
2259 		.nxpi_connected = ipsec_nexus_connected,
2260 		.nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
2261 		.nxpi_disconnected = ipsec_nexus_disconnected,
2262 		.nxpi_ring_init = ipsec_kpipe_ring_init,
2263 		.nxpi_ring_fini = ipsec_kpipe_ring_fini,
2264 		.nxpi_slot_init = NULL,
2265 		.nxpi_slot_fini = NULL,
2266 		.nxpi_sync_tx = ipsec_kpipe_sync_tx,
2267 		.nxpi_sync_rx = ipsec_kpipe_sync_rx,
2268 		.nxpi_tx_doorbell = NULL,
2269 	};
2270 
2271 	result = kern_nexus_attr_create(&nxa);
2272 	if (result) {
2273 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2274 		    __FUNCTION__, result);
2275 		goto done;
2276 	}
2277 
2278 	uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2279 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2280 	VERIFY(result == 0);
2281 
2282 	// Reset ring size for kernel pipe nexus to limit memory usage
2283 	// Note: It's better to have less on slots on the kpipe TX ring than the netif
2284 	// so back pressure is applied at the AQM layer
2285 	uint64_t ring_size =
2286 	    pcb->ipsec_kpipe_tx_ring_size != 0 ? pcb->ipsec_kpipe_tx_ring_size :
2287 	    pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2288 	    if_ipsec_ring_size;
2289 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
2290 	VERIFY(result == 0);
2291 
2292 	ring_size =
2293 	    pcb->ipsec_kpipe_rx_ring_size != 0 ? pcb->ipsec_kpipe_rx_ring_size :
2294 	    pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2295 	    if_ipsec_ring_size;
2296 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
2297 	VERIFY(result == 0);
2298 
2299 	result = kern_nexus_controller_register_provider(ipsec_ncd,
2300 	    dom_prov,
2301 	    (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
2302 	    &prov_init,
2303 	    sizeof(prov_init),
2304 	    nxa,
2305 	    &ipsec_kpipe_uuid);
2306 	if (result) {
2307 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
2308 		    __FUNCTION__, result);
2309 		goto done;
2310 	}
2311 
2312 done:
2313 	if (nxa) {
2314 		kern_nexus_attr_destroy(nxa);
2315 	}
2316 
2317 	if (result) {
2318 		if (ipsec_ncd) {
2319 			kern_nexus_controller_destroy(ipsec_ncd);
2320 			ipsec_ncd = NULL;
2321 		}
2322 		ipsec_ncd_refcount = 0;
2323 	}
2324 
2325 	lck_mtx_unlock(&ipsec_lock);
2326 
2327 	return result;
2328 }
2329 
2330 static void
ipsec_unregister_kernel_pipe_nexus(void)2331 ipsec_unregister_kernel_pipe_nexus(void)
2332 {
2333 	lck_mtx_lock(&ipsec_lock);
2334 
2335 	VERIFY(ipsec_ncd_refcount > 0);
2336 
2337 	if (--ipsec_ncd_refcount == 0) {
2338 		kern_nexus_controller_destroy(ipsec_ncd);
2339 		ipsec_ncd = NULL;
2340 	}
2341 
2342 	lck_mtx_unlock(&ipsec_lock);
2343 }
2344 
2345 /* This structure only holds onto kpipe channels that need to be
2346  * freed in the future, but are cleared from the pcb under lock
2347  */
2348 struct ipsec_detached_channels {
2349 	int count;
2350 	kern_pbufpool_t pp;
2351 	uuid_t uuids[IPSEC_IF_MAX_RING_COUNT];
2352 };
2353 
2354 static void
ipsec_detach_channels(struct ipsec_pcb * pcb,struct ipsec_detached_channels * dc)2355 ipsec_detach_channels(struct ipsec_pcb *pcb, struct ipsec_detached_channels *dc)
2356 {
2357 	LCK_RW_ASSERT(&pcb->ipsec_pcb_lock, LCK_RW_TYPE_EXCLUSIVE);
2358 
2359 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
2360 		for (int i = 0; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2361 			VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2362 		}
2363 		dc->count = 0;
2364 		return;
2365 	}
2366 
2367 	dc->count = pcb->ipsec_kpipe_count;
2368 
2369 	VERIFY(dc->count >= 0);
2370 	VERIFY(dc->count <= IPSEC_IF_MAX_RING_COUNT);
2371 
2372 	for (int i = 0; i < dc->count; i++) {
2373 		VERIFY(!uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2374 		uuid_copy(dc->uuids[i], pcb->ipsec_kpipe_uuid[i]);
2375 		uuid_clear(pcb->ipsec_kpipe_uuid[i]);
2376 	}
2377 	for (int i = dc->count; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2378 		VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2379 	}
2380 
2381 	if (dc->count) {
2382 		VERIFY(pcb->ipsec_kpipe_pp);
2383 	} else {
2384 		VERIFY(!pcb->ipsec_kpipe_pp);
2385 	}
2386 
2387 	dc->pp = pcb->ipsec_kpipe_pp;
2388 
2389 	pcb->ipsec_kpipe_pp = NULL;
2390 
2391 	ipsec_flag_clr(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2392 }
2393 
2394 static void
ipsec_free_channels(struct ipsec_detached_channels * dc)2395 ipsec_free_channels(struct ipsec_detached_channels *dc)
2396 {
2397 	if (!dc->count) {
2398 		return;
2399 	}
2400 
2401 	for (int i = 0; i < dc->count; i++) {
2402 		errno_t result;
2403 		result = kern_nexus_controller_free_provider_instance(ipsec_ncd, dc->uuids[i]);
2404 		VERIFY(!result);
2405 	}
2406 
2407 	VERIFY(dc->pp);
2408 	kern_pbufpool_destroy(dc->pp);
2409 
2410 	ipsec_unregister_kernel_pipe_nexus();
2411 
2412 	memset(dc, 0, sizeof(*dc));
2413 }
2414 
2415 static errno_t
ipsec_enable_channel(struct ipsec_pcb * pcb,struct proc * proc)2416 ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
2417 {
2418 	struct kern_nexus_init init;
2419 	struct kern_pbufpool_init pp_init;
2420 	errno_t result;
2421 
2422 	kauth_cred_t cred = kauth_cred_get();
2423 	result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
2424 	if (result) {
2425 		return result;
2426 	}
2427 
2428 	VERIFY(pcb->ipsec_kpipe_count);
2429 	VERIFY(!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED));
2430 
2431 	result = ipsec_register_kernel_pipe_nexus(pcb);
2432 
2433 	lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2434 
2435 	if (result) {
2436 		os_log_error(OS_LOG_DEFAULT, "%s: %s failed to register kernel pipe nexus\n",
2437 		    __func__, pcb->ipsec_if_xname);
2438 		goto done;
2439 	}
2440 
2441 	VERIFY(ipsec_ncd);
2442 
2443 	bzero(&pp_init, sizeof(pp_init));
2444 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
2445 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
2446 	// Note: We only needs are many packets as can be held in the tx and rx rings
2447 	pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2 * pcb->ipsec_kpipe_count;
2448 	pp_init.kbi_bufsize = pcb->ipsec_slot_size;
2449 	pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
2450 	pp_init.kbi_max_frags = 1;
2451 	pp_init.kbi_flags |= KBIF_QUANTUM;
2452 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
2453 	    "com.apple.kpipe.%s", pcb->ipsec_if_xname);
2454 	pp_init.kbi_ctx = NULL;
2455 	pp_init.kbi_ctx_retain = NULL;
2456 	pp_init.kbi_ctx_release = NULL;
2457 
2458 	result = kern_pbufpool_create(&pp_init, &pcb->ipsec_kpipe_pp,
2459 	    NULL);
2460 	if (result != 0) {
2461 		os_log_error(OS_LOG_DEFAULT, "%s: %s pbufbool create failed, error %d\n",
2462 		    __func__, pcb->ipsec_if_xname, result);
2463 		goto done;
2464 	}
2465 
2466 	bzero(&init, sizeof(init));
2467 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2468 	init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp;
2469 
2470 	for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
2471 		VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2472 		result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
2473 		    ipsec_kpipe_uuid, pcb, NULL, &pcb->ipsec_kpipe_uuid[i], &init);
2474 
2475 		if (result == 0) {
2476 			nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
2477 			const bool has_proc_uuid = !uuid_is_null(pcb->ipsec_kpipe_proc_uuid);
2478 			pid_t pid = pcb->ipsec_kpipe_pid;
2479 			if (!pid && !has_proc_uuid) {
2480 				pid = proc_pid(proc);
2481 			}
2482 			result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
2483 			    pcb->ipsec_kpipe_uuid[i], &port,
2484 			    pid, has_proc_uuid ? pcb->ipsec_kpipe_proc_uuid : NULL, NULL,
2485 			    0, has_proc_uuid ? NEXUS_BIND_EXEC_UUID:NEXUS_BIND_PID);
2486 		}
2487 
2488 		if (result) {
2489 			/* Unwind all of them on error */
2490 			for (int j = 0; j < IPSEC_IF_MAX_RING_COUNT; j++) {
2491 				if (!uuid_is_null(pcb->ipsec_kpipe_uuid[j])) {
2492 					kern_nexus_controller_free_provider_instance(ipsec_ncd,
2493 					    pcb->ipsec_kpipe_uuid[j]);
2494 					uuid_clear(pcb->ipsec_kpipe_uuid[j]);
2495 				}
2496 			}
2497 			goto done;
2498 		}
2499 	}
2500 
2501 done:
2502 	lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2503 
2504 	if (result) {
2505 		if (pcb->ipsec_kpipe_pp != NULL) {
2506 			kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2507 			pcb->ipsec_kpipe_pp = NULL;
2508 		}
2509 		ipsec_unregister_kernel_pipe_nexus();
2510 	} else {
2511 		ipsec_flag_set(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2512 	}
2513 
2514 	return result;
2515 }
2516 
2517 #endif // IPSEC_NEXUS
2518 
2519 
2520 /* Kernel control functions */
2521 
2522 static inline int
ipsec_find_by_unit(u_int32_t unit)2523 ipsec_find_by_unit(u_int32_t unit)
2524 {
2525 	struct ipsec_pcb *next_pcb = NULL;
2526 	int found = 0;
2527 
2528 	TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2529 		if (next_pcb->ipsec_unit == unit) {
2530 			found = 1;
2531 			break;
2532 		}
2533 	}
2534 
2535 	return found;
2536 }
2537 
2538 static inline void
ipsec_free_pcb(struct ipsec_pcb * pcb,bool locked)2539 ipsec_free_pcb(struct ipsec_pcb *pcb, bool locked)
2540 {
2541 #if IPSEC_NEXUS
2542 	mbuf_freem_list(pcb->ipsec_input_chain);
2543 	pcb->ipsec_input_chain_count = 0;
2544 	lck_mtx_destroy(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp);
2545 	lck_mtx_destroy(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp);
2546 	lck_mtx_destroy(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp);
2547 #endif // IPSEC_NEXUS
2548 	lck_mtx_destroy(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp);
2549 	lck_rw_destroy(&pcb->ipsec_pcb_lock, &ipsec_lck_grp);
2550 	if (!locked) {
2551 		lck_mtx_lock(&ipsec_lock);
2552 	}
2553 	TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
2554 	if (!locked) {
2555 		lck_mtx_unlock(&ipsec_lock);
2556 	}
2557 	zfree(ipsec_pcb_zone, pcb);
2558 }
2559 
2560 static errno_t
ipsec_ctl_setup(u_int32_t * unit,void ** unitinfo)2561 ipsec_ctl_setup(u_int32_t *unit, void **unitinfo)
2562 {
2563 	if (unit == NULL || unitinfo == NULL) {
2564 		return EINVAL;
2565 	}
2566 
2567 	lck_mtx_lock(&ipsec_lock);
2568 
2569 	/* Find next available unit */
2570 	if (*unit == 0) {
2571 		*unit = 1;
2572 		while (*unit != ctl_maxunit) {
2573 			if (ipsec_find_by_unit(*unit)) {
2574 				(*unit)++;
2575 			} else {
2576 				break;
2577 			}
2578 		}
2579 		if (*unit == ctl_maxunit) {
2580 			lck_mtx_unlock(&ipsec_lock);
2581 			return EBUSY;
2582 		}
2583 	} else if (ipsec_find_by_unit(*unit)) {
2584 		lck_mtx_unlock(&ipsec_lock);
2585 		return EBUSY;
2586 	}
2587 
2588 	/* Find some open interface id */
2589 	u_int32_t chosen_unique_id = 1;
2590 	struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
2591 	if (next_pcb != NULL) {
2592 		/* List was not empty, add one to the last item */
2593 		chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2594 		next_pcb = NULL;
2595 
2596 		/*
2597 		 * If this wrapped the id number, start looking at
2598 		 * the front of the list for an unused id.
2599 		 */
2600 		if (chosen_unique_id == 0) {
2601 			/* Find the next unused ID */
2602 			chosen_unique_id = 1;
2603 			TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2604 				if (next_pcb->ipsec_unique_id > chosen_unique_id) {
2605 					/* We found a gap */
2606 					break;
2607 				}
2608 
2609 				chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2610 			}
2611 		}
2612 	}
2613 
2614 	struct ipsec_pcb *pcb = zalloc_flags(ipsec_pcb_zone, Z_WAITOK | Z_ZERO);
2615 
2616 	*unitinfo = pcb;
2617 	pcb->ipsec_unit = *unit;
2618 	pcb->ipsec_unique_id = chosen_unique_id;
2619 
2620 	if (next_pcb != NULL) {
2621 		TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
2622 	} else {
2623 		TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
2624 	}
2625 
2626 	lck_mtx_unlock(&ipsec_lock);
2627 
2628 	return 0;
2629 }
2630 
2631 static errno_t
ipsec_ctl_bind(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)2632 ipsec_ctl_bind(kern_ctl_ref kctlref,
2633     struct sockaddr_ctl *sac,
2634     void **unitinfo)
2635 {
2636 	if (*unitinfo == NULL) {
2637 		u_int32_t unit = 0;
2638 		(void)ipsec_ctl_setup(&unit, unitinfo);
2639 	}
2640 
2641 	struct ipsec_pcb *pcb = (struct ipsec_pcb *)*unitinfo;
2642 	if (pcb == NULL) {
2643 		return EINVAL;
2644 	}
2645 
2646 	/* Setup the protocol control block */
2647 	pcb->ipsec_ctlref = kctlref;
2648 	pcb->ipsec_unit = sac->sc_unit;
2649 	pcb->ipsec_output_service_class = MBUF_SC_OAM;
2650 
2651 #if IPSEC_NEXUS
2652 	pcb->ipsec_use_netif = false;
2653 	pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2654 	pcb->ipsec_netif_ring_size = if_ipsec_ring_size;
2655 	pcb->ipsec_tx_fsw_ring_size = if_ipsec_tx_fsw_ring_size;
2656 	pcb->ipsec_rx_fsw_ring_size = if_ipsec_rx_fsw_ring_size;
2657 #endif // IPSEC_NEXUS
2658 
2659 	lck_rw_init(&pcb->ipsec_pcb_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2660 	lck_mtx_init(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2661 #if IPSEC_NEXUS
2662 	pcb->ipsec_input_chain_count = 0;
2663 	lck_mtx_init(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2664 	lck_mtx_init(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2665 	lck_mtx_init(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2666 #endif // IPSEC_NEXUS
2667 
2668 	return 0;
2669 }
2670 
2671 static errno_t
ipsec_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)2672 ipsec_ctl_connect(kern_ctl_ref kctlref,
2673     struct sockaddr_ctl *sac,
2674     void **unitinfo)
2675 {
2676 	struct ifnet_init_eparams ipsec_init = {};
2677 	errno_t result = 0;
2678 
2679 	if (*unitinfo == NULL) {
2680 		(void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2681 	}
2682 
2683 	struct ipsec_pcb *pcb = *unitinfo;
2684 	if (pcb == NULL) {
2685 		return EINVAL;
2686 	}
2687 
2688 	/* Handle case where ipsec_ctl_setup() was called, but ipsec_ctl_bind() was not */
2689 	if (pcb->ipsec_ctlref == NULL) {
2690 		(void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2691 	}
2692 
2693 	snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
2694 	snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
2695 	os_log(OS_LOG_DEFAULT, "ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2696 
2697 	/* Create the interface */
2698 	bzero(&ipsec_init, sizeof(ipsec_init));
2699 	ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
2700 	ipsec_init.len = sizeof(ipsec_init);
2701 
2702 #if IPSEC_NEXUS
2703 	if (pcb->ipsec_use_netif) {
2704 		ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
2705 	} else
2706 #endif // IPSEC_NEXUS
2707 	{
2708 		ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
2709 		ipsec_init.start = ipsec_start;
2710 	}
2711 	ipsec_init.name = "ipsec";
2712 	ipsec_init.unit = pcb->ipsec_unit - 1;
2713 	ipsec_init.uniqueid = pcb->ipsec_unique_name;
2714 	ipsec_init.uniqueid_len = (uint32_t)strlen(pcb->ipsec_unique_name);
2715 	ipsec_init.family = IFNET_FAMILY_IPSEC;
2716 	ipsec_init.type = IFT_OTHER;
2717 	ipsec_init.demux = ipsec_demux;
2718 	ipsec_init.add_proto = ipsec_add_proto;
2719 	ipsec_init.del_proto = ipsec_del_proto;
2720 	ipsec_init.softc = pcb;
2721 	ipsec_init.ioctl = ipsec_ioctl;
2722 	ipsec_init.free = ipsec_detached;
2723 
2724 #if IPSEC_NEXUS
2725 	/* We don't support kpipes without a netif */
2726 	if (pcb->ipsec_kpipe_count && !pcb->ipsec_use_netif) {
2727 		result = ENOTSUP;
2728 		os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - kpipe requires netif: failed %d\n", result);
2729 		ipsec_free_pcb(pcb, false);
2730 		*unitinfo = NULL;
2731 		return result;
2732 	}
2733 
2734 	if (if_ipsec_debug != 0) {
2735 		printf("%s: %s%d use_netif %d kpipe_count %d slot_size %u ring_size %u "
2736 		    "kpipe_tx_ring_size %u kpipe_rx_ring_size %u\n",
2737 		    __func__,
2738 		    ipsec_init.name, ipsec_init.unit,
2739 		    pcb->ipsec_use_netif,
2740 		    pcb->ipsec_kpipe_count,
2741 		    pcb->ipsec_slot_size,
2742 		    pcb->ipsec_netif_ring_size,
2743 		    pcb->ipsec_kpipe_tx_ring_size,
2744 		    pcb->ipsec_kpipe_rx_ring_size);
2745 	}
2746 	if (pcb->ipsec_use_netif) {
2747 		if (pcb->ipsec_kpipe_count) {
2748 			result = ipsec_enable_channel(pcb, current_proc());
2749 			if (result) {
2750 				os_log_error(OS_LOG_DEFAULT, "%s: %s failed to enable channels\n",
2751 				    __func__, pcb->ipsec_if_xname);
2752 				ipsec_free_pcb(pcb, false);
2753 				*unitinfo = NULL;
2754 				return result;
2755 			}
2756 		}
2757 
2758 		result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
2759 		if (result != 0) {
2760 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
2761 			ipsec_free_pcb(pcb, false);
2762 			*unitinfo = NULL;
2763 			return result;
2764 		}
2765 
2766 		result = ipsec_flowswitch_attach(pcb);
2767 		if (result != 0) {
2768 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_flowswitch_attach failed: %d\n", result);
2769 			// Do not call ipsec_free_pcb(). We will be attached already, and will be freed later
2770 			// in ipsec_detached().
2771 			*unitinfo = NULL;
2772 			return result;
2773 		}
2774 
2775 		/* Attach to bpf */
2776 		bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
2777 	} else
2778 #endif // IPSEC_NEXUS
2779 	{
2780 		result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
2781 		if (result != 0) {
2782 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
2783 			ipsec_free_pcb(pcb, false);
2784 			*unitinfo = NULL;
2785 			return result;
2786 		}
2787 		ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
2788 
2789 		/* Attach the interface */
2790 		result = ifnet_attach(pcb->ipsec_ifp, NULL);
2791 		if (result != 0) {
2792 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
2793 			ifnet_release(pcb->ipsec_ifp);
2794 			ipsec_free_pcb(pcb, false);
2795 			*unitinfo = NULL;
2796 			return result;
2797 		}
2798 
2799 		/* Attach to bpf */
2800 		bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
2801 	}
2802 
2803 #if IPSEC_NEXUS
2804 	/*
2805 	 * Mark the data path as ready.
2806 	 * If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected.
2807 	 */
2808 	if (pcb->ipsec_kpipe_count == 0) {
2809 		lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
2810 		IPSEC_SET_DATA_PATH_READY(pcb);
2811 		lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
2812 	}
2813 #endif
2814 
2815 	/* The interfaces resoures allocated, mark it as running */
2816 	ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
2817 
2818 	return 0;
2819 }
2820 
2821 static errno_t
ipsec_detach_ip(ifnet_t interface,protocol_family_t protocol,socket_t pf_socket)2822 ipsec_detach_ip(ifnet_t                         interface,
2823     protocol_family_t       protocol,
2824     socket_t                        pf_socket)
2825 {
2826 	errno_t result = EPROTONOSUPPORT;
2827 
2828 	/* Attempt a detach */
2829 	if (protocol == PF_INET) {
2830 		struct ifreq    ifr;
2831 
2832 		bzero(&ifr, sizeof(ifr));
2833 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2834 		    ifnet_name(interface), ifnet_unit(interface));
2835 
2836 		result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
2837 	} else if (protocol == PF_INET6) {
2838 		struct in6_ifreq        ifr6;
2839 
2840 		bzero(&ifr6, sizeof(ifr6));
2841 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2842 		    ifnet_name(interface), ifnet_unit(interface));
2843 
2844 		result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
2845 	}
2846 
2847 	return result;
2848 }
2849 
2850 static void
ipsec_remove_address(ifnet_t interface,protocol_family_t protocol,ifaddr_t address,socket_t pf_socket)2851 ipsec_remove_address(ifnet_t                            interface,
2852     protocol_family_t      protocol,
2853     ifaddr_t                       address,
2854     socket_t                       pf_socket)
2855 {
2856 	errno_t result = 0;
2857 
2858 	/* Attempt a detach */
2859 	if (protocol == PF_INET) {
2860 		struct ifreq    ifr;
2861 
2862 		bzero(&ifr, sizeof(ifr));
2863 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2864 		    ifnet_name(interface), ifnet_unit(interface));
2865 		result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
2866 		if (result != 0) {
2867 			os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed: %d", result);
2868 		} else {
2869 			result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
2870 			if (result != 0) {
2871 				os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR failed: %d", result);
2872 			}
2873 		}
2874 	} else if (protocol == PF_INET6) {
2875 		struct in6_ifreq        ifr6;
2876 
2877 		bzero(&ifr6, sizeof(ifr6));
2878 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2879 		    ifnet_name(interface), ifnet_unit(interface));
2880 		result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
2881 		    sizeof(ifr6.ifr_addr));
2882 		if (result != 0) {
2883 			os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed (v6): %d",
2884 			    result);
2885 		} else {
2886 			result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
2887 			if (result != 0) {
2888 				os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
2889 				    result);
2890 			}
2891 		}
2892 	}
2893 }
2894 
2895 static void
ipsec_cleanup_family(ifnet_t interface,protocol_family_t protocol)2896 ipsec_cleanup_family(ifnet_t                            interface,
2897     protocol_family_t      protocol)
2898 {
2899 	errno_t         result = 0;
2900 	socket_t        pf_socket = NULL;
2901 	ifaddr_t        *addresses = NULL;
2902 	int                     i;
2903 
2904 	if (protocol != PF_INET && protocol != PF_INET6) {
2905 		os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - invalid protocol family %d\n", protocol);
2906 		return;
2907 	}
2908 
2909 	/* Create a socket for removing addresses and detaching the protocol */
2910 	result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
2911 	if (result != 0) {
2912 		if (result != EAFNOSUPPORT) {
2913 			os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - failed to create %s socket: %d\n",
2914 			    protocol == PF_INET ? "IP" : "IPv6", result);
2915 		}
2916 		goto cleanup;
2917 	}
2918 
2919 	/* always set SS_PRIV, we want to close and detach regardless */
2920 	sock_setpriv(pf_socket, 1);
2921 
2922 	result = ipsec_detach_ip(interface, protocol, pf_socket);
2923 	if (result == 0 || result == ENXIO) {
2924 		/* We are done! We either detached or weren't attached. */
2925 		goto cleanup;
2926 	} else if (result != EBUSY) {
2927 		/* Uh, not really sure what happened here... */
2928 		os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2929 		goto cleanup;
2930 	}
2931 
2932 	/*
2933 	 * At this point, we received an EBUSY error. This means there are
2934 	 * addresses attached. We should detach them and then try again.
2935 	 */
2936 	result = ifnet_get_address_list_family(interface, &addresses, (sa_family_t)protocol);
2937 	if (result != 0) {
2938 		os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
2939 		    ifnet_name(interface), ifnet_unit(interface),
2940 		    protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
2941 		goto cleanup;
2942 	}
2943 
2944 	for (i = 0; addresses[i] != 0; i++) {
2945 		ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
2946 	}
2947 	ifnet_free_address_list(addresses);
2948 	addresses = NULL;
2949 
2950 	/*
2951 	 * The addresses should be gone, we should try the remove again.
2952 	 */
2953 	result = ipsec_detach_ip(interface, protocol, pf_socket);
2954 	if (result != 0 && result != ENXIO) {
2955 		os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2956 	}
2957 
2958 cleanup:
2959 	if (pf_socket != NULL) {
2960 		sock_close(pf_socket);
2961 	}
2962 
2963 	if (addresses != NULL) {
2964 		ifnet_free_address_list(addresses);
2965 	}
2966 }
2967 
2968 static errno_t
ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo)2969 ipsec_ctl_disconnect(__unused kern_ctl_ref      kctlref,
2970     __unused u_int32_t             unit,
2971     void                                   *unitinfo)
2972 {
2973 	struct ipsec_pcb *pcb = unitinfo;
2974 	ifnet_t ifp = NULL;
2975 	errno_t result = 0;
2976 
2977 	if (pcb == NULL) {
2978 		return EINVAL;
2979 	}
2980 
2981 	/* Wait until all threads in the data paths are done. */
2982 	ipsec_wait_data_move_drain(pcb);
2983 
2984 #if IPSEC_NEXUS
2985 	// Tell the nexus to stop all rings
2986 	if (pcb->ipsec_netif_nexus != NULL) {
2987 		kern_nexus_stop(pcb->ipsec_netif_nexus);
2988 	}
2989 #endif // IPSEC_NEXUS
2990 
2991 	lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2992 
2993 #if IPSEC_NEXUS
2994 	if (if_ipsec_debug != 0) {
2995 		printf("ipsec_ctl_disconnect: detaching interface %s (id %s)\n",
2996 		    pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2997 	}
2998 
2999 	struct ipsec_detached_channels dc;
3000 	ipsec_detach_channels(pcb, &dc);
3001 #endif // IPSEC_NEXUS
3002 
3003 	pcb->ipsec_ctlref = NULL;
3004 
3005 	ifp = pcb->ipsec_ifp;
3006 	if (ifp != NULL) {
3007 #if IPSEC_NEXUS
3008 		if (pcb->ipsec_netif_nexus != NULL) {
3009 			/*
3010 			 * Quiesce the interface and flush any pending outbound packets.
3011 			 */
3012 			if_down(ifp);
3013 
3014 			/*
3015 			 * Suspend data movement and wait for IO threads to exit.
3016 			 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
3017 			 * do this because ipsec nexuses are attached/detached separately.
3018 			 */
3019 			ifnet_datamov_suspend_and_drain(ifp);
3020 			if ((result = ifnet_detach(ifp)) != 0) {
3021 				panic("ipsec_ctl_disconnect - ifnet_detach failed: %d", result);
3022 				/* NOT REACHED */
3023 			}
3024 
3025 			/*
3026 			 * We want to do everything in our power to ensure that the interface
3027 			 * really goes away when the socket is closed. We must remove IP/IPv6
3028 			 * addresses and detach the protocols. Finally, we can remove and
3029 			 * release the interface.
3030 			 */
3031 			key_delsp_for_ipsec_if(ifp);
3032 
3033 			ipsec_cleanup_family(ifp, AF_INET);
3034 			ipsec_cleanup_family(ifp, AF_INET6);
3035 
3036 			lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3037 
3038 			ipsec_free_channels(&dc);
3039 
3040 			ipsec_nexus_detach(pcb);
3041 
3042 			/* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
3043 			ifnet_datamov_resume(ifp);
3044 		} else
3045 #endif // IPSEC_NEXUS
3046 		{
3047 			lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3048 
3049 #if IPSEC_NEXUS
3050 			ipsec_free_channels(&dc);
3051 #endif // IPSEC_NEXUS
3052 
3053 			/*
3054 			 * We want to do everything in our power to ensure that the interface
3055 			 * really goes away when the socket is closed. We must remove IP/IPv6
3056 			 * addresses and detach the protocols. Finally, we can remove and
3057 			 * release the interface.
3058 			 */
3059 			key_delsp_for_ipsec_if(ifp);
3060 
3061 			ipsec_cleanup_family(ifp, AF_INET);
3062 			ipsec_cleanup_family(ifp, AF_INET6);
3063 
3064 			/*
3065 			 * Detach now; ipsec_detach() will be called asynchronously once
3066 			 * the I/O reference count drops to 0.  There we will invoke
3067 			 * ifnet_release().
3068 			 */
3069 			if ((result = ifnet_detach(ifp)) != 0) {
3070 				os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
3071 			}
3072 		}
3073 	} else {
3074 		// Bound, but not connected
3075 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3076 		ipsec_free_pcb(pcb, false);
3077 	}
3078 
3079 	return 0;
3080 }
3081 
3082 static errno_t
ipsec_ctl_send(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,__unused void * unitinfo,mbuf_t m,__unused int flags)3083 ipsec_ctl_send(__unused kern_ctl_ref    kctlref,
3084     __unused u_int32_t           unit,
3085     __unused void                        *unitinfo,
3086     mbuf_t                  m,
3087     __unused int                 flags)
3088 {
3089 	/* Receive messages from the control socket. Currently unused. */
3090 	mbuf_freem(m);
3091 	return 0;
3092 }
3093 
3094 static errno_t
ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t len)3095 ipsec_ctl_setopt(__unused kern_ctl_ref  kctlref,
3096     __unused u_int32_t             unit,
3097     void                                   *unitinfo,
3098     int                                            opt,
3099     void                                   *data,
3100     size_t                                 len)
3101 {
3102 	errno_t                                 result = 0;
3103 	struct ipsec_pcb                        *pcb = unitinfo;
3104 	if (pcb == NULL) {
3105 		return EINVAL;
3106 	}
3107 
3108 	/* check for privileges for privileged options */
3109 	switch (opt) {
3110 	case IPSEC_OPT_FLAGS:
3111 	case IPSEC_OPT_EXT_IFDATA_STATS:
3112 	case IPSEC_OPT_SET_DELEGATE_INTERFACE:
3113 	case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
3114 	case IPSEC_OPT_OUTPUT_DSCP_MAPPING:
3115 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3116 			return EPERM;
3117 		}
3118 		break;
3119 	}
3120 
3121 	switch (opt) {
3122 	case IPSEC_OPT_FLAGS: {
3123 		if (len != sizeof(u_int32_t)) {
3124 			result = EMSGSIZE;
3125 		} else {
3126 			pcb->ipsec_external_flags = *(u_int32_t *)data;
3127 		}
3128 		break;
3129 	}
3130 
3131 	case IPSEC_OPT_EXT_IFDATA_STATS: {
3132 		if (len != sizeof(int)) {
3133 			result = EMSGSIZE;
3134 			break;
3135 		}
3136 		if (pcb->ipsec_ifp == NULL) {
3137 			// Only can set after connecting
3138 			result = EINVAL;
3139 			break;
3140 		}
3141 		pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
3142 		break;
3143 	}
3144 
3145 	case IPSEC_OPT_INC_IFDATA_STATS_IN:
3146 	case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
3147 		struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
3148 
3149 		if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
3150 			result = EINVAL;
3151 			break;
3152 		}
3153 		if (pcb->ipsec_ifp == NULL) {
3154 			// Only can set after connecting
3155 			result = EINVAL;
3156 			break;
3157 		}
3158 		if (!pcb->ipsec_ext_ifdata_stats) {
3159 			result = EINVAL;
3160 			break;
3161 		}
3162 		if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) {
3163 			ifnet_stat_increment_in(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3164 			    (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3165 		} else {
3166 			ifnet_stat_increment_out(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3167 			    (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3168 		}
3169 		break;
3170 	}
3171 
3172 	case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
3173 		ifnet_t del_ifp = NULL;
3174 		char name[IFNAMSIZ];
3175 
3176 		if (len > IFNAMSIZ - 1) {
3177 			result = EMSGSIZE;
3178 			break;
3179 		}
3180 		if (pcb->ipsec_ifp == NULL) {
3181 			// Only can set after connecting
3182 			result = EINVAL;
3183 			break;
3184 		}
3185 		if (len != 0) {                   /* if len==0, del_ifp will be NULL causing the delegate to be removed */
3186 			bcopy(data, name, len);
3187 			name[len] = 0;
3188 			result = ifnet_find_by_name(name, &del_ifp);
3189 		}
3190 		if (result == 0) {
3191 			os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
3192 			    __func__, pcb->ipsec_ifp->if_xname,
3193 			    del_ifp ? del_ifp->if_xname : "NULL");
3194 
3195 			result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
3196 			if (del_ifp) {
3197 				ifnet_release(del_ifp);
3198 			}
3199 		}
3200 		break;
3201 	}
3202 
3203 	case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3204 		if (len != sizeof(int)) {
3205 			result = EMSGSIZE;
3206 			break;
3207 		}
3208 		if (pcb->ipsec_ifp == NULL) {
3209 			// Only can set after connecting
3210 			result = EINVAL;
3211 			break;
3212 		}
3213 		mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
3214 		if (output_service_class == MBUF_SC_UNSPEC) {
3215 			pcb->ipsec_output_service_class = MBUF_SC_OAM;
3216 		} else {
3217 			pcb->ipsec_output_service_class = output_service_class;
3218 		}
3219 		os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
3220 		    __func__, pcb->ipsec_ifp->if_xname,
3221 		    pcb->ipsec_output_service_class);
3222 		break;
3223 	}
3224 
3225 #if IPSEC_NEXUS
3226 	case IPSEC_OPT_ENABLE_CHANNEL: {
3227 		if (len != sizeof(int)) {
3228 			result = EMSGSIZE;
3229 			break;
3230 		}
3231 		if (pcb->ipsec_ifp != NULL) {
3232 			// Only can set before connecting
3233 			result = EINVAL;
3234 			break;
3235 		}
3236 		if ((*(int *)data) != 0 &&
3237 		    (*(int *)data) != 1 &&
3238 		    (*(int *)data) != IPSEC_IF_WMM_RING_COUNT) {
3239 			result = EINVAL;
3240 			break;
3241 		}
3242 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3243 		pcb->ipsec_kpipe_count = *(int *)data;
3244 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3245 		break;
3246 	}
3247 
3248 	case IPSEC_OPT_CHANNEL_BIND_PID: {
3249 		if (len != sizeof(pid_t)) {
3250 			result = EMSGSIZE;
3251 			break;
3252 		}
3253 		if (pcb->ipsec_ifp != NULL) {
3254 			// Only can set before connecting
3255 			result = EINVAL;
3256 			break;
3257 		}
3258 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3259 		pcb->ipsec_kpipe_pid = *(pid_t *)data;
3260 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3261 		break;
3262 	}
3263 
3264 	case IPSEC_OPT_CHANNEL_BIND_UUID: {
3265 		if (len != sizeof(uuid_t)) {
3266 			result = EMSGSIZE;
3267 			break;
3268 		}
3269 		if (pcb->ipsec_ifp != NULL) {
3270 			// Only can set before connecting
3271 			result = EINVAL;
3272 			break;
3273 		}
3274 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3275 		uuid_copy(pcb->ipsec_kpipe_proc_uuid, *((uuid_t *)data));
3276 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3277 		break;
3278 	}
3279 
3280 	case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3281 		if (len != sizeof(int)) {
3282 			result = EMSGSIZE;
3283 			break;
3284 		}
3285 		if (pcb->ipsec_ifp == NULL) {
3286 			// Only can set after connecting
3287 			result = EINVAL;
3288 			break;
3289 		}
3290 		if (!if_is_fsw_transport_netagent_enabled()) {
3291 			result = ENOTSUP;
3292 			break;
3293 		}
3294 		if (uuid_is_null(pcb->ipsec_nx.fsw_agent)) {
3295 			result = ENOENT;
3296 			break;
3297 		}
3298 
3299 		uint32_t flags = netagent_get_flags(pcb->ipsec_nx.fsw_agent);
3300 
3301 		if (*(int *)data) {
3302 			flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
3303 			    NETAGENT_FLAG_NEXUS_LISTENER);
3304 			result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3305 			pcb->ipsec_needs_netagent = true;
3306 		} else {
3307 			pcb->ipsec_needs_netagent = false;
3308 			flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
3309 			    NETAGENT_FLAG_NEXUS_LISTENER);
3310 			result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3311 		}
3312 		break;
3313 	}
3314 
3315 	case IPSEC_OPT_INPUT_FRAG_SIZE: {
3316 		if (len != sizeof(u_int32_t)) {
3317 			result = EMSGSIZE;
3318 			break;
3319 		}
3320 		u_int32_t input_frag_size = *(u_int32_t *)data;
3321 		if (input_frag_size <= sizeof(struct ip6_hdr)) {
3322 			pcb->ipsec_frag_size_set = FALSE;
3323 			pcb->ipsec_input_frag_size = 0;
3324 		} else {
3325 			pcb->ipsec_frag_size_set = TRUE;
3326 			pcb->ipsec_input_frag_size = input_frag_size;
3327 		}
3328 		break;
3329 	}
3330 	case IPSEC_OPT_ENABLE_NETIF: {
3331 		if (len != sizeof(int)) {
3332 			result = EMSGSIZE;
3333 			break;
3334 		}
3335 		if (pcb->ipsec_ifp != NULL) {
3336 			// Only can set before connecting
3337 			result = EINVAL;
3338 			break;
3339 		}
3340 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3341 		pcb->ipsec_use_netif = !!(*(int *)data);
3342 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3343 		break;
3344 	}
3345 	case IPSEC_OPT_SLOT_SIZE: {
3346 		if (len != sizeof(u_int32_t)) {
3347 			result = EMSGSIZE;
3348 			break;
3349 		}
3350 		if (pcb->ipsec_ifp != NULL) {
3351 			// Only can set before connecting
3352 			result = EINVAL;
3353 			break;
3354 		}
3355 		u_int32_t slot_size = *(u_int32_t *)data;
3356 		if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
3357 		    slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
3358 			return EINVAL;
3359 		}
3360 		pcb->ipsec_slot_size = slot_size;
3361 		if (if_ipsec_debug != 0) {
3362 			printf("%s: IPSEC_OPT_SLOT_SIZE %u\n", __func__, slot_size);
3363 		}
3364 		break;
3365 	}
3366 	case IPSEC_OPT_NETIF_RING_SIZE: {
3367 		if (len != sizeof(u_int32_t)) {
3368 			result = EMSGSIZE;
3369 			break;
3370 		}
3371 		if (pcb->ipsec_ifp != NULL) {
3372 			// Only can set before connecting
3373 			result = EINVAL;
3374 			break;
3375 		}
3376 		u_int32_t ring_size = *(u_int32_t *)data;
3377 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3378 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3379 			return EINVAL;
3380 		}
3381 		pcb->ipsec_netif_ring_size = ring_size;
3382 		if (if_ipsec_debug != 0) {
3383 			printf("%s: IPSEC_OPT_NETIF_RING_SIZE %u\n", __func__, ring_size);
3384 		}
3385 		break;
3386 	}
3387 	case IPSEC_OPT_TX_FSW_RING_SIZE: {
3388 		if (len != sizeof(u_int32_t)) {
3389 			result = EMSGSIZE;
3390 			break;
3391 		}
3392 		if (pcb->ipsec_ifp != NULL) {
3393 			// Only can set before connecting
3394 			result = EINVAL;
3395 			break;
3396 		}
3397 		u_int32_t ring_size = *(u_int32_t *)data;
3398 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3399 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3400 			return EINVAL;
3401 		}
3402 		pcb->ipsec_tx_fsw_ring_size = ring_size;
3403 		if (if_ipsec_debug != 0) {
3404 			printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3405 		}
3406 		break;
3407 	}
3408 	case IPSEC_OPT_RX_FSW_RING_SIZE: {
3409 		if (len != sizeof(u_int32_t)) {
3410 			result = EMSGSIZE;
3411 			break;
3412 		}
3413 		if (pcb->ipsec_ifp != NULL) {
3414 			// Only can set before connecting
3415 			result = EINVAL;
3416 			break;
3417 		}
3418 		u_int32_t ring_size = *(u_int32_t *)data;
3419 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3420 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3421 			return EINVAL;
3422 		}
3423 		pcb->ipsec_rx_fsw_ring_size = ring_size;
3424 		if (if_ipsec_debug != 0) {
3425 			printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3426 		}
3427 		break;
3428 	}
3429 	case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3430 		if (len != sizeof(u_int32_t)) {
3431 			result = EMSGSIZE;
3432 			break;
3433 		}
3434 		if (pcb->ipsec_ifp != NULL) {
3435 			// Only can set before connecting
3436 			result = EINVAL;
3437 			break;
3438 		}
3439 		u_int32_t ring_size = *(u_int32_t *)data;
3440 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3441 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3442 			return EINVAL;
3443 		}
3444 		pcb->ipsec_kpipe_tx_ring_size = ring_size;
3445 		if (if_ipsec_debug != 0) {
3446 			printf("%s: IPSEC_OPT_KPIPE_TX_RING_SIZE %u\n", __func__, ring_size);
3447 		}
3448 		break;
3449 	}
3450 	case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3451 		if (len != sizeof(u_int32_t)) {
3452 			result = EMSGSIZE;
3453 			break;
3454 		}
3455 		if (pcb->ipsec_ifp != NULL) {
3456 			// Only can set before connecting
3457 			result = EINVAL;
3458 			break;
3459 		}
3460 		u_int32_t ring_size = *(u_int32_t *)data;
3461 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3462 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3463 			return EINVAL;
3464 		}
3465 		pcb->ipsec_kpipe_rx_ring_size = ring_size;
3466 		if (if_ipsec_debug != 0) {
3467 			printf("%s: IPSEC_OPT_KPIPE_RX_RING_SIZE %u\n", __func__, ring_size);
3468 		}
3469 		break;
3470 	}
3471 	case IPSEC_OPT_OUTPUT_DSCP_MAPPING: {
3472 		if (len != sizeof(int)) {
3473 			result = EMSGSIZE;
3474 			break;
3475 		}
3476 		if (pcb->ipsec_ifp == NULL) {
3477 			// Only can set after connecting
3478 			result = EINVAL;
3479 			break;
3480 		}
3481 
3482 		ipsec_dscp_mapping_t output_dscp_mapping = (ipsec_dscp_mapping_t)(*(int *)data);
3483 		if (output_dscp_mapping > IPSEC_DSCP_MAPPING_LEGACY) {
3484 			return EINVAL;
3485 		}
3486 
3487 		pcb->ipsec_output_dscp_mapping = output_dscp_mapping;
3488 
3489 		os_log(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_DSCP_MAPPING %s DSCP %d\n",
3490 		    __func__, pcb->ipsec_ifp->if_xname,
3491 		    pcb->ipsec_output_dscp_mapping);
3492 		break;
3493 	}
3494 
3495 #endif // IPSEC_NEXUS
3496 
3497 	default: {
3498 		result = ENOPROTOOPT;
3499 		break;
3500 	}
3501 	}
3502 
3503 	return result;
3504 }
3505 
3506 static errno_t
ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t * len)3507 ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
3508     __unused u_int32_t unit,
3509     void *unitinfo,
3510     int opt,
3511     void *data,
3512     size_t *len)
3513 {
3514 	errno_t result = 0;
3515 	struct ipsec_pcb *pcb = unitinfo;
3516 	if (pcb == NULL) {
3517 		return EINVAL;
3518 	}
3519 
3520 	switch (opt) {
3521 	case IPSEC_OPT_FLAGS: {
3522 		if (*len != sizeof(u_int32_t)) {
3523 			result = EMSGSIZE;
3524 		} else {
3525 			*(u_int32_t *)data = pcb->ipsec_external_flags;
3526 		}
3527 		break;
3528 	}
3529 
3530 	case IPSEC_OPT_EXT_IFDATA_STATS: {
3531 		if (*len != sizeof(int)) {
3532 			result = EMSGSIZE;
3533 		} else {
3534 			*(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
3535 		}
3536 		break;
3537 	}
3538 
3539 	case IPSEC_OPT_IFNAME: {
3540 		if (*len < MIN(strlen(pcb->ipsec_if_xname) + 1, sizeof(pcb->ipsec_if_xname))) {
3541 			result = EMSGSIZE;
3542 		} else {
3543 			if (pcb->ipsec_ifp == NULL) {
3544 				// Only can get after connecting
3545 				result = EINVAL;
3546 				break;
3547 			}
3548 			*len = scnprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
3549 		}
3550 		break;
3551 	}
3552 
3553 	case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3554 		if (*len != sizeof(int)) {
3555 			result = EMSGSIZE;
3556 		} else {
3557 			*(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
3558 		}
3559 		break;
3560 	}
3561 
3562 #if IPSEC_NEXUS
3563 
3564 	case IPSEC_OPT_ENABLE_CHANNEL: {
3565 		if (*len != sizeof(int)) {
3566 			result = EMSGSIZE;
3567 		} else {
3568 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3569 			*(int *)data = pcb->ipsec_kpipe_count;
3570 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3571 		}
3572 		break;
3573 	}
3574 
3575 	case IPSEC_OPT_CHANNEL_BIND_PID: {
3576 		if (*len != sizeof(pid_t)) {
3577 			result = EMSGSIZE;
3578 		} else {
3579 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3580 			*(pid_t *)data = pcb->ipsec_kpipe_pid;
3581 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3582 		}
3583 		break;
3584 	}
3585 
3586 	case IPSEC_OPT_CHANNEL_BIND_UUID: {
3587 		if (*len != sizeof(uuid_t)) {
3588 			result = EMSGSIZE;
3589 		} else {
3590 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3591 			uuid_copy(*((uuid_t *)data), pcb->ipsec_kpipe_proc_uuid);
3592 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3593 		}
3594 		break;
3595 	}
3596 
3597 	case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3598 		if (*len != sizeof(int)) {
3599 			result = EMSGSIZE;
3600 		} else {
3601 			*(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.fsw_agent);
3602 		}
3603 		break;
3604 	}
3605 
3606 	case IPSEC_OPT_ENABLE_NETIF: {
3607 		if (*len != sizeof(int)) {
3608 			result = EMSGSIZE;
3609 		} else {
3610 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3611 			*(int *)data = !!pcb->ipsec_use_netif;
3612 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3613 		}
3614 		break;
3615 	}
3616 
3617 	case IPSEC_OPT_GET_CHANNEL_UUID: {
3618 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3619 		if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
3620 			result = ENXIO;
3621 		} else if (*len != sizeof(uuid_t) * pcb->ipsec_kpipe_count) {
3622 			result = EMSGSIZE;
3623 		} else {
3624 			for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
3625 				uuid_copy(((uuid_t *)data)[i], pcb->ipsec_kpipe_uuid[i]);
3626 			}
3627 		}
3628 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3629 		break;
3630 	}
3631 
3632 	case IPSEC_OPT_INPUT_FRAG_SIZE: {
3633 		if (*len != sizeof(u_int32_t)) {
3634 			result = EMSGSIZE;
3635 		} else {
3636 			*(u_int32_t *)data = pcb->ipsec_input_frag_size;
3637 		}
3638 		break;
3639 	}
3640 	case IPSEC_OPT_SLOT_SIZE: {
3641 		if (*len != sizeof(u_int32_t)) {
3642 			result = EMSGSIZE;
3643 		} else {
3644 			*(u_int32_t *)data = pcb->ipsec_slot_size;
3645 		}
3646 		break;
3647 	}
3648 	case IPSEC_OPT_NETIF_RING_SIZE: {
3649 		if (*len != sizeof(u_int32_t)) {
3650 			result = EMSGSIZE;
3651 		} else {
3652 			*(u_int32_t *)data = pcb->ipsec_netif_ring_size;
3653 		}
3654 		break;
3655 	}
3656 	case IPSEC_OPT_TX_FSW_RING_SIZE: {
3657 		if (*len != sizeof(u_int32_t)) {
3658 			result = EMSGSIZE;
3659 		} else {
3660 			*(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
3661 		}
3662 		break;
3663 	}
3664 	case IPSEC_OPT_RX_FSW_RING_SIZE: {
3665 		if (*len != sizeof(u_int32_t)) {
3666 			result = EMSGSIZE;
3667 		} else {
3668 			*(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
3669 		}
3670 		break;
3671 	}
3672 	case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3673 		if (*len != sizeof(u_int32_t)) {
3674 			result = EMSGSIZE;
3675 		} else {
3676 			*(u_int32_t *)data = pcb->ipsec_kpipe_tx_ring_size;
3677 		}
3678 		break;
3679 	}
3680 	case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3681 		if (*len != sizeof(u_int32_t)) {
3682 			result = EMSGSIZE;
3683 		} else {
3684 			*(u_int32_t *)data = pcb->ipsec_kpipe_rx_ring_size;
3685 		}
3686 		break;
3687 	}
3688 
3689 #endif // IPSEC_NEXUS
3690 
3691 	default: {
3692 		result = ENOPROTOOPT;
3693 		break;
3694 	}
3695 	}
3696 
3697 	return result;
3698 }
3699 
3700 /* Network Interface functions */
3701 static errno_t
ipsec_output(ifnet_t interface,mbuf_t data)3702 ipsec_output(ifnet_t interface,
3703     mbuf_t data)
3704 {
3705 	struct ipsec_pcb *pcb = ifnet_softc(interface);
3706 	struct ipsec_output_state ipsec_state;
3707 	struct route ro;
3708 	struct route_in6 ro6;
3709 	size_t length;
3710 	struct ip *ip = NULL;
3711 	struct ip6_hdr *ip6 = NULL;
3712 	struct ip_out_args ipoa;
3713 	struct ip6_out_args ip6oa;
3714 	int error = 0;
3715 	u_int ip_version = 0;
3716 	int flags = 0;
3717 	struct flowadv *adv = NULL;
3718 
3719 	// Make sure this packet isn't looping through the interface
3720 	if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
3721 		error = EINVAL;
3722 		goto ipsec_output_err;
3723 	}
3724 
3725 	// Mark the interface so NECP can evaluate tunnel policy
3726 	necp_mark_packet_from_interface(data, interface);
3727 
3728 	if (data->m_len < sizeof(*ip)) {
3729 		os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IP header length: %d.\n", data->m_len);
3730 		IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3731 		error = EINVAL;
3732 		goto ipsec_output_err;
3733 	}
3734 
3735 	ip = mtod(data, struct ip *);
3736 	ip_version = ip->ip_v;
3737 
3738 	switch (ip_version) {
3739 	case 4: {
3740 		u_int8_t ip_hlen = 0;
3741 #ifdef _IP_VHL
3742 		ip_hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
3743 #else
3744 		ip_hlen = (uint8_t)(ip->ip_hl << 2);
3745 #endif
3746 		if (ip_hlen < sizeof(*ip)) {
3747 			os_log_error(OS_LOG_DEFAULT, "ipsec_output: Bad ip header length %d.\n", ip_hlen);
3748 			IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3749 			error = EINVAL;
3750 			goto ipsec_output_err;
3751 		}
3752 #if IPSEC_NEXUS
3753 		if (!pcb->ipsec_use_netif)
3754 #endif // IPSEC_NEXUS
3755 		{
3756 			int af = AF_INET;
3757 			bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3758 		}
3759 
3760 		/* Apply encryption */
3761 		memset(&ipsec_state, 0, sizeof(ipsec_state));
3762 		ipsec_state.m = data;
3763 		ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
3764 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3765 		ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
3766 
3767 		error = ipsec4_interface_output(&ipsec_state, interface);
3768 		/* Tunneled in IPv6 - packet is gone */
3769 		if (error == 0 && ipsec_state.tunneled == 6) {
3770 			goto done;
3771 		}
3772 
3773 		data = ipsec_state.m;
3774 		if (error || data == NULL) {
3775 			if (error) {
3776 				os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec4_output error %d.\n", error);
3777 			}
3778 			goto ipsec_output_err;
3779 		}
3780 
3781 		/* Set traffic class, set flow */
3782 		m_set_service_class(data, pcb->ipsec_output_service_class);
3783 		data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3784 #if SKYWALK
3785 		data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
3786 #else /* !SKYWALK */
3787 		data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3788 #endif /* !SKYWALK */
3789 		data->m_pkthdr.pkt_proto = ip->ip_p;
3790 		data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3791 
3792 		/* Flip endian-ness for ip_output */
3793 		ip = mtod(data, struct ip *);
3794 		NTOHS(ip->ip_len);
3795 		NTOHS(ip->ip_off);
3796 
3797 		/* Increment statistics */
3798 		length = mbuf_pkthdr_len(data);
3799 		ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
3800 
3801 		/* Send to ip_output */
3802 		memset(&ro, 0, sizeof(ro));
3803 
3804 		flags = (IP_OUTARGS |   /* Passing out args to specify interface */
3805 		    IP_NOIPSEC);                        /* To ensure the packet doesn't go through ipsec twice */
3806 
3807 		memset(&ipoa, 0, sizeof(ipoa));
3808 		ipoa.ipoa_flowadv.code = 0;
3809 		ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
3810 		if (ipsec_state.outgoing_if) {
3811 			ipoa.ipoa_boundif = ipsec_state.outgoing_if;
3812 			ipoa.ipoa_flags |= IPOAF_BOUND_IF;
3813 		}
3814 		ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
3815 
3816 		adv = &ipoa.ipoa_flowadv;
3817 
3818 		(void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
3819 		data = NULL;
3820 
3821 		if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3822 			error = ENOBUFS;
3823 			ifnet_disable_output(interface);
3824 		}
3825 
3826 		goto done;
3827 	}
3828 	case 6: {
3829 		if (data->m_len < sizeof(*ip6)) {
3830 			os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IPv6 header length: %d.\n", data->m_len);
3831 			IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
3832 			error = EINVAL;
3833 			goto ipsec_output_err;
3834 		}
3835 #if IPSEC_NEXUS
3836 		if (!pcb->ipsec_use_netif)
3837 #endif // IPSEC_NEXUS
3838 		{
3839 			int af = AF_INET6;
3840 			bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3841 		}
3842 
3843 		data = ipsec6_splithdr(data);
3844 		if (data == NULL) {
3845 			os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_splithdr returned NULL\n");
3846 			goto ipsec_output_err;
3847 		}
3848 
3849 		ip6 = mtod(data, struct ip6_hdr *);
3850 
3851 		memset(&ipsec_state, 0, sizeof(ipsec_state));
3852 		ipsec_state.m = data;
3853 		ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
3854 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3855 		ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
3856 
3857 		error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
3858 		if (error == 0 && ipsec_state.tunneled == 4) {          /* tunneled in IPv4 - packet is gone */
3859 			goto done;
3860 		}
3861 		data = ipsec_state.m;
3862 		if (error || data == NULL) {
3863 			if (error) {
3864 				os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_output error %d\n", error);
3865 			}
3866 			goto ipsec_output_err;
3867 		}
3868 
3869 		/* Set traffic class, set flow */
3870 		m_set_service_class(data, pcb->ipsec_output_service_class);
3871 		data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3872 #if SKYWALK
3873 		data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
3874 #else /* !SKYWALK */
3875 		data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3876 #endif /* !SKYWALK */
3877 		data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3878 		data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3879 
3880 		/* Increment statistics */
3881 		length = mbuf_pkthdr_len(data);
3882 		ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
3883 
3884 		/* Send to ip6_output */
3885 		memset(&ro6, 0, sizeof(ro6));
3886 
3887 		flags = IPV6_OUTARGS;
3888 
3889 		memset(&ip6oa, 0, sizeof(ip6oa));
3890 		ip6oa.ip6oa_flowadv.code = 0;
3891 		ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
3892 		if (ipsec_state.outgoing_if) {
3893 			ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
3894 			ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
3895 			ip6_output_setsrcifscope(data, ipsec_state.outgoing_if, NULL);
3896 			ip6_output_setdstifscope(data, ipsec_state.outgoing_if, NULL);
3897 		} else {
3898 			ip6_output_setsrcifscope(data, IFSCOPE_UNKNOWN, NULL);
3899 			ip6_output_setdstifscope(data, IFSCOPE_UNKNOWN, NULL);
3900 		}
3901 		ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
3902 
3903 		adv = &ip6oa.ip6oa_flowadv;
3904 
3905 		(void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
3906 		data = NULL;
3907 
3908 		if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3909 			error = ENOBUFS;
3910 			ifnet_disable_output(interface);
3911 		}
3912 
3913 		goto done;
3914 	}
3915 	default: {
3916 		os_log_error(OS_LOG_DEFAULT, "ipsec_output: Received unknown packet version %d.\n", ip_version);
3917 		error = EINVAL;
3918 		goto ipsec_output_err;
3919 	}
3920 	}
3921 
3922 done:
3923 	return error;
3924 
3925 ipsec_output_err:
3926 	if (data) {
3927 		mbuf_freem(data);
3928 	}
3929 	goto done;
3930 }
3931 
3932 static void
ipsec_start(ifnet_t interface)3933 ipsec_start(ifnet_t     interface)
3934 {
3935 	mbuf_t data;
3936 	struct ipsec_pcb *pcb = ifnet_softc(interface);
3937 
3938 	VERIFY(pcb != NULL);
3939 	for (;;) {
3940 		if (ifnet_dequeue(interface, &data) != 0) {
3941 			break;
3942 		}
3943 		if (ipsec_output(interface, data) != 0) {
3944 			break;
3945 		}
3946 	}
3947 }
3948 
3949 /* Network Interface functions */
3950 static errno_t
ipsec_demux(__unused ifnet_t interface,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)3951 ipsec_demux(__unused ifnet_t    interface,
3952     mbuf_t                          data,
3953     __unused char           *frame_header,
3954     protocol_family_t       *protocol)
3955 {
3956 	struct ip *ip;
3957 	u_int ip_version;
3958 
3959 	while (data != NULL && mbuf_len(data) < 1) {
3960 		data = mbuf_next(data);
3961 	}
3962 
3963 	if (data == NULL) {
3964 		return ENOENT;
3965 	}
3966 
3967 	ip = mtod(data, struct ip *);
3968 	ip_version = ip->ip_v;
3969 
3970 	switch (ip_version) {
3971 	case 4:
3972 		*protocol = PF_INET;
3973 		return 0;
3974 	case 6:
3975 		*protocol = PF_INET6;
3976 		return 0;
3977 	default:
3978 		*protocol = PF_UNSPEC;
3979 		break;
3980 	}
3981 
3982 	return 0;
3983 }
3984 
3985 static errno_t
ipsec_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused u_int32_t demux_count)3986 ipsec_add_proto(__unused ifnet_t                                                interface,
3987     protocol_family_t                                               protocol,
3988     __unused const struct ifnet_demux_desc  *demux_array,
3989     __unused u_int32_t                                              demux_count)
3990 {
3991 	switch (protocol) {
3992 	case PF_INET:
3993 		return 0;
3994 	case PF_INET6:
3995 		return 0;
3996 	default:
3997 		break;
3998 	}
3999 
4000 	return ENOPROTOOPT;
4001 }
4002 
4003 static errno_t
ipsec_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)4004 ipsec_del_proto(__unused ifnet_t                        interface,
4005     __unused protocol_family_t      protocol)
4006 {
4007 	return 0;
4008 }
4009 
4010 static errno_t
ipsec_ioctl(ifnet_t interface,u_long command,void * data)4011 ipsec_ioctl(ifnet_t interface,
4012     u_long command,
4013     void *data)
4014 {
4015 #if IPSEC_NEXUS
4016 	struct ipsec_pcb *pcb = ifnet_softc(interface);
4017 #endif
4018 	errno_t result = 0;
4019 
4020 	switch (command) {
4021 	case SIOCSIFMTU: {
4022 #if IPSEC_NEXUS
4023 		if (pcb->ipsec_use_netif) {
4024 			// Make sure we can fit packets in the channel buffers
4025 			if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
4026 				result = EINVAL;
4027 			} else {
4028 				ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
4029 			}
4030 		} else
4031 #endif // IPSEC_NEXUS
4032 		{
4033 			ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
4034 		}
4035 		break;
4036 	}
4037 
4038 	case SIOCSIFFLAGS:
4039 		/* ifioctl() takes care of it */
4040 		break;
4041 
4042 	case SIOCSIFSUBFAMILY: {
4043 		uint32_t subfamily;
4044 
4045 		subfamily = ((struct ifreq*)data)->ifr_type.ift_subfamily;
4046 		switch (subfamily) {
4047 		case IFRTYPE_SUBFAMILY_BLUETOOTH:
4048 			interface->if_subfamily = IFNET_SUBFAMILY_BLUETOOTH;
4049 			break;
4050 		case IFRTYPE_SUBFAMILY_WIFI:
4051 			interface->if_subfamily = IFNET_SUBFAMILY_WIFI;
4052 			break;
4053 		case IFRTYPE_SUBFAMILY_QUICKRELAY:
4054 			interface->if_subfamily = IFNET_SUBFAMILY_QUICKRELAY;
4055 			break;
4056 		case IFRTYPE_SUBFAMILY_DEFAULT:
4057 			interface->if_subfamily = IFNET_SUBFAMILY_DEFAULT;
4058 			break;
4059 		default:
4060 			result = EINVAL;
4061 			break;
4062 		}
4063 		break;
4064 	}
4065 
4066 	default:
4067 		result = EOPNOTSUPP;
4068 	}
4069 
4070 	return result;
4071 }
4072 
4073 static void
ipsec_detached(ifnet_t interface)4074 ipsec_detached(ifnet_t interface)
4075 {
4076 	struct ipsec_pcb *pcb = ifnet_softc(interface);
4077 
4078 	(void)ifnet_release(interface);
4079 	lck_mtx_lock(&ipsec_lock);
4080 	ipsec_free_pcb(pcb, true);
4081 	(void)ifnet_dispose(interface);
4082 	lck_mtx_unlock(&ipsec_lock);
4083 }
4084 
4085 /* Protocol Handlers */
4086 
4087 static errno_t
ipsec_proto_input(ifnet_t interface,protocol_family_t protocol,mbuf_t m,__unused char * frame_header)4088 ipsec_proto_input(ifnet_t interface,
4089     protocol_family_t     protocol,
4090     mbuf_t m,
4091     __unused char *frame_header)
4092 {
4093 	mbuf_pkthdr_setrcvif(m, interface);
4094 
4095 #if IPSEC_NEXUS
4096 	struct ipsec_pcb *pcb = ifnet_softc(interface);
4097 	if (!pcb->ipsec_use_netif)
4098 #endif // IPSEC_NEXUS
4099 	{
4100 		uint32_t af = 0;
4101 		struct ip *ip = mtod(m, struct ip *);
4102 		if (ip->ip_v == 4) {
4103 			af = AF_INET;
4104 		} else if (ip->ip_v == 6) {
4105 			af = AF_INET6;
4106 		}
4107 		bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
4108 		pktap_input(interface, protocol, m, NULL);
4109 	}
4110 
4111 	int32_t pktlen = m->m_pkthdr.len;
4112 	if (proto_input(protocol, m) != 0) {
4113 		ifnet_stat_increment_in(interface, 0, 0, 1);
4114 		m_freem(m);
4115 	} else {
4116 		ifnet_stat_increment_in(interface, 1, pktlen, 0);
4117 	}
4118 
4119 	return 0;
4120 }
4121 
4122 static errno_t
ipsec_proto_pre_output(__unused ifnet_t interface,protocol_family_t protocol,__unused mbuf_t * packet,__unused const struct sockaddr * dest,__unused void * route,__unused char * frame_type,__unused char * link_layer_dest)4123 ipsec_proto_pre_output(__unused ifnet_t interface,
4124     protocol_family_t    protocol,
4125     __unused mbuf_t              *packet,
4126     __unused const struct sockaddr *dest,
4127     __unused void *route,
4128     __unused char *frame_type,
4129     __unused char *link_layer_dest)
4130 {
4131 	*(protocol_family_t *)(void *)frame_type = protocol;
4132 	return 0;
4133 }
4134 
4135 static errno_t
ipsec_attach_proto(ifnet_t interface,protocol_family_t protocol)4136 ipsec_attach_proto(ifnet_t                              interface,
4137     protocol_family_t    protocol)
4138 {
4139 	struct ifnet_attach_proto_param proto;
4140 	errno_t                                                 result;
4141 
4142 	bzero(&proto, sizeof(proto));
4143 	proto.input = ipsec_proto_input;
4144 	proto.pre_output = ipsec_proto_pre_output;
4145 
4146 	result = ifnet_attach_protocol(interface, protocol, &proto);
4147 	if (result != 0 && result != EEXIST) {
4148 		os_log_error(OS_LOG_DEFAULT, "ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
4149 		    protocol, result);
4150 	}
4151 
4152 	return result;
4153 }
4154 
4155 errno_t
ipsec_inject_inbound_packet(ifnet_t interface,mbuf_t packet)4156 ipsec_inject_inbound_packet(ifnet_t     interface,
4157     mbuf_t      packet)
4158 {
4159 #if IPSEC_NEXUS
4160 	struct ipsec_pcb *pcb = ifnet_softc(interface);
4161 
4162 	if (pcb->ipsec_use_netif) {
4163 		if (!ipsec_data_move_begin(pcb)) {
4164 			os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__,
4165 			    if_name(pcb->ipsec_ifp));
4166 			return ENXIO;
4167 		}
4168 
4169 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4170 
4171 		lck_mtx_lock(&pcb->ipsec_input_chain_lock);
4172 
4173 		if (pcb->ipsec_input_chain_count > (u_int32_t)if_ipsec_max_pending_input) {
4174 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4175 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4176 			ipsec_data_move_end(pcb);
4177 			return ENOSPC;
4178 		}
4179 
4180 		if (pcb->ipsec_input_chain != NULL) {
4181 			pcb->ipsec_input_chain_last->m_nextpkt = packet;
4182 		} else {
4183 			pcb->ipsec_input_chain = packet;
4184 		}
4185 		pcb->ipsec_input_chain_count++;
4186 		while (packet->m_nextpkt) {
4187 			VERIFY(packet != packet->m_nextpkt);
4188 			packet = packet->m_nextpkt;
4189 			pcb->ipsec_input_chain_count++;
4190 		}
4191 		pcb->ipsec_input_chain_last = packet;
4192 		lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4193 
4194 		kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
4195 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4196 
4197 		if (rx_ring != NULL) {
4198 			kern_channel_notify(rx_ring, 0);
4199 		}
4200 
4201 		ipsec_data_move_end(pcb);
4202 		return 0;
4203 	} else
4204 #endif // IPSEC_NEXUS
4205 	{
4206 		errno_t error;
4207 		protocol_family_t protocol;
4208 		if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
4209 			return error;
4210 		}
4211 
4212 		return ipsec_proto_input(interface, protocol, packet, NULL);
4213 	}
4214 }
4215 
4216 void
ipsec_set_pkthdr_for_interface(ifnet_t interface,mbuf_t packet,int family,uint32_t flowid)4217 ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family,
4218     uint32_t flowid)
4219 {
4220 #pragma unused (flowid)
4221 	if (packet != NULL && interface != NULL) {
4222 		struct ipsec_pcb *pcb = ifnet_softc(interface);
4223 		if (pcb != NULL) {
4224 			/* Set traffic class, set flow */
4225 			m_set_service_class(packet, pcb->ipsec_output_service_class);
4226 			packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4227 #if SKYWALK
4228 			packet->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4229 			packet->m_pkthdr.pkt_flowid = flowid;
4230 #else /* !SKYWALK */
4231 			packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
4232 #endif /* !SKYWALK */
4233 			if (family == AF_INET) {
4234 				struct ip *ip = mtod(packet, struct ip *);
4235 				packet->m_pkthdr.pkt_proto = ip->ip_p;
4236 			} else if (family == AF_INET6) {
4237 				struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
4238 				packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
4239 			}
4240 			packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4241 		}
4242 	}
4243 }
4244 
4245 void
ipsec_set_ipoa_for_interface(ifnet_t interface,struct ip_out_args * ipoa)4246 ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
4247 {
4248 	struct ipsec_pcb *pcb;
4249 
4250 	if (interface == NULL || ipoa == NULL) {
4251 		return;
4252 	}
4253 	pcb = ifnet_softc(interface);
4254 
4255 	if (net_qos_policy_restricted == 0) {
4256 		ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
4257 		ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4258 	} else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4259 	    net_qos_policy_restrict_avapps != 0) {
4260 		ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4261 	} else {
4262 		ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4263 		ipoa->ipoa_sotc = SO_TC_VO;
4264 	}
4265 }
4266 
4267 void
ipsec_set_ip6oa_for_interface(ifnet_t interface,struct ip6_out_args * ip6oa)4268 ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
4269 {
4270 	struct ipsec_pcb *pcb;
4271 
4272 	if (interface == NULL || ip6oa == NULL) {
4273 		return;
4274 	}
4275 	pcb = ifnet_softc(interface);
4276 
4277 	if (net_qos_policy_restricted == 0) {
4278 		ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
4279 		ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4280 	} else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4281 	    net_qos_policy_restrict_avapps != 0) {
4282 		ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4283 	} else {
4284 		ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4285 		ip6oa->ip6oa_sotc = SO_TC_VO;
4286 	}
4287 }
4288 
4289 static boolean_t
ipsec_data_move_begin(struct ipsec_pcb * pcb)4290 ipsec_data_move_begin(struct ipsec_pcb *pcb)
4291 {
4292 	boolean_t ret = 0;
4293 
4294 	lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4295 	if ((ret = IPSEC_IS_DATA_PATH_READY(pcb))) {
4296 		pcb->ipsec_pcb_data_move++;
4297 	}
4298 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4299 
4300 	return ret;
4301 }
4302 
4303 static void
ipsec_data_move_end(struct ipsec_pcb * pcb)4304 ipsec_data_move_end(struct ipsec_pcb *pcb)
4305 {
4306 	lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4307 	VERIFY(pcb->ipsec_pcb_data_move > 0);
4308 	/*
4309 	 * if there's no more thread moving data, wakeup any
4310 	 * drainers that's blocked waiting for this.
4311 	 */
4312 	if (--pcb->ipsec_pcb_data_move == 0 && pcb->ipsec_pcb_drainers > 0) {
4313 		wakeup(&(pcb->ipsec_pcb_data_move));
4314 	}
4315 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4316 }
4317 
4318 static void
ipsec_data_move_drain(struct ipsec_pcb * pcb)4319 ipsec_data_move_drain(struct ipsec_pcb *pcb)
4320 {
4321 	lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4322 	/* data path must already be marked as not ready */
4323 	VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4324 	pcb->ipsec_pcb_drainers++;
4325 	while (pcb->ipsec_pcb_data_move != 0) {
4326 		(void)msleep(&(pcb->ipsec_pcb_data_move), &pcb->ipsec_pcb_data_move_lock,
4327 		    (PZERO - 1), __func__, NULL);
4328 	}
4329 	VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4330 	VERIFY(pcb->ipsec_pcb_drainers > 0);
4331 	pcb->ipsec_pcb_drainers--;
4332 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4333 }
4334 
4335 static void
ipsec_wait_data_move_drain(struct ipsec_pcb * pcb)4336 ipsec_wait_data_move_drain(struct ipsec_pcb *pcb)
4337 {
4338 	/*
4339 	 * Mark the data path as not usable.
4340 	 */
4341 	lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4342 	IPSEC_CLR_DATA_PATH_READY(pcb);
4343 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4344 
4345 	/* Wait until all threads in the data paths are done. */
4346 	ipsec_data_move_drain(pcb);
4347 }
4348