xref: /xnu-10063.121.3/bsd/net/if_ipsec.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2012-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 #include <sys/systm.h>
31 #include <sys/kern_control.h>
32 #include <net/kpi_protocol.h>
33 #include <net/kpi_interface.h>
34 #include <sys/socket.h>
35 #include <sys/socketvar.h>
36 #include <net/if.h>
37 #include <net/if_types.h>
38 #include <net/bpf.h>
39 #include <net/if_ipsec.h>
40 #include <sys/mbuf.h>
41 #include <sys/sockio.h>
42 #include <netinet/in.h>
43 #include <netinet/ip6.h>
44 #include <netinet6/in6_var.h>
45 #include <netinet6/ip6_var.h>
46 #include <sys/kauth.h>
47 #include <netinet6/ipsec.h>
48 #include <netinet6/ipsec6.h>
49 #include <netinet6/esp.h>
50 #include <netinet6/esp6.h>
51 #include <netinet/ip.h>
52 #include <net/flowadv.h>
53 #include <net/necp.h>
54 #include <netkey/key.h>
55 #include <net/pktap.h>
56 #include <kern/zalloc.h>
57 #include <os/log.h>
58 
59 #if SKYWALK
60 #include <skywalk/os_skywalk_private.h>
61 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
62 #include <skywalk/nexus/netif/nx_netif.h>
63 #define IPSEC_NEXUS 1
64 #else // SKYWALK
65 #define IPSEC_NEXUS 0
66 #endif // SKYWALK
67 
68 extern int net_qos_policy_restricted;
69 extern int net_qos_policy_restrict_avapps;
70 
71 /* Kernel Control functions */
72 static errno_t  ipsec_ctl_setup(u_int32_t *unit, void **unitinfo);
73 static errno_t  ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
74     void **unitinfo);
75 static errno_t  ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
76     void **unitinfo);
77 static errno_t  ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
78     void *unitinfo);
79 static errno_t  ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
80     void *unitinfo, mbuf_t m, int flags);
81 static errno_t  ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
82     int opt, void *data, size_t *len);
83 static errno_t  ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
84     int opt, void *data, size_t len);
85 
86 /* Network Interface functions */
87 static void     ipsec_start(ifnet_t     interface);
88 static errno_t  ipsec_output(ifnet_t interface, mbuf_t data);
89 static errno_t  ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
90     protocol_family_t *protocol);
91 static errno_t  ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
92     const struct ifnet_demux_desc *demux_array,
93     u_int32_t demux_count);
94 static errno_t  ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
95 static errno_t  ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
96 static void             ipsec_detached(ifnet_t interface);
97 
98 /* Protocol handlers */
99 static errno_t  ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
100 static errno_t  ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
101     mbuf_t m, char *frame_header);
102 static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
103     mbuf_t *packet, const struct sockaddr *dest, void *route,
104     char *frame_type, char *link_layer_dest);
105 
106 static kern_ctl_ref     ipsec_kctlref;
107 static LCK_ATTR_DECLARE(ipsec_lck_attr, 0, 0);
108 static LCK_GRP_DECLARE(ipsec_lck_grp, "ipsec");
109 static LCK_MTX_DECLARE_ATTR(ipsec_lock, &ipsec_lck_grp, &ipsec_lck_attr);
110 
111 #if IPSEC_NEXUS
112 
113 SYSCTL_DECL(_net_ipsec);
114 SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
115 static int if_ipsec_verify_interface_creation = 0;
116 SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
117 
118 #define IPSEC_IF_VERIFY(_e)             if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
119 
120 #define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
121 #define IPSEC_IF_DEFAULT_RING_SIZE 64
122 #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
123 #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
124 #define IPSEC_IF_DEFAULT_BUF_SEG_SIZE   skmem_usr_buf_seg_size
125 
126 #define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES
127 #define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
128 #define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
129 #define IPSEC_NETIF_WMM_RX_RING_COUNT 1
130 #define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT
131 #define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT
132 
133 #define IPSEC_IF_MIN_RING_SIZE 8
134 #define IPSEC_IF_MAX_RING_SIZE 1024
135 
136 #define IPSEC_IF_MIN_SLOT_SIZE 1024
137 #define IPSEC_IF_MAX_SLOT_SIZE (16 * 1024)
138 
139 #define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512
140 
141 #define IPSEC_KPIPE_FLAG_WAKE_PKT 0x01
142 
143 static uint32_t ipsec_kpipe_mbuf;
144 
145 static int if_ipsec_max_pending_input = IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT;
146 
147 static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
148 static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
149 static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
150 
151 static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
152 static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
153 static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
154 
155 SYSCTL_INT(_net_ipsec, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_max_pending_input, 0, "");
156 SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
157     &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
158 SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
159     &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
160 SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
161     &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
162 
163 static int if_ipsec_debug = 0;
164 SYSCTL_INT(_net_ipsec, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_debug, 0, "");
165 
166 static errno_t
167 ipsec_register_nexus(void);
168 
169 typedef struct ipsec_nx {
170 	uuid_t if_provider;
171 	uuid_t if_instance;
172 	uuid_t fsw_provider;
173 	uuid_t fsw_instance;
174 	uuid_t fsw_device;
175 	uuid_t fsw_agent;
176 } *ipsec_nx_t;
177 
178 static nexus_controller_t ipsec_ncd;
179 static int ipsec_ncd_refcount;
180 static uuid_t ipsec_kpipe_uuid;
181 
182 #endif // IPSEC_NEXUS
183 
184 /* Control block allocated for each kernel control connection */
185 struct ipsec_pcb {
186 	TAILQ_ENTRY(ipsec_pcb)  ipsec_chain;
187 	kern_ctl_ref            ipsec_ctlref;
188 	ifnet_t                 ipsec_ifp;
189 	u_int32_t               ipsec_unit;
190 	u_int32_t               ipsec_unique_id;
191 	// These external flags can be set with IPSEC_OPT_FLAGS
192 	u_int32_t               ipsec_external_flags;
193 	// These internal flags are only used within this driver
194 	u_int32_t               ipsec_internal_flags;
195 	u_int32_t               ipsec_input_frag_size;
196 	bool                    ipsec_frag_size_set;
197 	int                     ipsec_ext_ifdata_stats;
198 	mbuf_svc_class_t        ipsec_output_service_class;
199 	char                    ipsec_if_xname[IFXNAMSIZ];
200 	char                    ipsec_unique_name[IFXNAMSIZ];
201 	// PCB lock protects state fields, like ipsec_kpipe_count
202 	decl_lck_rw_data(, ipsec_pcb_lock);
203 	// lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers
204 	decl_lck_mtx_data(, ipsec_pcb_data_move_lock);
205 	u_int32_t               ipsec_pcb_data_move; /* number of data moving contexts */
206 	u_int32_t               ipsec_pcb_drainers; /* number of threads waiting to drain */
207 	u_int32_t               ipsec_pcb_data_path_state; /* internal state of interface data path */
208 	ipsec_dscp_mapping_t    ipsec_output_dscp_mapping;
209 
210 #if IPSEC_NEXUS
211 	lck_mtx_t               ipsec_input_chain_lock;
212 	lck_mtx_t               ipsec_kpipe_encrypt_lock;
213 	lck_mtx_t               ipsec_kpipe_decrypt_lock;
214 	struct mbuf *           ipsec_input_chain;
215 	struct mbuf *           ipsec_input_chain_last;
216 	u_int32_t               ipsec_input_chain_count;
217 	// Input chain lock protects the list of input mbufs
218 	// The input chain lock must be taken AFTER the PCB lock if both are held
219 	struct ipsec_nx         ipsec_nx;
220 	u_int32_t               ipsec_kpipe_count;
221 	pid_t                   ipsec_kpipe_pid;
222 	uuid_t                  ipsec_kpipe_proc_uuid;
223 	uuid_t                  ipsec_kpipe_uuid[IPSEC_IF_MAX_RING_COUNT];
224 	void *                  ipsec_kpipe_rxring[IPSEC_IF_MAX_RING_COUNT];
225 	void *                  ipsec_kpipe_txring[IPSEC_IF_MAX_RING_COUNT];
226 	kern_pbufpool_t         ipsec_kpipe_pp;
227 	u_int32_t               ipsec_kpipe_tx_ring_size;
228 	u_int32_t               ipsec_kpipe_rx_ring_size;
229 
230 	kern_nexus_t            ipsec_netif_nexus;
231 	kern_pbufpool_t         ipsec_netif_pp;
232 	void *                  ipsec_netif_rxring[IPSEC_NETIF_MAX_RX_RING_COUNT];
233 	void *                  ipsec_netif_txring[IPSEC_NETIF_MAX_TX_RING_COUNT];
234 	uint64_t                ipsec_netif_txring_size;
235 
236 	u_int32_t               ipsec_slot_size;
237 	u_int32_t               ipsec_netif_ring_size;
238 	u_int32_t               ipsec_tx_fsw_ring_size;
239 	u_int32_t               ipsec_rx_fsw_ring_size;
240 	bool                    ipsec_use_netif;
241 	bool                    ipsec_needs_netagent;
242 #endif // IPSEC_NEXUS
243 };
244 
245 /* These are internal flags not exposed outside this file */
246 #define IPSEC_FLAGS_KPIPE_ALLOCATED 1
247 
248 /* data movement refcounting functions */
249 static boolean_t ipsec_data_move_begin(struct ipsec_pcb *pcb);
250 static void ipsec_data_move_end(struct ipsec_pcb *pcb);
251 static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb);
252 
253 /* Data path states */
254 #define IPSEC_PCB_DATA_PATH_READY    0x1
255 
256 /* Macros to set/clear/test data path states */
257 #define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY)
258 #define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY)
259 #define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0)
260 
261 #if IPSEC_NEXUS
262 /* Macros to clear/set/test flags. */
263 static inline void
ipsec_flag_set(struct ipsec_pcb * pcb,uint32_t flag)264 ipsec_flag_set(struct ipsec_pcb *pcb, uint32_t flag)
265 {
266 	pcb->ipsec_internal_flags |= flag;
267 }
268 static inline void
ipsec_flag_clr(struct ipsec_pcb * pcb,uint32_t flag)269 ipsec_flag_clr(struct ipsec_pcb *pcb, uint32_t flag)
270 {
271 	pcb->ipsec_internal_flags &= ~flag;
272 }
273 
274 static inline bool
ipsec_flag_isset(struct ipsec_pcb * pcb,uint32_t flag)275 ipsec_flag_isset(struct ipsec_pcb *pcb, uint32_t flag)
276 {
277 	return !!(pcb->ipsec_internal_flags & flag);
278 }
279 #endif // IPSEC_NEXUS
280 
281 TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
282 
283 static KALLOC_TYPE_DEFINE(ipsec_pcb_zone, struct ipsec_pcb, NET_KT_DEFAULT);
284 
285 #define IPSECQ_MAXLEN 256
286 
287 #if IPSEC_NEXUS
288 static int
289 sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
290 {
291 #pragma unused(arg1, arg2)
292 	int value = if_ipsec_ring_size;
293 
294 	int error = sysctl_handle_int(oidp, &value, 0, req);
295 	if (error || !req->newptr) {
296 		return error;
297 	}
298 
299 	if (value < IPSEC_IF_MIN_RING_SIZE ||
300 	    value > IPSEC_IF_MAX_RING_SIZE) {
301 		return EINVAL;
302 	}
303 
304 	if_ipsec_ring_size = value;
305 
306 	return 0;
307 }
308 
309 static int
310 sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
311 {
312 #pragma unused(arg1, arg2)
313 	int value = if_ipsec_tx_fsw_ring_size;
314 
315 	int error = sysctl_handle_int(oidp, &value, 0, req);
316 	if (error || !req->newptr) {
317 		return error;
318 	}
319 
320 	if (value < IPSEC_IF_MIN_RING_SIZE ||
321 	    value > IPSEC_IF_MAX_RING_SIZE) {
322 		return EINVAL;
323 	}
324 
325 	if_ipsec_tx_fsw_ring_size = value;
326 
327 	return 0;
328 }
329 
330 static int
331 sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
332 {
333 #pragma unused(arg1, arg2)
334 	int value = if_ipsec_rx_fsw_ring_size;
335 
336 	int error = sysctl_handle_int(oidp, &value, 0, req);
337 	if (error || !req->newptr) {
338 		return error;
339 	}
340 
341 	if (value < IPSEC_IF_MIN_RING_SIZE ||
342 	    value > IPSEC_IF_MAX_RING_SIZE) {
343 		return EINVAL;
344 	}
345 
346 	if_ipsec_rx_fsw_ring_size = value;
347 
348 	return 0;
349 }
350 
351 
352 static inline bool
ipsec_in_wmm_mode(struct ipsec_pcb * pcb)353 ipsec_in_wmm_mode(struct ipsec_pcb *pcb)
354 {
355 	return pcb->ipsec_kpipe_count == IPSEC_IF_WMM_RING_COUNT;
356 }
357 
358 #endif // IPSEC_NEXUS
359 
360 errno_t
ipsec_register_control(void)361 ipsec_register_control(void)
362 {
363 	struct kern_ctl_reg     kern_ctl;
364 	errno_t                 result = 0;
365 
366 #if (DEVELOPMENT || DEBUG)
367 	(void)PE_parse_boot_argn("ipsec_kpipe_mbuf", &ipsec_kpipe_mbuf,
368 	    sizeof(ipsec_kpipe_mbuf));
369 #endif /* DEVELOPMENT || DEBUG */
370 
371 #if IPSEC_NEXUS
372 	ipsec_register_nexus();
373 #endif // IPSEC_NEXUS
374 
375 	TAILQ_INIT(&ipsec_head);
376 
377 	bzero(&kern_ctl, sizeof(kern_ctl));
378 	strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
379 	kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
380 	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP; /* Require root */
381 	kern_ctl.ctl_sendsize = 64 * 1024;
382 	kern_ctl.ctl_recvsize = 64 * 1024;
383 	kern_ctl.ctl_setup = ipsec_ctl_setup;
384 	kern_ctl.ctl_bind = ipsec_ctl_bind;
385 	kern_ctl.ctl_connect = ipsec_ctl_connect;
386 	kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
387 	kern_ctl.ctl_send = ipsec_ctl_send;
388 	kern_ctl.ctl_setopt = ipsec_ctl_setopt;
389 	kern_ctl.ctl_getopt = ipsec_ctl_getopt;
390 
391 	result = ctl_register(&kern_ctl, &ipsec_kctlref);
392 	if (result != 0) {
393 		os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - ctl_register failed: %d\n", result);
394 		return result;
395 	}
396 
397 	/* Register the protocol plumbers */
398 	if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC,
399 	    ipsec_attach_proto, NULL)) != 0) {
400 		os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n",
401 		    result);
402 		ctl_deregister(ipsec_kctlref);
403 		return result;
404 	}
405 
406 	/* Register the protocol plumbers */
407 	if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC,
408 	    ipsec_attach_proto, NULL)) != 0) {
409 		proto_unregister_plumber(PF_INET, IFNET_FAMILY_IPSEC);
410 		ctl_deregister(ipsec_kctlref);
411 		os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n",
412 		    result);
413 		return result;
414 	}
415 
416 	return 0;
417 }
418 
419 /* Helpers */
420 int
ipsec_interface_isvalid(ifnet_t interface)421 ipsec_interface_isvalid(ifnet_t interface)
422 {
423 	struct ipsec_pcb *pcb = NULL;
424 
425 	if (interface == NULL) {
426 		return 0;
427 	}
428 
429 	pcb = ifnet_softc(interface);
430 
431 	if (pcb == NULL) {
432 		return 0;
433 	}
434 
435 	/* When ctl disconnects, ipsec_unit is set to 0 */
436 	if (pcb->ipsec_unit == 0) {
437 		return 0;
438 	}
439 
440 	return 1;
441 }
442 
443 #if IPSEC_NEXUS
444 boolean_t
ipsec_interface_needs_netagent(ifnet_t interface)445 ipsec_interface_needs_netagent(ifnet_t interface)
446 {
447 	struct ipsec_pcb *pcb = NULL;
448 
449 	if (interface == NULL) {
450 		return FALSE;
451 	}
452 
453 	pcb = ifnet_softc(interface);
454 
455 	if (pcb == NULL) {
456 		return FALSE;
457 	}
458 
459 	return pcb->ipsec_needs_netagent == true;
460 }
461 #endif // IPSEC_NEXUS
462 
463 static errno_t
ipsec_ifnet_set_attrs(ifnet_t ifp)464 ipsec_ifnet_set_attrs(ifnet_t ifp)
465 {
466 	/* Set flags and additional information. */
467 	ifnet_set_mtu(ifp, 1500);
468 	ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
469 
470 	/* The interface must generate its own IPv6 LinkLocal address,
471 	 * if possible following the recommendation of RFC2472 to the 64bit interface ID
472 	 */
473 	ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
474 
475 #if !IPSEC_NEXUS
476 	/* Reset the stats in case as the interface may have been recycled */
477 	struct ifnet_stats_param stats;
478 	bzero(&stats, sizeof(struct ifnet_stats_param));
479 	ifnet_set_stat(ifp, &stats);
480 #endif // !IPSEC_NEXUS
481 
482 	return 0;
483 }
484 
485 #if IPSEC_NEXUS
486 
487 static uuid_t ipsec_nx_dom_prov;
488 
489 static errno_t
ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)490 ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
491 {
492 	return 0;
493 }
494 
495 static void
ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)496 ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
497 {
498 	// Ignore
499 }
500 
501 static errno_t
ipsec_register_nexus(void)502 ipsec_register_nexus(void)
503 {
504 	const struct kern_nexus_domain_provider_init dp_init = {
505 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
506 		.nxdpi_flags = 0,
507 		.nxdpi_init = ipsec_nxdp_init,
508 		.nxdpi_fini = ipsec_nxdp_fini
509 	};
510 	errno_t err = 0;
511 
512 	/* ipsec_nxdp_init() is called before this function returns */
513 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
514 	    (const uint8_t *) "com.apple.ipsec",
515 	    &dp_init, sizeof(dp_init),
516 	    &ipsec_nx_dom_prov);
517 	if (err != 0) {
518 		os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
519 		return err;
520 	}
521 	return 0;
522 }
523 
524 static errno_t
ipsec_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)525 ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
526 {
527 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
528 	pcb->ipsec_netif_nexus = nexus;
529 	return ipsec_ifnet_set_attrs(ifp);
530 }
531 
532 static errno_t
ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,proc_t p,kern_nexus_t nexus,nexus_port_t nexus_port,kern_channel_t channel,void ** ch_ctx)533 ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
534     proc_t p, kern_nexus_t nexus,
535     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
536 {
537 #pragma unused(nxprov, p)
538 #pragma unused(nexus, nexus_port, channel, ch_ctx)
539 	return 0;
540 }
541 
542 static errno_t
ipsec_nexus_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)543 ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
544     kern_channel_t channel)
545 {
546 #pragma unused(nxprov, channel)
547 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
548 	boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, 1);
549 	/* Mark the data path as ready */
550 	if (ok) {
551 		lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
552 		IPSEC_SET_DATA_PATH_READY(pcb);
553 		lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
554 	}
555 	return ok ? 0 : ENXIO;
556 }
557 
558 static void
ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)559 ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
560     kern_channel_t channel)
561 {
562 #pragma unused(nxprov, channel)
563 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
564 
565 	VERIFY(pcb->ipsec_kpipe_count != 0);
566 
567 	/* Wait until all threads in the data paths are done. */
568 	ipsec_wait_data_move_drain(pcb);
569 }
570 
571 static void
ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)572 ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
573     kern_channel_t channel)
574 {
575 #pragma unused(nxprov, channel)
576 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
577 
578 	/* Wait until all threads in the data paths are done. */
579 	ipsec_wait_data_move_drain(pcb);
580 }
581 
582 static void
ipsec_nexus_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)583 ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
584     kern_channel_t channel)
585 {
586 #pragma unused(nxprov, channel)
587 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
588 	if (pcb->ipsec_netif_nexus == nexus) {
589 		pcb->ipsec_netif_nexus = NULL;
590 	}
591 	ifnet_decr_iorefcnt(pcb->ipsec_ifp);
592 }
593 
594 static errno_t
ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)595 ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
596     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
597     void **ring_ctx)
598 {
599 #pragma unused(nxprov)
600 #pragma unused(channel)
601 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
602 	uint8_t ring_idx;
603 
604 	for (ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
605 		if (!uuid_compare(channel->ch_info->cinfo_nx_uuid, pcb->ipsec_kpipe_uuid[ring_idx])) {
606 			break;
607 		}
608 	}
609 
610 	if (ring_idx == pcb->ipsec_kpipe_count) {
611 		uuid_string_t uuidstr;
612 		uuid_unparse(channel->ch_info->cinfo_nx_uuid, uuidstr);
613 		os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n", __func__, pcb->ipsec_if_xname, uuidstr);
614 		return ENOENT;
615 	}
616 
617 	*ring_ctx = (void *)(uintptr_t)ring_idx;
618 
619 	if (!is_tx_ring) {
620 		VERIFY(pcb->ipsec_kpipe_rxring[ring_idx] == NULL);
621 		pcb->ipsec_kpipe_rxring[ring_idx] = ring;
622 	} else {
623 		VERIFY(pcb->ipsec_kpipe_txring[ring_idx] == NULL);
624 		pcb->ipsec_kpipe_txring[ring_idx] = ring;
625 	}
626 	return 0;
627 }
628 
629 static void
ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)630 ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
631     kern_channel_ring_t ring)
632 {
633 #pragma unused(nxprov)
634 	bool found = false;
635 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
636 
637 	for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
638 		if (pcb->ipsec_kpipe_rxring[i] == ring) {
639 			pcb->ipsec_kpipe_rxring[i] = NULL;
640 			found = true;
641 		} else if (pcb->ipsec_kpipe_txring[i] == ring) {
642 			pcb->ipsec_kpipe_txring[i] = NULL;
643 			found = true;
644 		}
645 	}
646 	VERIFY(found);
647 }
648 
649 static errno_t
ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)650 ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
651     kern_channel_ring_t tx_ring, uint32_t flags)
652 {
653 #pragma unused(nxprov)
654 #pragma unused(flags)
655 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
656 
657 	if (!ipsec_data_move_begin(pcb)) {
658 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
659 		return 0;
660 	}
661 
662 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
663 
664 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
665 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
666 		ipsec_data_move_end(pcb);
667 		return 0;
668 	}
669 
670 	VERIFY(pcb->ipsec_kpipe_count);
671 
672 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
673 	if (tx_slot == NULL) {
674 		// Nothing to write, bail
675 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
676 		ipsec_data_move_end(pcb);
677 		return 0;
678 	}
679 
680 	// Signal the netif ring to read
681 	kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
682 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
683 
684 	if (rx_ring != NULL) {
685 		kern_channel_notify(rx_ring, 0);
686 	}
687 
688 	ipsec_data_move_end(pcb);
689 	return 0;
690 }
691 
692 static mbuf_t
ipsec_encrypt_mbuf(ifnet_t interface,mbuf_t data)693 ipsec_encrypt_mbuf(ifnet_t interface,
694     mbuf_t data)
695 {
696 	struct ipsec_output_state ipsec_state;
697 	int error = 0;
698 	uint32_t af;
699 
700 	// Make sure this packet isn't looping through the interface
701 	if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
702 		error = -1;
703 		goto ipsec_output_err;
704 	}
705 
706 	// Mark the interface so NECP can evaluate tunnel policy
707 	necp_mark_packet_from_interface(data, interface);
708 
709 	struct ip *ip = mtod(data, struct ip *);
710 	u_int ip_version = ip->ip_v;
711 
712 	switch (ip_version) {
713 	case 4: {
714 		af = AF_INET;
715 
716 		memset(&ipsec_state, 0, sizeof(ipsec_state));
717 		ipsec_state.m = data;
718 		ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
719 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
720 
721 		error = ipsec4_interface_output(&ipsec_state, interface);
722 		if (error == 0 && ipsec_state.tunneled == 6) {
723 			// Tunneled in IPv6 - packet is gone
724 			// TODO: Don't lose mbuf
725 			data = NULL;
726 			goto done;
727 		}
728 
729 		data = ipsec_state.m;
730 		if (error || data == NULL) {
731 			if (error) {
732 				os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec4_output error %d\n", error);
733 			}
734 			goto ipsec_output_err;
735 		}
736 		goto done;
737 	}
738 	case 6: {
739 		af = AF_INET6;
740 
741 		data = ipsec6_splithdr(data);
742 		if (data == NULL) {
743 			os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
744 			goto ipsec_output_err;
745 		}
746 
747 		struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
748 
749 		memset(&ipsec_state, 0, sizeof(ipsec_state));
750 		ipsec_state.m = data;
751 		ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
752 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
753 
754 		error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
755 		if (error == 0 && ipsec_state.tunneled == 4) {
756 			// Tunneled in IPv4 - packet is gone
757 			// TODO: Don't lose mbuf
758 			data = NULL;
759 			goto done;
760 		}
761 		data = ipsec_state.m;
762 		if (error || data == NULL) {
763 			if (error) {
764 				os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_output error %d\n", error);
765 			}
766 			goto ipsec_output_err;
767 		}
768 		goto done;
769 	}
770 	default: {
771 		os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version);
772 		error = -1;
773 		goto ipsec_output_err;
774 	}
775 	}
776 
777 done:
778 	return data;
779 
780 ipsec_output_err:
781 	if (data) {
782 		mbuf_freem(data);
783 	}
784 	return NULL;
785 }
786 
787 static errno_t
ipsec_kpipe_sync_rx_mbuf(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)788 ipsec_kpipe_sync_rx_mbuf(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
789     kern_channel_ring_t rx_ring, uint32_t flags)
790 {
791 #pragma unused(nxprov)
792 #pragma unused(flags)
793 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
794 	struct kern_channel_ring_stat_increment rx_ring_stats;
795 	uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring);
796 
797 	if (!ipsec_data_move_begin(pcb)) {
798 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
799 		return 0;
800 	}
801 
802 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
803 
804 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
805 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
806 		ipsec_data_move_end(pcb);
807 		return 0;
808 	}
809 
810 	VERIFY(pcb->ipsec_kpipe_count);
811 	VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
812 
813 	// Reclaim user-released slots
814 	(void) kern_channel_reclaim(rx_ring);
815 
816 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
817 	if (avail == 0) {
818 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
819 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
820 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
821 		ipsec_data_move_end(pcb);
822 		return 0;
823 	}
824 
825 	kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx];
826 	if (tx_ring == NULL) {
827 		// Net-If TX ring not set up yet, nothing to read
828 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
829 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
830 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
831 		ipsec_data_move_end(pcb);
832 		return 0;
833 	}
834 
835 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
836 
837 	// Unlock ipsec before entering ring
838 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
839 
840 	(void)kr_enter(tx_ring, TRUE);
841 
842 	// Lock again after entering and validate
843 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
844 	if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
845 		// Ring no longer valid
846 		// Unlock first, then exit ring
847 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
848 		kr_exit(tx_ring);
849 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
850 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
851 		ipsec_data_move_end(pcb);
852 		return 0;
853 	}
854 
855 	struct kern_channel_ring_stat_increment tx_ring_stats;
856 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
857 	kern_channel_slot_t tx_pslot = NULL;
858 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
859 	if (tx_slot == NULL) {
860 		// Nothing to read, don't bother signalling
861 		// Unlock first, then exit ring
862 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
863 		kr_exit(tx_ring);
864 		ipsec_data_move_end(pcb);
865 		return 0;
866 	}
867 
868 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
869 	VERIFY(rx_pp != NULL);
870 	struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
871 	VERIFY(tx_pp != NULL);
872 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
873 	kern_channel_slot_t rx_pslot = NULL;
874 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
875 	kern_packet_t tx_chain_ph = 0;
876 
877 	while (rx_slot != NULL && tx_slot != NULL) {
878 		size_t length = 0;
879 		mbuf_t data = NULL;
880 		errno_t error = 0;
881 
882 		// Allocate rx packet
883 		kern_packet_t rx_ph = 0;
884 		error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
885 		if (__improbable(error != 0)) {
886 			os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
887 			    pcb->ipsec_ifp->if_xname);
888 			break;
889 		}
890 
891 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
892 
893 		if (tx_ph == 0) {
894 			// Advance TX ring
895 			tx_pslot = tx_slot;
896 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
897 			kern_pbufpool_free(rx_pp, rx_ph);
898 			continue;
899 		}
900 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
901 		if (tx_chain_ph != 0) {
902 			kern_packet_append(tx_ph, tx_chain_ph);
903 		}
904 		tx_chain_ph = tx_ph;
905 
906 		// Advance TX ring
907 		tx_pslot = tx_slot;
908 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
909 
910 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
911 		VERIFY(tx_buf != NULL);
912 		uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
913 		VERIFY(tx_baddr != NULL);
914 		tx_baddr += kern_buflet_get_data_offset(tx_buf);
915 
916 		bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
917 
918 		length = MIN(kern_packet_get_data_length(tx_ph),
919 		    pcb->ipsec_slot_size);
920 
921 		// Increment TX stats
922 		tx_ring_stats.kcrsi_slots_transferred++;
923 		tx_ring_stats.kcrsi_bytes_transferred += length;
924 
925 		if (length > 0) {
926 			error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
927 			if (error == 0) {
928 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
929 				if (error == 0) {
930 					// Encrypt and send packet
931 					lck_mtx_lock(&pcb->ipsec_kpipe_encrypt_lock);
932 					data = ipsec_encrypt_mbuf(pcb->ipsec_ifp, data);
933 					lck_mtx_unlock(&pcb->ipsec_kpipe_encrypt_lock);
934 				} else {
935 					os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
936 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
937 					STATS_INC(nifs, NETIF_STATS_DROP);
938 					mbuf_freem(data);
939 					data = NULL;
940 				}
941 			} else {
942 				os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
943 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
944 				STATS_INC(nifs, NETIF_STATS_DROP);
945 			}
946 		} else {
947 			os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
948 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
949 			STATS_INC(nifs, NETIF_STATS_DROP);
950 		}
951 
952 		if (data == NULL) {
953 			os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
954 			kern_pbufpool_free(rx_pp, rx_ph);
955 			break;
956 		}
957 
958 		length = mbuf_pkthdr_len(data);
959 		if (length > PP_BUF_SIZE_DEF(rx_pp)) {
960 			// Flush data
961 			mbuf_freem(data);
962 			kern_pbufpool_free(rx_pp, rx_ph);
963 			os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
964 			    pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
965 			continue;
966 		}
967 
968 		// Fillout rx packet
969 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
970 		VERIFY(rx_buf != NULL);
971 		void *rx_baddr = kern_buflet_get_data_address(rx_buf);
972 		VERIFY(rx_baddr != NULL);
973 
974 		// Copy-in data from mbuf to buflet
975 		mbuf_copydata(data, 0, length, (void *)rx_baddr);
976 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
977 
978 		// Finalize and attach the packet
979 		error = kern_buflet_set_data_offset(rx_buf, 0);
980 		VERIFY(error == 0);
981 		error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
982 		VERIFY(error == 0);
983 		error = kern_packet_finalize(rx_ph);
984 		VERIFY(error == 0);
985 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
986 		VERIFY(error == 0);
987 
988 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
989 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
990 
991 		rx_ring_stats.kcrsi_slots_transferred++;
992 		rx_ring_stats.kcrsi_bytes_transferred += length;
993 
994 		if (!pcb->ipsec_ext_ifdata_stats) {
995 			ifnet_stat_increment_out(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
996 		}
997 
998 		mbuf_freem(data);
999 
1000 		rx_pslot = rx_slot;
1001 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1002 	}
1003 
1004 	if (rx_pslot) {
1005 		kern_channel_advance_slot(rx_ring, rx_pslot);
1006 		kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1007 	}
1008 
1009 	if (tx_chain_ph != 0) {
1010 		kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
1011 	}
1012 
1013 	if (tx_pslot) {
1014 		kern_channel_advance_slot(tx_ring, tx_pslot);
1015 		kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1016 		(void)kern_channel_reclaim(tx_ring);
1017 	}
1018 
1019 	/* always reenable output */
1020 	errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
1021 	if (error != 0) {
1022 		os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
1023 	}
1024 
1025 	// Unlock first, then exit ring
1026 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1027 
1028 	if (tx_pslot != NULL) {
1029 		kern_channel_notify(tx_ring, 0);
1030 	}
1031 	kr_exit(tx_ring);
1032 
1033 	ipsec_data_move_end(pcb);
1034 	return 0;
1035 }
1036 
1037 static errno_t
ipsec_encrypt_kpipe_pkt(ifnet_t interface,kern_packet_t sph,kern_packet_t dph)1038 ipsec_encrypt_kpipe_pkt(ifnet_t interface, kern_packet_t sph,
1039     kern_packet_t dph)
1040 {
1041 	uint8_t *sbaddr = NULL;
1042 	int err = 0;
1043 	uint32_t slen = 0;
1044 
1045 	VERIFY(interface != NULL);
1046 	VERIFY(sph != 0);
1047 	VERIFY(dph != 0);
1048 
1049 	kern_buflet_t sbuf = __packet_get_next_buflet(sph, NULL);
1050 	VERIFY(sbuf != NULL);
1051 	slen = __buflet_get_data_length(sbuf);
1052 
1053 	if (__improbable(slen < sizeof(struct ip))) {
1054 		os_log_error(OS_LOG_DEFAULT, "ipsec encrypt kpipe pkt: source "
1055 		    "buffer shorter than ip header, %u\n", slen);
1056 		return EINVAL;
1057 	}
1058 
1059 	MD_BUFLET_ADDR(SK_PTR_ADDR_KPKT(sph), sbaddr);
1060 	struct ip *ip = (struct ip *)(void *)sbaddr;
1061 	ASSERT(IP_HDR_ALIGNED_P(ip));
1062 
1063 	u_int ip_vers = ip->ip_v;
1064 	switch (ip_vers) {
1065 	case IPVERSION: {
1066 		err = ipsec4_interface_kpipe_output(interface, sph, dph);
1067 		if (__improbable(err != 0)) {
1068 			os_log_error(OS_LOG_DEFAULT, "ipsec4 interface kpipe "
1069 			    "output error %d\n", err);
1070 			return err;
1071 		}
1072 		break;
1073 	}
1074 	case 6: {
1075 		err = ipsec6_interface_kpipe_output(interface, sph, dph);
1076 		if (__improbable(err != 0)) {
1077 			os_log_error(OS_LOG_DEFAULT, "ipsec6 interface kpipe "
1078 			    "output error %d\n", err);
1079 			return err;
1080 		}
1081 		break;
1082 	}
1083 	default: {
1084 		os_log_error(OS_LOG_DEFAULT, "received unknown packet version: %d\n",
1085 		    ip_vers);
1086 		return EINVAL;
1087 	}
1088 	}
1089 
1090 	return err;
1091 }
1092 
1093 static errno_t
ipsec_kpipe_sync_rx_packet(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)1094 ipsec_kpipe_sync_rx_packet(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1095     kern_channel_ring_t rx_ring, uint32_t flags)
1096 {
1097 #pragma unused(nxprov)
1098 #pragma unused(flags)
1099 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1100 	struct kern_channel_ring_stat_increment rx_ring_stats;
1101 	uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring);
1102 
1103 	if (!ipsec_data_move_begin(pcb)) {
1104 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1105 		return 0;
1106 	}
1107 
1108 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1109 
1110 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1111 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1112 		ipsec_data_move_end(pcb);
1113 		return 0;
1114 	}
1115 
1116 	VERIFY(pcb->ipsec_kpipe_count);
1117 	VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
1118 
1119 	// Reclaim user-released slots
1120 	(void) kern_channel_reclaim(rx_ring);
1121 
1122 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
1123 	if (avail == 0) {
1124 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1125 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
1126 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
1127 		ipsec_data_move_end(pcb);
1128 		return 0;
1129 	}
1130 
1131 	kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx];
1132 	if (tx_ring == NULL) {
1133 		// Net-If TX ring not set up yet, nothing to read
1134 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1135 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
1136 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
1137 		ipsec_data_move_end(pcb);
1138 		return 0;
1139 	}
1140 
1141 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
1142 
1143 	// Unlock ipsec before entering ring
1144 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1145 
1146 	(void)kr_enter(tx_ring, TRUE);
1147 
1148 	// Lock again after entering and validate
1149 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1150 	if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
1151 		// Ring no longer valid
1152 		// Unlock first, then exit ring
1153 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1154 		kr_exit(tx_ring);
1155 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
1156 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
1157 		ipsec_data_move_end(pcb);
1158 		return 0;
1159 	}
1160 
1161 	struct kern_channel_ring_stat_increment tx_ring_stats;
1162 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1163 	kern_channel_slot_t tx_pslot = NULL;
1164 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1165 	if (tx_slot == NULL) {
1166 		// Nothing to read, don't bother signalling
1167 		// Unlock first, then exit ring
1168 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1169 		kr_exit(tx_ring);
1170 		ipsec_data_move_end(pcb);
1171 		return 0;
1172 	}
1173 
1174 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1175 	VERIFY(rx_pp != NULL);
1176 	struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
1177 	VERIFY(tx_pp != NULL);
1178 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1179 	kern_channel_slot_t rx_pslot = NULL;
1180 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1181 	kern_packet_t tx_chain_ph = 0;
1182 
1183 	while (rx_slot != NULL && tx_slot != NULL) {
1184 		size_t tx_pkt_length = 0;
1185 		errno_t error = 0;
1186 
1187 		// Allocate rx packet
1188 		kern_packet_t rx_ph = 0;
1189 		error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1190 		if (__improbable(error != 0)) {
1191 			os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
1192 			    "failed to allocate packet\n", pcb->ipsec_ifp->if_xname);
1193 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1194 			STATS_INC(nifs, NETIF_STATS_DROP);
1195 			break;
1196 		}
1197 
1198 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1199 		if (__improbable(tx_ph == 0)) {
1200 			// Advance TX ring
1201 			tx_pslot = tx_slot;
1202 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1203 			kern_pbufpool_free(rx_pp, rx_ph);
1204 			continue;
1205 		}
1206 
1207 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
1208 		if (tx_chain_ph != 0) {
1209 			kern_packet_append(tx_ph, tx_chain_ph);
1210 		}
1211 		tx_chain_ph = tx_ph;
1212 
1213 		// Advance TX ring
1214 		tx_pslot = tx_slot;
1215 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1216 
1217 		bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1218 
1219 		tx_pkt_length = kern_packet_get_data_length(tx_ph);
1220 		if (tx_pkt_length == 0 || tx_pkt_length > pcb->ipsec_slot_size) {
1221 			os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
1222 			    "packet length %zu", pcb->ipsec_ifp->if_xname,
1223 			    tx_pkt_length);
1224 			kern_pbufpool_free(rx_pp, rx_ph);
1225 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1226 			STATS_INC(nifs, NETIF_STATS_DROP);
1227 			continue;
1228 		}
1229 
1230 		// Increment TX stats
1231 		tx_ring_stats.kcrsi_slots_transferred++;
1232 		tx_ring_stats.kcrsi_bytes_transferred += tx_pkt_length;
1233 
1234 		// Encrypt packet
1235 		lck_mtx_lock(&pcb->ipsec_kpipe_encrypt_lock);
1236 		error = ipsec_encrypt_kpipe_pkt(pcb->ipsec_ifp, tx_ph, rx_ph);
1237 		lck_mtx_unlock(&pcb->ipsec_kpipe_encrypt_lock);
1238 		if (__improbable(error != 0)) {
1239 			os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
1240 			    "failed to encrypt packet", pcb->ipsec_ifp->if_xname);
1241 			kern_pbufpool_free(rx_pp, rx_ph);
1242 			STATS_INC(nifs, NETIF_STATS_DROP);
1243 			continue;
1244 		}
1245 
1246 		kern_packet_clear_flow_uuid(rx_ph);         // Zero flow id
1247 		// Finalize and attach the packet
1248 		kern_buflet_t rx_buf = __packet_get_next_buflet(rx_ph, NULL);
1249 		error = kern_buflet_set_data_offset(rx_buf, 0);
1250 		VERIFY(error == 0);
1251 		error = kern_packet_finalize(rx_ph);
1252 		VERIFY(error == 0);
1253 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1254 		VERIFY(error == 0);
1255 
1256 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1257 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
1258 
1259 		rx_ring_stats.kcrsi_slots_transferred++;
1260 		rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph);
1261 
1262 		if (!pcb->ipsec_ext_ifdata_stats) {
1263 			ifnet_stat_increment_out(pcb->ipsec_ifp, 1,
1264 			    kern_packet_get_data_length(rx_ph), 0);
1265 		}
1266 
1267 		rx_pslot = rx_slot;
1268 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1269 	}
1270 
1271 	if (rx_pslot) {
1272 		kern_channel_advance_slot(rx_ring, rx_pslot);
1273 		kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1274 	}
1275 
1276 	if (tx_chain_ph != 0) {
1277 		kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
1278 	}
1279 
1280 	if (tx_pslot) {
1281 		kern_channel_advance_slot(tx_ring, tx_pslot);
1282 		kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1283 		(void)kern_channel_reclaim(tx_ring);
1284 	}
1285 
1286 	/* always reenable output */
1287 	errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
1288 	if (error != 0) {
1289 		os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
1290 	}
1291 
1292 	// Unlock first, then exit ring
1293 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1294 
1295 	if (tx_pslot != NULL) {
1296 		kern_channel_notify(tx_ring, 0);
1297 	}
1298 	kr_exit(tx_ring);
1299 
1300 	ipsec_data_move_end(pcb);
1301 	return 0;
1302 }
1303 
1304 static errno_t
ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)1305 ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1306     kern_channel_ring_t rx_ring, uint32_t flags)
1307 {
1308 	if (__improbable(ipsec_kpipe_mbuf == 1)) {
1309 		return ipsec_kpipe_sync_rx_mbuf(nxprov, nexus, rx_ring, flags);
1310 	} else {
1311 		return ipsec_kpipe_sync_rx_packet(nxprov, nexus, rx_ring, flags);
1312 	}
1313 }
1314 
1315 static uint8_t
ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1316 ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1317 {
1318 	switch (svc_class) {
1319 	case KPKT_SC_VO: {
1320 		return 0;
1321 	}
1322 	case KPKT_SC_VI: {
1323 		return 1;
1324 	}
1325 	case KPKT_SC_BE: {
1326 		return 2;
1327 	}
1328 	case KPKT_SC_BK: {
1329 		return 3;
1330 	}
1331 	default: {
1332 		VERIFY(0);
1333 		return 0;
1334 	}
1335 	}
1336 }
1337 
1338 static errno_t
ipsec_netif_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1339 ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1340     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1341     void **ring_ctx)
1342 {
1343 #pragma unused(nxprov)
1344 #pragma unused(channel)
1345 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1346 
1347 	if (!is_tx_ring) {
1348 		VERIFY(pcb->ipsec_netif_rxring[0] == NULL);
1349 		pcb->ipsec_netif_rxring[0] = ring;
1350 	} else {
1351 		uint8_t ring_idx = 0;
1352 		if (ipsec_in_wmm_mode(pcb)) {
1353 			int err;
1354 			kern_packet_svc_class_t svc_class;
1355 			err = kern_channel_get_service_class(ring, &svc_class);
1356 			VERIFY(err == 0);
1357 			ring_idx = ipsec_find_tx_ring_by_svc(svc_class);
1358 			VERIFY(ring_idx < IPSEC_IF_WMM_RING_COUNT);
1359 		}
1360 
1361 		*ring_ctx = (void *)(uintptr_t)ring_idx;
1362 
1363 		VERIFY(pcb->ipsec_netif_txring[ring_idx] == NULL);
1364 		pcb->ipsec_netif_txring[ring_idx] = ring;
1365 	}
1366 	return 0;
1367 }
1368 
1369 static void
ipsec_netif_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)1370 ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1371     kern_channel_ring_t ring)
1372 {
1373 #pragma unused(nxprov)
1374 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1375 	bool found = false;
1376 
1377 	for (int i = 0; i < IPSEC_NETIF_MAX_RX_RING_COUNT; i++) {
1378 		if (pcb->ipsec_netif_rxring[i] == ring) {
1379 			pcb->ipsec_netif_rxring[i] = NULL;
1380 			VERIFY(!found);
1381 			found = true;
1382 		}
1383 	}
1384 	for (int i = 0; i < IPSEC_NETIF_MAX_TX_RING_COUNT; i++) {
1385 		if (pcb->ipsec_netif_txring[i] == ring) {
1386 			pcb->ipsec_netif_txring[i] = NULL;
1387 			VERIFY(!found);
1388 			found = true;
1389 		}
1390 	}
1391 	VERIFY(found);
1392 }
1393 
1394 static bool
ipsec_netif_check_policy(ifnet_t interface,mbuf_t data)1395 ipsec_netif_check_policy(ifnet_t interface, mbuf_t data)
1396 {
1397 	necp_kernel_policy_result necp_result = 0;
1398 	necp_kernel_policy_result_parameter necp_result_parameter = {};
1399 	uint32_t necp_matched_policy_id = 0;
1400 	struct ip_out_args args4 = { };
1401 	struct ip6_out_args args6 = { };
1402 
1403 	// This packet has been marked with IP level policy, do not mark again.
1404 	if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
1405 		return true;
1406 	}
1407 
1408 	size_t length = mbuf_pkthdr_len(data);
1409 	if (length < sizeof(struct ip)) {
1410 		return false;
1411 	}
1412 
1413 	struct ip *ip = mtod(data, struct ip *);
1414 	u_int ip_version = ip->ip_v;
1415 	switch (ip_version) {
1416 	case 4: {
1417 		if (interface != NULL) {
1418 			args4.ipoa_flags |= IPOAF_BOUND_IF;
1419 			args4.ipoa_boundif = interface->if_index;
1420 		}
1421 		necp_matched_policy_id = necp_ip_output_find_policy_match(data, IP_OUTARGS, &args4, NULL,
1422 		    &necp_result, &necp_result_parameter);
1423 		break;
1424 	}
1425 	case 6: {
1426 		if (interface != NULL) {
1427 			args6.ip6oa_flags |= IP6OAF_BOUND_IF;
1428 			args6.ip6oa_boundif = interface->if_index;
1429 		}
1430 		necp_matched_policy_id = necp_ip6_output_find_policy_match(data, IPV6_OUTARGS, &args6, NULL,
1431 		    &necp_result, &necp_result_parameter);
1432 		break;
1433 	}
1434 	default: {
1435 		return false;
1436 	}
1437 	}
1438 
1439 	if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
1440 	    necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
1441 		/* Drop and flow divert packets should be blocked at the IP layer */
1442 		return false;
1443 	}
1444 
1445 	necp_mark_packet_from_ip(data, necp_matched_policy_id);
1446 	return true;
1447 }
1448 
1449 static errno_t
ipsec_netif_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)1450 ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1451     kern_channel_ring_t tx_ring, uint32_t flags)
1452 {
1453 #pragma unused(nxprov)
1454 #pragma unused(flags)
1455 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1456 
1457 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1458 
1459 	if (!ipsec_data_move_begin(pcb)) {
1460 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1461 		return 0;
1462 	}
1463 
1464 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1465 
1466 	struct kern_channel_ring_stat_increment tx_ring_stats;
1467 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1468 	kern_channel_slot_t tx_pslot = NULL;
1469 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1470 	kern_packet_t tx_chain_ph = 0;
1471 
1472 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
1473 
1474 	if (tx_slot == NULL) {
1475 		// Nothing to write, don't bother signalling
1476 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1477 		ipsec_data_move_end(pcb);
1478 		return 0;
1479 	}
1480 
1481 	if (pcb->ipsec_kpipe_count &&
1482 	    ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1483 		// Select the corresponding kpipe rx ring
1484 		uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(tx_ring);
1485 		VERIFY(ring_idx < IPSEC_IF_MAX_RING_COUNT);
1486 		kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1487 
1488 		// Unlock while calling notify
1489 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1490 
1491 		// Signal the kernel pipe ring to read
1492 		if (rx_ring != NULL) {
1493 			kern_channel_notify(rx_ring, 0);
1494 		}
1495 
1496 		ipsec_data_move_end(pcb);
1497 		return 0;
1498 	}
1499 
1500 	// If we're here, we're injecting into the BSD stack
1501 	while (tx_slot != NULL) {
1502 		size_t length = 0;
1503 		mbuf_t data = NULL;
1504 
1505 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1506 
1507 		if (tx_ph == 0) {
1508 			// Advance TX ring
1509 			tx_pslot = tx_slot;
1510 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1511 			continue;
1512 		}
1513 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
1514 		if (tx_chain_ph != 0) {
1515 			kern_packet_append(tx_ph, tx_chain_ph);
1516 		}
1517 		tx_chain_ph = tx_ph;
1518 
1519 		// Advance TX ring
1520 		tx_pslot = tx_slot;
1521 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1522 
1523 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1524 		VERIFY(tx_buf != NULL);
1525 		uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1526 		VERIFY(tx_baddr != 0);
1527 		tx_baddr += kern_buflet_get_data_offset(tx_buf);
1528 
1529 		bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1530 
1531 		length = MIN(kern_packet_get_data_length(tx_ph),
1532 		    pcb->ipsec_slot_size);
1533 
1534 		if (length > 0) {
1535 			errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1536 			if (error == 0) {
1537 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1538 				if (error == 0) {
1539 					// Mark packet from policy
1540 					uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1541 					uint32_t skip_policy_id = kern_packet_get_skip_policy_id(tx_ph);
1542 					necp_mark_packet_from_ip_with_skip(data, policy_id, skip_policy_id);
1543 
1544 					// Check policy with NECP
1545 					if (!ipsec_netif_check_policy(pcb->ipsec_ifp, data)) {
1546 						os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1547 						STATS_INC(nifs, NETIF_STATS_DROP);
1548 						mbuf_freem(data);
1549 						data = NULL;
1550 					} else {
1551 						// Send through encryption
1552 						error = ipsec_output(pcb->ipsec_ifp, data);
1553 						if (error != 0) {
1554 							os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
1555 						}
1556 					}
1557 				} else {
1558 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1559 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1560 					STATS_INC(nifs, NETIF_STATS_DROP);
1561 					mbuf_freem(data);
1562 					data = NULL;
1563 				}
1564 			} else {
1565 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1566 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1567 				STATS_INC(nifs, NETIF_STATS_DROP);
1568 			}
1569 		} else {
1570 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1571 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1572 			STATS_INC(nifs, NETIF_STATS_DROP);
1573 		}
1574 
1575 		if (data == NULL) {
1576 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
1577 			break;
1578 		}
1579 
1580 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1581 		STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
1582 
1583 		tx_ring_stats.kcrsi_slots_transferred++;
1584 		tx_ring_stats.kcrsi_bytes_transferred += length;
1585 	}
1586 
1587 	if (tx_chain_ph != 0) {
1588 		kern_pbufpool_free_chain(tx_ring->ckr_pp, tx_chain_ph);
1589 	}
1590 
1591 	if (tx_pslot) {
1592 		kern_channel_advance_slot(tx_ring, tx_pslot);
1593 		kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1594 		(void)kern_channel_reclaim(tx_ring);
1595 	}
1596 
1597 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1598 	ipsec_data_move_end(pcb);
1599 
1600 	return 0;
1601 }
1602 
1603 static errno_t
ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags,uint8_t ring_idx)1604 ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1605     kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx)
1606 {
1607 #pragma unused(nxprov)
1608 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1609 	boolean_t more = false;
1610 	errno_t rc = 0;
1611 
1612 	VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0);
1613 
1614 	/*
1615 	 * Refill and sync the ring; we may be racing against another thread doing
1616 	 * an RX sync that also wants to do kr_enter(), and so use the blocking
1617 	 * variant here.
1618 	 */
1619 	rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1620 	if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
1621 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n", __func__,
1622 		    pcb->ipsec_if_xname, ring->ckr_name, rc);
1623 	}
1624 
1625 	(void) kr_enter(ring, TRUE);
1626 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1627 	if (ring != pcb->ipsec_netif_txring[ring_idx]) {
1628 		// ring no longer valid
1629 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1630 		kr_exit(ring);
1631 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n", __func__,
1632 		    pcb->ipsec_if_xname, ring->ckr_name, ring_idx);
1633 		return ENXIO;
1634 	}
1635 
1636 	if (pcb->ipsec_kpipe_count) {
1637 		uint32_t tx_available = kern_channel_available_slot_count(ring);
1638 		if (pcb->ipsec_netif_txring_size > 0 &&
1639 		    tx_available >= pcb->ipsec_netif_txring_size - 1) {
1640 			// No room left in tx ring, disable output for now
1641 			errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1642 			if (error != 0) {
1643 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
1644 			}
1645 		}
1646 	}
1647 
1648 	if (pcb->ipsec_kpipe_count) {
1649 		kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1650 
1651 		// Unlock while calling notify
1652 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1653 		// Signal the kernel pipe ring to read
1654 		if (rx_ring != NULL) {
1655 			kern_channel_notify(rx_ring, 0);
1656 		}
1657 	} else {
1658 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1659 	}
1660 
1661 	kr_exit(ring);
1662 
1663 	return 0;
1664 }
1665 
1666 static errno_t
ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,__unused uint32_t flags)1667 ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1668     kern_channel_ring_t ring, __unused uint32_t flags)
1669 {
1670 	errno_t ret = 0;
1671 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1672 
1673 	if (!ipsec_data_move_begin(pcb)) {
1674 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1675 		return 0;
1676 	}
1677 
1678 	if (ipsec_in_wmm_mode(pcb)) {
1679 		for (uint8_t i = 0; i < IPSEC_IF_WMM_RING_COUNT; i++) {
1680 			kern_channel_ring_t nring = pcb->ipsec_netif_txring[i];
1681 			ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, nring, flags, i);
1682 			if (ret) {
1683 				break;
1684 			}
1685 		}
1686 	} else {
1687 		ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring, flags, 0);
1688 	}
1689 
1690 	ipsec_data_move_end(pcb);
1691 	return ret;
1692 }
1693 
1694 static errno_t
ipsec_netif_sync_rx_mbuf(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)1695 ipsec_netif_sync_rx_mbuf(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1696     kern_channel_ring_t rx_ring, uint32_t flags)
1697 {
1698 #pragma unused(nxprov)
1699 #pragma unused(flags)
1700 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1701 	struct kern_channel_ring_stat_increment rx_ring_stats;
1702 
1703 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1704 
1705 	if (!ipsec_data_move_begin(pcb)) {
1706 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1707 		return 0;
1708 	}
1709 
1710 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1711 
1712 	// Reclaim user-released slots
1713 	(void) kern_channel_reclaim(rx_ring);
1714 
1715 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1716 
1717 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
1718 	if (avail == 0) {
1719 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1720 		ipsec_data_move_end(pcb);
1721 		return 0;
1722 	}
1723 
1724 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1725 	VERIFY(rx_pp != NULL);
1726 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1727 	kern_channel_slot_t rx_pslot = NULL;
1728 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1729 
1730 	while (rx_slot != NULL) {
1731 		// Check for a waiting packet
1732 		lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1733 		mbuf_t data = pcb->ipsec_input_chain;
1734 		if (data == NULL) {
1735 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1736 			break;
1737 		}
1738 
1739 		// Allocate rx packet
1740 		kern_packet_t rx_ph = 0;
1741 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1742 		if (__improbable(error != 0)) {
1743 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1744 			STATS_INC(nifs, NETIF_STATS_DROP);
1745 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1746 			break;
1747 		}
1748 
1749 		// Advance waiting packets
1750 		if (pcb->ipsec_input_chain_count > 0) {
1751 			pcb->ipsec_input_chain_count--;
1752 		}
1753 		pcb->ipsec_input_chain = data->m_nextpkt;
1754 		data->m_nextpkt = NULL;
1755 		if (pcb->ipsec_input_chain == NULL) {
1756 			pcb->ipsec_input_chain_last = NULL;
1757 		}
1758 		lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1759 
1760 		size_t length = mbuf_pkthdr_len(data);
1761 
1762 		if (length < sizeof(struct ip)) {
1763 			// Flush data
1764 			mbuf_freem(data);
1765 			kern_pbufpool_free(rx_pp, rx_ph);
1766 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1767 			STATS_INC(nifs, NETIF_STATS_DROP);
1768 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1769 			    pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
1770 			continue;
1771 		}
1772 
1773 		uint32_t af = 0;
1774 		struct ip *ip = mtod(data, struct ip *);
1775 		u_int ip_version = ip->ip_v;
1776 		switch (ip_version) {
1777 		case 4: {
1778 			af = AF_INET;
1779 			break;
1780 		}
1781 		case 6: {
1782 			af = AF_INET6;
1783 			break;
1784 		}
1785 		default: {
1786 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1787 			    pcb->ipsec_ifp->if_xname, ip_version);
1788 			break;
1789 		}
1790 		}
1791 
1792 		if (length > PP_BUF_SIZE_DEF(rx_pp) ||
1793 		    (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
1794 			// We need to fragment to send up into the netif
1795 
1796 			u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp);
1797 			if (pcb->ipsec_frag_size_set &&
1798 			    pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) {
1799 				fragment_mtu = pcb->ipsec_input_frag_size;
1800 			}
1801 
1802 			mbuf_t fragment_chain = NULL;
1803 			switch (af) {
1804 			case AF_INET: {
1805 				// ip_fragment expects the length in host order
1806 				ip->ip_len = ntohs(ip->ip_len);
1807 
1808 				// ip_fragment will modify the original data, don't free
1809 				int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1810 				if (fragment_error == 0 && data != NULL) {
1811 					fragment_chain = data;
1812 				} else {
1813 					STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1814 					STATS_INC(nifs, NETIF_STATS_DROP);
1815 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1816 					    pcb->ipsec_ifp->if_xname, length, fragment_error);
1817 				}
1818 				break;
1819 			}
1820 			case AF_INET6: {
1821 				if (length < sizeof(struct ip6_hdr)) {
1822 					mbuf_freem(data);
1823 					STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1824 					STATS_INC(nifs, NETIF_STATS_DROP);
1825 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1826 					    pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1827 				} else {
1828 					// ip6_do_fragmentation will free the original data on success only
1829 					struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1830 
1831 					int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
1832 					    ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid((uint64_t)data)));
1833 					if (fragment_error == 0 && data != NULL) {
1834 						fragment_chain = data;
1835 					} else {
1836 						mbuf_freem(data);
1837 						STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1838 						STATS_INC(nifs, NETIF_STATS_DROP);
1839 						os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1840 						    pcb->ipsec_ifp->if_xname, length, fragment_error);
1841 					}
1842 				}
1843 				break;
1844 			}
1845 			default: {
1846 				// Cannot fragment unknown families
1847 				mbuf_freem(data);
1848 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1849 				STATS_INC(nifs, NETIF_STATS_DROP);
1850 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1851 				    pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
1852 				break;
1853 			}
1854 			}
1855 
1856 			if (fragment_chain != NULL) {
1857 				// Add fragments to chain before continuing
1858 				lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1859 				if (pcb->ipsec_input_chain != NULL) {
1860 					pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1861 				} else {
1862 					pcb->ipsec_input_chain = fragment_chain;
1863 				}
1864 				pcb->ipsec_input_chain_count++;
1865 				while (fragment_chain->m_nextpkt) {
1866 					VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1867 					fragment_chain = fragment_chain->m_nextpkt;
1868 					pcb->ipsec_input_chain_count++;
1869 				}
1870 				pcb->ipsec_input_chain_last = fragment_chain;
1871 				lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1872 			}
1873 
1874 			// Make sure to free unused rx packet
1875 			kern_pbufpool_free(rx_pp, rx_ph);
1876 
1877 			continue;
1878 		}
1879 
1880 		mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1881 
1882 		// Fillout rx packet
1883 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1884 		VERIFY(rx_buf != NULL);
1885 		void *rx_baddr = kern_buflet_get_data_address(rx_buf);
1886 		VERIFY(rx_baddr != NULL);
1887 
1888 		// Copy-in data from mbuf to buflet
1889 		mbuf_copydata(data, 0, length, (void *)rx_baddr);
1890 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
1891 
1892 		// Finalize and attach the packet
1893 		error = kern_buflet_set_data_offset(rx_buf, 0);
1894 		VERIFY(error == 0);
1895 		error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
1896 		VERIFY(error == 0);
1897 		error = kern_packet_set_headroom(rx_ph, 0);
1898 		VERIFY(error == 0);
1899 		error = kern_packet_finalize(rx_ph);
1900 		VERIFY(error == 0);
1901 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1902 		VERIFY(error == 0);
1903 
1904 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1905 		STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
1906 		bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1907 
1908 		rx_ring_stats.kcrsi_slots_transferred++;
1909 		rx_ring_stats.kcrsi_bytes_transferred += length;
1910 
1911 		if (!pcb->ipsec_ext_ifdata_stats) {
1912 			ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
1913 		}
1914 
1915 		mbuf_freem(data);
1916 
1917 		// Advance ring
1918 		rx_pslot = rx_slot;
1919 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1920 	}
1921 
1922 	for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
1923 		struct kern_channel_ring_stat_increment tx_ring_stats;
1924 		bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1925 		kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
1926 		kern_channel_slot_t tx_pslot = NULL;
1927 		kern_channel_slot_t tx_slot = NULL;
1928 		if (tx_ring == NULL) {
1929 			// Net-If TX ring not set up yet, nothing to read
1930 			goto done;
1931 		}
1932 		// Unlock ipsec before entering ring
1933 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1934 
1935 		(void)kr_enter(tx_ring, TRUE);
1936 
1937 		// Lock again after entering and validate
1938 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1939 
1940 		if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
1941 			goto done;
1942 		}
1943 
1944 		tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1945 		if (tx_slot == NULL) {
1946 			// Nothing to read, don't bother signalling
1947 			goto done;
1948 		}
1949 
1950 		while (rx_slot != NULL && tx_slot != NULL) {
1951 			size_t length = 0;
1952 			mbuf_t data = NULL;
1953 			errno_t error = 0;
1954 			uint32_t af;
1955 
1956 			// Allocate rx packet
1957 			kern_packet_t rx_ph = 0;
1958 			error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1959 			if (__improbable(error != 0)) {
1960 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1961 				STATS_INC(nifs, NETIF_STATS_DROP);
1962 				break;
1963 			}
1964 
1965 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1966 
1967 			// Advance TX ring
1968 			tx_pslot = tx_slot;
1969 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1970 
1971 			if (tx_ph == 0) {
1972 				kern_pbufpool_free(rx_pp, rx_ph);
1973 				continue;
1974 			}
1975 
1976 			kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1977 			VERIFY(tx_buf != NULL);
1978 			uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1979 			VERIFY(tx_baddr != 0);
1980 			tx_baddr += kern_buflet_get_data_offset(tx_buf);
1981 
1982 			length = MIN(kern_packet_get_data_length(tx_ph),
1983 			    pcb->ipsec_slot_size);
1984 
1985 			// Increment TX stats
1986 			tx_ring_stats.kcrsi_slots_transferred++;
1987 			tx_ring_stats.kcrsi_bytes_transferred += length;
1988 
1989 			if (length >= sizeof(struct ip)) {
1990 				error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1991 				if (error == 0) {
1992 					error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1993 					if (error == 0) {
1994 						// Check for wake packet flag
1995 						uuid_t flow_uuid;
1996 						kern_packet_get_flow_uuid(tx_ph, &flow_uuid);
1997 						u_int8_t *id_8 = (u_int8_t *)(uintptr_t)flow_uuid;
1998 						if ((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT) {
1999 							os_log_info(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: wake packet flag is set\n",
2000 							    pcb->ipsec_ifp->if_xname);
2001 							data->m_pkthdr.pkt_flags |= PKTF_WAKE_PKT;
2002 						}
2003 
2004 						lck_mtx_lock(&pcb->ipsec_kpipe_decrypt_lock);
2005 						struct ip *ip = mtod(data, struct ip *);
2006 						u_int ip_version = ip->ip_v;
2007 						switch (ip_version) {
2008 						case 4: {
2009 							af = AF_INET;
2010 							ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip);
2011 							ip->ip_off = ntohs(ip->ip_off);
2012 
2013 							if (length < ip->ip_len) {
2014 								os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
2015 								    pcb->ipsec_ifp->if_xname, length, ip->ip_len);
2016 								STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2017 								STATS_INC(nifs, NETIF_STATS_DROP);
2018 								mbuf_freem(data);
2019 								data = NULL;
2020 							} else {
2021 								data = esp4_input_extended(data, sizeof(struct ip), pcb->ipsec_ifp);
2022 							}
2023 							break;
2024 						}
2025 						case 6: {
2026 							if (length < sizeof(struct ip6_hdr)) {
2027 								os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
2028 								    pcb->ipsec_ifp->if_xname, length);
2029 								STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2030 								STATS_INC(nifs, NETIF_STATS_DROP);
2031 								mbuf_freem(data);
2032 								data = NULL;
2033 							} else {
2034 								af = AF_INET6;
2035 								struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
2036 								const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen);
2037 								if (length < ip6_len) {
2038 									os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
2039 									    pcb->ipsec_ifp->if_xname, length, ip6_len);
2040 									STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2041 									STATS_INC(nifs, NETIF_STATS_DROP);
2042 									mbuf_freem(data);
2043 									data = NULL;
2044 								} else {
2045 									int offset = sizeof(struct ip6_hdr);
2046 									esp6_input_extended(&data, &offset, ip6->ip6_nxt, pcb->ipsec_ifp);
2047 								}
2048 							}
2049 							break;
2050 						}
2051 						default: {
2052 							os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: unknown ip version %u\n",
2053 							    pcb->ipsec_ifp->if_xname, ip_version);
2054 							STATS_INC(nifs, NETIF_STATS_DROP);
2055 							mbuf_freem(data);
2056 							data = NULL;
2057 							break;
2058 						}
2059 						}
2060 						lck_mtx_unlock(&pcb->ipsec_kpipe_decrypt_lock);
2061 					} else {
2062 						os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
2063 						STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
2064 						STATS_INC(nifs, NETIF_STATS_DROP);
2065 						mbuf_freem(data);
2066 						data = NULL;
2067 					}
2068 				} else {
2069 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
2070 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
2071 					STATS_INC(nifs, NETIF_STATS_DROP);
2072 				}
2073 			} else {
2074 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb->ipsec_ifp->if_xname, length);
2075 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2076 				STATS_INC(nifs, NETIF_STATS_DROP);
2077 			}
2078 
2079 			if (data == NULL) {
2080 				// Failed to get decrypted data data
2081 				kern_pbufpool_free(rx_pp, rx_ph);
2082 				continue;
2083 			}
2084 
2085 			length = mbuf_pkthdr_len(data);
2086 			if (length > PP_BUF_SIZE_DEF(rx_pp)) {
2087 				// Flush data
2088 				mbuf_freem(data);
2089 				kern_pbufpool_free(rx_pp, rx_ph);
2090 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2091 				STATS_INC(nifs, NETIF_STATS_DROP);
2092 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
2093 				    pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
2094 				continue;
2095 			}
2096 
2097 			mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
2098 
2099 			// Fillout rx packet
2100 			kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
2101 			VERIFY(rx_buf != NULL);
2102 			void *rx_baddr = kern_buflet_get_data_address(rx_buf);
2103 			VERIFY(rx_baddr != NULL);
2104 
2105 			// Copy-in data from mbuf to buflet
2106 			mbuf_copydata(data, 0, length, (void *)rx_baddr);
2107 			kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
2108 
2109 			// Finalize and attach the packet
2110 			error = kern_buflet_set_data_offset(rx_buf, 0);
2111 			VERIFY(error == 0);
2112 			error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
2113 			VERIFY(error == 0);
2114 			error = kern_packet_set_link_header_offset(rx_ph, 0);
2115 			VERIFY(error == 0);
2116 			error = kern_packet_set_network_header_offset(rx_ph, 0);
2117 			VERIFY(error == 0);
2118 			error = kern_packet_finalize(rx_ph);
2119 			VERIFY(error == 0);
2120 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
2121 			VERIFY(error == 0);
2122 
2123 			STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
2124 			STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
2125 			bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
2126 
2127 			rx_ring_stats.kcrsi_slots_transferred++;
2128 			rx_ring_stats.kcrsi_bytes_transferred += length;
2129 
2130 			if (!pcb->ipsec_ext_ifdata_stats) {
2131 				ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
2132 			}
2133 
2134 			mbuf_freem(data);
2135 
2136 			rx_pslot = rx_slot;
2137 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
2138 		}
2139 
2140 done:
2141 		if (tx_pslot) {
2142 			kern_channel_advance_slot(tx_ring, tx_pslot);
2143 			kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
2144 			(void)kern_channel_reclaim(tx_ring);
2145 		}
2146 
2147 		// Unlock first, then exit ring
2148 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2149 		if (tx_ring != NULL) {
2150 			if (tx_pslot != NULL) {
2151 				kern_channel_notify(tx_ring, 0);
2152 			}
2153 			kr_exit(tx_ring);
2154 		}
2155 
2156 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2157 	}
2158 
2159 	if (rx_pslot) {
2160 		kern_channel_advance_slot(rx_ring, rx_pslot);
2161 		kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
2162 	}
2163 
2164 
2165 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2166 
2167 	ipsec_data_move_end(pcb);
2168 	return 0;
2169 }
2170 
2171 static errno_t
ipsec_transform_kpipe_pkt_to_netif_pkt(struct ipsec_pcb * pcb,struct kern_channel_ring_stat_increment * tx_ring_stats,struct netif_stats * nifs,kern_packet_t kpipe_ph,kern_packet_t netif_ph)2172 ipsec_transform_kpipe_pkt_to_netif_pkt(struct ipsec_pcb *pcb,
2173     struct kern_channel_ring_stat_increment *tx_ring_stats,
2174     struct netif_stats *nifs, kern_packet_t kpipe_ph, kern_packet_t netif_ph)
2175 {
2176 	kern_buflet_t kpipe_buf = NULL, netif_buf = NULL;
2177 	uint8_t *kpipe_baddr = NULL, *netif_baddr = NULL;
2178 	uuid_t flow_uuid;
2179 	size_t iphlen = 0;
2180 	uint32_t kpipe_buf_len = 0, netif_buf_lim = 0;
2181 	int err = 0;
2182 
2183 	VERIFY(kpipe_ph != 0);
2184 	VERIFY(netif_ph != 0);
2185 	VERIFY(pcb != NULL);
2186 	VERIFY(tx_ring_stats != NULL);
2187 	VERIFY(nifs != NULL);
2188 
2189 	kpipe_buf = kern_packet_get_next_buflet(kpipe_ph, NULL);
2190 	VERIFY(kpipe_buf != NULL);
2191 	kpipe_baddr = kern_buflet_get_data_address(kpipe_buf);
2192 	VERIFY(kpipe_baddr != NULL);
2193 	kpipe_baddr += kern_buflet_get_data_offset(kpipe_buf);
2194 	kpipe_buf_len = kern_buflet_get_data_length(kpipe_buf);
2195 
2196 	netif_buf = kern_packet_get_next_buflet(netif_ph, NULL);
2197 	VERIFY(netif_buf != NULL);
2198 	netif_baddr = kern_buflet_get_data_address(netif_buf);
2199 	VERIFY(netif_baddr != NULL);
2200 	netif_baddr += kern_buflet_get_data_offset(netif_buf);
2201 	netif_buf_lim = __buflet_get_data_limit(netif_buf);
2202 	netif_buf_lim -= __buflet_get_data_offset(netif_buf);
2203 
2204 	if (kpipe_buf_len > pcb->ipsec_slot_size) {
2205 		os_log_info(OS_LOG_DEFAULT,
2206 		    "ipsec_transform_kpipe_pkt_to_netif_pkt %s: kpipe buffer length "
2207 		    "%u > pcb ipsec slot size %u", pcb->ipsec_ifp->if_xname,
2208 		    kpipe_buf_len, pcb->ipsec_slot_size);
2209 		STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2210 		err = EMSGSIZE;
2211 		goto bad;
2212 	}
2213 
2214 	tx_ring_stats->kcrsi_slots_transferred++;
2215 	tx_ring_stats->kcrsi_bytes_transferred += kpipe_buf_len;
2216 
2217 	if (__improbable(kpipe_buf_len < sizeof(struct ip))) {
2218 		os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
2219 		    "packet length %u\n", pcb->ipsec_ifp->if_xname, kpipe_buf_len);
2220 		STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2221 		err = EBADMSG;
2222 		goto bad;
2223 	}
2224 
2225 	struct ip *ip = (struct ip *)(void *)kpipe_baddr;
2226 	ASSERT(IP_HDR_ALIGNED_P(ip));
2227 
2228 	u_int ip_vers = ip->ip_v;
2229 	switch (ip_vers) {
2230 	case IPVERSION: {
2231 #ifdef _IP_VHL
2232 		iphlen = IP_VHL_HL(ip->ip_vhl) << 2;
2233 #else /* _IP_VHL */
2234 		iphlen = ip->ip_hl << 2;
2235 #endif /* _IP_VHL */
2236 		break;
2237 	}
2238 	case 6: {
2239 		iphlen = sizeof(struct ip6_hdr);
2240 		break;
2241 	}
2242 	default: {
2243 		os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
2244 		    "ip version %u\n", pcb->ipsec_ifp->if_xname, ip_vers);
2245 		err = EBADMSG;
2246 		goto bad;
2247 	}
2248 	}
2249 
2250 	if (__improbable(kpipe_buf_len < iphlen)) {
2251 		os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
2252 		    "packet length %u\n", pcb->ipsec_ifp->if_xname, kpipe_buf_len);
2253 		STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2254 		err = EBADMSG;
2255 		goto bad;
2256 	}
2257 
2258 	if (__improbable(netif_buf_lim < iphlen)) {
2259 		os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - netif "
2260 		    "buffer length %u too short\n", pcb->ipsec_ifp->if_xname, netif_buf_lim);
2261 		STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2262 		err = EBADMSG;
2263 		goto bad;
2264 	}
2265 
2266 	memcpy(netif_baddr, kpipe_baddr, iphlen);
2267 	__buflet_set_data_length(netif_buf, (uint16_t)iphlen);
2268 
2269 	lck_mtx_lock(&pcb->ipsec_kpipe_decrypt_lock);
2270 	err = esp_kpipe_input(pcb->ipsec_ifp, kpipe_ph, netif_ph);
2271 	lck_mtx_unlock(&pcb->ipsec_kpipe_decrypt_lock);
2272 
2273 	if (__improbable((err != 0))) {
2274 		goto bad;
2275 	}
2276 
2277 	kern_packet_get_flow_uuid(kpipe_ph, &flow_uuid);
2278 	uint8_t *id_8 = (uint8_t *)(uintptr_t)flow_uuid;
2279 	if (__improbable((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT)) {
2280 		os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s: wake packet "
2281 		    "flag is set\n", pcb->ipsec_ifp->if_xname);
2282 		__packet_set_wake_flag(netif_ph);
2283 	}
2284 
2285 	kern_packet_clear_flow_uuid(netif_ph);
2286 	err = kern_buflet_set_data_offset(netif_buf, 0);
2287 	VERIFY(err == 0);
2288 	err = kern_packet_set_link_header_offset(netif_ph, 0);
2289 	VERIFY(err == 0);
2290 	err = kern_packet_set_network_header_offset(netif_ph, 0);
2291 	VERIFY(err == 0);
2292 	err = kern_packet_finalize(netif_ph);
2293 	VERIFY(err == 0);
2294 
2295 	return 0;
2296 bad:
2297 	STATS_INC(nifs, NETIF_STATS_DROP);
2298 	return err;
2299 }
2300 
2301 
2302 static errno_t
ipsec_netif_sync_rx_packet(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)2303 ipsec_netif_sync_rx_packet(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2304     kern_channel_ring_t rx_ring, uint32_t flags)
2305 {
2306 #pragma unused(nxprov)
2307 #pragma unused(flags)
2308 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
2309 	struct kern_channel_ring_stat_increment rx_ring_stats;
2310 
2311 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2312 
2313 	if (!ipsec_data_move_begin(pcb)) {
2314 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
2315 		return 0;
2316 	}
2317 
2318 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2319 
2320 	// Reclaim user-released slots
2321 	(void) kern_channel_reclaim(rx_ring);
2322 
2323 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
2324 
2325 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
2326 	if (avail == 0) {
2327 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2328 		ipsec_data_move_end(pcb);
2329 		return 0;
2330 	}
2331 
2332 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
2333 	VERIFY(rx_pp != NULL);
2334 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
2335 	kern_channel_slot_t rx_pslot = NULL;
2336 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
2337 
2338 	while (rx_slot != NULL) {
2339 		// Check for a waiting packet
2340 		lck_mtx_lock(&pcb->ipsec_input_chain_lock);
2341 		mbuf_t data = pcb->ipsec_input_chain;
2342 		if (data == NULL) {
2343 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
2344 			break;
2345 		}
2346 
2347 		// Allocate rx packet
2348 		kern_packet_t rx_ph = 0;
2349 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
2350 		if (__improbable(error != 0)) {
2351 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
2352 			STATS_INC(nifs, NETIF_STATS_DROP);
2353 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
2354 			break;
2355 		}
2356 
2357 		// Advance waiting packets
2358 		if (pcb->ipsec_input_chain_count > 0) {
2359 			pcb->ipsec_input_chain_count--;
2360 		}
2361 		pcb->ipsec_input_chain = data->m_nextpkt;
2362 		data->m_nextpkt = NULL;
2363 		if (pcb->ipsec_input_chain == NULL) {
2364 			pcb->ipsec_input_chain_last = NULL;
2365 		}
2366 		lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
2367 
2368 		size_t length = mbuf_pkthdr_len(data);
2369 
2370 		if (length < sizeof(struct ip)) {
2371 			// Flush data
2372 			mbuf_freem(data);
2373 			kern_pbufpool_free(rx_pp, rx_ph);
2374 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2375 			STATS_INC(nifs, NETIF_STATS_DROP);
2376 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
2377 			    pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
2378 			continue;
2379 		}
2380 
2381 		uint32_t af = 0;
2382 		struct ip *ip = mtod(data, struct ip *);
2383 		u_int ip_version = ip->ip_v;
2384 		switch (ip_version) {
2385 		case 4: {
2386 			af = AF_INET;
2387 			break;
2388 		}
2389 		case 6: {
2390 			af = AF_INET6;
2391 			break;
2392 		}
2393 		default: {
2394 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
2395 			    pcb->ipsec_ifp->if_xname, ip_version);
2396 			break;
2397 		}
2398 		}
2399 
2400 		if (length > PP_BUF_SIZE_DEF(rx_pp) ||
2401 		    (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
2402 			// We need to fragment to send up into the netif
2403 
2404 			u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp);
2405 			if (pcb->ipsec_frag_size_set &&
2406 			    pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) {
2407 				fragment_mtu = pcb->ipsec_input_frag_size;
2408 			}
2409 
2410 			mbuf_t fragment_chain = NULL;
2411 			switch (af) {
2412 			case AF_INET: {
2413 				// ip_fragment expects the length in host order
2414 				ip->ip_len = ntohs(ip->ip_len);
2415 
2416 				// ip_fragment will modify the original data, don't free
2417 				int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
2418 				if (fragment_error == 0 && data != NULL) {
2419 					fragment_chain = data;
2420 				} else {
2421 					STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2422 					STATS_INC(nifs, NETIF_STATS_DROP);
2423 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
2424 					    pcb->ipsec_ifp->if_xname, length, fragment_error);
2425 				}
2426 				break;
2427 			}
2428 			case AF_INET6: {
2429 				if (length < sizeof(struct ip6_hdr)) {
2430 					mbuf_freem(data);
2431 					STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2432 					STATS_INC(nifs, NETIF_STATS_DROP);
2433 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
2434 					    pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
2435 				} else {
2436 					// ip6_do_fragmentation will free the original data on success only
2437 					struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
2438 
2439 					int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
2440 					    ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid((uint64_t)data)));
2441 					if (fragment_error == 0 && data != NULL) {
2442 						fragment_chain = data;
2443 					} else {
2444 						mbuf_freem(data);
2445 						STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2446 						STATS_INC(nifs, NETIF_STATS_DROP);
2447 						os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
2448 						    pcb->ipsec_ifp->if_xname, length, fragment_error);
2449 					}
2450 				}
2451 				break;
2452 			}
2453 			default: {
2454 				// Cannot fragment unknown families
2455 				mbuf_freem(data);
2456 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2457 				STATS_INC(nifs, NETIF_STATS_DROP);
2458 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
2459 				    pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
2460 				break;
2461 			}
2462 			}
2463 
2464 			if (fragment_chain != NULL) {
2465 				// Add fragments to chain before continuing
2466 				lck_mtx_lock(&pcb->ipsec_input_chain_lock);
2467 				if (pcb->ipsec_input_chain != NULL) {
2468 					pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
2469 				} else {
2470 					pcb->ipsec_input_chain = fragment_chain;
2471 				}
2472 				pcb->ipsec_input_chain_count++;
2473 				while (fragment_chain->m_nextpkt) {
2474 					VERIFY(fragment_chain != fragment_chain->m_nextpkt);
2475 					fragment_chain = fragment_chain->m_nextpkt;
2476 					pcb->ipsec_input_chain_count++;
2477 				}
2478 				pcb->ipsec_input_chain_last = fragment_chain;
2479 				lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
2480 			}
2481 
2482 			// Make sure to free unused rx packet
2483 			kern_pbufpool_free(rx_pp, rx_ph);
2484 
2485 			continue;
2486 		}
2487 
2488 		mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
2489 
2490 		// Fillout rx packet
2491 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
2492 		VERIFY(rx_buf != NULL);
2493 		void *rx_baddr = kern_buflet_get_data_address(rx_buf);
2494 		VERIFY(rx_baddr != NULL);
2495 
2496 		// Copy-in data from mbuf to buflet
2497 		mbuf_copydata(data, 0, length, (void *)rx_baddr);
2498 		kern_packet_clear_flow_uuid(rx_ph);         // Zero flow id
2499 
2500 		// Finalize and attach the packet
2501 		error = kern_buflet_set_data_offset(rx_buf, 0);
2502 		VERIFY(error == 0);
2503 		error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
2504 		VERIFY(error == 0);
2505 		error = kern_packet_set_headroom(rx_ph, 0);
2506 		VERIFY(error == 0);
2507 		error = kern_packet_finalize(rx_ph);
2508 		VERIFY(error == 0);
2509 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
2510 		VERIFY(error == 0);
2511 
2512 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
2513 		STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
2514 		bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
2515 
2516 		rx_ring_stats.kcrsi_slots_transferred++;
2517 		rx_ring_stats.kcrsi_bytes_transferred += length;
2518 
2519 		if (!pcb->ipsec_ext_ifdata_stats) {
2520 			ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
2521 		}
2522 
2523 		mbuf_freem(data);
2524 
2525 		// Advance ring
2526 		rx_pslot = rx_slot;
2527 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
2528 	}
2529 
2530 	for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
2531 		struct kern_channel_ring_stat_increment tx_ring_stats = {};
2532 		kern_channel_slot_t tx_pslot = NULL;
2533 		kern_channel_slot_t tx_slot = NULL;
2534 
2535 		kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
2536 		if (tx_ring == NULL) {
2537 			// Net-If TX ring not set up yet, nothing to read
2538 			goto done;
2539 		}
2540 
2541 		// Unlock ipsec before entering ring
2542 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2543 
2544 		(void)kr_enter(tx_ring, TRUE);
2545 
2546 		// Lock again after entering and validate
2547 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2548 
2549 		if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
2550 			goto done;
2551 		}
2552 
2553 		tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
2554 		if (tx_slot == NULL) {
2555 			// Nothing to read, don't bother signalling
2556 			goto done;
2557 		}
2558 
2559 		while (rx_slot != NULL && tx_slot != NULL) {
2560 			errno_t error = 0;
2561 
2562 			// Allocate rx packet
2563 			kern_packet_t rx_ph = 0;
2564 			error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
2565 			if (__improbable(error != 0)) {
2566 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
2567 				STATS_INC(nifs, NETIF_STATS_DROP);
2568 				break;
2569 			}
2570 
2571 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
2572 			tx_pslot = tx_slot;
2573 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
2574 			if (tx_ph == 0) {
2575 				kern_pbufpool_free(rx_pp, rx_ph);
2576 				continue;
2577 			}
2578 
2579 			error = ipsec_transform_kpipe_pkt_to_netif_pkt(pcb,
2580 			    &tx_ring_stats, nifs, tx_ph, rx_ph);
2581 			if (error != 0) {
2582 				// Failed to get decrypted packet
2583 				kern_pbufpool_free(rx_pp, rx_ph);
2584 				continue;
2585 			}
2586 
2587 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
2588 			VERIFY(error == 0);
2589 
2590 			STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
2591 			STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
2592 
2593 			bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
2594 
2595 			rx_ring_stats.kcrsi_slots_transferred++;
2596 			rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph);
2597 
2598 			if (!pcb->ipsec_ext_ifdata_stats) {
2599 				ifnet_stat_increment_in(pcb->ipsec_ifp, 1,
2600 				    kern_packet_get_data_length(rx_ph), 0);
2601 			}
2602 
2603 			rx_pslot = rx_slot;
2604 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
2605 		}
2606 
2607 done:
2608 		if (tx_pslot) {
2609 			kern_channel_advance_slot(tx_ring, tx_pslot);
2610 			kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
2611 			(void)kern_channel_reclaim(tx_ring);
2612 		}
2613 
2614 		// Unlock first, then exit ring
2615 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2616 		if (tx_ring != NULL) {
2617 			if (tx_pslot != NULL) {
2618 				kern_channel_notify(tx_ring, 0);
2619 			}
2620 			kr_exit(tx_ring);
2621 		}
2622 
2623 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2624 	}
2625 
2626 	if (rx_pslot) {
2627 		kern_channel_advance_slot(rx_ring, rx_pslot);
2628 		kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
2629 	}
2630 
2631 
2632 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2633 
2634 	ipsec_data_move_end(pcb);
2635 	return 0;
2636 }
2637 
2638 static errno_t
ipsec_netif_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)2639 ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2640     kern_channel_ring_t rx_ring, uint32_t flags)
2641 {
2642 	if (__improbable(ipsec_kpipe_mbuf == 1)) {
2643 		return ipsec_netif_sync_rx_mbuf(nxprov, nexus, rx_ring, flags);
2644 	} else {
2645 		return ipsec_netif_sync_rx_packet(nxprov, nexus, rx_ring, flags);
2646 	}
2647 }
2648 
2649 static errno_t
ipsec_nexus_ifattach(struct ipsec_pcb * pcb,struct ifnet_init_eparams * init_params,struct ifnet ** ifp)2650 ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
2651     struct ifnet_init_eparams *init_params,
2652     struct ifnet **ifp)
2653 {
2654 	errno_t err;
2655 	nexus_controller_t controller = kern_nexus_shared_controller();
2656 	struct kern_nexus_net_init net_init;
2657 	struct kern_pbufpool_init pp_init;
2658 
2659 	nexus_name_t provider_name;
2660 	snprintf((char *)provider_name, sizeof(provider_name),
2661 	    "com.apple.netif.%s", pcb->ipsec_if_xname);
2662 
2663 	struct kern_nexus_provider_init prov_init = {
2664 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2665 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2666 		.nxpi_pre_connect = ipsec_nexus_pre_connect,
2667 		.nxpi_connected = ipsec_nexus_connected,
2668 		.nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
2669 		.nxpi_disconnected = ipsec_nexus_disconnected,
2670 		.nxpi_ring_init = ipsec_netif_ring_init,
2671 		.nxpi_ring_fini = ipsec_netif_ring_fini,
2672 		.nxpi_slot_init = NULL,
2673 		.nxpi_slot_fini = NULL,
2674 		.nxpi_sync_tx = ipsec_netif_sync_tx,
2675 		.nxpi_sync_rx = ipsec_netif_sync_rx,
2676 		.nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
2677 	};
2678 
2679 	nexus_attr_t nxa = NULL;
2680 	err = kern_nexus_attr_create(&nxa);
2681 	IPSEC_IF_VERIFY(err == 0);
2682 	if (err != 0) {
2683 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2684 		    __func__, err);
2685 		goto failed;
2686 	}
2687 
2688 	uint64_t slot_buffer_size = pcb->ipsec_slot_size;
2689 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2690 	VERIFY(err == 0);
2691 
2692 	// Reset ring size for netif nexus to limit memory usage
2693 	uint64_t ring_size = pcb->ipsec_netif_ring_size;
2694 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
2695 	VERIFY(err == 0);
2696 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
2697 	VERIFY(err == 0);
2698 
2699 	assert(err == 0);
2700 
2701 	if (ipsec_in_wmm_mode(pcb)) {
2702 		os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n",
2703 		    __func__, pcb->ipsec_if_xname);
2704 
2705 		init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED;
2706 
2707 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_RINGS,
2708 		    IPSEC_NETIF_WMM_TX_RING_COUNT);
2709 		VERIFY(err == 0);
2710 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_RINGS,
2711 		    IPSEC_NETIF_WMM_RX_RING_COUNT);
2712 		VERIFY(err == 0);
2713 
2714 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_QMAP, NEXUS_QMAP_TYPE_WMM);
2715 		VERIFY(err == 0);
2716 	}
2717 
2718 	pcb->ipsec_netif_txring_size = ring_size;
2719 
2720 	bzero(&pp_init, sizeof(pp_init));
2721 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
2722 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
2723 	// Note: we need more packets than can be held in the tx and rx rings because
2724 	// packets can also be in the AQM queue(s)
2725 	pp_init.kbi_packets = pcb->ipsec_netif_ring_size * (2 * pcb->ipsec_kpipe_count + 1);
2726 	pp_init.kbi_bufsize = pcb->ipsec_slot_size;
2727 	pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
2728 	pp_init.kbi_max_frags = 1;
2729 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
2730 	    "%s", provider_name);
2731 	pp_init.kbi_ctx = NULL;
2732 	pp_init.kbi_ctx_retain = NULL;
2733 	pp_init.kbi_ctx_release = NULL;
2734 
2735 	err = kern_pbufpool_create(&pp_init, &pcb->ipsec_netif_pp, NULL);
2736 	if (err != 0) {
2737 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
2738 		goto failed;
2739 	}
2740 
2741 	err = kern_nexus_controller_register_provider(controller,
2742 	    ipsec_nx_dom_prov,
2743 	    provider_name,
2744 	    &prov_init,
2745 	    sizeof(prov_init),
2746 	    nxa,
2747 	    &pcb->ipsec_nx.if_provider);
2748 	IPSEC_IF_VERIFY(err == 0);
2749 	if (err != 0) {
2750 		os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
2751 		    __func__, err);
2752 		goto failed;
2753 	}
2754 
2755 	bzero(&net_init, sizeof(net_init));
2756 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2757 	net_init.nxneti_flags = 0;
2758 	net_init.nxneti_eparams = init_params;
2759 	net_init.nxneti_lladdr = NULL;
2760 	net_init.nxneti_prepare = ipsec_netif_prepare;
2761 	net_init.nxneti_rx_pbufpool = pcb->ipsec_netif_pp;
2762 	net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp;
2763 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
2764 	    pcb->ipsec_nx.if_provider,
2765 	    pcb,
2766 	    NULL,
2767 	    &pcb->ipsec_nx.if_instance,
2768 	    &net_init,
2769 	    ifp);
2770 	IPSEC_IF_VERIFY(err == 0);
2771 	if (err != 0) {
2772 		os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
2773 		    __func__, err);
2774 		kern_nexus_controller_deregister_provider(controller,
2775 		    pcb->ipsec_nx.if_provider);
2776 		uuid_clear(pcb->ipsec_nx.if_provider);
2777 		goto failed;
2778 	}
2779 
2780 failed:
2781 	if (nxa) {
2782 		kern_nexus_attr_destroy(nxa);
2783 	}
2784 	if (err && pcb->ipsec_netif_pp != NULL) {
2785 		kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2786 		pcb->ipsec_netif_pp = NULL;
2787 	}
2788 	return err;
2789 }
2790 
2791 static void
ipsec_detach_provider_and_instance(uuid_t provider,uuid_t instance)2792 ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
2793 {
2794 	nexus_controller_t controller = kern_nexus_shared_controller();
2795 	errno_t err;
2796 
2797 	if (!uuid_is_null(instance)) {
2798 		err = kern_nexus_controller_free_provider_instance(controller,
2799 		    instance);
2800 		if (err != 0) {
2801 			os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
2802 			    __func__, err);
2803 		}
2804 		uuid_clear(instance);
2805 	}
2806 	if (!uuid_is_null(provider)) {
2807 		err = kern_nexus_controller_deregister_provider(controller,
2808 		    provider);
2809 		if (err != 0) {
2810 			os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
2811 		}
2812 		uuid_clear(provider);
2813 	}
2814 	return;
2815 }
2816 
2817 static void
ipsec_nexus_detach(struct ipsec_pcb * pcb)2818 ipsec_nexus_detach(struct ipsec_pcb *pcb)
2819 {
2820 	ipsec_nx_t nx = &pcb->ipsec_nx;
2821 	nexus_controller_t controller = kern_nexus_shared_controller();
2822 	errno_t err;
2823 
2824 	if (!uuid_is_null(nx->fsw_device)) {
2825 		err = kern_nexus_ifdetach(controller,
2826 		    nx->fsw_instance,
2827 		    nx->fsw_device);
2828 		if (err != 0) {
2829 			os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
2830 			    __func__, err);
2831 		}
2832 	}
2833 
2834 	ipsec_detach_provider_and_instance(nx->fsw_provider,
2835 	    nx->fsw_instance);
2836 	ipsec_detach_provider_and_instance(nx->if_provider,
2837 	    nx->if_instance);
2838 
2839 	if (pcb->ipsec_netif_pp != NULL) {
2840 		kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2841 		pcb->ipsec_netif_pp = NULL;
2842 	}
2843 	memset(nx, 0, sizeof(*nx));
2844 }
2845 
2846 static errno_t
ipsec_create_fs_provider_and_instance(struct ipsec_pcb * pcb,const char * type_name,const char * ifname,uuid_t * provider,uuid_t * instance)2847 ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
2848     const char *type_name,
2849     const char *ifname,
2850     uuid_t *provider, uuid_t *instance)
2851 {
2852 	nexus_attr_t attr = NULL;
2853 	nexus_controller_t controller = kern_nexus_shared_controller();
2854 	uuid_t dom_prov;
2855 	errno_t err;
2856 	struct kern_nexus_init init;
2857 	nexus_name_t    provider_name;
2858 
2859 	err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
2860 	    &dom_prov);
2861 	IPSEC_IF_VERIFY(err == 0);
2862 	if (err != 0) {
2863 		os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
2864 		    __func__, type_name, err);
2865 		goto failed;
2866 	}
2867 
2868 	err = kern_nexus_attr_create(&attr);
2869 	IPSEC_IF_VERIFY(err == 0);
2870 	if (err != 0) {
2871 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2872 		    __func__, err);
2873 		goto failed;
2874 	}
2875 
2876 	uint64_t slot_buffer_size = pcb->ipsec_slot_size;
2877 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2878 	VERIFY(err == 0);
2879 
2880 	// Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
2881 	uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
2882 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
2883 	VERIFY(err == 0);
2884 	uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
2885 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
2886 	VERIFY(err == 0);
2887 	/*
2888 	 * Configure flowswitch to use super-packet (multi-buflet).
2889 	 * This allows flowswitch to perform intra-stack packet aggregation.
2890 	 */
2891 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
2892 	    NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
2893 	VERIFY(err == 0);
2894 
2895 	snprintf((char *)provider_name, sizeof(provider_name),
2896 	    "com.apple.%s.%s", type_name, ifname);
2897 	err = kern_nexus_controller_register_provider(controller,
2898 	    dom_prov,
2899 	    provider_name,
2900 	    NULL,
2901 	    0,
2902 	    attr,
2903 	    provider);
2904 	kern_nexus_attr_destroy(attr);
2905 	attr = NULL;
2906 	IPSEC_IF_VERIFY(err == 0);
2907 	if (err != 0) {
2908 		os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
2909 		    __func__, type_name, err);
2910 		goto failed;
2911 	}
2912 	bzero(&init, sizeof(init));
2913 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2914 	err = kern_nexus_controller_alloc_provider_instance(controller,
2915 	    *provider,
2916 	    NULL, NULL,
2917 	    instance, &init);
2918 	IPSEC_IF_VERIFY(err == 0);
2919 	if (err != 0) {
2920 		os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
2921 		    __func__, type_name, err);
2922 		kern_nexus_controller_deregister_provider(controller,
2923 		    *provider);
2924 		uuid_clear(*provider);
2925 	}
2926 failed:
2927 	return err;
2928 }
2929 
2930 static errno_t
ipsec_flowswitch_attach(struct ipsec_pcb * pcb)2931 ipsec_flowswitch_attach(struct ipsec_pcb *pcb)
2932 {
2933 	nexus_controller_t controller = kern_nexus_shared_controller();
2934 	errno_t err = 0;
2935 	ipsec_nx_t nx = &pcb->ipsec_nx;
2936 
2937 	// Allocate flowswitch
2938 	err = ipsec_create_fs_provider_and_instance(pcb,
2939 	    "flowswitch",
2940 	    pcb->ipsec_ifp->if_xname,
2941 	    &nx->fsw_provider,
2942 	    &nx->fsw_instance);
2943 	if (err != 0) {
2944 		os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
2945 		    __func__);
2946 		goto failed;
2947 	}
2948 
2949 	// Attach flowswitch to device port
2950 	err = kern_nexus_ifattach(controller, nx->fsw_instance,
2951 	    NULL, nx->if_instance,
2952 	    FALSE, &nx->fsw_device);
2953 	if (err != 0) {
2954 		os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
2955 		goto failed;
2956 	}
2957 
2958 	// Extract the agent UUID and save for later
2959 	struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
2960 	if (flowswitch_nx != NULL) {
2961 		struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
2962 		if (flowswitch != NULL) {
2963 			FSW_RLOCK(flowswitch);
2964 			uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
2965 			FSW_UNLOCK(flowswitch);
2966 		} else {
2967 			os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - flowswitch is NULL\n");
2968 		}
2969 		nx_release(flowswitch_nx);
2970 	} else {
2971 		os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n");
2972 	}
2973 
2974 	return 0;
2975 
2976 failed:
2977 	ipsec_nexus_detach(pcb);
2978 
2979 	errno_t detach_error = 0;
2980 	if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
2981 		panic("ipsec_flowswitch_attach - ifnet_detach failed: %d", detach_error);
2982 		/* NOT REACHED */
2983 	}
2984 
2985 	return err;
2986 }
2987 
2988 #pragma mark Kernel Pipe Nexus
2989 
2990 static errno_t
ipsec_register_kernel_pipe_nexus(struct ipsec_pcb * pcb)2991 ipsec_register_kernel_pipe_nexus(struct ipsec_pcb *pcb)
2992 {
2993 	nexus_attr_t nxa = NULL;
2994 	errno_t result;
2995 
2996 	lck_mtx_lock(&ipsec_lock);
2997 	if (ipsec_ncd_refcount++) {
2998 		lck_mtx_unlock(&ipsec_lock);
2999 		return 0;
3000 	}
3001 
3002 	result = kern_nexus_controller_create(&ipsec_ncd);
3003 	if (result) {
3004 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
3005 		    __FUNCTION__, result);
3006 		goto done;
3007 	}
3008 
3009 	uuid_t dom_prov;
3010 	result = kern_nexus_get_default_domain_provider(
3011 		NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
3012 	if (result) {
3013 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
3014 		    __FUNCTION__, result);
3015 		goto done;
3016 	}
3017 
3018 	struct kern_nexus_provider_init prov_init = {
3019 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
3020 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
3021 		.nxpi_pre_connect = ipsec_nexus_pre_connect,
3022 		.nxpi_connected = ipsec_nexus_connected,
3023 		.nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
3024 		.nxpi_disconnected = ipsec_nexus_disconnected,
3025 		.nxpi_ring_init = ipsec_kpipe_ring_init,
3026 		.nxpi_ring_fini = ipsec_kpipe_ring_fini,
3027 		.nxpi_slot_init = NULL,
3028 		.nxpi_slot_fini = NULL,
3029 		.nxpi_sync_tx = ipsec_kpipe_sync_tx,
3030 		.nxpi_sync_rx = ipsec_kpipe_sync_rx,
3031 		.nxpi_tx_doorbell = NULL,
3032 	};
3033 
3034 	result = kern_nexus_attr_create(&nxa);
3035 	if (result) {
3036 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
3037 		    __FUNCTION__, result);
3038 		goto done;
3039 	}
3040 
3041 	uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
3042 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
3043 	VERIFY(result == 0);
3044 
3045 	// Reset ring size for kernel pipe nexus to limit memory usage
3046 	// Note: It's better to have less on slots on the kpipe TX ring than the netif
3047 	// so back pressure is applied at the AQM layer
3048 	uint64_t ring_size =
3049 	    pcb->ipsec_kpipe_tx_ring_size != 0 ? pcb->ipsec_kpipe_tx_ring_size :
3050 	    pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
3051 	    if_ipsec_ring_size;
3052 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
3053 	VERIFY(result == 0);
3054 
3055 	ring_size =
3056 	    pcb->ipsec_kpipe_rx_ring_size != 0 ? pcb->ipsec_kpipe_rx_ring_size :
3057 	    pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
3058 	    if_ipsec_ring_size;
3059 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
3060 	VERIFY(result == 0);
3061 
3062 	result = kern_nexus_controller_register_provider(ipsec_ncd,
3063 	    dom_prov,
3064 	    (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
3065 	    &prov_init,
3066 	    sizeof(prov_init),
3067 	    nxa,
3068 	    &ipsec_kpipe_uuid);
3069 	if (result) {
3070 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
3071 		    __FUNCTION__, result);
3072 		goto done;
3073 	}
3074 
3075 done:
3076 	if (nxa) {
3077 		kern_nexus_attr_destroy(nxa);
3078 	}
3079 
3080 	if (result) {
3081 		if (ipsec_ncd) {
3082 			kern_nexus_controller_destroy(ipsec_ncd);
3083 			ipsec_ncd = NULL;
3084 		}
3085 		ipsec_ncd_refcount = 0;
3086 	}
3087 
3088 	lck_mtx_unlock(&ipsec_lock);
3089 
3090 	return result;
3091 }
3092 
3093 static void
ipsec_unregister_kernel_pipe_nexus(void)3094 ipsec_unregister_kernel_pipe_nexus(void)
3095 {
3096 	lck_mtx_lock(&ipsec_lock);
3097 
3098 	VERIFY(ipsec_ncd_refcount > 0);
3099 
3100 	if (--ipsec_ncd_refcount == 0) {
3101 		kern_nexus_controller_destroy(ipsec_ncd);
3102 		ipsec_ncd = NULL;
3103 	}
3104 
3105 	lck_mtx_unlock(&ipsec_lock);
3106 }
3107 
3108 /* This structure only holds onto kpipe channels that need to be
3109  * freed in the future, but are cleared from the pcb under lock
3110  */
3111 struct ipsec_detached_channels {
3112 	int count;
3113 	kern_pbufpool_t pp;
3114 	uuid_t uuids[IPSEC_IF_MAX_RING_COUNT];
3115 };
3116 
3117 static void
ipsec_detach_channels(struct ipsec_pcb * pcb,struct ipsec_detached_channels * dc)3118 ipsec_detach_channels(struct ipsec_pcb *pcb, struct ipsec_detached_channels *dc)
3119 {
3120 	LCK_RW_ASSERT(&pcb->ipsec_pcb_lock, LCK_RW_TYPE_EXCLUSIVE);
3121 
3122 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
3123 		for (int i = 0; i < IPSEC_IF_MAX_RING_COUNT; i++) {
3124 			VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
3125 		}
3126 		dc->count = 0;
3127 		return;
3128 	}
3129 
3130 	dc->count = pcb->ipsec_kpipe_count;
3131 
3132 	VERIFY(dc->count >= 0);
3133 	VERIFY(dc->count <= IPSEC_IF_MAX_RING_COUNT);
3134 
3135 	for (int i = 0; i < dc->count; i++) {
3136 		VERIFY(!uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
3137 		uuid_copy(dc->uuids[i], pcb->ipsec_kpipe_uuid[i]);
3138 		uuid_clear(pcb->ipsec_kpipe_uuid[i]);
3139 	}
3140 	for (int i = dc->count; i < IPSEC_IF_MAX_RING_COUNT; i++) {
3141 		VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
3142 	}
3143 
3144 	if (dc->count) {
3145 		VERIFY(pcb->ipsec_kpipe_pp);
3146 	} else {
3147 		VERIFY(!pcb->ipsec_kpipe_pp);
3148 	}
3149 
3150 	dc->pp = pcb->ipsec_kpipe_pp;
3151 
3152 	pcb->ipsec_kpipe_pp = NULL;
3153 
3154 	ipsec_flag_clr(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
3155 }
3156 
3157 static void
ipsec_free_channels(struct ipsec_detached_channels * dc)3158 ipsec_free_channels(struct ipsec_detached_channels *dc)
3159 {
3160 	if (!dc->count) {
3161 		return;
3162 	}
3163 
3164 	for (int i = 0; i < dc->count; i++) {
3165 		errno_t result;
3166 		result = kern_nexus_controller_free_provider_instance(ipsec_ncd, dc->uuids[i]);
3167 		VERIFY(!result);
3168 	}
3169 
3170 	VERIFY(dc->pp);
3171 	kern_pbufpool_destroy(dc->pp);
3172 
3173 	ipsec_unregister_kernel_pipe_nexus();
3174 
3175 	memset(dc, 0, sizeof(*dc));
3176 }
3177 
3178 static errno_t
ipsec_enable_channel(struct ipsec_pcb * pcb,struct proc * proc)3179 ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
3180 {
3181 	struct kern_nexus_init init;
3182 	struct kern_pbufpool_init pp_init;
3183 	errno_t result;
3184 
3185 	kauth_cred_t cred = kauth_cred_get();
3186 	result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
3187 	if (result) {
3188 		return result;
3189 	}
3190 
3191 	VERIFY(pcb->ipsec_kpipe_count);
3192 	VERIFY(!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED));
3193 
3194 	result = ipsec_register_kernel_pipe_nexus(pcb);
3195 
3196 	lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3197 
3198 	if (result) {
3199 		os_log_error(OS_LOG_DEFAULT, "%s: %s failed to register kernel pipe nexus\n",
3200 		    __func__, pcb->ipsec_if_xname);
3201 		goto done;
3202 	}
3203 
3204 	VERIFY(ipsec_ncd);
3205 
3206 	bzero(&pp_init, sizeof(pp_init));
3207 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
3208 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
3209 	// Note: We only needs are many packets as can be held in the tx and rx rings
3210 	pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2 * pcb->ipsec_kpipe_count;
3211 	pp_init.kbi_bufsize = pcb->ipsec_slot_size;
3212 	pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
3213 	pp_init.kbi_max_frags = 1;
3214 	pp_init.kbi_flags |= KBIF_QUANTUM;
3215 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
3216 	    "com.apple.kpipe.%s", pcb->ipsec_if_xname);
3217 	pp_init.kbi_ctx = NULL;
3218 	pp_init.kbi_ctx_retain = NULL;
3219 	pp_init.kbi_ctx_release = NULL;
3220 
3221 	result = kern_pbufpool_create(&pp_init, &pcb->ipsec_kpipe_pp,
3222 	    NULL);
3223 	if (result != 0) {
3224 		os_log_error(OS_LOG_DEFAULT, "%s: %s pbufbool create failed, error %d\n",
3225 		    __func__, pcb->ipsec_if_xname, result);
3226 		goto done;
3227 	}
3228 
3229 	bzero(&init, sizeof(init));
3230 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
3231 	init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp;
3232 
3233 	for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
3234 		VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
3235 		result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
3236 		    ipsec_kpipe_uuid, pcb, NULL, &pcb->ipsec_kpipe_uuid[i], &init);
3237 
3238 		if (result == 0) {
3239 			nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
3240 			const bool has_proc_uuid = !uuid_is_null(pcb->ipsec_kpipe_proc_uuid);
3241 			pid_t pid = pcb->ipsec_kpipe_pid;
3242 			if (!pid && !has_proc_uuid) {
3243 				pid = proc_pid(proc);
3244 			}
3245 			result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
3246 			    pcb->ipsec_kpipe_uuid[i], &port,
3247 			    pid, has_proc_uuid ? pcb->ipsec_kpipe_proc_uuid : NULL, NULL,
3248 			    0, has_proc_uuid ? NEXUS_BIND_EXEC_UUID:NEXUS_BIND_PID);
3249 		}
3250 
3251 		if (result) {
3252 			/* Unwind all of them on error */
3253 			for (int j = 0; j < IPSEC_IF_MAX_RING_COUNT; j++) {
3254 				if (!uuid_is_null(pcb->ipsec_kpipe_uuid[j])) {
3255 					kern_nexus_controller_free_provider_instance(ipsec_ncd,
3256 					    pcb->ipsec_kpipe_uuid[j]);
3257 					uuid_clear(pcb->ipsec_kpipe_uuid[j]);
3258 				}
3259 			}
3260 			goto done;
3261 		}
3262 	}
3263 
3264 done:
3265 	lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3266 
3267 	if (result) {
3268 		if (pcb->ipsec_kpipe_pp != NULL) {
3269 			kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
3270 			pcb->ipsec_kpipe_pp = NULL;
3271 		}
3272 		ipsec_unregister_kernel_pipe_nexus();
3273 	} else {
3274 		ipsec_flag_set(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
3275 	}
3276 
3277 	return result;
3278 }
3279 
3280 #endif // IPSEC_NEXUS
3281 
3282 
3283 /* Kernel control functions */
3284 
3285 static inline int
ipsec_find_by_unit(u_int32_t unit)3286 ipsec_find_by_unit(u_int32_t unit)
3287 {
3288 	struct ipsec_pcb *next_pcb = NULL;
3289 	int found = 0;
3290 
3291 	TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
3292 		if (next_pcb->ipsec_unit == unit) {
3293 			found = 1;
3294 			break;
3295 		}
3296 	}
3297 
3298 	return found;
3299 }
3300 
3301 static inline void
ipsec_free_pcb(struct ipsec_pcb * pcb,bool locked)3302 ipsec_free_pcb(struct ipsec_pcb *pcb, bool locked)
3303 {
3304 #if IPSEC_NEXUS
3305 	mbuf_freem_list(pcb->ipsec_input_chain);
3306 	pcb->ipsec_input_chain_count = 0;
3307 	lck_mtx_destroy(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp);
3308 	lck_mtx_destroy(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp);
3309 	lck_mtx_destroy(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp);
3310 #endif // IPSEC_NEXUS
3311 	lck_mtx_destroy(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp);
3312 	lck_rw_destroy(&pcb->ipsec_pcb_lock, &ipsec_lck_grp);
3313 	if (!locked) {
3314 		lck_mtx_lock(&ipsec_lock);
3315 	}
3316 	TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
3317 	if (!locked) {
3318 		lck_mtx_unlock(&ipsec_lock);
3319 	}
3320 	zfree(ipsec_pcb_zone, pcb);
3321 }
3322 
3323 static errno_t
ipsec_ctl_setup(u_int32_t * unit,void ** unitinfo)3324 ipsec_ctl_setup(u_int32_t *unit, void **unitinfo)
3325 {
3326 	if (unit == NULL || unitinfo == NULL) {
3327 		return EINVAL;
3328 	}
3329 
3330 	lck_mtx_lock(&ipsec_lock);
3331 
3332 	/* Find next available unit */
3333 	if (*unit == 0) {
3334 		*unit = 1;
3335 		while (*unit != ctl_maxunit) {
3336 			if (ipsec_find_by_unit(*unit)) {
3337 				(*unit)++;
3338 			} else {
3339 				break;
3340 			}
3341 		}
3342 		if (*unit == ctl_maxunit) {
3343 			lck_mtx_unlock(&ipsec_lock);
3344 			return EBUSY;
3345 		}
3346 	} else if (ipsec_find_by_unit(*unit)) {
3347 		lck_mtx_unlock(&ipsec_lock);
3348 		return EBUSY;
3349 	}
3350 
3351 	/* Find some open interface id */
3352 	u_int32_t chosen_unique_id = 1;
3353 	struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
3354 	if (next_pcb != NULL) {
3355 		/* List was not empty, add one to the last item */
3356 		chosen_unique_id = next_pcb->ipsec_unique_id + 1;
3357 		next_pcb = NULL;
3358 
3359 		/*
3360 		 * If this wrapped the id number, start looking at
3361 		 * the front of the list for an unused id.
3362 		 */
3363 		if (chosen_unique_id == 0) {
3364 			/* Find the next unused ID */
3365 			chosen_unique_id = 1;
3366 			TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
3367 				if (next_pcb->ipsec_unique_id > chosen_unique_id) {
3368 					/* We found a gap */
3369 					break;
3370 				}
3371 
3372 				chosen_unique_id = next_pcb->ipsec_unique_id + 1;
3373 			}
3374 		}
3375 	}
3376 
3377 	struct ipsec_pcb *pcb = zalloc_flags(ipsec_pcb_zone, Z_WAITOK | Z_ZERO);
3378 
3379 	*unitinfo = pcb;
3380 	pcb->ipsec_unit = *unit;
3381 	pcb->ipsec_unique_id = chosen_unique_id;
3382 
3383 	if (next_pcb != NULL) {
3384 		TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
3385 	} else {
3386 		TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
3387 	}
3388 
3389 	lck_mtx_unlock(&ipsec_lock);
3390 
3391 	return 0;
3392 }
3393 
3394 static errno_t
ipsec_ctl_bind(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)3395 ipsec_ctl_bind(kern_ctl_ref kctlref,
3396     struct sockaddr_ctl *sac,
3397     void **unitinfo)
3398 {
3399 	if (*unitinfo == NULL) {
3400 		u_int32_t unit = 0;
3401 		(void)ipsec_ctl_setup(&unit, unitinfo);
3402 	}
3403 
3404 	struct ipsec_pcb *pcb = (struct ipsec_pcb *)*unitinfo;
3405 	if (pcb == NULL) {
3406 		return EINVAL;
3407 	}
3408 
3409 	if (pcb->ipsec_ctlref != NULL) {
3410 		// Return if bind was already called
3411 		return EINVAL;
3412 	}
3413 
3414 	/* Setup the protocol control block */
3415 	pcb->ipsec_ctlref = kctlref;
3416 	pcb->ipsec_unit = sac->sc_unit;
3417 	pcb->ipsec_output_service_class = MBUF_SC_OAM;
3418 
3419 #if IPSEC_NEXUS
3420 	pcb->ipsec_use_netif = false;
3421 	pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
3422 	pcb->ipsec_netif_ring_size = if_ipsec_ring_size;
3423 	pcb->ipsec_tx_fsw_ring_size = if_ipsec_tx_fsw_ring_size;
3424 	pcb->ipsec_rx_fsw_ring_size = if_ipsec_rx_fsw_ring_size;
3425 #endif // IPSEC_NEXUS
3426 
3427 	lck_rw_init(&pcb->ipsec_pcb_lock, &ipsec_lck_grp, &ipsec_lck_attr);
3428 	lck_mtx_init(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp, &ipsec_lck_attr);
3429 #if IPSEC_NEXUS
3430 	pcb->ipsec_input_chain_count = 0;
3431 	lck_mtx_init(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp, &ipsec_lck_attr);
3432 	lck_mtx_init(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
3433 	lck_mtx_init(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
3434 #endif // IPSEC_NEXUS
3435 
3436 	return 0;
3437 }
3438 
3439 static errno_t
ipsec_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)3440 ipsec_ctl_connect(kern_ctl_ref kctlref,
3441     struct sockaddr_ctl *sac,
3442     void **unitinfo)
3443 {
3444 	struct ifnet_init_eparams ipsec_init = {};
3445 	errno_t result = 0;
3446 
3447 	if (*unitinfo == NULL) {
3448 		(void)ipsec_ctl_bind(kctlref, sac, unitinfo);
3449 	}
3450 
3451 	struct ipsec_pcb *pcb = *unitinfo;
3452 	if (pcb == NULL) {
3453 		return EINVAL;
3454 	}
3455 
3456 	/* Handle case where ipsec_ctl_setup() was called, but ipsec_ctl_bind() was not */
3457 	if (pcb->ipsec_ctlref == NULL) {
3458 		(void)ipsec_ctl_bind(kctlref, sac, unitinfo);
3459 	}
3460 
3461 	snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
3462 	snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
3463 	os_log(OS_LOG_DEFAULT, "ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
3464 
3465 	/* Create the interface */
3466 	bzero(&ipsec_init, sizeof(ipsec_init));
3467 	ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
3468 	ipsec_init.len = sizeof(ipsec_init);
3469 
3470 #if IPSEC_NEXUS
3471 	if (pcb->ipsec_use_netif) {
3472 		ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
3473 	} else
3474 #endif // IPSEC_NEXUS
3475 	{
3476 		ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
3477 		ipsec_init.start = ipsec_start;
3478 	}
3479 	ipsec_init.name = "ipsec";
3480 	ipsec_init.unit = pcb->ipsec_unit - 1;
3481 	ipsec_init.uniqueid = pcb->ipsec_unique_name;
3482 	ipsec_init.uniqueid_len = (uint32_t)strlen(pcb->ipsec_unique_name);
3483 	ipsec_init.family = IFNET_FAMILY_IPSEC;
3484 	ipsec_init.type = IFT_OTHER;
3485 	ipsec_init.demux = ipsec_demux;
3486 	ipsec_init.add_proto = ipsec_add_proto;
3487 	ipsec_init.del_proto = ipsec_del_proto;
3488 	ipsec_init.softc = pcb;
3489 	ipsec_init.ioctl = ipsec_ioctl;
3490 	ipsec_init.free = ipsec_detached;
3491 
3492 #if IPSEC_NEXUS
3493 	/* We don't support kpipes without a netif */
3494 	if (pcb->ipsec_kpipe_count && !pcb->ipsec_use_netif) {
3495 		result = ENOTSUP;
3496 		os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - kpipe requires netif: failed %d\n", result);
3497 		ipsec_free_pcb(pcb, false);
3498 		*unitinfo = NULL;
3499 		return result;
3500 	}
3501 
3502 	if (if_ipsec_debug != 0) {
3503 		printf("%s: %s%d use_netif %d kpipe_count %d slot_size %u ring_size %u "
3504 		    "kpipe_tx_ring_size %u kpipe_rx_ring_size %u\n",
3505 		    __func__,
3506 		    ipsec_init.name, ipsec_init.unit,
3507 		    pcb->ipsec_use_netif,
3508 		    pcb->ipsec_kpipe_count,
3509 		    pcb->ipsec_slot_size,
3510 		    pcb->ipsec_netif_ring_size,
3511 		    pcb->ipsec_kpipe_tx_ring_size,
3512 		    pcb->ipsec_kpipe_rx_ring_size);
3513 	}
3514 	if (pcb->ipsec_use_netif) {
3515 		if (pcb->ipsec_kpipe_count) {
3516 			result = ipsec_enable_channel(pcb, current_proc());
3517 			if (result) {
3518 				os_log_error(OS_LOG_DEFAULT, "%s: %s failed to enable channels\n",
3519 				    __func__, pcb->ipsec_if_xname);
3520 				ipsec_free_pcb(pcb, false);
3521 				*unitinfo = NULL;
3522 				return result;
3523 			}
3524 		}
3525 
3526 		result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
3527 		if (result != 0) {
3528 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
3529 			ipsec_free_pcb(pcb, false);
3530 			*unitinfo = NULL;
3531 			return result;
3532 		}
3533 
3534 		result = ipsec_flowswitch_attach(pcb);
3535 		if (result != 0) {
3536 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_flowswitch_attach failed: %d\n", result);
3537 			// Do not call ipsec_free_pcb(). We will be attached already, and will be freed later
3538 			// in ipsec_detached().
3539 			*unitinfo = NULL;
3540 			return result;
3541 		}
3542 
3543 		/* Attach to bpf */
3544 		bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
3545 	} else
3546 #endif // IPSEC_NEXUS
3547 	{
3548 		result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
3549 		if (result != 0) {
3550 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
3551 			ipsec_free_pcb(pcb, false);
3552 			*unitinfo = NULL;
3553 			return result;
3554 		}
3555 		ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
3556 
3557 		/* Attach the interface */
3558 		result = ifnet_attach(pcb->ipsec_ifp, NULL);
3559 		if (result != 0) {
3560 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
3561 			ifnet_release(pcb->ipsec_ifp);
3562 			ipsec_free_pcb(pcb, false);
3563 			*unitinfo = NULL;
3564 			return result;
3565 		}
3566 
3567 		/* Attach to bpf */
3568 		bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
3569 	}
3570 
3571 #if IPSEC_NEXUS
3572 	/*
3573 	 * Mark the data path as ready.
3574 	 * If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected.
3575 	 */
3576 	if (pcb->ipsec_kpipe_count == 0) {
3577 		lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
3578 		IPSEC_SET_DATA_PATH_READY(pcb);
3579 		lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
3580 	}
3581 #endif
3582 
3583 	/* The interfaces resoures allocated, mark it as running */
3584 	ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
3585 
3586 	return 0;
3587 }
3588 
3589 static errno_t
ipsec_detach_ip(ifnet_t interface,protocol_family_t protocol,socket_t pf_socket)3590 ipsec_detach_ip(ifnet_t                         interface,
3591     protocol_family_t       protocol,
3592     socket_t                        pf_socket)
3593 {
3594 	errno_t result = EPROTONOSUPPORT;
3595 
3596 	/* Attempt a detach */
3597 	if (protocol == PF_INET) {
3598 		struct ifreq    ifr;
3599 
3600 		bzero(&ifr, sizeof(ifr));
3601 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
3602 		    ifnet_name(interface), ifnet_unit(interface));
3603 
3604 		result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
3605 	} else if (protocol == PF_INET6) {
3606 		struct in6_ifreq        ifr6;
3607 
3608 		bzero(&ifr6, sizeof(ifr6));
3609 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
3610 		    ifnet_name(interface), ifnet_unit(interface));
3611 
3612 		result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
3613 	}
3614 
3615 	return result;
3616 }
3617 
3618 static void
ipsec_remove_address(ifnet_t interface,protocol_family_t protocol,ifaddr_t address,socket_t pf_socket)3619 ipsec_remove_address(ifnet_t                            interface,
3620     protocol_family_t      protocol,
3621     ifaddr_t                       address,
3622     socket_t                       pf_socket)
3623 {
3624 	errno_t result = 0;
3625 
3626 	/* Attempt a detach */
3627 	if (protocol == PF_INET) {
3628 		struct ifreq    ifr;
3629 
3630 		bzero(&ifr, sizeof(ifr));
3631 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
3632 		    ifnet_name(interface), ifnet_unit(interface));
3633 		result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
3634 		if (result != 0) {
3635 			os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed: %d", result);
3636 		} else {
3637 			result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
3638 			if (result != 0) {
3639 				os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR failed: %d", result);
3640 			}
3641 		}
3642 	} else if (protocol == PF_INET6) {
3643 		struct in6_ifreq        ifr6;
3644 
3645 		bzero(&ifr6, sizeof(ifr6));
3646 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
3647 		    ifnet_name(interface), ifnet_unit(interface));
3648 		result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
3649 		    sizeof(ifr6.ifr_addr));
3650 		if (result != 0) {
3651 			os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed (v6): %d",
3652 			    result);
3653 		} else {
3654 			result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
3655 			if (result != 0) {
3656 				os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
3657 				    result);
3658 			}
3659 		}
3660 	}
3661 }
3662 
3663 static void
ipsec_cleanup_family(ifnet_t interface,protocol_family_t protocol)3664 ipsec_cleanup_family(ifnet_t                            interface,
3665     protocol_family_t      protocol)
3666 {
3667 	errno_t         result = 0;
3668 	socket_t        pf_socket = NULL;
3669 	ifaddr_t        *addresses = NULL;
3670 	int                     i;
3671 
3672 	if (protocol != PF_INET && protocol != PF_INET6) {
3673 		os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - invalid protocol family %d\n", protocol);
3674 		return;
3675 	}
3676 
3677 	/* Create a socket for removing addresses and detaching the protocol */
3678 	result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
3679 	if (result != 0) {
3680 		if (result != EAFNOSUPPORT) {
3681 			os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - failed to create %s socket: %d\n",
3682 			    protocol == PF_INET ? "IP" : "IPv6", result);
3683 		}
3684 		goto cleanup;
3685 	}
3686 
3687 	/* always set SS_PRIV, we want to close and detach regardless */
3688 	sock_setpriv(pf_socket, 1);
3689 
3690 	result = ipsec_detach_ip(interface, protocol, pf_socket);
3691 	if (result == 0 || result == ENXIO) {
3692 		/* We are done! We either detached or weren't attached. */
3693 		goto cleanup;
3694 	} else if (result != EBUSY) {
3695 		/* Uh, not really sure what happened here... */
3696 		os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
3697 		goto cleanup;
3698 	}
3699 
3700 	/*
3701 	 * At this point, we received an EBUSY error. This means there are
3702 	 * addresses attached. We should detach them and then try again.
3703 	 */
3704 	result = ifnet_get_address_list_family(interface, &addresses, (sa_family_t)protocol);
3705 	if (result != 0) {
3706 		os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
3707 		    ifnet_name(interface), ifnet_unit(interface),
3708 		    protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
3709 		goto cleanup;
3710 	}
3711 
3712 	for (i = 0; addresses[i] != 0; i++) {
3713 		ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
3714 	}
3715 	ifnet_free_address_list(addresses);
3716 	addresses = NULL;
3717 
3718 	/*
3719 	 * The addresses should be gone, we should try the remove again.
3720 	 */
3721 	result = ipsec_detach_ip(interface, protocol, pf_socket);
3722 	if (result != 0 && result != ENXIO) {
3723 		os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
3724 	}
3725 
3726 cleanup:
3727 	if (pf_socket != NULL) {
3728 		sock_close(pf_socket);
3729 	}
3730 
3731 	if (addresses != NULL) {
3732 		ifnet_free_address_list(addresses);
3733 	}
3734 }
3735 
3736 static errno_t
ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo)3737 ipsec_ctl_disconnect(__unused kern_ctl_ref      kctlref,
3738     __unused u_int32_t             unit,
3739     void                                   *unitinfo)
3740 {
3741 	struct ipsec_pcb *pcb = unitinfo;
3742 	ifnet_t ifp = NULL;
3743 	errno_t result = 0;
3744 
3745 	if (pcb == NULL) {
3746 		return EINVAL;
3747 	}
3748 
3749 	/* Wait until all threads in the data paths are done. */
3750 	ipsec_wait_data_move_drain(pcb);
3751 
3752 #if IPSEC_NEXUS
3753 	// Tell the nexus to stop all rings
3754 	if (pcb->ipsec_netif_nexus != NULL) {
3755 		kern_nexus_stop(pcb->ipsec_netif_nexus);
3756 	}
3757 #endif // IPSEC_NEXUS
3758 
3759 	lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3760 
3761 #if IPSEC_NEXUS
3762 	if (if_ipsec_debug != 0) {
3763 		printf("ipsec_ctl_disconnect: detaching interface %s (id %s)\n",
3764 		    pcb->ipsec_if_xname, pcb->ipsec_unique_name);
3765 	}
3766 
3767 	struct ipsec_detached_channels dc;
3768 	ipsec_detach_channels(pcb, &dc);
3769 #endif // IPSEC_NEXUS
3770 
3771 	pcb->ipsec_ctlref = NULL;
3772 
3773 	ifp = pcb->ipsec_ifp;
3774 	if (ifp != NULL) {
3775 #if IPSEC_NEXUS
3776 		if (pcb->ipsec_netif_nexus != NULL) {
3777 			/*
3778 			 * Quiesce the interface and flush any pending outbound packets.
3779 			 */
3780 			if_down(ifp);
3781 
3782 			/*
3783 			 * Suspend data movement and wait for IO threads to exit.
3784 			 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
3785 			 * do this because ipsec nexuses are attached/detached separately.
3786 			 */
3787 			ifnet_datamov_suspend_and_drain(ifp);
3788 			if ((result = ifnet_detach(ifp)) != 0) {
3789 				panic("ipsec_ctl_disconnect - ifnet_detach failed: %d", result);
3790 				/* NOT REACHED */
3791 			}
3792 
3793 			/*
3794 			 * We want to do everything in our power to ensure that the interface
3795 			 * really goes away when the socket is closed. We must remove IP/IPv6
3796 			 * addresses and detach the protocols. Finally, we can remove and
3797 			 * release the interface.
3798 			 */
3799 			key_delsp_for_ipsec_if(ifp);
3800 
3801 			ipsec_cleanup_family(ifp, AF_INET);
3802 			ipsec_cleanup_family(ifp, AF_INET6);
3803 
3804 			lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3805 
3806 			ipsec_free_channels(&dc);
3807 
3808 			ipsec_nexus_detach(pcb);
3809 
3810 			/* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
3811 			ifnet_datamov_resume(ifp);
3812 		} else
3813 #endif // IPSEC_NEXUS
3814 		{
3815 			lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3816 
3817 #if IPSEC_NEXUS
3818 			ipsec_free_channels(&dc);
3819 #endif // IPSEC_NEXUS
3820 
3821 			/*
3822 			 * We want to do everything in our power to ensure that the interface
3823 			 * really goes away when the socket is closed. We must remove IP/IPv6
3824 			 * addresses and detach the protocols. Finally, we can remove and
3825 			 * release the interface.
3826 			 */
3827 			key_delsp_for_ipsec_if(ifp);
3828 
3829 			ipsec_cleanup_family(ifp, AF_INET);
3830 			ipsec_cleanup_family(ifp, AF_INET6);
3831 
3832 			/*
3833 			 * Detach now; ipsec_detach() will be called asynchronously once
3834 			 * the I/O reference count drops to 0.  There we will invoke
3835 			 * ifnet_release().
3836 			 */
3837 			if ((result = ifnet_detach(ifp)) != 0) {
3838 				os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
3839 			}
3840 		}
3841 	} else {
3842 		// Bound, but not connected
3843 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3844 		ipsec_free_pcb(pcb, false);
3845 	}
3846 
3847 	return 0;
3848 }
3849 
3850 static errno_t
ipsec_ctl_send(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,__unused void * unitinfo,mbuf_t m,__unused int flags)3851 ipsec_ctl_send(__unused kern_ctl_ref    kctlref,
3852     __unused u_int32_t           unit,
3853     __unused void                        *unitinfo,
3854     mbuf_t                  m,
3855     __unused int                 flags)
3856 {
3857 	/* Receive messages from the control socket. Currently unused. */
3858 	mbuf_freem(m);
3859 	return 0;
3860 }
3861 
3862 static errno_t
ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t len)3863 ipsec_ctl_setopt(__unused kern_ctl_ref  kctlref,
3864     __unused u_int32_t             unit,
3865     void                                   *unitinfo,
3866     int                                            opt,
3867     void                                   *data,
3868     size_t                                 len)
3869 {
3870 	errno_t                                 result = 0;
3871 	struct ipsec_pcb                        *pcb = unitinfo;
3872 	if (pcb == NULL) {
3873 		return EINVAL;
3874 	}
3875 
3876 	/* check for privileges for privileged options */
3877 	switch (opt) {
3878 	case IPSEC_OPT_FLAGS:
3879 	case IPSEC_OPT_EXT_IFDATA_STATS:
3880 	case IPSEC_OPT_SET_DELEGATE_INTERFACE:
3881 	case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
3882 	case IPSEC_OPT_OUTPUT_DSCP_MAPPING:
3883 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3884 			return EPERM;
3885 		}
3886 		break;
3887 	}
3888 
3889 	switch (opt) {
3890 	case IPSEC_OPT_FLAGS: {
3891 		if (len != sizeof(u_int32_t)) {
3892 			result = EMSGSIZE;
3893 		} else {
3894 			pcb->ipsec_external_flags = *(u_int32_t *)data;
3895 		}
3896 		break;
3897 	}
3898 
3899 	case IPSEC_OPT_EXT_IFDATA_STATS: {
3900 		if (len != sizeof(int)) {
3901 			result = EMSGSIZE;
3902 			break;
3903 		}
3904 		if (pcb->ipsec_ifp == NULL) {
3905 			// Only can set after connecting
3906 			result = EINVAL;
3907 			break;
3908 		}
3909 		pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
3910 		break;
3911 	}
3912 
3913 	case IPSEC_OPT_INC_IFDATA_STATS_IN:
3914 	case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
3915 		struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
3916 
3917 		if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
3918 			result = EINVAL;
3919 			break;
3920 		}
3921 		if (pcb->ipsec_ifp == NULL) {
3922 			// Only can set after connecting
3923 			result = EINVAL;
3924 			break;
3925 		}
3926 		if (!pcb->ipsec_ext_ifdata_stats) {
3927 			result = EINVAL;
3928 			break;
3929 		}
3930 		if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) {
3931 			ifnet_stat_increment_in(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3932 			    (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3933 		} else {
3934 			ifnet_stat_increment_out(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3935 			    (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3936 		}
3937 		break;
3938 	}
3939 
3940 	case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
3941 		ifnet_t del_ifp = NULL;
3942 		char name[IFNAMSIZ];
3943 
3944 		if (len > IFNAMSIZ - 1) {
3945 			result = EMSGSIZE;
3946 			break;
3947 		}
3948 		if (pcb->ipsec_ifp == NULL) {
3949 			// Only can set after connecting
3950 			result = EINVAL;
3951 			break;
3952 		}
3953 		if (len != 0) {                   /* if len==0, del_ifp will be NULL causing the delegate to be removed */
3954 			bcopy(data, name, len);
3955 			name[len] = 0;
3956 			result = ifnet_find_by_name(name, &del_ifp);
3957 		}
3958 		if (result == 0) {
3959 			os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
3960 			    __func__, pcb->ipsec_ifp->if_xname,
3961 			    del_ifp ? del_ifp->if_xname : "NULL");
3962 
3963 			result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
3964 			if (del_ifp) {
3965 				ifnet_release(del_ifp);
3966 			}
3967 		}
3968 		break;
3969 	}
3970 
3971 	case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3972 		if (len != sizeof(int)) {
3973 			result = EMSGSIZE;
3974 			break;
3975 		}
3976 		if (pcb->ipsec_ifp == NULL) {
3977 			// Only can set after connecting
3978 			result = EINVAL;
3979 			break;
3980 		}
3981 		mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
3982 		if (output_service_class == MBUF_SC_UNSPEC) {
3983 			pcb->ipsec_output_service_class = MBUF_SC_OAM;
3984 		} else {
3985 			pcb->ipsec_output_service_class = output_service_class;
3986 		}
3987 		os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
3988 		    __func__, pcb->ipsec_ifp->if_xname,
3989 		    pcb->ipsec_output_service_class);
3990 		break;
3991 	}
3992 
3993 #if IPSEC_NEXUS
3994 	case IPSEC_OPT_ENABLE_CHANNEL: {
3995 		if (len != sizeof(int)) {
3996 			result = EMSGSIZE;
3997 			break;
3998 		}
3999 		if (pcb->ipsec_ifp != NULL) {
4000 			// Only can set before connecting
4001 			result = EINVAL;
4002 			break;
4003 		}
4004 		if ((*(int *)data) != 0 &&
4005 		    (*(int *)data) != 1 &&
4006 		    (*(int *)data) != IPSEC_IF_WMM_RING_COUNT) {
4007 			result = EINVAL;
4008 			break;
4009 		}
4010 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
4011 		pcb->ipsec_kpipe_count = *(int *)data;
4012 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
4013 		break;
4014 	}
4015 
4016 	case IPSEC_OPT_CHANNEL_BIND_PID: {
4017 		if (len != sizeof(pid_t)) {
4018 			result = EMSGSIZE;
4019 			break;
4020 		}
4021 		if (pcb->ipsec_ifp != NULL) {
4022 			// Only can set before connecting
4023 			result = EINVAL;
4024 			break;
4025 		}
4026 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
4027 		pcb->ipsec_kpipe_pid = *(pid_t *)data;
4028 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
4029 		break;
4030 	}
4031 
4032 	case IPSEC_OPT_CHANNEL_BIND_UUID: {
4033 		if (len != sizeof(uuid_t)) {
4034 			result = EMSGSIZE;
4035 			break;
4036 		}
4037 		if (pcb->ipsec_ifp != NULL) {
4038 			// Only can set before connecting
4039 			result = EINVAL;
4040 			break;
4041 		}
4042 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
4043 		uuid_copy(pcb->ipsec_kpipe_proc_uuid, *((uuid_t *)data));
4044 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
4045 		break;
4046 	}
4047 
4048 	case IPSEC_OPT_ENABLE_FLOWSWITCH: {
4049 		if (len != sizeof(int)) {
4050 			result = EMSGSIZE;
4051 			break;
4052 		}
4053 		if (pcb->ipsec_ifp == NULL) {
4054 			// Only can set after connecting
4055 			result = EINVAL;
4056 			break;
4057 		}
4058 		if (!if_is_fsw_transport_netagent_enabled()) {
4059 			result = ENOTSUP;
4060 			break;
4061 		}
4062 		if (uuid_is_null(pcb->ipsec_nx.fsw_agent)) {
4063 			result = ENOENT;
4064 			break;
4065 		}
4066 
4067 		uint32_t flags = netagent_get_flags(pcb->ipsec_nx.fsw_agent);
4068 
4069 		if (*(int *)data) {
4070 			flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
4071 			    NETAGENT_FLAG_NEXUS_LISTENER);
4072 			result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
4073 			pcb->ipsec_needs_netagent = true;
4074 		} else {
4075 			pcb->ipsec_needs_netagent = false;
4076 			flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
4077 			    NETAGENT_FLAG_NEXUS_LISTENER);
4078 			result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
4079 		}
4080 		break;
4081 	}
4082 
4083 	case IPSEC_OPT_INPUT_FRAG_SIZE: {
4084 		if (len != sizeof(u_int32_t)) {
4085 			result = EMSGSIZE;
4086 			break;
4087 		}
4088 		u_int32_t input_frag_size = *(u_int32_t *)data;
4089 		if (input_frag_size <= sizeof(struct ip6_hdr)) {
4090 			pcb->ipsec_frag_size_set = FALSE;
4091 			pcb->ipsec_input_frag_size = 0;
4092 		} else {
4093 			pcb->ipsec_frag_size_set = TRUE;
4094 			pcb->ipsec_input_frag_size = input_frag_size;
4095 		}
4096 		break;
4097 	}
4098 	case IPSEC_OPT_ENABLE_NETIF: {
4099 		if (len != sizeof(int)) {
4100 			result = EMSGSIZE;
4101 			break;
4102 		}
4103 		if (pcb->ipsec_ifp != NULL) {
4104 			// Only can set before connecting
4105 			result = EINVAL;
4106 			break;
4107 		}
4108 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
4109 		pcb->ipsec_use_netif = !!(*(int *)data);
4110 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
4111 		break;
4112 	}
4113 	case IPSEC_OPT_SLOT_SIZE: {
4114 		if (len != sizeof(u_int32_t)) {
4115 			result = EMSGSIZE;
4116 			break;
4117 		}
4118 		if (pcb->ipsec_ifp != NULL) {
4119 			// Only can set before connecting
4120 			result = EINVAL;
4121 			break;
4122 		}
4123 		u_int32_t slot_size = *(u_int32_t *)data;
4124 		if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
4125 		    slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
4126 			return EINVAL;
4127 		}
4128 		pcb->ipsec_slot_size = slot_size;
4129 		if (if_ipsec_debug != 0) {
4130 			printf("%s: IPSEC_OPT_SLOT_SIZE %u\n", __func__, slot_size);
4131 		}
4132 		break;
4133 	}
4134 	case IPSEC_OPT_NETIF_RING_SIZE: {
4135 		if (len != sizeof(u_int32_t)) {
4136 			result = EMSGSIZE;
4137 			break;
4138 		}
4139 		if (pcb->ipsec_ifp != NULL) {
4140 			// Only can set before connecting
4141 			result = EINVAL;
4142 			break;
4143 		}
4144 		u_int32_t ring_size = *(u_int32_t *)data;
4145 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
4146 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
4147 			return EINVAL;
4148 		}
4149 		pcb->ipsec_netif_ring_size = ring_size;
4150 		if (if_ipsec_debug != 0) {
4151 			printf("%s: IPSEC_OPT_NETIF_RING_SIZE %u\n", __func__, ring_size);
4152 		}
4153 		break;
4154 	}
4155 	case IPSEC_OPT_TX_FSW_RING_SIZE: {
4156 		if (len != sizeof(u_int32_t)) {
4157 			result = EMSGSIZE;
4158 			break;
4159 		}
4160 		if (pcb->ipsec_ifp != NULL) {
4161 			// Only can set before connecting
4162 			result = EINVAL;
4163 			break;
4164 		}
4165 		u_int32_t ring_size = *(u_int32_t *)data;
4166 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
4167 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
4168 			return EINVAL;
4169 		}
4170 		pcb->ipsec_tx_fsw_ring_size = ring_size;
4171 		if (if_ipsec_debug != 0) {
4172 			printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
4173 		}
4174 		break;
4175 	}
4176 	case IPSEC_OPT_RX_FSW_RING_SIZE: {
4177 		if (len != sizeof(u_int32_t)) {
4178 			result = EMSGSIZE;
4179 			break;
4180 		}
4181 		if (pcb->ipsec_ifp != NULL) {
4182 			// Only can set before connecting
4183 			result = EINVAL;
4184 			break;
4185 		}
4186 		u_int32_t ring_size = *(u_int32_t *)data;
4187 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
4188 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
4189 			return EINVAL;
4190 		}
4191 		pcb->ipsec_rx_fsw_ring_size = ring_size;
4192 		if (if_ipsec_debug != 0) {
4193 			printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
4194 		}
4195 		break;
4196 	}
4197 	case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
4198 		if (len != sizeof(u_int32_t)) {
4199 			result = EMSGSIZE;
4200 			break;
4201 		}
4202 		if (pcb->ipsec_ifp != NULL) {
4203 			// Only can set before connecting
4204 			result = EINVAL;
4205 			break;
4206 		}
4207 		u_int32_t ring_size = *(u_int32_t *)data;
4208 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
4209 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
4210 			return EINVAL;
4211 		}
4212 		pcb->ipsec_kpipe_tx_ring_size = ring_size;
4213 		if (if_ipsec_debug != 0) {
4214 			printf("%s: IPSEC_OPT_KPIPE_TX_RING_SIZE %u\n", __func__, ring_size);
4215 		}
4216 		break;
4217 	}
4218 	case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
4219 		if (len != sizeof(u_int32_t)) {
4220 			result = EMSGSIZE;
4221 			break;
4222 		}
4223 		if (pcb->ipsec_ifp != NULL) {
4224 			// Only can set before connecting
4225 			result = EINVAL;
4226 			break;
4227 		}
4228 		u_int32_t ring_size = *(u_int32_t *)data;
4229 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
4230 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
4231 			return EINVAL;
4232 		}
4233 		pcb->ipsec_kpipe_rx_ring_size = ring_size;
4234 		if (if_ipsec_debug != 0) {
4235 			printf("%s: IPSEC_OPT_KPIPE_RX_RING_SIZE %u\n", __func__, ring_size);
4236 		}
4237 		break;
4238 	}
4239 	case IPSEC_OPT_OUTPUT_DSCP_MAPPING: {
4240 		if (len != sizeof(int)) {
4241 			result = EMSGSIZE;
4242 			break;
4243 		}
4244 		if (pcb->ipsec_ifp == NULL) {
4245 			// Only can set after connecting
4246 			result = EINVAL;
4247 			break;
4248 		}
4249 
4250 		ipsec_dscp_mapping_t output_dscp_mapping = (ipsec_dscp_mapping_t)(*(int *)data);
4251 		if (output_dscp_mapping > IPSEC_DSCP_MAPPING_LEGACY) {
4252 			return EINVAL;
4253 		}
4254 
4255 		pcb->ipsec_output_dscp_mapping = output_dscp_mapping;
4256 
4257 		os_log(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_DSCP_MAPPING %s DSCP %d\n",
4258 		    __func__, pcb->ipsec_ifp->if_xname,
4259 		    pcb->ipsec_output_dscp_mapping);
4260 		break;
4261 	}
4262 
4263 #endif // IPSEC_NEXUS
4264 
4265 	default: {
4266 		result = ENOPROTOOPT;
4267 		break;
4268 	}
4269 	}
4270 
4271 	return result;
4272 }
4273 
4274 static errno_t
ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t * len)4275 ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
4276     __unused u_int32_t unit,
4277     void *unitinfo,
4278     int opt,
4279     void *data,
4280     size_t *len)
4281 {
4282 	errno_t result = 0;
4283 	struct ipsec_pcb *pcb = unitinfo;
4284 	if (pcb == NULL) {
4285 		return EINVAL;
4286 	}
4287 
4288 	switch (opt) {
4289 	case IPSEC_OPT_FLAGS: {
4290 		if (*len != sizeof(u_int32_t)) {
4291 			result = EMSGSIZE;
4292 		} else {
4293 			*(u_int32_t *)data = pcb->ipsec_external_flags;
4294 		}
4295 		break;
4296 	}
4297 
4298 	case IPSEC_OPT_EXT_IFDATA_STATS: {
4299 		if (*len != sizeof(int)) {
4300 			result = EMSGSIZE;
4301 		} else {
4302 			*(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
4303 		}
4304 		break;
4305 	}
4306 
4307 	case IPSEC_OPT_IFNAME: {
4308 		if (*len < MIN(strlen(pcb->ipsec_if_xname) + 1, sizeof(pcb->ipsec_if_xname))) {
4309 			result = EMSGSIZE;
4310 		} else {
4311 			if (pcb->ipsec_ifp == NULL) {
4312 				// Only can get after connecting
4313 				result = EINVAL;
4314 				break;
4315 			}
4316 			*len = scnprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
4317 		}
4318 		break;
4319 	}
4320 
4321 	case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
4322 		if (*len != sizeof(int)) {
4323 			result = EMSGSIZE;
4324 		} else {
4325 			*(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
4326 		}
4327 		break;
4328 	}
4329 
4330 #if IPSEC_NEXUS
4331 
4332 	case IPSEC_OPT_ENABLE_CHANNEL: {
4333 		if (*len != sizeof(int)) {
4334 			result = EMSGSIZE;
4335 		} else {
4336 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4337 			*(int *)data = pcb->ipsec_kpipe_count;
4338 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4339 		}
4340 		break;
4341 	}
4342 
4343 	case IPSEC_OPT_CHANNEL_BIND_PID: {
4344 		if (*len != sizeof(pid_t)) {
4345 			result = EMSGSIZE;
4346 		} else {
4347 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4348 			*(pid_t *)data = pcb->ipsec_kpipe_pid;
4349 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4350 		}
4351 		break;
4352 	}
4353 
4354 	case IPSEC_OPT_CHANNEL_BIND_UUID: {
4355 		if (*len != sizeof(uuid_t)) {
4356 			result = EMSGSIZE;
4357 		} else {
4358 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4359 			uuid_copy(*((uuid_t *)data), pcb->ipsec_kpipe_proc_uuid);
4360 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4361 		}
4362 		break;
4363 	}
4364 
4365 	case IPSEC_OPT_ENABLE_FLOWSWITCH: {
4366 		if (*len != sizeof(int)) {
4367 			result = EMSGSIZE;
4368 		} else {
4369 			*(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.fsw_agent);
4370 		}
4371 		break;
4372 	}
4373 
4374 	case IPSEC_OPT_ENABLE_NETIF: {
4375 		if (*len != sizeof(int)) {
4376 			result = EMSGSIZE;
4377 		} else {
4378 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4379 			*(int *)data = !!pcb->ipsec_use_netif;
4380 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4381 		}
4382 		break;
4383 	}
4384 
4385 	case IPSEC_OPT_GET_CHANNEL_UUID: {
4386 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4387 		if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
4388 			result = ENXIO;
4389 		} else if (*len != sizeof(uuid_t) * pcb->ipsec_kpipe_count) {
4390 			result = EMSGSIZE;
4391 		} else {
4392 			for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
4393 				uuid_copy(((uuid_t *)data)[i], pcb->ipsec_kpipe_uuid[i]);
4394 			}
4395 		}
4396 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4397 		break;
4398 	}
4399 
4400 	case IPSEC_OPT_INPUT_FRAG_SIZE: {
4401 		if (*len != sizeof(u_int32_t)) {
4402 			result = EMSGSIZE;
4403 		} else {
4404 			*(u_int32_t *)data = pcb->ipsec_input_frag_size;
4405 		}
4406 		break;
4407 	}
4408 	case IPSEC_OPT_SLOT_SIZE: {
4409 		if (*len != sizeof(u_int32_t)) {
4410 			result = EMSGSIZE;
4411 		} else {
4412 			*(u_int32_t *)data = pcb->ipsec_slot_size;
4413 		}
4414 		break;
4415 	}
4416 	case IPSEC_OPT_NETIF_RING_SIZE: {
4417 		if (*len != sizeof(u_int32_t)) {
4418 			result = EMSGSIZE;
4419 		} else {
4420 			*(u_int32_t *)data = pcb->ipsec_netif_ring_size;
4421 		}
4422 		break;
4423 	}
4424 	case IPSEC_OPT_TX_FSW_RING_SIZE: {
4425 		if (*len != sizeof(u_int32_t)) {
4426 			result = EMSGSIZE;
4427 		} else {
4428 			*(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
4429 		}
4430 		break;
4431 	}
4432 	case IPSEC_OPT_RX_FSW_RING_SIZE: {
4433 		if (*len != sizeof(u_int32_t)) {
4434 			result = EMSGSIZE;
4435 		} else {
4436 			*(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
4437 		}
4438 		break;
4439 	}
4440 	case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
4441 		if (*len != sizeof(u_int32_t)) {
4442 			result = EMSGSIZE;
4443 		} else {
4444 			*(u_int32_t *)data = pcb->ipsec_kpipe_tx_ring_size;
4445 		}
4446 		break;
4447 	}
4448 	case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
4449 		if (*len != sizeof(u_int32_t)) {
4450 			result = EMSGSIZE;
4451 		} else {
4452 			*(u_int32_t *)data = pcb->ipsec_kpipe_rx_ring_size;
4453 		}
4454 		break;
4455 	}
4456 
4457 #endif // IPSEC_NEXUS
4458 
4459 	default: {
4460 		result = ENOPROTOOPT;
4461 		break;
4462 	}
4463 	}
4464 
4465 	return result;
4466 }
4467 
4468 /* Network Interface functions */
4469 static errno_t
ipsec_output(ifnet_t interface,mbuf_t data)4470 ipsec_output(ifnet_t interface,
4471     mbuf_t data)
4472 {
4473 	struct ipsec_pcb *pcb = ifnet_softc(interface);
4474 	struct ipsec_output_state ipsec_state;
4475 	struct route ro;
4476 	struct route_in6 ro6;
4477 	size_t length;
4478 	struct ip *ip = NULL;
4479 	struct ip6_hdr *ip6 = NULL;
4480 	struct ip_out_args ipoa;
4481 	struct ip6_out_args ip6oa;
4482 	int error = 0;
4483 	u_int ip_version = 0;
4484 	int flags = 0;
4485 	struct flowadv *adv = NULL;
4486 
4487 	// Make sure this packet isn't looping through the interface
4488 	if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
4489 		error = EINVAL;
4490 		goto ipsec_output_err;
4491 	}
4492 
4493 	// Mark the interface so NECP can evaluate tunnel policy
4494 	necp_mark_packet_from_interface(data, interface);
4495 
4496 	if (data->m_len < sizeof(*ip)) {
4497 		os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IP header length: %d.\n", data->m_len);
4498 		IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
4499 		error = EINVAL;
4500 		goto ipsec_output_err;
4501 	}
4502 
4503 	ip = mtod(data, struct ip *);
4504 	ip_version = ip->ip_v;
4505 
4506 	switch (ip_version) {
4507 	case 4: {
4508 		u_int8_t ip_hlen = 0;
4509 #ifdef _IP_VHL
4510 		ip_hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
4511 #else
4512 		ip_hlen = (uint8_t)(ip->ip_hl << 2);
4513 #endif
4514 		if (ip_hlen < sizeof(*ip)) {
4515 			os_log_error(OS_LOG_DEFAULT, "ipsec_output: Bad ip header length %d.\n", ip_hlen);
4516 			IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
4517 			error = EINVAL;
4518 			goto ipsec_output_err;
4519 		}
4520 #if IPSEC_NEXUS
4521 		if (!pcb->ipsec_use_netif)
4522 #endif // IPSEC_NEXUS
4523 		{
4524 			int af = AF_INET;
4525 			bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
4526 		}
4527 
4528 		/* Apply encryption */
4529 		memset(&ipsec_state, 0, sizeof(ipsec_state));
4530 		ipsec_state.m = data;
4531 		ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
4532 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
4533 		ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
4534 
4535 		error = ipsec4_interface_output(&ipsec_state, interface);
4536 		/* Tunneled in IPv6 - packet is gone */
4537 		if (error == 0 && ipsec_state.tunneled == 6) {
4538 			goto done;
4539 		}
4540 
4541 		data = ipsec_state.m;
4542 		if (error || data == NULL) {
4543 			if (error) {
4544 				os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec4_output error %d.\n", error);
4545 			}
4546 			goto ipsec_output_err;
4547 		}
4548 
4549 		/* Set traffic class, set flow */
4550 		m_set_service_class(data, pcb->ipsec_output_service_class);
4551 		data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4552 #if SKYWALK
4553 		data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4554 #else /* !SKYWALK */
4555 		data->m_pkthdr.pkt_flowid = interface->if_flowhash;
4556 #endif /* !SKYWALK */
4557 		data->m_pkthdr.pkt_proto = ip->ip_p;
4558 		data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4559 
4560 		/* Flip endian-ness for ip_output */
4561 		ip = mtod(data, struct ip *);
4562 		NTOHS(ip->ip_len);
4563 		NTOHS(ip->ip_off);
4564 
4565 		/* Increment statistics */
4566 		length = mbuf_pkthdr_len(data);
4567 		ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
4568 
4569 		/* Send to ip_output */
4570 		memset(&ro, 0, sizeof(ro));
4571 
4572 		flags = (IP_OUTARGS |   /* Passing out args to specify interface */
4573 		    IP_NOIPSEC);                        /* To ensure the packet doesn't go through ipsec twice */
4574 
4575 		memset(&ipoa, 0, sizeof(ipoa));
4576 		ipoa.ipoa_flowadv.code = 0;
4577 		ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
4578 		if (ipsec_state.outgoing_if) {
4579 			ipoa.ipoa_boundif = ipsec_state.outgoing_if;
4580 			ipoa.ipoa_flags |= IPOAF_BOUND_IF;
4581 		}
4582 		ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
4583 
4584 		adv = &ipoa.ipoa_flowadv;
4585 
4586 		(void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
4587 		data = NULL;
4588 
4589 		if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
4590 			error = ENOBUFS;
4591 			ifnet_disable_output(interface);
4592 		}
4593 
4594 		goto done;
4595 	}
4596 	case 6: {
4597 		if (data->m_len < sizeof(*ip6)) {
4598 			os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IPv6 header length: %d.\n", data->m_len);
4599 			IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
4600 			error = EINVAL;
4601 			goto ipsec_output_err;
4602 		}
4603 #if IPSEC_NEXUS
4604 		if (!pcb->ipsec_use_netif)
4605 #endif // IPSEC_NEXUS
4606 		{
4607 			int af = AF_INET6;
4608 			bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
4609 		}
4610 
4611 		data = ipsec6_splithdr(data);
4612 		if (data == NULL) {
4613 			os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_splithdr returned NULL\n");
4614 			goto ipsec_output_err;
4615 		}
4616 
4617 		ip6 = mtod(data, struct ip6_hdr *);
4618 
4619 		memset(&ipsec_state, 0, sizeof(ipsec_state));
4620 		ipsec_state.m = data;
4621 		ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
4622 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
4623 		ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
4624 
4625 		error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
4626 		if (error == 0 && ipsec_state.tunneled == 4) {          /* tunneled in IPv4 - packet is gone */
4627 			goto done;
4628 		}
4629 		data = ipsec_state.m;
4630 		if (error || data == NULL) {
4631 			if (error) {
4632 				os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_output error %d\n", error);
4633 			}
4634 			goto ipsec_output_err;
4635 		}
4636 
4637 		/* Set traffic class, set flow */
4638 		m_set_service_class(data, pcb->ipsec_output_service_class);
4639 		data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4640 #if SKYWALK
4641 		data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4642 #else /* !SKYWALK */
4643 		data->m_pkthdr.pkt_flowid = interface->if_flowhash;
4644 #endif /* !SKYWALK */
4645 		data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
4646 		data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4647 
4648 		/* Increment statistics */
4649 		length = mbuf_pkthdr_len(data);
4650 		ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
4651 
4652 		/* Send to ip6_output */
4653 		memset(&ro6, 0, sizeof(ro6));
4654 
4655 		flags = IPV6_OUTARGS;
4656 
4657 		memset(&ip6oa, 0, sizeof(ip6oa));
4658 		ip6oa.ip6oa_flowadv.code = 0;
4659 		ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
4660 		if (ipsec_state.outgoing_if) {
4661 			ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
4662 			ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
4663 			ip6_output_setsrcifscope(data, ipsec_state.outgoing_if, NULL);
4664 			ip6_output_setdstifscope(data, ipsec_state.outgoing_if, NULL);
4665 		} else {
4666 			ip6_output_setsrcifscope(data, IFSCOPE_UNKNOWN, NULL);
4667 			ip6_output_setdstifscope(data, IFSCOPE_UNKNOWN, NULL);
4668 		}
4669 		ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
4670 
4671 		adv = &ip6oa.ip6oa_flowadv;
4672 
4673 		(void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
4674 		data = NULL;
4675 
4676 		if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
4677 			error = ENOBUFS;
4678 			ifnet_disable_output(interface);
4679 		}
4680 
4681 		goto done;
4682 	}
4683 	default: {
4684 		os_log_error(OS_LOG_DEFAULT, "ipsec_output: Received unknown packet version %d.\n", ip_version);
4685 		error = EINVAL;
4686 		goto ipsec_output_err;
4687 	}
4688 	}
4689 
4690 done:
4691 	return error;
4692 
4693 ipsec_output_err:
4694 	if (data) {
4695 		mbuf_freem(data);
4696 	}
4697 	goto done;
4698 }
4699 
4700 static void
ipsec_start(ifnet_t interface)4701 ipsec_start(ifnet_t     interface)
4702 {
4703 	mbuf_t data;
4704 	struct ipsec_pcb *pcb = ifnet_softc(interface);
4705 
4706 	VERIFY(pcb != NULL);
4707 	for (;;) {
4708 		if (ifnet_dequeue(interface, &data) != 0) {
4709 			break;
4710 		}
4711 		if (ipsec_output(interface, data) != 0) {
4712 			break;
4713 		}
4714 	}
4715 }
4716 
4717 /* Network Interface functions */
4718 static errno_t
ipsec_demux(__unused ifnet_t interface,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)4719 ipsec_demux(__unused ifnet_t    interface,
4720     mbuf_t                          data,
4721     __unused char           *frame_header,
4722     protocol_family_t       *protocol)
4723 {
4724 	struct ip *ip;
4725 	u_int ip_version;
4726 
4727 	while (data != NULL && mbuf_len(data) < 1) {
4728 		data = mbuf_next(data);
4729 	}
4730 
4731 	if (data == NULL) {
4732 		return ENOENT;
4733 	}
4734 
4735 	ip = mtod(data, struct ip *);
4736 	ip_version = ip->ip_v;
4737 
4738 	switch (ip_version) {
4739 	case 4:
4740 		*protocol = PF_INET;
4741 		return 0;
4742 	case 6:
4743 		*protocol = PF_INET6;
4744 		return 0;
4745 	default:
4746 		*protocol = PF_UNSPEC;
4747 		break;
4748 	}
4749 
4750 	return 0;
4751 }
4752 
4753 static errno_t
ipsec_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused u_int32_t demux_count)4754 ipsec_add_proto(__unused ifnet_t                                                interface,
4755     protocol_family_t                                               protocol,
4756     __unused const struct ifnet_demux_desc  *demux_array,
4757     __unused u_int32_t                                              demux_count)
4758 {
4759 	switch (protocol) {
4760 	case PF_INET:
4761 		return 0;
4762 	case PF_INET6:
4763 		return 0;
4764 	default:
4765 		break;
4766 	}
4767 
4768 	return ENOPROTOOPT;
4769 }
4770 
4771 static errno_t
ipsec_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)4772 ipsec_del_proto(__unused ifnet_t                        interface,
4773     __unused protocol_family_t      protocol)
4774 {
4775 	return 0;
4776 }
4777 
4778 static errno_t
ipsec_ioctl(ifnet_t interface,u_long command,void * data)4779 ipsec_ioctl(ifnet_t interface,
4780     u_long command,
4781     void *data)
4782 {
4783 #if IPSEC_NEXUS
4784 	struct ipsec_pcb *pcb = ifnet_softc(interface);
4785 #endif
4786 	errno_t result = 0;
4787 
4788 	switch (command) {
4789 	case SIOCSIFMTU: {
4790 #if IPSEC_NEXUS
4791 		if (pcb->ipsec_use_netif) {
4792 			// Make sure we can fit packets in the channel buffers
4793 			if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
4794 				result = EINVAL;
4795 			} else {
4796 				ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
4797 			}
4798 		} else
4799 #endif // IPSEC_NEXUS
4800 		{
4801 			ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
4802 		}
4803 		break;
4804 	}
4805 
4806 	case SIOCSIFFLAGS:
4807 		/* ifioctl() takes care of it */
4808 		break;
4809 
4810 	case SIOCSIFSUBFAMILY: {
4811 		uint32_t subfamily;
4812 
4813 		subfamily = ((struct ifreq*)data)->ifr_type.ift_subfamily;
4814 		switch (subfamily) {
4815 		case IFRTYPE_SUBFAMILY_BLUETOOTH:
4816 			interface->if_subfamily = IFNET_SUBFAMILY_BLUETOOTH;
4817 			break;
4818 		case IFRTYPE_SUBFAMILY_WIFI:
4819 			interface->if_subfamily = IFNET_SUBFAMILY_WIFI;
4820 			break;
4821 		case IFRTYPE_SUBFAMILY_QUICKRELAY:
4822 			interface->if_subfamily = IFNET_SUBFAMILY_QUICKRELAY;
4823 			break;
4824 		case IFRTYPE_SUBFAMILY_DEFAULT:
4825 			interface->if_subfamily = IFNET_SUBFAMILY_DEFAULT;
4826 			break;
4827 		default:
4828 			result = EINVAL;
4829 			break;
4830 		}
4831 		break;
4832 	}
4833 
4834 	default:
4835 		result = EOPNOTSUPP;
4836 	}
4837 
4838 	return result;
4839 }
4840 
4841 static void
ipsec_detached(ifnet_t interface)4842 ipsec_detached(ifnet_t interface)
4843 {
4844 	struct ipsec_pcb *pcb = ifnet_softc(interface);
4845 
4846 	(void)ifnet_release(interface);
4847 	lck_mtx_lock(&ipsec_lock);
4848 	ipsec_free_pcb(pcb, true);
4849 	(void)ifnet_dispose(interface);
4850 	lck_mtx_unlock(&ipsec_lock);
4851 }
4852 
4853 /* Protocol Handlers */
4854 
4855 static errno_t
ipsec_proto_input(ifnet_t interface,protocol_family_t protocol,mbuf_t m,__unused char * frame_header)4856 ipsec_proto_input(ifnet_t interface,
4857     protocol_family_t     protocol,
4858     mbuf_t m,
4859     __unused char *frame_header)
4860 {
4861 	mbuf_pkthdr_setrcvif(m, interface);
4862 
4863 #if IPSEC_NEXUS
4864 	struct ipsec_pcb *pcb = ifnet_softc(interface);
4865 	if (!pcb->ipsec_use_netif)
4866 #endif // IPSEC_NEXUS
4867 	{
4868 		uint32_t af = 0;
4869 		struct ip *ip = mtod(m, struct ip *);
4870 		if (ip->ip_v == 4) {
4871 			af = AF_INET;
4872 		} else if (ip->ip_v == 6) {
4873 			af = AF_INET6;
4874 		}
4875 		bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
4876 		pktap_input(interface, protocol, m, NULL);
4877 	}
4878 
4879 	int32_t pktlen = m->m_pkthdr.len;
4880 	if (proto_input(protocol, m) != 0) {
4881 		ifnet_stat_increment_in(interface, 0, 0, 1);
4882 		m_freem(m);
4883 	} else {
4884 		ifnet_stat_increment_in(interface, 1, pktlen, 0);
4885 	}
4886 
4887 	return 0;
4888 }
4889 
4890 static errno_t
ipsec_proto_pre_output(__unused ifnet_t interface,protocol_family_t protocol,__unused mbuf_t * packet,__unused const struct sockaddr * dest,__unused void * route,__unused char * frame_type,__unused char * link_layer_dest)4891 ipsec_proto_pre_output(__unused ifnet_t interface,
4892     protocol_family_t    protocol,
4893     __unused mbuf_t              *packet,
4894     __unused const struct sockaddr *dest,
4895     __unused void *route,
4896     __unused char *frame_type,
4897     __unused char *link_layer_dest)
4898 {
4899 	*(protocol_family_t *)(void *)frame_type = protocol;
4900 	return 0;
4901 }
4902 
4903 static errno_t
ipsec_attach_proto(ifnet_t interface,protocol_family_t protocol)4904 ipsec_attach_proto(ifnet_t                              interface,
4905     protocol_family_t    protocol)
4906 {
4907 	struct ifnet_attach_proto_param proto;
4908 	errno_t                                                 result;
4909 
4910 	bzero(&proto, sizeof(proto));
4911 	proto.input = ipsec_proto_input;
4912 	proto.pre_output = ipsec_proto_pre_output;
4913 
4914 	result = ifnet_attach_protocol(interface, protocol, &proto);
4915 	if (result != 0 && result != EEXIST) {
4916 		os_log_error(OS_LOG_DEFAULT, "ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
4917 		    protocol, result);
4918 	}
4919 
4920 	return result;
4921 }
4922 
4923 errno_t
ipsec_inject_inbound_packet(ifnet_t interface,mbuf_t packet)4924 ipsec_inject_inbound_packet(ifnet_t     interface,
4925     mbuf_t      packet)
4926 {
4927 #if IPSEC_NEXUS
4928 	struct ipsec_pcb *pcb = ifnet_softc(interface);
4929 
4930 	if (pcb->ipsec_use_netif) {
4931 		if (!ipsec_data_move_begin(pcb)) {
4932 			os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__,
4933 			    if_name(pcb->ipsec_ifp));
4934 			return ENXIO;
4935 		}
4936 
4937 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4938 
4939 		lck_mtx_lock(&pcb->ipsec_input_chain_lock);
4940 
4941 		if (pcb->ipsec_input_chain_count > (u_int32_t)if_ipsec_max_pending_input) {
4942 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4943 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4944 			ipsec_data_move_end(pcb);
4945 			return ENOSPC;
4946 		}
4947 
4948 		if (pcb->ipsec_input_chain != NULL) {
4949 			pcb->ipsec_input_chain_last->m_nextpkt = packet;
4950 		} else {
4951 			pcb->ipsec_input_chain = packet;
4952 		}
4953 		pcb->ipsec_input_chain_count++;
4954 		while (packet->m_nextpkt) {
4955 			VERIFY(packet != packet->m_nextpkt);
4956 			packet = packet->m_nextpkt;
4957 			pcb->ipsec_input_chain_count++;
4958 		}
4959 		pcb->ipsec_input_chain_last = packet;
4960 		lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4961 
4962 		kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
4963 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4964 
4965 		if (rx_ring != NULL) {
4966 			kern_channel_notify(rx_ring, 0);
4967 		}
4968 
4969 		ipsec_data_move_end(pcb);
4970 		return 0;
4971 	} else
4972 #endif // IPSEC_NEXUS
4973 	{
4974 		errno_t error;
4975 		protocol_family_t protocol;
4976 		if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
4977 			return error;
4978 		}
4979 
4980 		return ipsec_proto_input(interface, protocol, packet, NULL);
4981 	}
4982 }
4983 
4984 void
ipsec_set_pkthdr_for_interface(ifnet_t interface,mbuf_t packet,int family,uint32_t flowid)4985 ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family,
4986     uint32_t flowid)
4987 {
4988 #pragma unused (flowid)
4989 	if (packet != NULL && interface != NULL) {
4990 		struct ipsec_pcb *pcb = ifnet_softc(interface);
4991 		if (pcb != NULL) {
4992 			/* Set traffic class, set flow */
4993 			m_set_service_class(packet, pcb->ipsec_output_service_class);
4994 			packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4995 #if SKYWALK
4996 			packet->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4997 			packet->m_pkthdr.pkt_flowid = flowid;
4998 #else /* !SKYWALK */
4999 			packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
5000 #endif /* !SKYWALK */
5001 			if (family == AF_INET) {
5002 				struct ip *ip = mtod(packet, struct ip *);
5003 				packet->m_pkthdr.pkt_proto = ip->ip_p;
5004 			} else if (family == AF_INET6) {
5005 				struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
5006 				packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
5007 			}
5008 			packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
5009 		}
5010 	}
5011 }
5012 
5013 void
ipsec_set_ipoa_for_interface(ifnet_t interface,struct ip_out_args * ipoa)5014 ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
5015 {
5016 	struct ipsec_pcb *pcb;
5017 
5018 	if (interface == NULL || ipoa == NULL) {
5019 		return;
5020 	}
5021 	pcb = ifnet_softc(interface);
5022 
5023 	if (net_qos_policy_restricted == 0) {
5024 		ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
5025 		ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
5026 	} else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
5027 	    net_qos_policy_restrict_avapps != 0) {
5028 		ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
5029 	} else {
5030 		ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
5031 		ipoa->ipoa_sotc = SO_TC_VO;
5032 	}
5033 }
5034 
5035 void
ipsec_set_ip6oa_for_interface(ifnet_t interface,struct ip6_out_args * ip6oa)5036 ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
5037 {
5038 	struct ipsec_pcb *pcb;
5039 
5040 	if (interface == NULL || ip6oa == NULL) {
5041 		return;
5042 	}
5043 	pcb = ifnet_softc(interface);
5044 
5045 	if (net_qos_policy_restricted == 0) {
5046 		ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
5047 		ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
5048 	} else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
5049 	    net_qos_policy_restrict_avapps != 0) {
5050 		ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
5051 	} else {
5052 		ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
5053 		ip6oa->ip6oa_sotc = SO_TC_VO;
5054 	}
5055 }
5056 
5057 static boolean_t
ipsec_data_move_begin(struct ipsec_pcb * pcb)5058 ipsec_data_move_begin(struct ipsec_pcb *pcb)
5059 {
5060 	boolean_t ret = 0;
5061 
5062 	lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
5063 	if ((ret = IPSEC_IS_DATA_PATH_READY(pcb))) {
5064 		pcb->ipsec_pcb_data_move++;
5065 	}
5066 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
5067 
5068 	return ret;
5069 }
5070 
5071 static void
ipsec_data_move_end(struct ipsec_pcb * pcb)5072 ipsec_data_move_end(struct ipsec_pcb *pcb)
5073 {
5074 	lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
5075 	VERIFY(pcb->ipsec_pcb_data_move > 0);
5076 	/*
5077 	 * if there's no more thread moving data, wakeup any
5078 	 * drainers that's blocked waiting for this.
5079 	 */
5080 	if (--pcb->ipsec_pcb_data_move == 0 && pcb->ipsec_pcb_drainers > 0) {
5081 		wakeup(&(pcb->ipsec_pcb_data_move));
5082 	}
5083 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
5084 }
5085 
5086 static void
ipsec_data_move_drain(struct ipsec_pcb * pcb)5087 ipsec_data_move_drain(struct ipsec_pcb *pcb)
5088 {
5089 	lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
5090 	/* data path must already be marked as not ready */
5091 	VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
5092 	pcb->ipsec_pcb_drainers++;
5093 	while (pcb->ipsec_pcb_data_move != 0) {
5094 		(void)msleep(&(pcb->ipsec_pcb_data_move), &pcb->ipsec_pcb_data_move_lock,
5095 		    (PZERO - 1), __func__, NULL);
5096 	}
5097 	VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
5098 	VERIFY(pcb->ipsec_pcb_drainers > 0);
5099 	pcb->ipsec_pcb_drainers--;
5100 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
5101 }
5102 
5103 static void
ipsec_wait_data_move_drain(struct ipsec_pcb * pcb)5104 ipsec_wait_data_move_drain(struct ipsec_pcb *pcb)
5105 {
5106 	/*
5107 	 * Mark the data path as not usable.
5108 	 */
5109 	lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
5110 	IPSEC_CLR_DATA_PATH_READY(pcb);
5111 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
5112 
5113 	/* Wait until all threads in the data paths are done. */
5114 	ipsec_data_move_drain(pcb);
5115 }
5116