xref: /xnu-8796.141.3/bsd/net/if_ipsec.c (revision 1b191cb58250d0705d8a51287127505aa4bc0789)
1 /*
2  * Copyright (c) 2012-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 #include <sys/systm.h>
31 #include <sys/kern_control.h>
32 #include <net/kpi_protocol.h>
33 #include <net/kpi_interface.h>
34 #include <sys/socket.h>
35 #include <sys/socketvar.h>
36 #include <net/if.h>
37 #include <net/if_types.h>
38 #include <net/bpf.h>
39 #include <net/if_ipsec.h>
40 #include <sys/mbuf.h>
41 #include <sys/sockio.h>
42 #include <netinet/in.h>
43 #include <netinet/ip6.h>
44 #include <netinet6/in6_var.h>
45 #include <netinet6/ip6_var.h>
46 #include <sys/kauth.h>
47 #include <netinet6/ipsec.h>
48 #include <netinet6/ipsec6.h>
49 #include <netinet6/esp.h>
50 #include <netinet6/esp6.h>
51 #include <netinet/ip.h>
52 #include <net/flowadv.h>
53 #include <net/necp.h>
54 #include <netkey/key.h>
55 #include <net/pktap.h>
56 #include <kern/zalloc.h>
57 #include <os/log.h>
58 
59 #if SKYWALK
60 #include <skywalk/os_skywalk_private.h>
61 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
62 #include <skywalk/nexus/netif/nx_netif.h>
63 #define IPSEC_NEXUS 1
64 #else // SKYWALK
65 #define IPSEC_NEXUS 0
66 #endif // SKYWALK
67 
68 extern int net_qos_policy_restricted;
69 extern int net_qos_policy_restrict_avapps;
70 
71 /* Kernel Control functions */
72 static errno_t  ipsec_ctl_setup(u_int32_t *unit, void **unitinfo);
73 static errno_t  ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
74     void **unitinfo);
75 static errno_t  ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
76     void **unitinfo);
77 static errno_t  ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
78     void *unitinfo);
79 static errno_t  ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
80     void *unitinfo, mbuf_t m, int flags);
81 static errno_t  ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
82     int opt, void *data, size_t *len);
83 static errno_t  ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
84     int opt, void *data, size_t len);
85 
86 /* Network Interface functions */
87 static void     ipsec_start(ifnet_t     interface);
88 static errno_t  ipsec_output(ifnet_t interface, mbuf_t data);
89 static errno_t  ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
90     protocol_family_t *protocol);
91 static errno_t  ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
92     const struct ifnet_demux_desc *demux_array,
93     u_int32_t demux_count);
94 static errno_t  ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
95 static errno_t  ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
96 static void             ipsec_detached(ifnet_t interface);
97 
98 /* Protocol handlers */
99 static errno_t  ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
100 static errno_t  ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
101     mbuf_t m, char *frame_header);
102 static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
103     mbuf_t *packet, const struct sockaddr *dest, void *route,
104     char *frame_type, char *link_layer_dest);
105 
106 static kern_ctl_ref     ipsec_kctlref;
107 static LCK_ATTR_DECLARE(ipsec_lck_attr, 0, 0);
108 static LCK_GRP_DECLARE(ipsec_lck_grp, "ipsec");
109 static LCK_MTX_DECLARE_ATTR(ipsec_lock, &ipsec_lck_grp, &ipsec_lck_attr);
110 
111 #if IPSEC_NEXUS
112 
113 SYSCTL_DECL(_net_ipsec);
114 SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
115 static int if_ipsec_verify_interface_creation = 0;
116 SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
117 
118 #define IPSEC_IF_VERIFY(_e)             if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
119 
120 #define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
121 #define IPSEC_IF_DEFAULT_RING_SIZE 64
122 #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
123 #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
124 #define IPSEC_IF_DEFAULT_BUF_SEG_SIZE   skmem_usr_buf_seg_size
125 
126 #define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES
127 #define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
128 #define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
129 #define IPSEC_NETIF_WMM_RX_RING_COUNT 1
130 #define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT
131 #define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT
132 
133 #define IPSEC_IF_MIN_RING_SIZE 8
134 #define IPSEC_IF_MAX_RING_SIZE 1024
135 
136 #define IPSEC_IF_MIN_SLOT_SIZE 1024
137 #define IPSEC_IF_MAX_SLOT_SIZE 4096
138 
139 #define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512
140 
141 #define IPSEC_KPIPE_FLAG_WAKE_PKT 0x01
142 
143 static int if_ipsec_max_pending_input = IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT;
144 
145 static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
146 static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
147 static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
148 
149 static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
150 static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
151 static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
152 
153 SYSCTL_INT(_net_ipsec, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_max_pending_input, 0, "");
154 SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
155     &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
156 SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
157     &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
158 SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
159     &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
160 
161 static int if_ipsec_debug = 0;
162 SYSCTL_INT(_net_ipsec, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_debug, 0, "");
163 
164 static errno_t
165 ipsec_register_nexus(void);
166 
167 typedef struct ipsec_nx {
168 	uuid_t if_provider;
169 	uuid_t if_instance;
170 	uuid_t fsw_provider;
171 	uuid_t fsw_instance;
172 	uuid_t fsw_device;
173 	uuid_t fsw_agent;
174 } *ipsec_nx_t;
175 
176 static nexus_controller_t ipsec_ncd;
177 static int ipsec_ncd_refcount;
178 static uuid_t ipsec_kpipe_uuid;
179 
180 #endif // IPSEC_NEXUS
181 
182 /* Control block allocated for each kernel control connection */
183 struct ipsec_pcb {
184 	TAILQ_ENTRY(ipsec_pcb)  ipsec_chain;
185 	kern_ctl_ref            ipsec_ctlref;
186 	ifnet_t                 ipsec_ifp;
187 	u_int32_t               ipsec_unit;
188 	u_int32_t               ipsec_unique_id;
189 	// These external flags can be set with IPSEC_OPT_FLAGS
190 	u_int32_t               ipsec_external_flags;
191 	// These internal flags are only used within this driver
192 	u_int32_t               ipsec_internal_flags;
193 	u_int32_t               ipsec_input_frag_size;
194 	bool                    ipsec_frag_size_set;
195 	int                     ipsec_ext_ifdata_stats;
196 	mbuf_svc_class_t        ipsec_output_service_class;
197 	char                    ipsec_if_xname[IFXNAMSIZ];
198 	char                    ipsec_unique_name[IFXNAMSIZ];
199 	// PCB lock protects state fields, like ipsec_kpipe_count
200 	decl_lck_rw_data(, ipsec_pcb_lock);
201 	// lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers
202 	decl_lck_mtx_data(, ipsec_pcb_data_move_lock);
203 	u_int32_t               ipsec_pcb_data_move; /* number of data moving contexts */
204 	u_int32_t               ipsec_pcb_drainers; /* number of threads waiting to drain */
205 	u_int32_t               ipsec_pcb_data_path_state; /* internal state of interface data path */
206 	ipsec_dscp_mapping_t    ipsec_output_dscp_mapping;
207 
208 #if IPSEC_NEXUS
209 	lck_mtx_t               ipsec_input_chain_lock;
210 	lck_mtx_t               ipsec_kpipe_encrypt_lock;
211 	lck_mtx_t               ipsec_kpipe_decrypt_lock;
212 	struct mbuf *           ipsec_input_chain;
213 	struct mbuf *           ipsec_input_chain_last;
214 	u_int32_t               ipsec_input_chain_count;
215 	// Input chain lock protects the list of input mbufs
216 	// The input chain lock must be taken AFTER the PCB lock if both are held
217 	struct ipsec_nx         ipsec_nx;
218 	u_int32_t               ipsec_kpipe_count;
219 	pid_t                   ipsec_kpipe_pid;
220 	uuid_t                  ipsec_kpipe_proc_uuid;
221 	uuid_t                  ipsec_kpipe_uuid[IPSEC_IF_MAX_RING_COUNT];
222 	void *                  ipsec_kpipe_rxring[IPSEC_IF_MAX_RING_COUNT];
223 	void *                  ipsec_kpipe_txring[IPSEC_IF_MAX_RING_COUNT];
224 	kern_pbufpool_t         ipsec_kpipe_pp;
225 	u_int32_t               ipsec_kpipe_tx_ring_size;
226 	u_int32_t               ipsec_kpipe_rx_ring_size;
227 
228 	kern_nexus_t            ipsec_netif_nexus;
229 	kern_pbufpool_t         ipsec_netif_pp;
230 	void *                  ipsec_netif_rxring[IPSEC_NETIF_MAX_RX_RING_COUNT];
231 	void *                  ipsec_netif_txring[IPSEC_NETIF_MAX_TX_RING_COUNT];
232 	uint64_t                ipsec_netif_txring_size;
233 
234 	u_int32_t               ipsec_slot_size;
235 	u_int32_t               ipsec_netif_ring_size;
236 	u_int32_t               ipsec_tx_fsw_ring_size;
237 	u_int32_t               ipsec_rx_fsw_ring_size;
238 	bool                    ipsec_use_netif;
239 	bool                    ipsec_needs_netagent;
240 #endif // IPSEC_NEXUS
241 };
242 
243 /* These are internal flags not exposed outside this file */
244 #define IPSEC_FLAGS_KPIPE_ALLOCATED 1
245 
246 /* data movement refcounting functions */
247 static boolean_t ipsec_data_move_begin(struct ipsec_pcb *pcb);
248 static void ipsec_data_move_end(struct ipsec_pcb *pcb);
249 static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb);
250 
251 /* Data path states */
252 #define IPSEC_PCB_DATA_PATH_READY    0x1
253 
254 /* Macros to set/clear/test data path states */
255 #define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY)
256 #define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY)
257 #define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0)
258 
259 #if IPSEC_NEXUS
260 /* Macros to clear/set/test flags. */
261 static inline void
ipsec_flag_set(struct ipsec_pcb * pcb,uint32_t flag)262 ipsec_flag_set(struct ipsec_pcb *pcb, uint32_t flag)
263 {
264 	pcb->ipsec_internal_flags |= flag;
265 }
266 static inline void
ipsec_flag_clr(struct ipsec_pcb * pcb,uint32_t flag)267 ipsec_flag_clr(struct ipsec_pcb *pcb, uint32_t flag)
268 {
269 	pcb->ipsec_internal_flags &= ~flag;
270 }
271 
272 static inline bool
ipsec_flag_isset(struct ipsec_pcb * pcb,uint32_t flag)273 ipsec_flag_isset(struct ipsec_pcb *pcb, uint32_t flag)
274 {
275 	return !!(pcb->ipsec_internal_flags & flag);
276 }
277 #endif // IPSEC_NEXUS
278 
279 TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
280 
281 static KALLOC_TYPE_DEFINE(ipsec_pcb_zone, struct ipsec_pcb, NET_KT_DEFAULT);
282 
283 #define IPSECQ_MAXLEN 256
284 
285 #if IPSEC_NEXUS
286 static int
287 sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
288 {
289 #pragma unused(arg1, arg2)
290 	int value = if_ipsec_ring_size;
291 
292 	int error = sysctl_handle_int(oidp, &value, 0, req);
293 	if (error || !req->newptr) {
294 		return error;
295 	}
296 
297 	if (value < IPSEC_IF_MIN_RING_SIZE ||
298 	    value > IPSEC_IF_MAX_RING_SIZE) {
299 		return EINVAL;
300 	}
301 
302 	if_ipsec_ring_size = value;
303 
304 	return 0;
305 }
306 
307 static int
308 sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
309 {
310 #pragma unused(arg1, arg2)
311 	int value = if_ipsec_tx_fsw_ring_size;
312 
313 	int error = sysctl_handle_int(oidp, &value, 0, req);
314 	if (error || !req->newptr) {
315 		return error;
316 	}
317 
318 	if (value < IPSEC_IF_MIN_RING_SIZE ||
319 	    value > IPSEC_IF_MAX_RING_SIZE) {
320 		return EINVAL;
321 	}
322 
323 	if_ipsec_tx_fsw_ring_size = value;
324 
325 	return 0;
326 }
327 
328 static int
329 sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
330 {
331 #pragma unused(arg1, arg2)
332 	int value = if_ipsec_rx_fsw_ring_size;
333 
334 	int error = sysctl_handle_int(oidp, &value, 0, req);
335 	if (error || !req->newptr) {
336 		return error;
337 	}
338 
339 	if (value < IPSEC_IF_MIN_RING_SIZE ||
340 	    value > IPSEC_IF_MAX_RING_SIZE) {
341 		return EINVAL;
342 	}
343 
344 	if_ipsec_rx_fsw_ring_size = value;
345 
346 	return 0;
347 }
348 
349 
350 static inline bool
ipsec_in_wmm_mode(struct ipsec_pcb * pcb)351 ipsec_in_wmm_mode(struct ipsec_pcb *pcb)
352 {
353 	return pcb->ipsec_kpipe_count == IPSEC_IF_WMM_RING_COUNT;
354 }
355 
356 #endif // IPSEC_NEXUS
357 
358 errno_t
ipsec_register_control(void)359 ipsec_register_control(void)
360 {
361 	struct kern_ctl_reg     kern_ctl;
362 	errno_t                 result = 0;
363 
364 #if IPSEC_NEXUS
365 	ipsec_register_nexus();
366 #endif // IPSEC_NEXUS
367 
368 	TAILQ_INIT(&ipsec_head);
369 
370 	bzero(&kern_ctl, sizeof(kern_ctl));
371 	strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
372 	kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
373 	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP; /* Require root */
374 	kern_ctl.ctl_sendsize = 64 * 1024;
375 	kern_ctl.ctl_recvsize = 64 * 1024;
376 	kern_ctl.ctl_setup = ipsec_ctl_setup;
377 	kern_ctl.ctl_bind = ipsec_ctl_bind;
378 	kern_ctl.ctl_connect = ipsec_ctl_connect;
379 	kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
380 	kern_ctl.ctl_send = ipsec_ctl_send;
381 	kern_ctl.ctl_setopt = ipsec_ctl_setopt;
382 	kern_ctl.ctl_getopt = ipsec_ctl_getopt;
383 
384 	result = ctl_register(&kern_ctl, &ipsec_kctlref);
385 	if (result != 0) {
386 		os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - ctl_register failed: %d\n", result);
387 		return result;
388 	}
389 
390 	/* Register the protocol plumbers */
391 	if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC,
392 	    ipsec_attach_proto, NULL)) != 0) {
393 		os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n",
394 		    result);
395 		ctl_deregister(ipsec_kctlref);
396 		return result;
397 	}
398 
399 	/* Register the protocol plumbers */
400 	if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC,
401 	    ipsec_attach_proto, NULL)) != 0) {
402 		proto_unregister_plumber(PF_INET, IFNET_FAMILY_IPSEC);
403 		ctl_deregister(ipsec_kctlref);
404 		os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n",
405 		    result);
406 		return result;
407 	}
408 
409 	return 0;
410 }
411 
412 /* Helpers */
413 int
ipsec_interface_isvalid(ifnet_t interface)414 ipsec_interface_isvalid(ifnet_t interface)
415 {
416 	struct ipsec_pcb *pcb = NULL;
417 
418 	if (interface == NULL) {
419 		return 0;
420 	}
421 
422 	pcb = ifnet_softc(interface);
423 
424 	if (pcb == NULL) {
425 		return 0;
426 	}
427 
428 	/* When ctl disconnects, ipsec_unit is set to 0 */
429 	if (pcb->ipsec_unit == 0) {
430 		return 0;
431 	}
432 
433 	return 1;
434 }
435 
436 #if IPSEC_NEXUS
437 boolean_t
ipsec_interface_needs_netagent(ifnet_t interface)438 ipsec_interface_needs_netagent(ifnet_t interface)
439 {
440 	struct ipsec_pcb *pcb = NULL;
441 
442 	if (interface == NULL) {
443 		return FALSE;
444 	}
445 
446 	pcb = ifnet_softc(interface);
447 
448 	if (pcb == NULL) {
449 		return FALSE;
450 	}
451 
452 	return pcb->ipsec_needs_netagent == true;
453 }
454 #endif // IPSEC_NEXUS
455 
456 static errno_t
ipsec_ifnet_set_attrs(ifnet_t ifp)457 ipsec_ifnet_set_attrs(ifnet_t ifp)
458 {
459 	/* Set flags and additional information. */
460 	ifnet_set_mtu(ifp, 1500);
461 	ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
462 
463 	/* The interface must generate its own IPv6 LinkLocal address,
464 	 * if possible following the recommendation of RFC2472 to the 64bit interface ID
465 	 */
466 	ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
467 
468 #if !IPSEC_NEXUS
469 	/* Reset the stats in case as the interface may have been recycled */
470 	struct ifnet_stats_param stats;
471 	bzero(&stats, sizeof(struct ifnet_stats_param));
472 	ifnet_set_stat(ifp, &stats);
473 #endif // !IPSEC_NEXUS
474 
475 	return 0;
476 }
477 
478 #if IPSEC_NEXUS
479 
480 static uuid_t ipsec_nx_dom_prov;
481 
482 static errno_t
ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)483 ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
484 {
485 	return 0;
486 }
487 
488 static void
ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)489 ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
490 {
491 	// Ignore
492 }
493 
494 static errno_t
ipsec_register_nexus(void)495 ipsec_register_nexus(void)
496 {
497 	const struct kern_nexus_domain_provider_init dp_init = {
498 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
499 		.nxdpi_flags = 0,
500 		.nxdpi_init = ipsec_nxdp_init,
501 		.nxdpi_fini = ipsec_nxdp_fini
502 	};
503 	errno_t err = 0;
504 
505 	/* ipsec_nxdp_init() is called before this function returns */
506 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
507 	    (const uint8_t *) "com.apple.ipsec",
508 	    &dp_init, sizeof(dp_init),
509 	    &ipsec_nx_dom_prov);
510 	if (err != 0) {
511 		os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
512 		return err;
513 	}
514 	return 0;
515 }
516 
517 static errno_t
ipsec_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)518 ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
519 {
520 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
521 	pcb->ipsec_netif_nexus = nexus;
522 	return ipsec_ifnet_set_attrs(ifp);
523 }
524 
525 static errno_t
ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,proc_t p,kern_nexus_t nexus,nexus_port_t nexus_port,kern_channel_t channel,void ** ch_ctx)526 ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
527     proc_t p, kern_nexus_t nexus,
528     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
529 {
530 #pragma unused(nxprov, p)
531 #pragma unused(nexus, nexus_port, channel, ch_ctx)
532 	return 0;
533 }
534 
535 static errno_t
ipsec_nexus_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)536 ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
537     kern_channel_t channel)
538 {
539 #pragma unused(nxprov, channel)
540 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
541 	boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, 1);
542 	/* Mark the data path as ready */
543 	if (ok) {
544 		lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
545 		IPSEC_SET_DATA_PATH_READY(pcb);
546 		lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
547 	}
548 	return ok ? 0 : ENXIO;
549 }
550 
551 static void
ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)552 ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
553     kern_channel_t channel)
554 {
555 #pragma unused(nxprov, channel)
556 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
557 
558 	VERIFY(pcb->ipsec_kpipe_count != 0);
559 
560 	/* Wait until all threads in the data paths are done. */
561 	ipsec_wait_data_move_drain(pcb);
562 }
563 
564 static void
ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)565 ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
566     kern_channel_t channel)
567 {
568 #pragma unused(nxprov, channel)
569 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
570 
571 	/* Wait until all threads in the data paths are done. */
572 	ipsec_wait_data_move_drain(pcb);
573 }
574 
575 static void
ipsec_nexus_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)576 ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
577     kern_channel_t channel)
578 {
579 #pragma unused(nxprov, channel)
580 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
581 	if (pcb->ipsec_netif_nexus == nexus) {
582 		pcb->ipsec_netif_nexus = NULL;
583 	}
584 	ifnet_decr_iorefcnt(pcb->ipsec_ifp);
585 }
586 
587 static errno_t
ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)588 ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
589     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
590     void **ring_ctx)
591 {
592 #pragma unused(nxprov)
593 #pragma unused(channel)
594 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
595 	uint8_t ring_idx;
596 
597 	for (ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
598 		if (!uuid_compare(channel->ch_info->cinfo_nx_uuid, pcb->ipsec_kpipe_uuid[ring_idx])) {
599 			break;
600 		}
601 	}
602 
603 	if (ring_idx == pcb->ipsec_kpipe_count) {
604 		uuid_string_t uuidstr;
605 		uuid_unparse(channel->ch_info->cinfo_nx_uuid, uuidstr);
606 		os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n", __func__, pcb->ipsec_if_xname, uuidstr);
607 		return ENOENT;
608 	}
609 
610 	*ring_ctx = (void *)(uintptr_t)ring_idx;
611 
612 	if (!is_tx_ring) {
613 		VERIFY(pcb->ipsec_kpipe_rxring[ring_idx] == NULL);
614 		pcb->ipsec_kpipe_rxring[ring_idx] = ring;
615 	} else {
616 		VERIFY(pcb->ipsec_kpipe_txring[ring_idx] == NULL);
617 		pcb->ipsec_kpipe_txring[ring_idx] = ring;
618 	}
619 	return 0;
620 }
621 
622 static void
ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)623 ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
624     kern_channel_ring_t ring)
625 {
626 #pragma unused(nxprov)
627 	bool found = false;
628 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
629 
630 	for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
631 		if (pcb->ipsec_kpipe_rxring[i] == ring) {
632 			pcb->ipsec_kpipe_rxring[i] = NULL;
633 			found = true;
634 		} else if (pcb->ipsec_kpipe_txring[i] == ring) {
635 			pcb->ipsec_kpipe_txring[i] = NULL;
636 			found = true;
637 		}
638 	}
639 	VERIFY(found);
640 }
641 
642 static errno_t
ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)643 ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
644     kern_channel_ring_t tx_ring, uint32_t flags)
645 {
646 #pragma unused(nxprov)
647 #pragma unused(flags)
648 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
649 
650 	if (!ipsec_data_move_begin(pcb)) {
651 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
652 		return 0;
653 	}
654 
655 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
656 
657 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
658 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
659 		ipsec_data_move_end(pcb);
660 		return 0;
661 	}
662 
663 	VERIFY(pcb->ipsec_kpipe_count);
664 
665 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
666 	if (tx_slot == NULL) {
667 		// Nothing to write, bail
668 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
669 		ipsec_data_move_end(pcb);
670 		return 0;
671 	}
672 
673 	// Signal the netif ring to read
674 	kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
675 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
676 
677 	if (rx_ring != NULL) {
678 		kern_channel_notify(rx_ring, 0);
679 	}
680 
681 	ipsec_data_move_end(pcb);
682 	return 0;
683 }
684 
685 static mbuf_t
ipsec_encrypt_mbuf(ifnet_t interface,mbuf_t data)686 ipsec_encrypt_mbuf(ifnet_t interface,
687     mbuf_t data)
688 {
689 	struct ipsec_output_state ipsec_state;
690 	int error = 0;
691 	uint32_t af;
692 
693 	// Make sure this packet isn't looping through the interface
694 	if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
695 		error = -1;
696 		goto ipsec_output_err;
697 	}
698 
699 	// Mark the interface so NECP can evaluate tunnel policy
700 	necp_mark_packet_from_interface(data, interface);
701 
702 	struct ip *ip = mtod(data, struct ip *);
703 	u_int ip_version = ip->ip_v;
704 
705 	switch (ip_version) {
706 	case 4: {
707 		af = AF_INET;
708 
709 		memset(&ipsec_state, 0, sizeof(ipsec_state));
710 		ipsec_state.m = data;
711 		ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
712 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
713 
714 		error = ipsec4_interface_output(&ipsec_state, interface);
715 		if (error == 0 && ipsec_state.tunneled == 6) {
716 			// Tunneled in IPv6 - packet is gone
717 			// TODO: Don't lose mbuf
718 			data = NULL;
719 			goto done;
720 		}
721 
722 		data = ipsec_state.m;
723 		if (error || data == NULL) {
724 			if (error) {
725 				os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec4_output error %d\n", error);
726 			}
727 			goto ipsec_output_err;
728 		}
729 		goto done;
730 	}
731 	case 6: {
732 		af = AF_INET6;
733 
734 		data = ipsec6_splithdr(data);
735 		if (data == NULL) {
736 			os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
737 			goto ipsec_output_err;
738 		}
739 
740 		struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
741 
742 		memset(&ipsec_state, 0, sizeof(ipsec_state));
743 		ipsec_state.m = data;
744 		ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
745 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
746 
747 		error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
748 		if (error == 0 && ipsec_state.tunneled == 4) {
749 			// Tunneled in IPv4 - packet is gone
750 			// TODO: Don't lose mbuf
751 			data = NULL;
752 			goto done;
753 		}
754 		data = ipsec_state.m;
755 		if (error || data == NULL) {
756 			if (error) {
757 				os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_output error %d\n", error);
758 			}
759 			goto ipsec_output_err;
760 		}
761 		goto done;
762 	}
763 	default: {
764 		os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version);
765 		error = -1;
766 		goto ipsec_output_err;
767 	}
768 	}
769 
770 done:
771 	return data;
772 
773 ipsec_output_err:
774 	if (data) {
775 		mbuf_freem(data);
776 	}
777 	return NULL;
778 }
779 
780 static errno_t
ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)781 ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
782     kern_channel_ring_t rx_ring, uint32_t flags)
783 {
784 #pragma unused(nxprov)
785 #pragma unused(flags)
786 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
787 	struct kern_channel_ring_stat_increment rx_ring_stats;
788 	uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring);
789 
790 	if (!ipsec_data_move_begin(pcb)) {
791 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
792 		return 0;
793 	}
794 
795 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
796 
797 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
798 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
799 		ipsec_data_move_end(pcb);
800 		return 0;
801 	}
802 
803 	VERIFY(pcb->ipsec_kpipe_count);
804 	VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
805 
806 	// Reclaim user-released slots
807 	(void) kern_channel_reclaim(rx_ring);
808 
809 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
810 	if (avail == 0) {
811 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
812 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
813 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
814 		ipsec_data_move_end(pcb);
815 		return 0;
816 	}
817 
818 	kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx];
819 	if (tx_ring == NULL) {
820 		// Net-If TX ring not set up yet, nothing to read
821 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
822 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
823 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
824 		ipsec_data_move_end(pcb);
825 		return 0;
826 	}
827 
828 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
829 
830 	// Unlock ipsec before entering ring
831 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
832 
833 	(void)kr_enter(tx_ring, TRUE);
834 
835 	// Lock again after entering and validate
836 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
837 	if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
838 		// Ring no longer valid
839 		// Unlock first, then exit ring
840 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
841 		kr_exit(tx_ring);
842 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
843 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
844 		ipsec_data_move_end(pcb);
845 		return 0;
846 	}
847 
848 	struct kern_channel_ring_stat_increment tx_ring_stats;
849 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
850 	kern_channel_slot_t tx_pslot = NULL;
851 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
852 	if (tx_slot == NULL) {
853 		// Nothing to read, don't bother signalling
854 		// Unlock first, then exit ring
855 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
856 		kr_exit(tx_ring);
857 		ipsec_data_move_end(pcb);
858 		return 0;
859 	}
860 
861 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
862 	VERIFY(rx_pp != NULL);
863 	struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
864 	VERIFY(tx_pp != NULL);
865 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
866 	kern_channel_slot_t rx_pslot = NULL;
867 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
868 	kern_packet_t tx_chain_ph = 0;
869 
870 	while (rx_slot != NULL && tx_slot != NULL) {
871 		size_t length = 0;
872 		mbuf_t data = NULL;
873 		errno_t error = 0;
874 
875 		// Allocate rx packet
876 		kern_packet_t rx_ph = 0;
877 		error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
878 		if (__improbable(error != 0)) {
879 			os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
880 			    pcb->ipsec_ifp->if_xname);
881 			break;
882 		}
883 
884 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
885 
886 		if (tx_ph == 0) {
887 			// Advance TX ring
888 			tx_pslot = tx_slot;
889 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
890 			kern_pbufpool_free(rx_pp, rx_ph);
891 			continue;
892 		}
893 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
894 		if (tx_chain_ph != 0) {
895 			kern_packet_append(tx_ph, tx_chain_ph);
896 		}
897 		tx_chain_ph = tx_ph;
898 
899 		// Advance TX ring
900 		tx_pslot = tx_slot;
901 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
902 
903 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
904 		VERIFY(tx_buf != NULL);
905 		uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
906 		VERIFY(tx_baddr != NULL);
907 		tx_baddr += kern_buflet_get_data_offset(tx_buf);
908 
909 		bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
910 
911 		length = MIN(kern_packet_get_data_length(tx_ph),
912 		    pcb->ipsec_slot_size);
913 
914 		// Increment TX stats
915 		tx_ring_stats.kcrsi_slots_transferred++;
916 		tx_ring_stats.kcrsi_bytes_transferred += length;
917 
918 		if (length > 0) {
919 			error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
920 			if (error == 0) {
921 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
922 				if (error == 0) {
923 					// Encrypt and send packet
924 					lck_mtx_lock(&pcb->ipsec_kpipe_encrypt_lock);
925 					data = ipsec_encrypt_mbuf(pcb->ipsec_ifp, data);
926 					lck_mtx_unlock(&pcb->ipsec_kpipe_encrypt_lock);
927 				} else {
928 					os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
929 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
930 					STATS_INC(nifs, NETIF_STATS_DROP);
931 					mbuf_freem(data);
932 					data = NULL;
933 				}
934 			} else {
935 				os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
936 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
937 				STATS_INC(nifs, NETIF_STATS_DROP);
938 			}
939 		} else {
940 			os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
941 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
942 			STATS_INC(nifs, NETIF_STATS_DROP);
943 		}
944 
945 		if (data == NULL) {
946 			os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
947 			kern_pbufpool_free(rx_pp, rx_ph);
948 			break;
949 		}
950 
951 		length = mbuf_pkthdr_len(data);
952 		if (length > PP_BUF_SIZE_DEF(rx_pp)) {
953 			// Flush data
954 			mbuf_freem(data);
955 			kern_pbufpool_free(rx_pp, rx_ph);
956 			os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
957 			    pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
958 			continue;
959 		}
960 
961 		// Fillout rx packet
962 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
963 		VERIFY(rx_buf != NULL);
964 		void *rx_baddr = kern_buflet_get_data_address(rx_buf);
965 		VERIFY(rx_baddr != NULL);
966 
967 		// Copy-in data from mbuf to buflet
968 		mbuf_copydata(data, 0, length, (void *)rx_baddr);
969 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
970 
971 		// Finalize and attach the packet
972 		error = kern_buflet_set_data_offset(rx_buf, 0);
973 		VERIFY(error == 0);
974 		error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
975 		VERIFY(error == 0);
976 		error = kern_packet_finalize(rx_ph);
977 		VERIFY(error == 0);
978 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
979 		VERIFY(error == 0);
980 
981 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
982 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
983 
984 		rx_ring_stats.kcrsi_slots_transferred++;
985 		rx_ring_stats.kcrsi_bytes_transferred += length;
986 
987 		if (!pcb->ipsec_ext_ifdata_stats) {
988 			ifnet_stat_increment_out(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
989 		}
990 
991 		mbuf_freem(data);
992 
993 		rx_pslot = rx_slot;
994 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
995 	}
996 
997 	if (rx_pslot) {
998 		kern_channel_advance_slot(rx_ring, rx_pslot);
999 		kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1000 	}
1001 
1002 	if (tx_chain_ph != 0) {
1003 		kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
1004 	}
1005 
1006 	if (tx_pslot) {
1007 		kern_channel_advance_slot(tx_ring, tx_pslot);
1008 		kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1009 		(void)kern_channel_reclaim(tx_ring);
1010 	}
1011 
1012 	/* always reenable output */
1013 	errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
1014 	if (error != 0) {
1015 		os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
1016 	}
1017 
1018 	// Unlock first, then exit ring
1019 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1020 
1021 	if (tx_pslot != NULL) {
1022 		kern_channel_notify(tx_ring, 0);
1023 	}
1024 	kr_exit(tx_ring);
1025 
1026 	ipsec_data_move_end(pcb);
1027 	return 0;
1028 }
1029 
1030 static uint8_t
ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1031 ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1032 {
1033 	switch (svc_class) {
1034 	case KPKT_SC_VO: {
1035 		return 0;
1036 	}
1037 	case KPKT_SC_VI: {
1038 		return 1;
1039 	}
1040 	case KPKT_SC_BE: {
1041 		return 2;
1042 	}
1043 	case KPKT_SC_BK: {
1044 		return 3;
1045 	}
1046 	default: {
1047 		VERIFY(0);
1048 		return 0;
1049 	}
1050 	}
1051 }
1052 
1053 static errno_t
ipsec_netif_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1054 ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1055     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1056     void **ring_ctx)
1057 {
1058 #pragma unused(nxprov)
1059 #pragma unused(channel)
1060 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1061 
1062 	if (!is_tx_ring) {
1063 		VERIFY(pcb->ipsec_netif_rxring[0] == NULL);
1064 		pcb->ipsec_netif_rxring[0] = ring;
1065 	} else {
1066 		uint8_t ring_idx = 0;
1067 		if (ipsec_in_wmm_mode(pcb)) {
1068 			int err;
1069 			kern_packet_svc_class_t svc_class;
1070 			err = kern_channel_get_service_class(ring, &svc_class);
1071 			VERIFY(err == 0);
1072 			ring_idx = ipsec_find_tx_ring_by_svc(svc_class);
1073 			VERIFY(ring_idx < IPSEC_IF_WMM_RING_COUNT);
1074 		}
1075 
1076 		*ring_ctx = (void *)(uintptr_t)ring_idx;
1077 
1078 		VERIFY(pcb->ipsec_netif_txring[ring_idx] == NULL);
1079 		pcb->ipsec_netif_txring[ring_idx] = ring;
1080 	}
1081 	return 0;
1082 }
1083 
1084 static void
ipsec_netif_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)1085 ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1086     kern_channel_ring_t ring)
1087 {
1088 #pragma unused(nxprov)
1089 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1090 	bool found = false;
1091 
1092 	for (int i = 0; i < IPSEC_NETIF_MAX_RX_RING_COUNT; i++) {
1093 		if (pcb->ipsec_netif_rxring[i] == ring) {
1094 			pcb->ipsec_netif_rxring[i] = NULL;
1095 			VERIFY(!found);
1096 			found = true;
1097 		}
1098 	}
1099 	for (int i = 0; i < IPSEC_NETIF_MAX_TX_RING_COUNT; i++) {
1100 		if (pcb->ipsec_netif_txring[i] == ring) {
1101 			pcb->ipsec_netif_txring[i] = NULL;
1102 			VERIFY(!found);
1103 			found = true;
1104 		}
1105 	}
1106 	VERIFY(found);
1107 }
1108 
1109 static bool
ipsec_netif_check_policy(ifnet_t interface,mbuf_t data)1110 ipsec_netif_check_policy(ifnet_t interface, mbuf_t data)
1111 {
1112 	necp_kernel_policy_result necp_result = 0;
1113 	necp_kernel_policy_result_parameter necp_result_parameter = {};
1114 	uint32_t necp_matched_policy_id = 0;
1115 	struct ip_out_args args4 = { };
1116 	struct ip6_out_args args6 = { };
1117 
1118 	// This packet has been marked with IP level policy, do not mark again.
1119 	if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
1120 		return true;
1121 	}
1122 
1123 	size_t length = mbuf_pkthdr_len(data);
1124 	if (length < sizeof(struct ip)) {
1125 		return false;
1126 	}
1127 
1128 	struct ip *ip = mtod(data, struct ip *);
1129 	u_int ip_version = ip->ip_v;
1130 	switch (ip_version) {
1131 	case 4: {
1132 		if (interface != NULL) {
1133 			args4.ipoa_flags |= IPOAF_BOUND_IF;
1134 			args4.ipoa_boundif = interface->if_index;
1135 		}
1136 		necp_matched_policy_id = necp_ip_output_find_policy_match(data, IP_OUTARGS, &args4, NULL,
1137 		    &necp_result, &necp_result_parameter);
1138 		break;
1139 	}
1140 	case 6: {
1141 		if (interface != NULL) {
1142 			args6.ip6oa_flags |= IP6OAF_BOUND_IF;
1143 			args6.ip6oa_boundif = interface->if_index;
1144 		}
1145 		necp_matched_policy_id = necp_ip6_output_find_policy_match(data, IPV6_OUTARGS, &args6, NULL,
1146 		    &necp_result, &necp_result_parameter);
1147 		break;
1148 	}
1149 	default: {
1150 		return false;
1151 	}
1152 	}
1153 
1154 	if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
1155 	    necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
1156 		/* Drop and flow divert packets should be blocked at the IP layer */
1157 		return false;
1158 	}
1159 
1160 	necp_mark_packet_from_ip(data, necp_matched_policy_id);
1161 	return true;
1162 }
1163 
1164 static errno_t
ipsec_netif_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)1165 ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1166     kern_channel_ring_t tx_ring, uint32_t flags)
1167 {
1168 #pragma unused(nxprov)
1169 #pragma unused(flags)
1170 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1171 
1172 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1173 
1174 	if (!ipsec_data_move_begin(pcb)) {
1175 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1176 		return 0;
1177 	}
1178 
1179 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1180 
1181 	struct kern_channel_ring_stat_increment tx_ring_stats;
1182 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1183 	kern_channel_slot_t tx_pslot = NULL;
1184 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1185 	kern_packet_t tx_chain_ph = 0;
1186 
1187 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
1188 
1189 	if (tx_slot == NULL) {
1190 		// Nothing to write, don't bother signalling
1191 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1192 		ipsec_data_move_end(pcb);
1193 		return 0;
1194 	}
1195 
1196 	if (pcb->ipsec_kpipe_count &&
1197 	    ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1198 		// Select the corresponding kpipe rx ring
1199 		uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(tx_ring);
1200 		VERIFY(ring_idx < IPSEC_IF_MAX_RING_COUNT);
1201 		kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1202 
1203 		// Unlock while calling notify
1204 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1205 
1206 		// Signal the kernel pipe ring to read
1207 		if (rx_ring != NULL) {
1208 			kern_channel_notify(rx_ring, 0);
1209 		}
1210 
1211 		ipsec_data_move_end(pcb);
1212 		return 0;
1213 	}
1214 
1215 	// If we're here, we're injecting into the BSD stack
1216 	while (tx_slot != NULL) {
1217 		size_t length = 0;
1218 		mbuf_t data = NULL;
1219 
1220 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1221 
1222 		if (tx_ph == 0) {
1223 			// Advance TX ring
1224 			tx_pslot = tx_slot;
1225 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1226 			continue;
1227 		}
1228 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
1229 		if (tx_chain_ph != 0) {
1230 			kern_packet_append(tx_ph, tx_chain_ph);
1231 		}
1232 		tx_chain_ph = tx_ph;
1233 
1234 		// Advance TX ring
1235 		tx_pslot = tx_slot;
1236 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1237 
1238 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1239 		VERIFY(tx_buf != NULL);
1240 		uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1241 		VERIFY(tx_baddr != 0);
1242 		tx_baddr += kern_buflet_get_data_offset(tx_buf);
1243 
1244 		bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1245 
1246 		length = MIN(kern_packet_get_data_length(tx_ph),
1247 		    pcb->ipsec_slot_size);
1248 
1249 		if (length > 0) {
1250 			errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1251 			if (error == 0) {
1252 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1253 				if (error == 0) {
1254 					// Mark packet from policy
1255 					uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1256 					necp_mark_packet_from_ip(data, policy_id);
1257 
1258 					// Check policy with NECP
1259 					if (!ipsec_netif_check_policy(pcb->ipsec_ifp, data)) {
1260 						os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1261 						STATS_INC(nifs, NETIF_STATS_DROP);
1262 						mbuf_freem(data);
1263 						data = NULL;
1264 					} else {
1265 						// Send through encryption
1266 						error = ipsec_output(pcb->ipsec_ifp, data);
1267 						if (error != 0) {
1268 							os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
1269 						}
1270 					}
1271 				} else {
1272 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1273 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1274 					STATS_INC(nifs, NETIF_STATS_DROP);
1275 					mbuf_freem(data);
1276 					data = NULL;
1277 				}
1278 			} else {
1279 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1280 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1281 				STATS_INC(nifs, NETIF_STATS_DROP);
1282 			}
1283 		} else {
1284 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1285 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1286 			STATS_INC(nifs, NETIF_STATS_DROP);
1287 		}
1288 
1289 		if (data == NULL) {
1290 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
1291 			break;
1292 		}
1293 
1294 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1295 		STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
1296 
1297 		tx_ring_stats.kcrsi_slots_transferred++;
1298 		tx_ring_stats.kcrsi_bytes_transferred += length;
1299 	}
1300 
1301 	if (tx_chain_ph != 0) {
1302 		kern_pbufpool_free_chain(tx_ring->ckr_pp, tx_chain_ph);
1303 	}
1304 
1305 	if (tx_pslot) {
1306 		kern_channel_advance_slot(tx_ring, tx_pslot);
1307 		kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1308 		(void)kern_channel_reclaim(tx_ring);
1309 	}
1310 
1311 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1312 	ipsec_data_move_end(pcb);
1313 
1314 	return 0;
1315 }
1316 
1317 static errno_t
ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags,uint8_t ring_idx)1318 ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1319     kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx)
1320 {
1321 #pragma unused(nxprov)
1322 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1323 	boolean_t more = false;
1324 	errno_t rc = 0;
1325 
1326 	VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0);
1327 
1328 	/*
1329 	 * Refill and sync the ring; we may be racing against another thread doing
1330 	 * an RX sync that also wants to do kr_enter(), and so use the blocking
1331 	 * variant here.
1332 	 */
1333 	rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1334 	if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
1335 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n", __func__,
1336 		    pcb->ipsec_if_xname, ring->ckr_name, rc);
1337 	}
1338 
1339 	(void) kr_enter(ring, TRUE);
1340 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1341 	if (ring != pcb->ipsec_netif_txring[ring_idx]) {
1342 		// ring no longer valid
1343 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1344 		kr_exit(ring);
1345 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n", __func__,
1346 		    pcb->ipsec_if_xname, ring->ckr_name, ring_idx);
1347 		return ENXIO;
1348 	}
1349 
1350 	if (pcb->ipsec_kpipe_count) {
1351 		uint32_t tx_available = kern_channel_available_slot_count(ring);
1352 		if (pcb->ipsec_netif_txring_size > 0 &&
1353 		    tx_available >= pcb->ipsec_netif_txring_size - 1) {
1354 			// No room left in tx ring, disable output for now
1355 			errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1356 			if (error != 0) {
1357 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
1358 			}
1359 		}
1360 	}
1361 
1362 	if (pcb->ipsec_kpipe_count) {
1363 		kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1364 
1365 		// Unlock while calling notify
1366 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1367 		// Signal the kernel pipe ring to read
1368 		if (rx_ring != NULL) {
1369 			kern_channel_notify(rx_ring, 0);
1370 		}
1371 	} else {
1372 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1373 	}
1374 
1375 	kr_exit(ring);
1376 
1377 	return 0;
1378 }
1379 
1380 static errno_t
ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,__unused uint32_t flags)1381 ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1382     kern_channel_ring_t ring, __unused uint32_t flags)
1383 {
1384 	errno_t ret = 0;
1385 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1386 
1387 	if (!ipsec_data_move_begin(pcb)) {
1388 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1389 		return 0;
1390 	}
1391 
1392 	if (ipsec_in_wmm_mode(pcb)) {
1393 		for (uint8_t i = 0; i < IPSEC_IF_WMM_RING_COUNT; i++) {
1394 			kern_channel_ring_t nring = pcb->ipsec_netif_txring[i];
1395 			ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, nring, flags, i);
1396 			if (ret) {
1397 				break;
1398 			}
1399 		}
1400 	} else {
1401 		ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring, flags, 0);
1402 	}
1403 
1404 	ipsec_data_move_end(pcb);
1405 	return ret;
1406 }
1407 
1408 static errno_t
ipsec_netif_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)1409 ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1410     kern_channel_ring_t rx_ring, uint32_t flags)
1411 {
1412 #pragma unused(nxprov)
1413 #pragma unused(flags)
1414 	struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1415 	struct kern_channel_ring_stat_increment rx_ring_stats;
1416 
1417 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1418 
1419 	if (!ipsec_data_move_begin(pcb)) {
1420 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1421 		return 0;
1422 	}
1423 
1424 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1425 
1426 	// Reclaim user-released slots
1427 	(void) kern_channel_reclaim(rx_ring);
1428 
1429 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1430 
1431 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
1432 	if (avail == 0) {
1433 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1434 		ipsec_data_move_end(pcb);
1435 		return 0;
1436 	}
1437 
1438 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1439 	VERIFY(rx_pp != NULL);
1440 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1441 	kern_channel_slot_t rx_pslot = NULL;
1442 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1443 
1444 	while (rx_slot != NULL) {
1445 		// Check for a waiting packet
1446 		lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1447 		mbuf_t data = pcb->ipsec_input_chain;
1448 		if (data == NULL) {
1449 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1450 			break;
1451 		}
1452 
1453 		// Allocate rx packet
1454 		kern_packet_t rx_ph = 0;
1455 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1456 		if (__improbable(error != 0)) {
1457 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1458 			STATS_INC(nifs, NETIF_STATS_DROP);
1459 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1460 			break;
1461 		}
1462 
1463 		// Advance waiting packets
1464 		if (pcb->ipsec_input_chain_count > 0) {
1465 			pcb->ipsec_input_chain_count--;
1466 		}
1467 		pcb->ipsec_input_chain = data->m_nextpkt;
1468 		data->m_nextpkt = NULL;
1469 		if (pcb->ipsec_input_chain == NULL) {
1470 			pcb->ipsec_input_chain_last = NULL;
1471 		}
1472 		lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1473 
1474 		size_t length = mbuf_pkthdr_len(data);
1475 
1476 		if (length < sizeof(struct ip)) {
1477 			// Flush data
1478 			mbuf_freem(data);
1479 			kern_pbufpool_free(rx_pp, rx_ph);
1480 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1481 			STATS_INC(nifs, NETIF_STATS_DROP);
1482 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1483 			    pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
1484 			continue;
1485 		}
1486 
1487 		uint32_t af = 0;
1488 		struct ip *ip = mtod(data, struct ip *);
1489 		u_int ip_version = ip->ip_v;
1490 		switch (ip_version) {
1491 		case 4: {
1492 			af = AF_INET;
1493 			break;
1494 		}
1495 		case 6: {
1496 			af = AF_INET6;
1497 			break;
1498 		}
1499 		default: {
1500 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1501 			    pcb->ipsec_ifp->if_xname, ip_version);
1502 			break;
1503 		}
1504 		}
1505 
1506 		if (length > PP_BUF_SIZE_DEF(rx_pp) ||
1507 		    (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
1508 			// We need to fragment to send up into the netif
1509 
1510 			u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp);
1511 			if (pcb->ipsec_frag_size_set &&
1512 			    pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) {
1513 				fragment_mtu = pcb->ipsec_input_frag_size;
1514 			}
1515 
1516 			mbuf_t fragment_chain = NULL;
1517 			switch (af) {
1518 			case AF_INET: {
1519 				// ip_fragment expects the length in host order
1520 				ip->ip_len = ntohs(ip->ip_len);
1521 
1522 				// ip_fragment will modify the original data, don't free
1523 				int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1524 				if (fragment_error == 0 && data != NULL) {
1525 					fragment_chain = data;
1526 				} else {
1527 					STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1528 					STATS_INC(nifs, NETIF_STATS_DROP);
1529 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1530 					    pcb->ipsec_ifp->if_xname, length, fragment_error);
1531 				}
1532 				break;
1533 			}
1534 			case AF_INET6: {
1535 				if (length < sizeof(struct ip6_hdr)) {
1536 					mbuf_freem(data);
1537 					STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1538 					STATS_INC(nifs, NETIF_STATS_DROP);
1539 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1540 					    pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1541 				} else {
1542 					// ip6_do_fragmentation will free the original data on success only
1543 					struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1544 
1545 					int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
1546 					    ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid()));
1547 					if (fragment_error == 0 && data != NULL) {
1548 						fragment_chain = data;
1549 					} else {
1550 						mbuf_freem(data);
1551 						STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1552 						STATS_INC(nifs, NETIF_STATS_DROP);
1553 						os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1554 						    pcb->ipsec_ifp->if_xname, length, fragment_error);
1555 					}
1556 				}
1557 				break;
1558 			}
1559 			default: {
1560 				// Cannot fragment unknown families
1561 				mbuf_freem(data);
1562 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1563 				STATS_INC(nifs, NETIF_STATS_DROP);
1564 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1565 				    pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
1566 				break;
1567 			}
1568 			}
1569 
1570 			if (fragment_chain != NULL) {
1571 				// Add fragments to chain before continuing
1572 				lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1573 				if (pcb->ipsec_input_chain != NULL) {
1574 					pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1575 				} else {
1576 					pcb->ipsec_input_chain = fragment_chain;
1577 				}
1578 				pcb->ipsec_input_chain_count++;
1579 				while (fragment_chain->m_nextpkt) {
1580 					VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1581 					fragment_chain = fragment_chain->m_nextpkt;
1582 					pcb->ipsec_input_chain_count++;
1583 				}
1584 				pcb->ipsec_input_chain_last = fragment_chain;
1585 				lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1586 			}
1587 
1588 			// Make sure to free unused rx packet
1589 			kern_pbufpool_free(rx_pp, rx_ph);
1590 
1591 			continue;
1592 		}
1593 
1594 		mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1595 
1596 		// Fillout rx packet
1597 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1598 		VERIFY(rx_buf != NULL);
1599 		void *rx_baddr = kern_buflet_get_data_address(rx_buf);
1600 		VERIFY(rx_baddr != NULL);
1601 
1602 		// Copy-in data from mbuf to buflet
1603 		mbuf_copydata(data, 0, length, (void *)rx_baddr);
1604 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
1605 
1606 		// Finalize and attach the packet
1607 		error = kern_buflet_set_data_offset(rx_buf, 0);
1608 		VERIFY(error == 0);
1609 		error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
1610 		VERIFY(error == 0);
1611 		error = kern_packet_set_headroom(rx_ph, 0);
1612 		VERIFY(error == 0);
1613 		error = kern_packet_finalize(rx_ph);
1614 		VERIFY(error == 0);
1615 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1616 		VERIFY(error == 0);
1617 
1618 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1619 		STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
1620 		bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1621 
1622 		rx_ring_stats.kcrsi_slots_transferred++;
1623 		rx_ring_stats.kcrsi_bytes_transferred += length;
1624 
1625 		if (!pcb->ipsec_ext_ifdata_stats) {
1626 			ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
1627 		}
1628 
1629 		mbuf_freem(data);
1630 
1631 		// Advance ring
1632 		rx_pslot = rx_slot;
1633 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1634 	}
1635 
1636 	for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
1637 		struct kern_channel_ring_stat_increment tx_ring_stats;
1638 		bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1639 		kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
1640 		kern_channel_slot_t tx_pslot = NULL;
1641 		kern_channel_slot_t tx_slot = NULL;
1642 		if (tx_ring == NULL) {
1643 			// Net-If TX ring not set up yet, nothing to read
1644 			goto done;
1645 		}
1646 		// Unlock ipsec before entering ring
1647 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1648 
1649 		(void)kr_enter(tx_ring, TRUE);
1650 
1651 		// Lock again after entering and validate
1652 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1653 
1654 		if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
1655 			goto done;
1656 		}
1657 
1658 		tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1659 		if (tx_slot == NULL) {
1660 			// Nothing to read, don't bother signalling
1661 			goto done;
1662 		}
1663 
1664 		while (rx_slot != NULL && tx_slot != NULL) {
1665 			size_t length = 0;
1666 			mbuf_t data = NULL;
1667 			errno_t error = 0;
1668 			uint32_t af;
1669 
1670 			// Allocate rx packet
1671 			kern_packet_t rx_ph = 0;
1672 			error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1673 			if (__improbable(error != 0)) {
1674 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1675 				STATS_INC(nifs, NETIF_STATS_DROP);
1676 				break;
1677 			}
1678 
1679 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1680 
1681 			// Advance TX ring
1682 			tx_pslot = tx_slot;
1683 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1684 
1685 			if (tx_ph == 0) {
1686 				kern_pbufpool_free(rx_pp, rx_ph);
1687 				continue;
1688 			}
1689 
1690 			kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1691 			VERIFY(tx_buf != NULL);
1692 			uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1693 			VERIFY(tx_baddr != 0);
1694 			tx_baddr += kern_buflet_get_data_offset(tx_buf);
1695 
1696 			length = MIN(kern_packet_get_data_length(tx_ph),
1697 			    pcb->ipsec_slot_size);
1698 
1699 			// Increment TX stats
1700 			tx_ring_stats.kcrsi_slots_transferred++;
1701 			tx_ring_stats.kcrsi_bytes_transferred += length;
1702 
1703 			if (length >= sizeof(struct ip)) {
1704 				error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1705 				if (error == 0) {
1706 					error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1707 					if (error == 0) {
1708 						// Check for wake packet flag
1709 						uuid_t flow_uuid;
1710 						kern_packet_get_flow_uuid(tx_ph, &flow_uuid);
1711 						u_int8_t *id_8 = (u_int8_t *)(uintptr_t)flow_uuid;
1712 						if ((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT) {
1713 							os_log_info(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: wake packet flag is set\n",
1714 							    pcb->ipsec_ifp->if_xname);
1715 							data->m_pkthdr.pkt_flags |= PKTF_WAKE_PKT;
1716 						}
1717 
1718 						lck_mtx_lock(&pcb->ipsec_kpipe_decrypt_lock);
1719 						struct ip *ip = mtod(data, struct ip *);
1720 						u_int ip_version = ip->ip_v;
1721 						switch (ip_version) {
1722 						case 4: {
1723 							af = AF_INET;
1724 							ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip);
1725 							ip->ip_off = ntohs(ip->ip_off);
1726 
1727 							if (length < ip->ip_len) {
1728 								os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
1729 								    pcb->ipsec_ifp->if_xname, length, ip->ip_len);
1730 								STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1731 								STATS_INC(nifs, NETIF_STATS_DROP);
1732 								mbuf_freem(data);
1733 								data = NULL;
1734 							} else {
1735 								data = esp4_input_extended(data, sizeof(struct ip), pcb->ipsec_ifp);
1736 							}
1737 							break;
1738 						}
1739 						case 6: {
1740 							if (length < sizeof(struct ip6_hdr)) {
1741 								os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
1742 								    pcb->ipsec_ifp->if_xname, length);
1743 								STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1744 								STATS_INC(nifs, NETIF_STATS_DROP);
1745 								mbuf_freem(data);
1746 								data = NULL;
1747 							} else {
1748 								af = AF_INET6;
1749 								struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1750 								const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen);
1751 								if (length < ip6_len) {
1752 									os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
1753 									    pcb->ipsec_ifp->if_xname, length, ip6_len);
1754 									STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1755 									STATS_INC(nifs, NETIF_STATS_DROP);
1756 									mbuf_freem(data);
1757 									data = NULL;
1758 								} else {
1759 									int offset = sizeof(struct ip6_hdr);
1760 									esp6_input_extended(&data, &offset, ip6->ip6_nxt, pcb->ipsec_ifp);
1761 								}
1762 							}
1763 							break;
1764 						}
1765 						default: {
1766 							os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: unknown ip version %u\n",
1767 							    pcb->ipsec_ifp->if_xname, ip_version);
1768 							STATS_INC(nifs, NETIF_STATS_DROP);
1769 							mbuf_freem(data);
1770 							data = NULL;
1771 							break;
1772 						}
1773 						}
1774 						lck_mtx_unlock(&pcb->ipsec_kpipe_decrypt_lock);
1775 					} else {
1776 						os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1777 						STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1778 						STATS_INC(nifs, NETIF_STATS_DROP);
1779 						mbuf_freem(data);
1780 						data = NULL;
1781 					}
1782 				} else {
1783 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1784 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1785 					STATS_INC(nifs, NETIF_STATS_DROP);
1786 				}
1787 			} else {
1788 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb->ipsec_ifp->if_xname, length);
1789 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1790 				STATS_INC(nifs, NETIF_STATS_DROP);
1791 			}
1792 
1793 			if (data == NULL) {
1794 				// Failed to get decrypted data data
1795 				kern_pbufpool_free(rx_pp, rx_ph);
1796 				continue;
1797 			}
1798 
1799 			length = mbuf_pkthdr_len(data);
1800 			if (length > PP_BUF_SIZE_DEF(rx_pp)) {
1801 				// Flush data
1802 				mbuf_freem(data);
1803 				kern_pbufpool_free(rx_pp, rx_ph);
1804 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1805 				STATS_INC(nifs, NETIF_STATS_DROP);
1806 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
1807 				    pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
1808 				continue;
1809 			}
1810 
1811 			mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1812 
1813 			// Fillout rx packet
1814 			kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1815 			VERIFY(rx_buf != NULL);
1816 			void *rx_baddr = kern_buflet_get_data_address(rx_buf);
1817 			VERIFY(rx_baddr != NULL);
1818 
1819 			// Copy-in data from mbuf to buflet
1820 			mbuf_copydata(data, 0, length, (void *)rx_baddr);
1821 			kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
1822 
1823 			// Finalize and attach the packet
1824 			error = kern_buflet_set_data_offset(rx_buf, 0);
1825 			VERIFY(error == 0);
1826 			error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
1827 			VERIFY(error == 0);
1828 			error = kern_packet_set_link_header_offset(rx_ph, 0);
1829 			VERIFY(error == 0);
1830 			error = kern_packet_set_network_header_offset(rx_ph, 0);
1831 			VERIFY(error == 0);
1832 			error = kern_packet_finalize(rx_ph);
1833 			VERIFY(error == 0);
1834 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1835 			VERIFY(error == 0);
1836 
1837 			STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1838 			STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
1839 			bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1840 
1841 			rx_ring_stats.kcrsi_slots_transferred++;
1842 			rx_ring_stats.kcrsi_bytes_transferred += length;
1843 
1844 			if (!pcb->ipsec_ext_ifdata_stats) {
1845 				ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
1846 			}
1847 
1848 			mbuf_freem(data);
1849 
1850 			rx_pslot = rx_slot;
1851 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1852 		}
1853 
1854 done:
1855 		if (tx_pslot) {
1856 			kern_channel_advance_slot(tx_ring, tx_pslot);
1857 			kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1858 			(void)kern_channel_reclaim(tx_ring);
1859 		}
1860 
1861 		// Unlock first, then exit ring
1862 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1863 		if (tx_ring != NULL) {
1864 			if (tx_pslot != NULL) {
1865 				kern_channel_notify(tx_ring, 0);
1866 			}
1867 			kr_exit(tx_ring);
1868 		}
1869 
1870 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1871 	}
1872 
1873 	if (rx_pslot) {
1874 		kern_channel_advance_slot(rx_ring, rx_pslot);
1875 		kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1876 	}
1877 
1878 
1879 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1880 
1881 	ipsec_data_move_end(pcb);
1882 	return 0;
1883 }
1884 
1885 static errno_t
ipsec_nexus_ifattach(struct ipsec_pcb * pcb,struct ifnet_init_eparams * init_params,struct ifnet ** ifp)1886 ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
1887     struct ifnet_init_eparams *init_params,
1888     struct ifnet **ifp)
1889 {
1890 	errno_t err;
1891 	nexus_controller_t controller = kern_nexus_shared_controller();
1892 	struct kern_nexus_net_init net_init;
1893 	struct kern_pbufpool_init pp_init;
1894 
1895 	nexus_name_t provider_name;
1896 	snprintf((char *)provider_name, sizeof(provider_name),
1897 	    "com.apple.netif.%s", pcb->ipsec_if_xname);
1898 
1899 	struct kern_nexus_provider_init prov_init = {
1900 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1901 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1902 		.nxpi_pre_connect = ipsec_nexus_pre_connect,
1903 		.nxpi_connected = ipsec_nexus_connected,
1904 		.nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
1905 		.nxpi_disconnected = ipsec_nexus_disconnected,
1906 		.nxpi_ring_init = ipsec_netif_ring_init,
1907 		.nxpi_ring_fini = ipsec_netif_ring_fini,
1908 		.nxpi_slot_init = NULL,
1909 		.nxpi_slot_fini = NULL,
1910 		.nxpi_sync_tx = ipsec_netif_sync_tx,
1911 		.nxpi_sync_rx = ipsec_netif_sync_rx,
1912 		.nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
1913 	};
1914 
1915 	nexus_attr_t nxa = NULL;
1916 	err = kern_nexus_attr_create(&nxa);
1917 	IPSEC_IF_VERIFY(err == 0);
1918 	if (err != 0) {
1919 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1920 		    __func__, err);
1921 		goto failed;
1922 	}
1923 
1924 	uint64_t slot_buffer_size = pcb->ipsec_slot_size;
1925 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1926 	VERIFY(err == 0);
1927 
1928 	// Reset ring size for netif nexus to limit memory usage
1929 	uint64_t ring_size = pcb->ipsec_netif_ring_size;
1930 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1931 	VERIFY(err == 0);
1932 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1933 	VERIFY(err == 0);
1934 
1935 	assert(err == 0);
1936 
1937 	if (ipsec_in_wmm_mode(pcb)) {
1938 		os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n",
1939 		    __func__, pcb->ipsec_if_xname);
1940 
1941 		init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED;
1942 
1943 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_RINGS,
1944 		    IPSEC_NETIF_WMM_TX_RING_COUNT);
1945 		VERIFY(err == 0);
1946 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_RINGS,
1947 		    IPSEC_NETIF_WMM_RX_RING_COUNT);
1948 		VERIFY(err == 0);
1949 
1950 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_QMAP, NEXUS_QMAP_TYPE_WMM);
1951 		VERIFY(err == 0);
1952 	}
1953 
1954 	pcb->ipsec_netif_txring_size = ring_size;
1955 
1956 	bzero(&pp_init, sizeof(pp_init));
1957 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1958 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
1959 	// Note: we need more packets than can be held in the tx and rx rings because
1960 	// packets can also be in the AQM queue(s)
1961 	pp_init.kbi_packets = pcb->ipsec_netif_ring_size * (2 * pcb->ipsec_kpipe_count + 1);
1962 	pp_init.kbi_bufsize = pcb->ipsec_slot_size;
1963 	pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
1964 	pp_init.kbi_max_frags = 1;
1965 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1966 	    "%s", provider_name);
1967 	pp_init.kbi_ctx = NULL;
1968 	pp_init.kbi_ctx_retain = NULL;
1969 	pp_init.kbi_ctx_release = NULL;
1970 
1971 	err = kern_pbufpool_create(&pp_init, &pcb->ipsec_netif_pp, NULL);
1972 	if (err != 0) {
1973 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
1974 		goto failed;
1975 	}
1976 
1977 	err = kern_nexus_controller_register_provider(controller,
1978 	    ipsec_nx_dom_prov,
1979 	    provider_name,
1980 	    &prov_init,
1981 	    sizeof(prov_init),
1982 	    nxa,
1983 	    &pcb->ipsec_nx.if_provider);
1984 	IPSEC_IF_VERIFY(err == 0);
1985 	if (err != 0) {
1986 		os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
1987 		    __func__, err);
1988 		goto failed;
1989 	}
1990 
1991 	bzero(&net_init, sizeof(net_init));
1992 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1993 	net_init.nxneti_flags = 0;
1994 	net_init.nxneti_eparams = init_params;
1995 	net_init.nxneti_lladdr = NULL;
1996 	net_init.nxneti_prepare = ipsec_netif_prepare;
1997 	net_init.nxneti_rx_pbufpool = pcb->ipsec_netif_pp;
1998 	net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp;
1999 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
2000 	    pcb->ipsec_nx.if_provider,
2001 	    pcb,
2002 	    NULL,
2003 	    &pcb->ipsec_nx.if_instance,
2004 	    &net_init,
2005 	    ifp);
2006 	IPSEC_IF_VERIFY(err == 0);
2007 	if (err != 0) {
2008 		os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
2009 		    __func__, err);
2010 		kern_nexus_controller_deregister_provider(controller,
2011 		    pcb->ipsec_nx.if_provider);
2012 		uuid_clear(pcb->ipsec_nx.if_provider);
2013 		goto failed;
2014 	}
2015 
2016 failed:
2017 	if (nxa) {
2018 		kern_nexus_attr_destroy(nxa);
2019 	}
2020 	if (err && pcb->ipsec_netif_pp != NULL) {
2021 		kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2022 		pcb->ipsec_netif_pp = NULL;
2023 	}
2024 	return err;
2025 }
2026 
2027 static void
ipsec_detach_provider_and_instance(uuid_t provider,uuid_t instance)2028 ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
2029 {
2030 	nexus_controller_t controller = kern_nexus_shared_controller();
2031 	errno_t err;
2032 
2033 	if (!uuid_is_null(instance)) {
2034 		err = kern_nexus_controller_free_provider_instance(controller,
2035 		    instance);
2036 		if (err != 0) {
2037 			os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
2038 			    __func__, err);
2039 		}
2040 		uuid_clear(instance);
2041 	}
2042 	if (!uuid_is_null(provider)) {
2043 		err = kern_nexus_controller_deregister_provider(controller,
2044 		    provider);
2045 		if (err != 0) {
2046 			os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
2047 		}
2048 		uuid_clear(provider);
2049 	}
2050 	return;
2051 }
2052 
2053 static void
ipsec_nexus_detach(struct ipsec_pcb * pcb)2054 ipsec_nexus_detach(struct ipsec_pcb *pcb)
2055 {
2056 	ipsec_nx_t nx = &pcb->ipsec_nx;
2057 	nexus_controller_t controller = kern_nexus_shared_controller();
2058 	errno_t err;
2059 
2060 	if (!uuid_is_null(nx->fsw_device)) {
2061 		err = kern_nexus_ifdetach(controller,
2062 		    nx->fsw_instance,
2063 		    nx->fsw_device);
2064 		if (err != 0) {
2065 			os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
2066 			    __func__, err);
2067 		}
2068 	}
2069 
2070 	ipsec_detach_provider_and_instance(nx->fsw_provider,
2071 	    nx->fsw_instance);
2072 	ipsec_detach_provider_and_instance(nx->if_provider,
2073 	    nx->if_instance);
2074 
2075 	if (pcb->ipsec_netif_pp != NULL) {
2076 		kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2077 		pcb->ipsec_netif_pp = NULL;
2078 	}
2079 	memset(nx, 0, sizeof(*nx));
2080 }
2081 
2082 static errno_t
ipsec_create_fs_provider_and_instance(struct ipsec_pcb * pcb,const char * type_name,const char * ifname,uuid_t * provider,uuid_t * instance)2083 ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
2084     const char *type_name,
2085     const char *ifname,
2086     uuid_t *provider, uuid_t *instance)
2087 {
2088 	nexus_attr_t attr = NULL;
2089 	nexus_controller_t controller = kern_nexus_shared_controller();
2090 	uuid_t dom_prov;
2091 	errno_t err;
2092 	struct kern_nexus_init init;
2093 	nexus_name_t    provider_name;
2094 
2095 	err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
2096 	    &dom_prov);
2097 	IPSEC_IF_VERIFY(err == 0);
2098 	if (err != 0) {
2099 		os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
2100 		    __func__, type_name, err);
2101 		goto failed;
2102 	}
2103 
2104 	err = kern_nexus_attr_create(&attr);
2105 	IPSEC_IF_VERIFY(err == 0);
2106 	if (err != 0) {
2107 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2108 		    __func__, err);
2109 		goto failed;
2110 	}
2111 
2112 	uint64_t slot_buffer_size = pcb->ipsec_slot_size;
2113 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2114 	VERIFY(err == 0);
2115 
2116 	// Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
2117 	uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
2118 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
2119 	VERIFY(err == 0);
2120 	uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
2121 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
2122 	VERIFY(err == 0);
2123 	/*
2124 	 * Configure flowswitch to use super-packet (multi-buflet).
2125 	 * This allows flowswitch to perform intra-stack packet aggregation.
2126 	 */
2127 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
2128 	    NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
2129 	VERIFY(err == 0);
2130 
2131 	snprintf((char *)provider_name, sizeof(provider_name),
2132 	    "com.apple.%s.%s", type_name, ifname);
2133 	err = kern_nexus_controller_register_provider(controller,
2134 	    dom_prov,
2135 	    provider_name,
2136 	    NULL,
2137 	    0,
2138 	    attr,
2139 	    provider);
2140 	kern_nexus_attr_destroy(attr);
2141 	attr = NULL;
2142 	IPSEC_IF_VERIFY(err == 0);
2143 	if (err != 0) {
2144 		os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
2145 		    __func__, type_name, err);
2146 		goto failed;
2147 	}
2148 	bzero(&init, sizeof(init));
2149 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2150 	err = kern_nexus_controller_alloc_provider_instance(controller,
2151 	    *provider,
2152 	    NULL, NULL,
2153 	    instance, &init);
2154 	IPSEC_IF_VERIFY(err == 0);
2155 	if (err != 0) {
2156 		os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
2157 		    __func__, type_name, err);
2158 		kern_nexus_controller_deregister_provider(controller,
2159 		    *provider);
2160 		uuid_clear(*provider);
2161 	}
2162 failed:
2163 	return err;
2164 }
2165 
2166 static errno_t
ipsec_flowswitch_attach(struct ipsec_pcb * pcb)2167 ipsec_flowswitch_attach(struct ipsec_pcb *pcb)
2168 {
2169 	nexus_controller_t controller = kern_nexus_shared_controller();
2170 	errno_t err = 0;
2171 	ipsec_nx_t nx = &pcb->ipsec_nx;
2172 
2173 	// Allocate flowswitch
2174 	err = ipsec_create_fs_provider_and_instance(pcb,
2175 	    "flowswitch",
2176 	    pcb->ipsec_ifp->if_xname,
2177 	    &nx->fsw_provider,
2178 	    &nx->fsw_instance);
2179 	if (err != 0) {
2180 		os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
2181 		    __func__);
2182 		goto failed;
2183 	}
2184 
2185 	// Attach flowswitch to device port
2186 	err = kern_nexus_ifattach(controller, nx->fsw_instance,
2187 	    NULL, nx->if_instance,
2188 	    FALSE, &nx->fsw_device);
2189 	if (err != 0) {
2190 		os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
2191 		goto failed;
2192 	}
2193 
2194 	// Extract the agent UUID and save for later
2195 	struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
2196 	if (flowswitch_nx != NULL) {
2197 		struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
2198 		if (flowswitch != NULL) {
2199 			FSW_RLOCK(flowswitch);
2200 			uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
2201 			FSW_UNLOCK(flowswitch);
2202 		} else {
2203 			os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - flowswitch is NULL\n");
2204 		}
2205 		nx_release(flowswitch_nx);
2206 	} else {
2207 		os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n");
2208 	}
2209 
2210 	return 0;
2211 
2212 failed:
2213 	ipsec_nexus_detach(pcb);
2214 
2215 	errno_t detach_error = 0;
2216 	if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
2217 		panic("ipsec_flowswitch_attach - ifnet_detach failed: %d", detach_error);
2218 		/* NOT REACHED */
2219 	}
2220 
2221 	return err;
2222 }
2223 
2224 #pragma mark Kernel Pipe Nexus
2225 
2226 static errno_t
ipsec_register_kernel_pipe_nexus(struct ipsec_pcb * pcb)2227 ipsec_register_kernel_pipe_nexus(struct ipsec_pcb *pcb)
2228 {
2229 	nexus_attr_t nxa = NULL;
2230 	errno_t result;
2231 
2232 	lck_mtx_lock(&ipsec_lock);
2233 	if (ipsec_ncd_refcount++) {
2234 		lck_mtx_unlock(&ipsec_lock);
2235 		return 0;
2236 	}
2237 
2238 	result = kern_nexus_controller_create(&ipsec_ncd);
2239 	if (result) {
2240 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
2241 		    __FUNCTION__, result);
2242 		goto done;
2243 	}
2244 
2245 	uuid_t dom_prov;
2246 	result = kern_nexus_get_default_domain_provider(
2247 		NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
2248 	if (result) {
2249 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
2250 		    __FUNCTION__, result);
2251 		goto done;
2252 	}
2253 
2254 	struct kern_nexus_provider_init prov_init = {
2255 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2256 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2257 		.nxpi_pre_connect = ipsec_nexus_pre_connect,
2258 		.nxpi_connected = ipsec_nexus_connected,
2259 		.nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
2260 		.nxpi_disconnected = ipsec_nexus_disconnected,
2261 		.nxpi_ring_init = ipsec_kpipe_ring_init,
2262 		.nxpi_ring_fini = ipsec_kpipe_ring_fini,
2263 		.nxpi_slot_init = NULL,
2264 		.nxpi_slot_fini = NULL,
2265 		.nxpi_sync_tx = ipsec_kpipe_sync_tx,
2266 		.nxpi_sync_rx = ipsec_kpipe_sync_rx,
2267 		.nxpi_tx_doorbell = NULL,
2268 	};
2269 
2270 	result = kern_nexus_attr_create(&nxa);
2271 	if (result) {
2272 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2273 		    __FUNCTION__, result);
2274 		goto done;
2275 	}
2276 
2277 	uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2278 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2279 	VERIFY(result == 0);
2280 
2281 	// Reset ring size for kernel pipe nexus to limit memory usage
2282 	// Note: It's better to have less on slots on the kpipe TX ring than the netif
2283 	// so back pressure is applied at the AQM layer
2284 	uint64_t ring_size =
2285 	    pcb->ipsec_kpipe_tx_ring_size != 0 ? pcb->ipsec_kpipe_tx_ring_size :
2286 	    pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2287 	    if_ipsec_ring_size;
2288 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
2289 	VERIFY(result == 0);
2290 
2291 	ring_size =
2292 	    pcb->ipsec_kpipe_rx_ring_size != 0 ? pcb->ipsec_kpipe_rx_ring_size :
2293 	    pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2294 	    if_ipsec_ring_size;
2295 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
2296 	VERIFY(result == 0);
2297 
2298 	result = kern_nexus_controller_register_provider(ipsec_ncd,
2299 	    dom_prov,
2300 	    (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
2301 	    &prov_init,
2302 	    sizeof(prov_init),
2303 	    nxa,
2304 	    &ipsec_kpipe_uuid);
2305 	if (result) {
2306 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
2307 		    __FUNCTION__, result);
2308 		goto done;
2309 	}
2310 
2311 done:
2312 	if (nxa) {
2313 		kern_nexus_attr_destroy(nxa);
2314 	}
2315 
2316 	if (result) {
2317 		if (ipsec_ncd) {
2318 			kern_nexus_controller_destroy(ipsec_ncd);
2319 			ipsec_ncd = NULL;
2320 		}
2321 		ipsec_ncd_refcount = 0;
2322 	}
2323 
2324 	lck_mtx_unlock(&ipsec_lock);
2325 
2326 	return result;
2327 }
2328 
2329 static void
ipsec_unregister_kernel_pipe_nexus(void)2330 ipsec_unregister_kernel_pipe_nexus(void)
2331 {
2332 	lck_mtx_lock(&ipsec_lock);
2333 
2334 	VERIFY(ipsec_ncd_refcount > 0);
2335 
2336 	if (--ipsec_ncd_refcount == 0) {
2337 		kern_nexus_controller_destroy(ipsec_ncd);
2338 		ipsec_ncd = NULL;
2339 	}
2340 
2341 	lck_mtx_unlock(&ipsec_lock);
2342 }
2343 
2344 /* This structure only holds onto kpipe channels that need to be
2345  * freed in the future, but are cleared from the pcb under lock
2346  */
2347 struct ipsec_detached_channels {
2348 	int count;
2349 	kern_pbufpool_t pp;
2350 	uuid_t uuids[IPSEC_IF_MAX_RING_COUNT];
2351 };
2352 
2353 static void
ipsec_detach_channels(struct ipsec_pcb * pcb,struct ipsec_detached_channels * dc)2354 ipsec_detach_channels(struct ipsec_pcb *pcb, struct ipsec_detached_channels *dc)
2355 {
2356 	LCK_RW_ASSERT(&pcb->ipsec_pcb_lock, LCK_RW_TYPE_EXCLUSIVE);
2357 
2358 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
2359 		for (int i = 0; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2360 			VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2361 		}
2362 		dc->count = 0;
2363 		return;
2364 	}
2365 
2366 	dc->count = pcb->ipsec_kpipe_count;
2367 
2368 	VERIFY(dc->count >= 0);
2369 	VERIFY(dc->count <= IPSEC_IF_MAX_RING_COUNT);
2370 
2371 	for (int i = 0; i < dc->count; i++) {
2372 		VERIFY(!uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2373 		uuid_copy(dc->uuids[i], pcb->ipsec_kpipe_uuid[i]);
2374 		uuid_clear(pcb->ipsec_kpipe_uuid[i]);
2375 	}
2376 	for (int i = dc->count; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2377 		VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2378 	}
2379 
2380 	if (dc->count) {
2381 		VERIFY(pcb->ipsec_kpipe_pp);
2382 	} else {
2383 		VERIFY(!pcb->ipsec_kpipe_pp);
2384 	}
2385 
2386 	dc->pp = pcb->ipsec_kpipe_pp;
2387 
2388 	pcb->ipsec_kpipe_pp = NULL;
2389 
2390 	ipsec_flag_clr(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2391 }
2392 
2393 static void
ipsec_free_channels(struct ipsec_detached_channels * dc)2394 ipsec_free_channels(struct ipsec_detached_channels *dc)
2395 {
2396 	if (!dc->count) {
2397 		return;
2398 	}
2399 
2400 	for (int i = 0; i < dc->count; i++) {
2401 		errno_t result;
2402 		result = kern_nexus_controller_free_provider_instance(ipsec_ncd, dc->uuids[i]);
2403 		VERIFY(!result);
2404 	}
2405 
2406 	VERIFY(dc->pp);
2407 	kern_pbufpool_destroy(dc->pp);
2408 
2409 	ipsec_unregister_kernel_pipe_nexus();
2410 
2411 	memset(dc, 0, sizeof(*dc));
2412 }
2413 
2414 static errno_t
ipsec_enable_channel(struct ipsec_pcb * pcb,struct proc * proc)2415 ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
2416 {
2417 	struct kern_nexus_init init;
2418 	struct kern_pbufpool_init pp_init;
2419 	errno_t result;
2420 
2421 	kauth_cred_t cred = kauth_cred_get();
2422 	result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
2423 	if (result) {
2424 		return result;
2425 	}
2426 
2427 	VERIFY(pcb->ipsec_kpipe_count);
2428 	VERIFY(!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED));
2429 
2430 	result = ipsec_register_kernel_pipe_nexus(pcb);
2431 
2432 	lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2433 
2434 	if (result) {
2435 		os_log_error(OS_LOG_DEFAULT, "%s: %s failed to register kernel pipe nexus\n",
2436 		    __func__, pcb->ipsec_if_xname);
2437 		goto done;
2438 	}
2439 
2440 	VERIFY(ipsec_ncd);
2441 
2442 	bzero(&pp_init, sizeof(pp_init));
2443 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
2444 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
2445 	// Note: We only needs are many packets as can be held in the tx and rx rings
2446 	pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2 * pcb->ipsec_kpipe_count;
2447 	pp_init.kbi_bufsize = pcb->ipsec_slot_size;
2448 	pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
2449 	pp_init.kbi_max_frags = 1;
2450 	pp_init.kbi_flags |= KBIF_QUANTUM;
2451 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
2452 	    "com.apple.kpipe.%s", pcb->ipsec_if_xname);
2453 	pp_init.kbi_ctx = NULL;
2454 	pp_init.kbi_ctx_retain = NULL;
2455 	pp_init.kbi_ctx_release = NULL;
2456 
2457 	result = kern_pbufpool_create(&pp_init, &pcb->ipsec_kpipe_pp,
2458 	    NULL);
2459 	if (result != 0) {
2460 		os_log_error(OS_LOG_DEFAULT, "%s: %s pbufbool create failed, error %d\n",
2461 		    __func__, pcb->ipsec_if_xname, result);
2462 		goto done;
2463 	}
2464 
2465 	bzero(&init, sizeof(init));
2466 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2467 	init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp;
2468 
2469 	for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
2470 		VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2471 		result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
2472 		    ipsec_kpipe_uuid, pcb, NULL, &pcb->ipsec_kpipe_uuid[i], &init);
2473 
2474 		if (result == 0) {
2475 			nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
2476 			const bool has_proc_uuid = !uuid_is_null(pcb->ipsec_kpipe_proc_uuid);
2477 			pid_t pid = pcb->ipsec_kpipe_pid;
2478 			if (!pid && !has_proc_uuid) {
2479 				pid = proc_pid(proc);
2480 			}
2481 			result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
2482 			    pcb->ipsec_kpipe_uuid[i], &port,
2483 			    pid, has_proc_uuid ? pcb->ipsec_kpipe_proc_uuid : NULL, NULL,
2484 			    0, has_proc_uuid ? NEXUS_BIND_EXEC_UUID:NEXUS_BIND_PID);
2485 		}
2486 
2487 		if (result) {
2488 			/* Unwind all of them on error */
2489 			for (int j = 0; j < IPSEC_IF_MAX_RING_COUNT; j++) {
2490 				if (!uuid_is_null(pcb->ipsec_kpipe_uuid[j])) {
2491 					kern_nexus_controller_free_provider_instance(ipsec_ncd,
2492 					    pcb->ipsec_kpipe_uuid[j]);
2493 					uuid_clear(pcb->ipsec_kpipe_uuid[j]);
2494 				}
2495 			}
2496 			goto done;
2497 		}
2498 	}
2499 
2500 done:
2501 	lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2502 
2503 	if (result) {
2504 		if (pcb->ipsec_kpipe_pp != NULL) {
2505 			kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2506 			pcb->ipsec_kpipe_pp = NULL;
2507 		}
2508 		ipsec_unregister_kernel_pipe_nexus();
2509 	} else {
2510 		ipsec_flag_set(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2511 	}
2512 
2513 	return result;
2514 }
2515 
2516 #endif // IPSEC_NEXUS
2517 
2518 
2519 /* Kernel control functions */
2520 
2521 static inline int
ipsec_find_by_unit(u_int32_t unit)2522 ipsec_find_by_unit(u_int32_t unit)
2523 {
2524 	struct ipsec_pcb *next_pcb = NULL;
2525 	int found = 0;
2526 
2527 	TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2528 		if (next_pcb->ipsec_unit == unit) {
2529 			found = 1;
2530 			break;
2531 		}
2532 	}
2533 
2534 	return found;
2535 }
2536 
2537 static inline void
ipsec_free_pcb(struct ipsec_pcb * pcb,bool locked)2538 ipsec_free_pcb(struct ipsec_pcb *pcb, bool locked)
2539 {
2540 #if IPSEC_NEXUS
2541 	mbuf_freem_list(pcb->ipsec_input_chain);
2542 	pcb->ipsec_input_chain_count = 0;
2543 	lck_mtx_destroy(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp);
2544 	lck_mtx_destroy(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp);
2545 	lck_mtx_destroy(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp);
2546 #endif // IPSEC_NEXUS
2547 	lck_mtx_destroy(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp);
2548 	lck_rw_destroy(&pcb->ipsec_pcb_lock, &ipsec_lck_grp);
2549 	if (!locked) {
2550 		lck_mtx_lock(&ipsec_lock);
2551 	}
2552 	TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
2553 	if (!locked) {
2554 		lck_mtx_unlock(&ipsec_lock);
2555 	}
2556 	zfree(ipsec_pcb_zone, pcb);
2557 }
2558 
2559 static errno_t
ipsec_ctl_setup(u_int32_t * unit,void ** unitinfo)2560 ipsec_ctl_setup(u_int32_t *unit, void **unitinfo)
2561 {
2562 	if (unit == NULL || unitinfo == NULL) {
2563 		return EINVAL;
2564 	}
2565 
2566 	lck_mtx_lock(&ipsec_lock);
2567 
2568 	/* Find next available unit */
2569 	if (*unit == 0) {
2570 		*unit = 1;
2571 		while (*unit != ctl_maxunit) {
2572 			if (ipsec_find_by_unit(*unit)) {
2573 				(*unit)++;
2574 			} else {
2575 				break;
2576 			}
2577 		}
2578 		if (*unit == ctl_maxunit) {
2579 			lck_mtx_unlock(&ipsec_lock);
2580 			return EBUSY;
2581 		}
2582 	} else if (ipsec_find_by_unit(*unit)) {
2583 		lck_mtx_unlock(&ipsec_lock);
2584 		return EBUSY;
2585 	}
2586 
2587 	/* Find some open interface id */
2588 	u_int32_t chosen_unique_id = 1;
2589 	struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
2590 	if (next_pcb != NULL) {
2591 		/* List was not empty, add one to the last item */
2592 		chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2593 		next_pcb = NULL;
2594 
2595 		/*
2596 		 * If this wrapped the id number, start looking at
2597 		 * the front of the list for an unused id.
2598 		 */
2599 		if (chosen_unique_id == 0) {
2600 			/* Find the next unused ID */
2601 			chosen_unique_id = 1;
2602 			TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2603 				if (next_pcb->ipsec_unique_id > chosen_unique_id) {
2604 					/* We found a gap */
2605 					break;
2606 				}
2607 
2608 				chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2609 			}
2610 		}
2611 	}
2612 
2613 	struct ipsec_pcb *pcb = zalloc_flags(ipsec_pcb_zone, Z_WAITOK | Z_ZERO);
2614 
2615 	*unitinfo = pcb;
2616 	pcb->ipsec_unit = *unit;
2617 	pcb->ipsec_unique_id = chosen_unique_id;
2618 
2619 	if (next_pcb != NULL) {
2620 		TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
2621 	} else {
2622 		TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
2623 	}
2624 
2625 	lck_mtx_unlock(&ipsec_lock);
2626 
2627 	return 0;
2628 }
2629 
2630 static errno_t
ipsec_ctl_bind(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)2631 ipsec_ctl_bind(kern_ctl_ref kctlref,
2632     struct sockaddr_ctl *sac,
2633     void **unitinfo)
2634 {
2635 	if (*unitinfo == NULL) {
2636 		u_int32_t unit = 0;
2637 		(void)ipsec_ctl_setup(&unit, unitinfo);
2638 	}
2639 
2640 	struct ipsec_pcb *pcb = (struct ipsec_pcb *)*unitinfo;
2641 	if (pcb == NULL) {
2642 		return EINVAL;
2643 	}
2644 
2645 	/* Setup the protocol control block */
2646 	pcb->ipsec_ctlref = kctlref;
2647 	pcb->ipsec_unit = sac->sc_unit;
2648 	pcb->ipsec_output_service_class = MBUF_SC_OAM;
2649 
2650 #if IPSEC_NEXUS
2651 	pcb->ipsec_use_netif = false;
2652 	pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2653 	pcb->ipsec_netif_ring_size = if_ipsec_ring_size;
2654 	pcb->ipsec_tx_fsw_ring_size = if_ipsec_tx_fsw_ring_size;
2655 	pcb->ipsec_rx_fsw_ring_size = if_ipsec_rx_fsw_ring_size;
2656 #endif // IPSEC_NEXUS
2657 
2658 	lck_rw_init(&pcb->ipsec_pcb_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2659 	lck_mtx_init(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2660 #if IPSEC_NEXUS
2661 	pcb->ipsec_input_chain_count = 0;
2662 	lck_mtx_init(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2663 	lck_mtx_init(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2664 	lck_mtx_init(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2665 #endif // IPSEC_NEXUS
2666 
2667 	return 0;
2668 }
2669 
2670 static errno_t
ipsec_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)2671 ipsec_ctl_connect(kern_ctl_ref kctlref,
2672     struct sockaddr_ctl *sac,
2673     void **unitinfo)
2674 {
2675 	struct ifnet_init_eparams ipsec_init = {};
2676 	errno_t result = 0;
2677 
2678 	if (*unitinfo == NULL) {
2679 		(void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2680 	}
2681 
2682 	struct ipsec_pcb *pcb = *unitinfo;
2683 	if (pcb == NULL) {
2684 		return EINVAL;
2685 	}
2686 
2687 	/* Handle case where ipsec_ctl_setup() was called, but ipsec_ctl_bind() was not */
2688 	if (pcb->ipsec_ctlref == NULL) {
2689 		(void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2690 	}
2691 
2692 	snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
2693 	snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
2694 	os_log(OS_LOG_DEFAULT, "ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2695 
2696 	/* Create the interface */
2697 	bzero(&ipsec_init, sizeof(ipsec_init));
2698 	ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
2699 	ipsec_init.len = sizeof(ipsec_init);
2700 
2701 #if IPSEC_NEXUS
2702 	if (pcb->ipsec_use_netif) {
2703 		ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
2704 	} else
2705 #endif // IPSEC_NEXUS
2706 	{
2707 		ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
2708 		ipsec_init.start = ipsec_start;
2709 	}
2710 	ipsec_init.name = "ipsec";
2711 	ipsec_init.unit = pcb->ipsec_unit - 1;
2712 	ipsec_init.uniqueid = pcb->ipsec_unique_name;
2713 	ipsec_init.uniqueid_len = (uint32_t)strlen(pcb->ipsec_unique_name);
2714 	ipsec_init.family = IFNET_FAMILY_IPSEC;
2715 	ipsec_init.type = IFT_OTHER;
2716 	ipsec_init.demux = ipsec_demux;
2717 	ipsec_init.add_proto = ipsec_add_proto;
2718 	ipsec_init.del_proto = ipsec_del_proto;
2719 	ipsec_init.softc = pcb;
2720 	ipsec_init.ioctl = ipsec_ioctl;
2721 	ipsec_init.free = ipsec_detached;
2722 
2723 #if IPSEC_NEXUS
2724 	/* We don't support kpipes without a netif */
2725 	if (pcb->ipsec_kpipe_count && !pcb->ipsec_use_netif) {
2726 		result = ENOTSUP;
2727 		os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - kpipe requires netif: failed %d\n", result);
2728 		ipsec_free_pcb(pcb, false);
2729 		*unitinfo = NULL;
2730 		return result;
2731 	}
2732 
2733 	if (if_ipsec_debug != 0) {
2734 		printf("%s: %s%d use_netif %d kpipe_count %d slot_size %u ring_size %u "
2735 		    "kpipe_tx_ring_size %u kpipe_rx_ring_size %u\n",
2736 		    __func__,
2737 		    ipsec_init.name, ipsec_init.unit,
2738 		    pcb->ipsec_use_netif,
2739 		    pcb->ipsec_kpipe_count,
2740 		    pcb->ipsec_slot_size,
2741 		    pcb->ipsec_netif_ring_size,
2742 		    pcb->ipsec_kpipe_tx_ring_size,
2743 		    pcb->ipsec_kpipe_rx_ring_size);
2744 	}
2745 	if (pcb->ipsec_use_netif) {
2746 		if (pcb->ipsec_kpipe_count) {
2747 			result = ipsec_enable_channel(pcb, current_proc());
2748 			if (result) {
2749 				os_log_error(OS_LOG_DEFAULT, "%s: %s failed to enable channels\n",
2750 				    __func__, pcb->ipsec_if_xname);
2751 				ipsec_free_pcb(pcb, false);
2752 				*unitinfo = NULL;
2753 				return result;
2754 			}
2755 		}
2756 
2757 		result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
2758 		if (result != 0) {
2759 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
2760 			ipsec_free_pcb(pcb, false);
2761 			*unitinfo = NULL;
2762 			return result;
2763 		}
2764 
2765 		result = ipsec_flowswitch_attach(pcb);
2766 		if (result != 0) {
2767 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_flowswitch_attach failed: %d\n", result);
2768 			// Do not call ipsec_free_pcb(). We will be attached already, and will be freed later
2769 			// in ipsec_detached().
2770 			*unitinfo = NULL;
2771 			return result;
2772 		}
2773 
2774 		/* Attach to bpf */
2775 		bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
2776 	} else
2777 #endif // IPSEC_NEXUS
2778 	{
2779 		result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
2780 		if (result != 0) {
2781 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
2782 			ipsec_free_pcb(pcb, false);
2783 			*unitinfo = NULL;
2784 			return result;
2785 		}
2786 		ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
2787 
2788 		/* Attach the interface */
2789 		result = ifnet_attach(pcb->ipsec_ifp, NULL);
2790 		if (result != 0) {
2791 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
2792 			ifnet_release(pcb->ipsec_ifp);
2793 			ipsec_free_pcb(pcb, false);
2794 			*unitinfo = NULL;
2795 			return result;
2796 		}
2797 
2798 		/* Attach to bpf */
2799 		bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
2800 	}
2801 
2802 #if IPSEC_NEXUS
2803 	/*
2804 	 * Mark the data path as ready.
2805 	 * If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected.
2806 	 */
2807 	if (pcb->ipsec_kpipe_count == 0) {
2808 		lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
2809 		IPSEC_SET_DATA_PATH_READY(pcb);
2810 		lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
2811 	}
2812 #endif
2813 
2814 	/* The interfaces resoures allocated, mark it as running */
2815 	ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
2816 
2817 	return 0;
2818 }
2819 
2820 static errno_t
ipsec_detach_ip(ifnet_t interface,protocol_family_t protocol,socket_t pf_socket)2821 ipsec_detach_ip(ifnet_t                         interface,
2822     protocol_family_t       protocol,
2823     socket_t                        pf_socket)
2824 {
2825 	errno_t result = EPROTONOSUPPORT;
2826 
2827 	/* Attempt a detach */
2828 	if (protocol == PF_INET) {
2829 		struct ifreq    ifr;
2830 
2831 		bzero(&ifr, sizeof(ifr));
2832 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2833 		    ifnet_name(interface), ifnet_unit(interface));
2834 
2835 		result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
2836 	} else if (protocol == PF_INET6) {
2837 		struct in6_ifreq        ifr6;
2838 
2839 		bzero(&ifr6, sizeof(ifr6));
2840 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2841 		    ifnet_name(interface), ifnet_unit(interface));
2842 
2843 		result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
2844 	}
2845 
2846 	return result;
2847 }
2848 
2849 static void
ipsec_remove_address(ifnet_t interface,protocol_family_t protocol,ifaddr_t address,socket_t pf_socket)2850 ipsec_remove_address(ifnet_t                            interface,
2851     protocol_family_t      protocol,
2852     ifaddr_t                       address,
2853     socket_t                       pf_socket)
2854 {
2855 	errno_t result = 0;
2856 
2857 	/* Attempt a detach */
2858 	if (protocol == PF_INET) {
2859 		struct ifreq    ifr;
2860 
2861 		bzero(&ifr, sizeof(ifr));
2862 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2863 		    ifnet_name(interface), ifnet_unit(interface));
2864 		result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
2865 		if (result != 0) {
2866 			os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed: %d", result);
2867 		} else {
2868 			result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
2869 			if (result != 0) {
2870 				os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR failed: %d", result);
2871 			}
2872 		}
2873 	} else if (protocol == PF_INET6) {
2874 		struct in6_ifreq        ifr6;
2875 
2876 		bzero(&ifr6, sizeof(ifr6));
2877 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2878 		    ifnet_name(interface), ifnet_unit(interface));
2879 		result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
2880 		    sizeof(ifr6.ifr_addr));
2881 		if (result != 0) {
2882 			os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed (v6): %d",
2883 			    result);
2884 		} else {
2885 			result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
2886 			if (result != 0) {
2887 				os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
2888 				    result);
2889 			}
2890 		}
2891 	}
2892 }
2893 
2894 static void
ipsec_cleanup_family(ifnet_t interface,protocol_family_t protocol)2895 ipsec_cleanup_family(ifnet_t                            interface,
2896     protocol_family_t      protocol)
2897 {
2898 	errno_t         result = 0;
2899 	socket_t        pf_socket = NULL;
2900 	ifaddr_t        *addresses = NULL;
2901 	int                     i;
2902 
2903 	if (protocol != PF_INET && protocol != PF_INET6) {
2904 		os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - invalid protocol family %d\n", protocol);
2905 		return;
2906 	}
2907 
2908 	/* Create a socket for removing addresses and detaching the protocol */
2909 	result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
2910 	if (result != 0) {
2911 		if (result != EAFNOSUPPORT) {
2912 			os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - failed to create %s socket: %d\n",
2913 			    protocol == PF_INET ? "IP" : "IPv6", result);
2914 		}
2915 		goto cleanup;
2916 	}
2917 
2918 	/* always set SS_PRIV, we want to close and detach regardless */
2919 	sock_setpriv(pf_socket, 1);
2920 
2921 	result = ipsec_detach_ip(interface, protocol, pf_socket);
2922 	if (result == 0 || result == ENXIO) {
2923 		/* We are done! We either detached or weren't attached. */
2924 		goto cleanup;
2925 	} else if (result != EBUSY) {
2926 		/* Uh, not really sure what happened here... */
2927 		os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2928 		goto cleanup;
2929 	}
2930 
2931 	/*
2932 	 * At this point, we received an EBUSY error. This means there are
2933 	 * addresses attached. We should detach them and then try again.
2934 	 */
2935 	result = ifnet_get_address_list_family(interface, &addresses, (sa_family_t)protocol);
2936 	if (result != 0) {
2937 		os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
2938 		    ifnet_name(interface), ifnet_unit(interface),
2939 		    protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
2940 		goto cleanup;
2941 	}
2942 
2943 	for (i = 0; addresses[i] != 0; i++) {
2944 		ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
2945 	}
2946 	ifnet_free_address_list(addresses);
2947 	addresses = NULL;
2948 
2949 	/*
2950 	 * The addresses should be gone, we should try the remove again.
2951 	 */
2952 	result = ipsec_detach_ip(interface, protocol, pf_socket);
2953 	if (result != 0 && result != ENXIO) {
2954 		os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2955 	}
2956 
2957 cleanup:
2958 	if (pf_socket != NULL) {
2959 		sock_close(pf_socket);
2960 	}
2961 
2962 	if (addresses != NULL) {
2963 		ifnet_free_address_list(addresses);
2964 	}
2965 }
2966 
2967 static errno_t
ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo)2968 ipsec_ctl_disconnect(__unused kern_ctl_ref      kctlref,
2969     __unused u_int32_t             unit,
2970     void                                   *unitinfo)
2971 {
2972 	struct ipsec_pcb *pcb = unitinfo;
2973 	ifnet_t ifp = NULL;
2974 	errno_t result = 0;
2975 
2976 	if (pcb == NULL) {
2977 		return EINVAL;
2978 	}
2979 
2980 	/* Wait until all threads in the data paths are done. */
2981 	ipsec_wait_data_move_drain(pcb);
2982 
2983 #if IPSEC_NEXUS
2984 	// Tell the nexus to stop all rings
2985 	if (pcb->ipsec_netif_nexus != NULL) {
2986 		kern_nexus_stop(pcb->ipsec_netif_nexus);
2987 	}
2988 #endif // IPSEC_NEXUS
2989 
2990 	lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2991 
2992 #if IPSEC_NEXUS
2993 	if (if_ipsec_debug != 0) {
2994 		printf("ipsec_ctl_disconnect: detaching interface %s (id %s)\n",
2995 		    pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2996 	}
2997 
2998 	struct ipsec_detached_channels dc;
2999 	ipsec_detach_channels(pcb, &dc);
3000 #endif // IPSEC_NEXUS
3001 
3002 	pcb->ipsec_ctlref = NULL;
3003 
3004 	ifp = pcb->ipsec_ifp;
3005 	if (ifp != NULL) {
3006 #if IPSEC_NEXUS
3007 		if (pcb->ipsec_netif_nexus != NULL) {
3008 			/*
3009 			 * Quiesce the interface and flush any pending outbound packets.
3010 			 */
3011 			if_down(ifp);
3012 
3013 			/*
3014 			 * Suspend data movement and wait for IO threads to exit.
3015 			 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
3016 			 * do this because ipsec nexuses are attached/detached separately.
3017 			 */
3018 			ifnet_datamov_suspend_and_drain(ifp);
3019 			if ((result = ifnet_detach(ifp)) != 0) {
3020 				panic("ipsec_ctl_disconnect - ifnet_detach failed: %d", result);
3021 				/* NOT REACHED */
3022 			}
3023 
3024 			/*
3025 			 * We want to do everything in our power to ensure that the interface
3026 			 * really goes away when the socket is closed. We must remove IP/IPv6
3027 			 * addresses and detach the protocols. Finally, we can remove and
3028 			 * release the interface.
3029 			 */
3030 			key_delsp_for_ipsec_if(ifp);
3031 
3032 			ipsec_cleanup_family(ifp, AF_INET);
3033 			ipsec_cleanup_family(ifp, AF_INET6);
3034 
3035 			lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3036 
3037 			ipsec_free_channels(&dc);
3038 
3039 			ipsec_nexus_detach(pcb);
3040 
3041 			/* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
3042 			ifnet_datamov_resume(ifp);
3043 		} else
3044 #endif // IPSEC_NEXUS
3045 		{
3046 			lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3047 
3048 #if IPSEC_NEXUS
3049 			ipsec_free_channels(&dc);
3050 #endif // IPSEC_NEXUS
3051 
3052 			/*
3053 			 * We want to do everything in our power to ensure that the interface
3054 			 * really goes away when the socket is closed. We must remove IP/IPv6
3055 			 * addresses and detach the protocols. Finally, we can remove and
3056 			 * release the interface.
3057 			 */
3058 			key_delsp_for_ipsec_if(ifp);
3059 
3060 			ipsec_cleanup_family(ifp, AF_INET);
3061 			ipsec_cleanup_family(ifp, AF_INET6);
3062 
3063 			/*
3064 			 * Detach now; ipsec_detach() will be called asynchronously once
3065 			 * the I/O reference count drops to 0.  There we will invoke
3066 			 * ifnet_release().
3067 			 */
3068 			if ((result = ifnet_detach(ifp)) != 0) {
3069 				os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
3070 			}
3071 		}
3072 	} else {
3073 		// Bound, but not connected
3074 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3075 		ipsec_free_pcb(pcb, false);
3076 	}
3077 
3078 	return 0;
3079 }
3080 
3081 static errno_t
ipsec_ctl_send(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,__unused void * unitinfo,mbuf_t m,__unused int flags)3082 ipsec_ctl_send(__unused kern_ctl_ref    kctlref,
3083     __unused u_int32_t           unit,
3084     __unused void                        *unitinfo,
3085     mbuf_t                  m,
3086     __unused int                 flags)
3087 {
3088 	/* Receive messages from the control socket. Currently unused. */
3089 	mbuf_freem(m);
3090 	return 0;
3091 }
3092 
3093 static errno_t
ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t len)3094 ipsec_ctl_setopt(__unused kern_ctl_ref  kctlref,
3095     __unused u_int32_t             unit,
3096     void                                   *unitinfo,
3097     int                                            opt,
3098     void                                   *data,
3099     size_t                                 len)
3100 {
3101 	errno_t                                 result = 0;
3102 	struct ipsec_pcb                        *pcb = unitinfo;
3103 	if (pcb == NULL) {
3104 		return EINVAL;
3105 	}
3106 
3107 	/* check for privileges for privileged options */
3108 	switch (opt) {
3109 	case IPSEC_OPT_FLAGS:
3110 	case IPSEC_OPT_EXT_IFDATA_STATS:
3111 	case IPSEC_OPT_SET_DELEGATE_INTERFACE:
3112 	case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
3113 	case IPSEC_OPT_OUTPUT_DSCP_MAPPING:
3114 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3115 			return EPERM;
3116 		}
3117 		break;
3118 	}
3119 
3120 	switch (opt) {
3121 	case IPSEC_OPT_FLAGS: {
3122 		if (len != sizeof(u_int32_t)) {
3123 			result = EMSGSIZE;
3124 		} else {
3125 			pcb->ipsec_external_flags = *(u_int32_t *)data;
3126 		}
3127 		break;
3128 	}
3129 
3130 	case IPSEC_OPT_EXT_IFDATA_STATS: {
3131 		if (len != sizeof(int)) {
3132 			result = EMSGSIZE;
3133 			break;
3134 		}
3135 		if (pcb->ipsec_ifp == NULL) {
3136 			// Only can set after connecting
3137 			result = EINVAL;
3138 			break;
3139 		}
3140 		pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
3141 		break;
3142 	}
3143 
3144 	case IPSEC_OPT_INC_IFDATA_STATS_IN:
3145 	case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
3146 		struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
3147 
3148 		if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
3149 			result = EINVAL;
3150 			break;
3151 		}
3152 		if (pcb->ipsec_ifp == NULL) {
3153 			// Only can set after connecting
3154 			result = EINVAL;
3155 			break;
3156 		}
3157 		if (!pcb->ipsec_ext_ifdata_stats) {
3158 			result = EINVAL;
3159 			break;
3160 		}
3161 		if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) {
3162 			ifnet_stat_increment_in(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3163 			    (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3164 		} else {
3165 			ifnet_stat_increment_out(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3166 			    (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3167 		}
3168 		break;
3169 	}
3170 
3171 	case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
3172 		ifnet_t del_ifp = NULL;
3173 		char name[IFNAMSIZ];
3174 
3175 		if (len > IFNAMSIZ - 1) {
3176 			result = EMSGSIZE;
3177 			break;
3178 		}
3179 		if (pcb->ipsec_ifp == NULL) {
3180 			// Only can set after connecting
3181 			result = EINVAL;
3182 			break;
3183 		}
3184 		if (len != 0) {                   /* if len==0, del_ifp will be NULL causing the delegate to be removed */
3185 			bcopy(data, name, len);
3186 			name[len] = 0;
3187 			result = ifnet_find_by_name(name, &del_ifp);
3188 		}
3189 		if (result == 0) {
3190 			os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
3191 			    __func__, pcb->ipsec_ifp->if_xname,
3192 			    del_ifp ? del_ifp->if_xname : "NULL");
3193 
3194 			result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
3195 			if (del_ifp) {
3196 				ifnet_release(del_ifp);
3197 			}
3198 		}
3199 		break;
3200 	}
3201 
3202 	case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3203 		if (len != sizeof(int)) {
3204 			result = EMSGSIZE;
3205 			break;
3206 		}
3207 		if (pcb->ipsec_ifp == NULL) {
3208 			// Only can set after connecting
3209 			result = EINVAL;
3210 			break;
3211 		}
3212 		mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
3213 		if (output_service_class == MBUF_SC_UNSPEC) {
3214 			pcb->ipsec_output_service_class = MBUF_SC_OAM;
3215 		} else {
3216 			pcb->ipsec_output_service_class = output_service_class;
3217 		}
3218 		os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
3219 		    __func__, pcb->ipsec_ifp->if_xname,
3220 		    pcb->ipsec_output_service_class);
3221 		break;
3222 	}
3223 
3224 #if IPSEC_NEXUS
3225 	case IPSEC_OPT_ENABLE_CHANNEL: {
3226 		if (len != sizeof(int)) {
3227 			result = EMSGSIZE;
3228 			break;
3229 		}
3230 		if (pcb->ipsec_ifp != NULL) {
3231 			// Only can set before connecting
3232 			result = EINVAL;
3233 			break;
3234 		}
3235 		if ((*(int *)data) != 0 &&
3236 		    (*(int *)data) != 1 &&
3237 		    (*(int *)data) != IPSEC_IF_WMM_RING_COUNT) {
3238 			result = EINVAL;
3239 			break;
3240 		}
3241 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3242 		pcb->ipsec_kpipe_count = *(int *)data;
3243 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3244 		break;
3245 	}
3246 
3247 	case IPSEC_OPT_CHANNEL_BIND_PID: {
3248 		if (len != sizeof(pid_t)) {
3249 			result = EMSGSIZE;
3250 			break;
3251 		}
3252 		if (pcb->ipsec_ifp != NULL) {
3253 			// Only can set before connecting
3254 			result = EINVAL;
3255 			break;
3256 		}
3257 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3258 		pcb->ipsec_kpipe_pid = *(pid_t *)data;
3259 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3260 		break;
3261 	}
3262 
3263 	case IPSEC_OPT_CHANNEL_BIND_UUID: {
3264 		if (len != sizeof(uuid_t)) {
3265 			result = EMSGSIZE;
3266 			break;
3267 		}
3268 		if (pcb->ipsec_ifp != NULL) {
3269 			// Only can set before connecting
3270 			result = EINVAL;
3271 			break;
3272 		}
3273 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3274 		uuid_copy(pcb->ipsec_kpipe_proc_uuid, *((uuid_t *)data));
3275 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3276 		break;
3277 	}
3278 
3279 	case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3280 		if (len != sizeof(int)) {
3281 			result = EMSGSIZE;
3282 			break;
3283 		}
3284 		if (pcb->ipsec_ifp == NULL) {
3285 			// Only can set after connecting
3286 			result = EINVAL;
3287 			break;
3288 		}
3289 		if (!if_is_fsw_transport_netagent_enabled()) {
3290 			result = ENOTSUP;
3291 			break;
3292 		}
3293 		if (uuid_is_null(pcb->ipsec_nx.fsw_agent)) {
3294 			result = ENOENT;
3295 			break;
3296 		}
3297 
3298 		uint32_t flags = netagent_get_flags(pcb->ipsec_nx.fsw_agent);
3299 
3300 		if (*(int *)data) {
3301 			flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
3302 			    NETAGENT_FLAG_NEXUS_LISTENER);
3303 			result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3304 			pcb->ipsec_needs_netagent = true;
3305 		} else {
3306 			pcb->ipsec_needs_netagent = false;
3307 			flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
3308 			    NETAGENT_FLAG_NEXUS_LISTENER);
3309 			result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3310 		}
3311 		break;
3312 	}
3313 
3314 	case IPSEC_OPT_INPUT_FRAG_SIZE: {
3315 		if (len != sizeof(u_int32_t)) {
3316 			result = EMSGSIZE;
3317 			break;
3318 		}
3319 		u_int32_t input_frag_size = *(u_int32_t *)data;
3320 		if (input_frag_size <= sizeof(struct ip6_hdr)) {
3321 			pcb->ipsec_frag_size_set = FALSE;
3322 			pcb->ipsec_input_frag_size = 0;
3323 		} else {
3324 			pcb->ipsec_frag_size_set = TRUE;
3325 			pcb->ipsec_input_frag_size = input_frag_size;
3326 		}
3327 		break;
3328 	}
3329 	case IPSEC_OPT_ENABLE_NETIF: {
3330 		if (len != sizeof(int)) {
3331 			result = EMSGSIZE;
3332 			break;
3333 		}
3334 		if (pcb->ipsec_ifp != NULL) {
3335 			// Only can set before connecting
3336 			result = EINVAL;
3337 			break;
3338 		}
3339 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3340 		pcb->ipsec_use_netif = !!(*(int *)data);
3341 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3342 		break;
3343 	}
3344 	case IPSEC_OPT_SLOT_SIZE: {
3345 		if (len != sizeof(u_int32_t)) {
3346 			result = EMSGSIZE;
3347 			break;
3348 		}
3349 		if (pcb->ipsec_ifp != NULL) {
3350 			// Only can set before connecting
3351 			result = EINVAL;
3352 			break;
3353 		}
3354 		u_int32_t slot_size = *(u_int32_t *)data;
3355 		if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
3356 		    slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
3357 			return EINVAL;
3358 		}
3359 		pcb->ipsec_slot_size = slot_size;
3360 		if (if_ipsec_debug != 0) {
3361 			printf("%s: IPSEC_OPT_SLOT_SIZE %u\n", __func__, slot_size);
3362 		}
3363 		break;
3364 	}
3365 	case IPSEC_OPT_NETIF_RING_SIZE: {
3366 		if (len != sizeof(u_int32_t)) {
3367 			result = EMSGSIZE;
3368 			break;
3369 		}
3370 		if (pcb->ipsec_ifp != NULL) {
3371 			// Only can set before connecting
3372 			result = EINVAL;
3373 			break;
3374 		}
3375 		u_int32_t ring_size = *(u_int32_t *)data;
3376 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3377 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3378 			return EINVAL;
3379 		}
3380 		pcb->ipsec_netif_ring_size = ring_size;
3381 		if (if_ipsec_debug != 0) {
3382 			printf("%s: IPSEC_OPT_NETIF_RING_SIZE %u\n", __func__, ring_size);
3383 		}
3384 		break;
3385 	}
3386 	case IPSEC_OPT_TX_FSW_RING_SIZE: {
3387 		if (len != sizeof(u_int32_t)) {
3388 			result = EMSGSIZE;
3389 			break;
3390 		}
3391 		if (pcb->ipsec_ifp != NULL) {
3392 			// Only can set before connecting
3393 			result = EINVAL;
3394 			break;
3395 		}
3396 		u_int32_t ring_size = *(u_int32_t *)data;
3397 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3398 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3399 			return EINVAL;
3400 		}
3401 		pcb->ipsec_tx_fsw_ring_size = ring_size;
3402 		if (if_ipsec_debug != 0) {
3403 			printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3404 		}
3405 		break;
3406 	}
3407 	case IPSEC_OPT_RX_FSW_RING_SIZE: {
3408 		if (len != sizeof(u_int32_t)) {
3409 			result = EMSGSIZE;
3410 			break;
3411 		}
3412 		if (pcb->ipsec_ifp != NULL) {
3413 			// Only can set before connecting
3414 			result = EINVAL;
3415 			break;
3416 		}
3417 		u_int32_t ring_size = *(u_int32_t *)data;
3418 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3419 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3420 			return EINVAL;
3421 		}
3422 		pcb->ipsec_rx_fsw_ring_size = ring_size;
3423 		if (if_ipsec_debug != 0) {
3424 			printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3425 		}
3426 		break;
3427 	}
3428 	case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3429 		if (len != sizeof(u_int32_t)) {
3430 			result = EMSGSIZE;
3431 			break;
3432 		}
3433 		if (pcb->ipsec_ifp != NULL) {
3434 			// Only can set before connecting
3435 			result = EINVAL;
3436 			break;
3437 		}
3438 		u_int32_t ring_size = *(u_int32_t *)data;
3439 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3440 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3441 			return EINVAL;
3442 		}
3443 		pcb->ipsec_kpipe_tx_ring_size = ring_size;
3444 		if (if_ipsec_debug != 0) {
3445 			printf("%s: IPSEC_OPT_KPIPE_TX_RING_SIZE %u\n", __func__, ring_size);
3446 		}
3447 		break;
3448 	}
3449 	case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3450 		if (len != sizeof(u_int32_t)) {
3451 			result = EMSGSIZE;
3452 			break;
3453 		}
3454 		if (pcb->ipsec_ifp != NULL) {
3455 			// Only can set before connecting
3456 			result = EINVAL;
3457 			break;
3458 		}
3459 		u_int32_t ring_size = *(u_int32_t *)data;
3460 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3461 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3462 			return EINVAL;
3463 		}
3464 		pcb->ipsec_kpipe_rx_ring_size = ring_size;
3465 		if (if_ipsec_debug != 0) {
3466 			printf("%s: IPSEC_OPT_KPIPE_RX_RING_SIZE %u\n", __func__, ring_size);
3467 		}
3468 		break;
3469 	}
3470 	case IPSEC_OPT_OUTPUT_DSCP_MAPPING: {
3471 		if (len != sizeof(int)) {
3472 			result = EMSGSIZE;
3473 			break;
3474 		}
3475 		if (pcb->ipsec_ifp == NULL) {
3476 			// Only can set after connecting
3477 			result = EINVAL;
3478 			break;
3479 		}
3480 
3481 		ipsec_dscp_mapping_t output_dscp_mapping = (ipsec_dscp_mapping_t)(*(int *)data);
3482 		if (output_dscp_mapping > IPSEC_DSCP_MAPPING_LEGACY) {
3483 			return EINVAL;
3484 		}
3485 
3486 		pcb->ipsec_output_dscp_mapping = output_dscp_mapping;
3487 
3488 		os_log(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_DSCP_MAPPING %s DSCP %d\n",
3489 		    __func__, pcb->ipsec_ifp->if_xname,
3490 		    pcb->ipsec_output_dscp_mapping);
3491 		break;
3492 	}
3493 
3494 #endif // IPSEC_NEXUS
3495 
3496 	default: {
3497 		result = ENOPROTOOPT;
3498 		break;
3499 	}
3500 	}
3501 
3502 	return result;
3503 }
3504 
3505 static errno_t
ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t * len)3506 ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
3507     __unused u_int32_t unit,
3508     void *unitinfo,
3509     int opt,
3510     void *data,
3511     size_t *len)
3512 {
3513 	errno_t result = 0;
3514 	struct ipsec_pcb *pcb = unitinfo;
3515 	if (pcb == NULL) {
3516 		return EINVAL;
3517 	}
3518 
3519 	switch (opt) {
3520 	case IPSEC_OPT_FLAGS: {
3521 		if (*len != sizeof(u_int32_t)) {
3522 			result = EMSGSIZE;
3523 		} else {
3524 			*(u_int32_t *)data = pcb->ipsec_external_flags;
3525 		}
3526 		break;
3527 	}
3528 
3529 	case IPSEC_OPT_EXT_IFDATA_STATS: {
3530 		if (*len != sizeof(int)) {
3531 			result = EMSGSIZE;
3532 		} else {
3533 			*(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
3534 		}
3535 		break;
3536 	}
3537 
3538 	case IPSEC_OPT_IFNAME: {
3539 		if (*len < MIN(strlen(pcb->ipsec_if_xname) + 1, sizeof(pcb->ipsec_if_xname))) {
3540 			result = EMSGSIZE;
3541 		} else {
3542 			if (pcb->ipsec_ifp == NULL) {
3543 				// Only can get after connecting
3544 				result = EINVAL;
3545 				break;
3546 			}
3547 			*len = scnprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
3548 		}
3549 		break;
3550 	}
3551 
3552 	case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3553 		if (*len != sizeof(int)) {
3554 			result = EMSGSIZE;
3555 		} else {
3556 			*(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
3557 		}
3558 		break;
3559 	}
3560 
3561 #if IPSEC_NEXUS
3562 
3563 	case IPSEC_OPT_ENABLE_CHANNEL: {
3564 		if (*len != sizeof(int)) {
3565 			result = EMSGSIZE;
3566 		} else {
3567 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3568 			*(int *)data = pcb->ipsec_kpipe_count;
3569 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3570 		}
3571 		break;
3572 	}
3573 
3574 	case IPSEC_OPT_CHANNEL_BIND_PID: {
3575 		if (*len != sizeof(pid_t)) {
3576 			result = EMSGSIZE;
3577 		} else {
3578 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3579 			*(pid_t *)data = pcb->ipsec_kpipe_pid;
3580 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3581 		}
3582 		break;
3583 	}
3584 
3585 	case IPSEC_OPT_CHANNEL_BIND_UUID: {
3586 		if (*len != sizeof(uuid_t)) {
3587 			result = EMSGSIZE;
3588 		} else {
3589 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3590 			uuid_copy(*((uuid_t *)data), pcb->ipsec_kpipe_proc_uuid);
3591 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3592 		}
3593 		break;
3594 	}
3595 
3596 	case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3597 		if (*len != sizeof(int)) {
3598 			result = EMSGSIZE;
3599 		} else {
3600 			*(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.fsw_agent);
3601 		}
3602 		break;
3603 	}
3604 
3605 	case IPSEC_OPT_ENABLE_NETIF: {
3606 		if (*len != sizeof(int)) {
3607 			result = EMSGSIZE;
3608 		} else {
3609 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3610 			*(int *)data = !!pcb->ipsec_use_netif;
3611 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3612 		}
3613 		break;
3614 	}
3615 
3616 	case IPSEC_OPT_GET_CHANNEL_UUID: {
3617 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3618 		if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
3619 			result = ENXIO;
3620 		} else if (*len != sizeof(uuid_t) * pcb->ipsec_kpipe_count) {
3621 			result = EMSGSIZE;
3622 		} else {
3623 			for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
3624 				uuid_copy(((uuid_t *)data)[i], pcb->ipsec_kpipe_uuid[i]);
3625 			}
3626 		}
3627 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3628 		break;
3629 	}
3630 
3631 	case IPSEC_OPT_INPUT_FRAG_SIZE: {
3632 		if (*len != sizeof(u_int32_t)) {
3633 			result = EMSGSIZE;
3634 		} else {
3635 			*(u_int32_t *)data = pcb->ipsec_input_frag_size;
3636 		}
3637 		break;
3638 	}
3639 	case IPSEC_OPT_SLOT_SIZE: {
3640 		if (*len != sizeof(u_int32_t)) {
3641 			result = EMSGSIZE;
3642 		} else {
3643 			*(u_int32_t *)data = pcb->ipsec_slot_size;
3644 		}
3645 		break;
3646 	}
3647 	case IPSEC_OPT_NETIF_RING_SIZE: {
3648 		if (*len != sizeof(u_int32_t)) {
3649 			result = EMSGSIZE;
3650 		} else {
3651 			*(u_int32_t *)data = pcb->ipsec_netif_ring_size;
3652 		}
3653 		break;
3654 	}
3655 	case IPSEC_OPT_TX_FSW_RING_SIZE: {
3656 		if (*len != sizeof(u_int32_t)) {
3657 			result = EMSGSIZE;
3658 		} else {
3659 			*(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
3660 		}
3661 		break;
3662 	}
3663 	case IPSEC_OPT_RX_FSW_RING_SIZE: {
3664 		if (*len != sizeof(u_int32_t)) {
3665 			result = EMSGSIZE;
3666 		} else {
3667 			*(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
3668 		}
3669 		break;
3670 	}
3671 	case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3672 		if (*len != sizeof(u_int32_t)) {
3673 			result = EMSGSIZE;
3674 		} else {
3675 			*(u_int32_t *)data = pcb->ipsec_kpipe_tx_ring_size;
3676 		}
3677 		break;
3678 	}
3679 	case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3680 		if (*len != sizeof(u_int32_t)) {
3681 			result = EMSGSIZE;
3682 		} else {
3683 			*(u_int32_t *)data = pcb->ipsec_kpipe_rx_ring_size;
3684 		}
3685 		break;
3686 	}
3687 
3688 #endif // IPSEC_NEXUS
3689 
3690 	default: {
3691 		result = ENOPROTOOPT;
3692 		break;
3693 	}
3694 	}
3695 
3696 	return result;
3697 }
3698 
3699 /* Network Interface functions */
3700 static errno_t
ipsec_output(ifnet_t interface,mbuf_t data)3701 ipsec_output(ifnet_t interface,
3702     mbuf_t data)
3703 {
3704 	struct ipsec_pcb *pcb = ifnet_softc(interface);
3705 	struct ipsec_output_state ipsec_state;
3706 	struct route ro;
3707 	struct route_in6 ro6;
3708 	size_t length;
3709 	struct ip *ip = NULL;
3710 	struct ip6_hdr *ip6 = NULL;
3711 	struct ip_out_args ipoa;
3712 	struct ip6_out_args ip6oa;
3713 	int error = 0;
3714 	u_int ip_version = 0;
3715 	int flags = 0;
3716 	struct flowadv *adv = NULL;
3717 
3718 	// Make sure this packet isn't looping through the interface
3719 	if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
3720 		error = EINVAL;
3721 		goto ipsec_output_err;
3722 	}
3723 
3724 	// Mark the interface so NECP can evaluate tunnel policy
3725 	necp_mark_packet_from_interface(data, interface);
3726 
3727 	if (data->m_len < sizeof(*ip)) {
3728 		os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IP header length: %d.\n", data->m_len);
3729 		IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3730 		error = EINVAL;
3731 		goto ipsec_output_err;
3732 	}
3733 
3734 	ip = mtod(data, struct ip *);
3735 	ip_version = ip->ip_v;
3736 
3737 	switch (ip_version) {
3738 	case 4: {
3739 		u_int8_t ip_hlen = 0;
3740 #ifdef _IP_VHL
3741 		ip_hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
3742 #else
3743 		ip_hlen = (uint8_t)(ip->ip_hl << 2);
3744 #endif
3745 		if (ip_hlen < sizeof(*ip)) {
3746 			os_log_error(OS_LOG_DEFAULT, "ipsec_output: Bad ip header length %d.\n", ip_hlen);
3747 			IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3748 			error = EINVAL;
3749 			goto ipsec_output_err;
3750 		}
3751 #if IPSEC_NEXUS
3752 		if (!pcb->ipsec_use_netif)
3753 #endif // IPSEC_NEXUS
3754 		{
3755 			int af = AF_INET;
3756 			bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3757 		}
3758 
3759 		/* Apply encryption */
3760 		memset(&ipsec_state, 0, sizeof(ipsec_state));
3761 		ipsec_state.m = data;
3762 		ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
3763 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3764 		ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
3765 
3766 		error = ipsec4_interface_output(&ipsec_state, interface);
3767 		/* Tunneled in IPv6 - packet is gone */
3768 		if (error == 0 && ipsec_state.tunneled == 6) {
3769 			goto done;
3770 		}
3771 
3772 		data = ipsec_state.m;
3773 		if (error || data == NULL) {
3774 			if (error) {
3775 				os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec4_output error %d.\n", error);
3776 			}
3777 			goto ipsec_output_err;
3778 		}
3779 
3780 		/* Set traffic class, set flow */
3781 		m_set_service_class(data, pcb->ipsec_output_service_class);
3782 		data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3783 #if SKYWALK
3784 		data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
3785 #else /* !SKYWALK */
3786 		data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3787 #endif /* !SKYWALK */
3788 		data->m_pkthdr.pkt_proto = ip->ip_p;
3789 		data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3790 
3791 		/* Flip endian-ness for ip_output */
3792 		ip = mtod(data, struct ip *);
3793 		NTOHS(ip->ip_len);
3794 		NTOHS(ip->ip_off);
3795 
3796 		/* Increment statistics */
3797 		length = mbuf_pkthdr_len(data);
3798 		ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
3799 
3800 		/* Send to ip_output */
3801 		memset(&ro, 0, sizeof(ro));
3802 
3803 		flags = (IP_OUTARGS |   /* Passing out args to specify interface */
3804 		    IP_NOIPSEC);                        /* To ensure the packet doesn't go through ipsec twice */
3805 
3806 		memset(&ipoa, 0, sizeof(ipoa));
3807 		ipoa.ipoa_flowadv.code = 0;
3808 		ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
3809 		if (ipsec_state.outgoing_if) {
3810 			ipoa.ipoa_boundif = ipsec_state.outgoing_if;
3811 			ipoa.ipoa_flags |= IPOAF_BOUND_IF;
3812 		}
3813 		ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
3814 
3815 		adv = &ipoa.ipoa_flowadv;
3816 
3817 		(void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
3818 		data = NULL;
3819 
3820 		if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3821 			error = ENOBUFS;
3822 			ifnet_disable_output(interface);
3823 		}
3824 
3825 		goto done;
3826 	}
3827 	case 6: {
3828 		if (data->m_len < sizeof(*ip6)) {
3829 			os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IPv6 header length: %d.\n", data->m_len);
3830 			IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
3831 			error = EINVAL;
3832 			goto ipsec_output_err;
3833 		}
3834 #if IPSEC_NEXUS
3835 		if (!pcb->ipsec_use_netif)
3836 #endif // IPSEC_NEXUS
3837 		{
3838 			int af = AF_INET6;
3839 			bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3840 		}
3841 
3842 		data = ipsec6_splithdr(data);
3843 		if (data == NULL) {
3844 			os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_splithdr returned NULL\n");
3845 			goto ipsec_output_err;
3846 		}
3847 
3848 		ip6 = mtod(data, struct ip6_hdr *);
3849 
3850 		memset(&ipsec_state, 0, sizeof(ipsec_state));
3851 		ipsec_state.m = data;
3852 		ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
3853 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3854 		ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
3855 
3856 		error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
3857 		if (error == 0 && ipsec_state.tunneled == 4) {          /* tunneled in IPv4 - packet is gone */
3858 			goto done;
3859 		}
3860 		data = ipsec_state.m;
3861 		if (error || data == NULL) {
3862 			if (error) {
3863 				os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_output error %d\n", error);
3864 			}
3865 			goto ipsec_output_err;
3866 		}
3867 
3868 		/* Set traffic class, set flow */
3869 		m_set_service_class(data, pcb->ipsec_output_service_class);
3870 		data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3871 #if SKYWALK
3872 		data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
3873 #else /* !SKYWALK */
3874 		data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3875 #endif /* !SKYWALK */
3876 		data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3877 		data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3878 
3879 		/* Increment statistics */
3880 		length = mbuf_pkthdr_len(data);
3881 		ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
3882 
3883 		/* Send to ip6_output */
3884 		memset(&ro6, 0, sizeof(ro6));
3885 
3886 		flags = IPV6_OUTARGS;
3887 
3888 		memset(&ip6oa, 0, sizeof(ip6oa));
3889 		ip6oa.ip6oa_flowadv.code = 0;
3890 		ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
3891 		if (ipsec_state.outgoing_if) {
3892 			ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
3893 			ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
3894 			ip6_output_setsrcifscope(data, ipsec_state.outgoing_if, NULL);
3895 			ip6_output_setdstifscope(data, ipsec_state.outgoing_if, NULL);
3896 		} else {
3897 			ip6_output_setsrcifscope(data, IFSCOPE_UNKNOWN, NULL);
3898 			ip6_output_setdstifscope(data, IFSCOPE_UNKNOWN, NULL);
3899 		}
3900 		ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
3901 
3902 		adv = &ip6oa.ip6oa_flowadv;
3903 
3904 		(void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
3905 		data = NULL;
3906 
3907 		if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3908 			error = ENOBUFS;
3909 			ifnet_disable_output(interface);
3910 		}
3911 
3912 		goto done;
3913 	}
3914 	default: {
3915 		os_log_error(OS_LOG_DEFAULT, "ipsec_output: Received unknown packet version %d.\n", ip_version);
3916 		error = EINVAL;
3917 		goto ipsec_output_err;
3918 	}
3919 	}
3920 
3921 done:
3922 	return error;
3923 
3924 ipsec_output_err:
3925 	if (data) {
3926 		mbuf_freem(data);
3927 	}
3928 	goto done;
3929 }
3930 
3931 static void
ipsec_start(ifnet_t interface)3932 ipsec_start(ifnet_t     interface)
3933 {
3934 	mbuf_t data;
3935 	struct ipsec_pcb *pcb = ifnet_softc(interface);
3936 
3937 	VERIFY(pcb != NULL);
3938 	for (;;) {
3939 		if (ifnet_dequeue(interface, &data) != 0) {
3940 			break;
3941 		}
3942 		if (ipsec_output(interface, data) != 0) {
3943 			break;
3944 		}
3945 	}
3946 }
3947 
3948 /* Network Interface functions */
3949 static errno_t
ipsec_demux(__unused ifnet_t interface,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)3950 ipsec_demux(__unused ifnet_t    interface,
3951     mbuf_t                          data,
3952     __unused char           *frame_header,
3953     protocol_family_t       *protocol)
3954 {
3955 	struct ip *ip;
3956 	u_int ip_version;
3957 
3958 	while (data != NULL && mbuf_len(data) < 1) {
3959 		data = mbuf_next(data);
3960 	}
3961 
3962 	if (data == NULL) {
3963 		return ENOENT;
3964 	}
3965 
3966 	ip = mtod(data, struct ip *);
3967 	ip_version = ip->ip_v;
3968 
3969 	switch (ip_version) {
3970 	case 4:
3971 		*protocol = PF_INET;
3972 		return 0;
3973 	case 6:
3974 		*protocol = PF_INET6;
3975 		return 0;
3976 	default:
3977 		*protocol = PF_UNSPEC;
3978 		break;
3979 	}
3980 
3981 	return 0;
3982 }
3983 
3984 static errno_t
ipsec_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused u_int32_t demux_count)3985 ipsec_add_proto(__unused ifnet_t                                                interface,
3986     protocol_family_t                                               protocol,
3987     __unused const struct ifnet_demux_desc  *demux_array,
3988     __unused u_int32_t                                              demux_count)
3989 {
3990 	switch (protocol) {
3991 	case PF_INET:
3992 		return 0;
3993 	case PF_INET6:
3994 		return 0;
3995 	default:
3996 		break;
3997 	}
3998 
3999 	return ENOPROTOOPT;
4000 }
4001 
4002 static errno_t
ipsec_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)4003 ipsec_del_proto(__unused ifnet_t                        interface,
4004     __unused protocol_family_t      protocol)
4005 {
4006 	return 0;
4007 }
4008 
4009 static errno_t
ipsec_ioctl(ifnet_t interface,u_long command,void * data)4010 ipsec_ioctl(ifnet_t interface,
4011     u_long command,
4012     void *data)
4013 {
4014 #if IPSEC_NEXUS
4015 	struct ipsec_pcb *pcb = ifnet_softc(interface);
4016 #endif
4017 	errno_t result = 0;
4018 
4019 	switch (command) {
4020 	case SIOCSIFMTU: {
4021 #if IPSEC_NEXUS
4022 		if (pcb->ipsec_use_netif) {
4023 			// Make sure we can fit packets in the channel buffers
4024 			if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
4025 				result = EINVAL;
4026 			} else {
4027 				ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
4028 			}
4029 		} else
4030 #endif // IPSEC_NEXUS
4031 		{
4032 			ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
4033 		}
4034 		break;
4035 	}
4036 
4037 	case SIOCSIFFLAGS:
4038 		/* ifioctl() takes care of it */
4039 		break;
4040 
4041 	case SIOCSIFSUBFAMILY: {
4042 		uint32_t subfamily;
4043 
4044 		subfamily = ((struct ifreq*)data)->ifr_type.ift_subfamily;
4045 		switch (subfamily) {
4046 		case IFRTYPE_SUBFAMILY_BLUETOOTH:
4047 			interface->if_subfamily = IFNET_SUBFAMILY_BLUETOOTH;
4048 			break;
4049 		case IFRTYPE_SUBFAMILY_WIFI:
4050 			interface->if_subfamily = IFNET_SUBFAMILY_WIFI;
4051 			break;
4052 		case IFRTYPE_SUBFAMILY_QUICKRELAY:
4053 			interface->if_subfamily = IFNET_SUBFAMILY_QUICKRELAY;
4054 			break;
4055 		case IFRTYPE_SUBFAMILY_DEFAULT:
4056 			interface->if_subfamily = IFNET_SUBFAMILY_DEFAULT;
4057 			break;
4058 		default:
4059 			result = EINVAL;
4060 			break;
4061 		}
4062 		break;
4063 	}
4064 
4065 	default:
4066 		result = EOPNOTSUPP;
4067 	}
4068 
4069 	return result;
4070 }
4071 
4072 static void
ipsec_detached(ifnet_t interface)4073 ipsec_detached(ifnet_t interface)
4074 {
4075 	struct ipsec_pcb *pcb = ifnet_softc(interface);
4076 
4077 	(void)ifnet_release(interface);
4078 	lck_mtx_lock(&ipsec_lock);
4079 	ipsec_free_pcb(pcb, true);
4080 	(void)ifnet_dispose(interface);
4081 	lck_mtx_unlock(&ipsec_lock);
4082 }
4083 
4084 /* Protocol Handlers */
4085 
4086 static errno_t
ipsec_proto_input(ifnet_t interface,protocol_family_t protocol,mbuf_t m,__unused char * frame_header)4087 ipsec_proto_input(ifnet_t interface,
4088     protocol_family_t     protocol,
4089     mbuf_t m,
4090     __unused char *frame_header)
4091 {
4092 	mbuf_pkthdr_setrcvif(m, interface);
4093 
4094 #if IPSEC_NEXUS
4095 	struct ipsec_pcb *pcb = ifnet_softc(interface);
4096 	if (!pcb->ipsec_use_netif)
4097 #endif // IPSEC_NEXUS
4098 	{
4099 		uint32_t af = 0;
4100 		struct ip *ip = mtod(m, struct ip *);
4101 		if (ip->ip_v == 4) {
4102 			af = AF_INET;
4103 		} else if (ip->ip_v == 6) {
4104 			af = AF_INET6;
4105 		}
4106 		bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
4107 		pktap_input(interface, protocol, m, NULL);
4108 	}
4109 
4110 	int32_t pktlen = m->m_pkthdr.len;
4111 	if (proto_input(protocol, m) != 0) {
4112 		ifnet_stat_increment_in(interface, 0, 0, 1);
4113 		m_freem(m);
4114 	} else {
4115 		ifnet_stat_increment_in(interface, 1, pktlen, 0);
4116 	}
4117 
4118 	return 0;
4119 }
4120 
4121 static errno_t
ipsec_proto_pre_output(__unused ifnet_t interface,protocol_family_t protocol,__unused mbuf_t * packet,__unused const struct sockaddr * dest,__unused void * route,__unused char * frame_type,__unused char * link_layer_dest)4122 ipsec_proto_pre_output(__unused ifnet_t interface,
4123     protocol_family_t    protocol,
4124     __unused mbuf_t              *packet,
4125     __unused const struct sockaddr *dest,
4126     __unused void *route,
4127     __unused char *frame_type,
4128     __unused char *link_layer_dest)
4129 {
4130 	*(protocol_family_t *)(void *)frame_type = protocol;
4131 	return 0;
4132 }
4133 
4134 static errno_t
ipsec_attach_proto(ifnet_t interface,protocol_family_t protocol)4135 ipsec_attach_proto(ifnet_t                              interface,
4136     protocol_family_t    protocol)
4137 {
4138 	struct ifnet_attach_proto_param proto;
4139 	errno_t                                                 result;
4140 
4141 	bzero(&proto, sizeof(proto));
4142 	proto.input = ipsec_proto_input;
4143 	proto.pre_output = ipsec_proto_pre_output;
4144 
4145 	result = ifnet_attach_protocol(interface, protocol, &proto);
4146 	if (result != 0 && result != EEXIST) {
4147 		os_log_error(OS_LOG_DEFAULT, "ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
4148 		    protocol, result);
4149 	}
4150 
4151 	return result;
4152 }
4153 
4154 errno_t
ipsec_inject_inbound_packet(ifnet_t interface,mbuf_t packet)4155 ipsec_inject_inbound_packet(ifnet_t     interface,
4156     mbuf_t      packet)
4157 {
4158 #if IPSEC_NEXUS
4159 	struct ipsec_pcb *pcb = ifnet_softc(interface);
4160 
4161 	if (pcb->ipsec_use_netif) {
4162 		if (!ipsec_data_move_begin(pcb)) {
4163 			os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__,
4164 			    if_name(pcb->ipsec_ifp));
4165 			return ENXIO;
4166 		}
4167 
4168 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4169 
4170 		lck_mtx_lock(&pcb->ipsec_input_chain_lock);
4171 
4172 		if (pcb->ipsec_input_chain_count > (u_int32_t)if_ipsec_max_pending_input) {
4173 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4174 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4175 			ipsec_data_move_end(pcb);
4176 			return ENOSPC;
4177 		}
4178 
4179 		if (pcb->ipsec_input_chain != NULL) {
4180 			pcb->ipsec_input_chain_last->m_nextpkt = packet;
4181 		} else {
4182 			pcb->ipsec_input_chain = packet;
4183 		}
4184 		pcb->ipsec_input_chain_count++;
4185 		while (packet->m_nextpkt) {
4186 			VERIFY(packet != packet->m_nextpkt);
4187 			packet = packet->m_nextpkt;
4188 			pcb->ipsec_input_chain_count++;
4189 		}
4190 		pcb->ipsec_input_chain_last = packet;
4191 		lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4192 
4193 		kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
4194 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4195 
4196 		if (rx_ring != NULL) {
4197 			kern_channel_notify(rx_ring, 0);
4198 		}
4199 
4200 		ipsec_data_move_end(pcb);
4201 		return 0;
4202 	} else
4203 #endif // IPSEC_NEXUS
4204 	{
4205 		errno_t error;
4206 		protocol_family_t protocol;
4207 		if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
4208 			return error;
4209 		}
4210 
4211 		return ipsec_proto_input(interface, protocol, packet, NULL);
4212 	}
4213 }
4214 
4215 void
ipsec_set_pkthdr_for_interface(ifnet_t interface,mbuf_t packet,int family,uint32_t flowid)4216 ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family,
4217     uint32_t flowid)
4218 {
4219 #pragma unused (flowid)
4220 	if (packet != NULL && interface != NULL) {
4221 		struct ipsec_pcb *pcb = ifnet_softc(interface);
4222 		if (pcb != NULL) {
4223 			/* Set traffic class, set flow */
4224 			m_set_service_class(packet, pcb->ipsec_output_service_class);
4225 			packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4226 #if SKYWALK
4227 			packet->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4228 			packet->m_pkthdr.pkt_flowid = flowid;
4229 #else /* !SKYWALK */
4230 			packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
4231 #endif /* !SKYWALK */
4232 			if (family == AF_INET) {
4233 				struct ip *ip = mtod(packet, struct ip *);
4234 				packet->m_pkthdr.pkt_proto = ip->ip_p;
4235 			} else if (family == AF_INET6) {
4236 				struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
4237 				packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
4238 			}
4239 			packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4240 		}
4241 	}
4242 }
4243 
4244 void
ipsec_set_ipoa_for_interface(ifnet_t interface,struct ip_out_args * ipoa)4245 ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
4246 {
4247 	struct ipsec_pcb *pcb;
4248 
4249 	if (interface == NULL || ipoa == NULL) {
4250 		return;
4251 	}
4252 	pcb = ifnet_softc(interface);
4253 
4254 	if (net_qos_policy_restricted == 0) {
4255 		ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
4256 		ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4257 	} else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4258 	    net_qos_policy_restrict_avapps != 0) {
4259 		ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4260 	} else {
4261 		ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4262 		ipoa->ipoa_sotc = SO_TC_VO;
4263 	}
4264 }
4265 
4266 void
ipsec_set_ip6oa_for_interface(ifnet_t interface,struct ip6_out_args * ip6oa)4267 ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
4268 {
4269 	struct ipsec_pcb *pcb;
4270 
4271 	if (interface == NULL || ip6oa == NULL) {
4272 		return;
4273 	}
4274 	pcb = ifnet_softc(interface);
4275 
4276 	if (net_qos_policy_restricted == 0) {
4277 		ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
4278 		ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4279 	} else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4280 	    net_qos_policy_restrict_avapps != 0) {
4281 		ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4282 	} else {
4283 		ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4284 		ip6oa->ip6oa_sotc = SO_TC_VO;
4285 	}
4286 }
4287 
4288 static boolean_t
ipsec_data_move_begin(struct ipsec_pcb * pcb)4289 ipsec_data_move_begin(struct ipsec_pcb *pcb)
4290 {
4291 	boolean_t ret = 0;
4292 
4293 	lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4294 	if ((ret = IPSEC_IS_DATA_PATH_READY(pcb))) {
4295 		pcb->ipsec_pcb_data_move++;
4296 	}
4297 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4298 
4299 	return ret;
4300 }
4301 
4302 static void
ipsec_data_move_end(struct ipsec_pcb * pcb)4303 ipsec_data_move_end(struct ipsec_pcb *pcb)
4304 {
4305 	lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4306 	VERIFY(pcb->ipsec_pcb_data_move > 0);
4307 	/*
4308 	 * if there's no more thread moving data, wakeup any
4309 	 * drainers that's blocked waiting for this.
4310 	 */
4311 	if (--pcb->ipsec_pcb_data_move == 0 && pcb->ipsec_pcb_drainers > 0) {
4312 		wakeup(&(pcb->ipsec_pcb_data_move));
4313 	}
4314 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4315 }
4316 
4317 static void
ipsec_data_move_drain(struct ipsec_pcb * pcb)4318 ipsec_data_move_drain(struct ipsec_pcb *pcb)
4319 {
4320 	lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4321 	/* data path must already be marked as not ready */
4322 	VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4323 	pcb->ipsec_pcb_drainers++;
4324 	while (pcb->ipsec_pcb_data_move != 0) {
4325 		(void)msleep(&(pcb->ipsec_pcb_data_move), &pcb->ipsec_pcb_data_move_lock,
4326 		    (PZERO - 1), __func__, NULL);
4327 	}
4328 	VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4329 	VERIFY(pcb->ipsec_pcb_drainers > 0);
4330 	pcb->ipsec_pcb_drainers--;
4331 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4332 }
4333 
4334 static void
ipsec_wait_data_move_drain(struct ipsec_pcb * pcb)4335 ipsec_wait_data_move_drain(struct ipsec_pcb *pcb)
4336 {
4337 	/*
4338 	 * Mark the data path as not usable.
4339 	 */
4340 	lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4341 	IPSEC_CLR_DATA_PATH_READY(pcb);
4342 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4343 
4344 	/* Wait until all threads in the data paths are done. */
4345 	ipsec_data_move_drain(pcb);
4346 }
4347