xref: /xnu-12377.41.6/bsd/net/if_ipsec.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2012-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 #include <sys/systm.h>
31 #include <sys/kern_control.h>
32 #include <net/kpi_protocol.h>
33 #include <net/kpi_interface.h>
34 #include <sys/socket.h>
35 #include <sys/socketvar.h>
36 #include <net/if.h>
37 #include <net/if_types.h>
38 #include <net/bpf.h>
39 #include <net/if_ipsec.h>
40 #include <sys/mbuf.h>
41 #include <sys/sockio.h>
42 #include <netinet/in.h>
43 #include <netinet/ip6.h>
44 #include <netinet6/in6_var.h>
45 #include <netinet6/ip6_var.h>
46 #include <sys/kauth.h>
47 #include <netinet6/ipsec.h>
48 #include <netinet6/ipsec6.h>
49 #include <netinet6/esp.h>
50 #include <netinet6/esp6.h>
51 #include <netinet/ip.h>
52 #include <net/flowadv.h>
53 #include <net/necp.h>
54 #include <netkey/key.h>
55 #include <net/pktap.h>
56 #include <kern/zalloc.h>
57 #include <os/log.h>
58 
59 #if SKYWALK
60 #include <skywalk/os_skywalk_private.h>
61 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
62 #include <skywalk/nexus/netif/nx_netif.h>
63 #define IPSEC_NEXUS 1
64 #else // SKYWALK
65 #define IPSEC_NEXUS 0
66 #endif // SKYWALK
67 
68 extern int net_qos_policy_restricted;
69 extern int net_qos_policy_restrict_avapps;
70 
71 /* Kernel Control functions */
72 static errno_t  ipsec_ctl_setup(u_int32_t *unit, void **unitinfo);
73 static errno_t  ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
74     void **unitinfo);
75 static errno_t  ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
76     void **unitinfo);
77 static errno_t  ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
78     void *unitinfo);
79 static errno_t  ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
80     void *unitinfo, mbuf_t m, int flags);
81 static errno_t  ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
82     int opt, void *__sized_by(*len)data, size_t *len);
83 static errno_t  ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
84     int opt, void *__sized_by(len)data, size_t len);
85 
86 /* Network Interface functions */
87 static void     ipsec_start(ifnet_t     interface);
88 static errno_t  ipsec_output(ifnet_t interface, mbuf_t data);
89 static errno_t  ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
90     protocol_family_t *protocol);
91 static errno_t  ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
92     const struct ifnet_demux_desc *demux_array,
93     u_int32_t demux_count);
94 static errno_t  ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
95 static errno_t  ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
96 static void             ipsec_detached(ifnet_t interface);
97 
98 /* Protocol handlers */
99 static errno_t  ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
100 static errno_t  ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
101     mbuf_t m, char *frame_header);
102 static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
103     mbuf_t *packet, const struct sockaddr *dest, void *route,
104     char *frame_type, char *link_layer_dest);
105 
106 static kern_ctl_ref     ipsec_kctlref;
107 static LCK_ATTR_DECLARE(ipsec_lck_attr, 0, 0);
108 static LCK_GRP_DECLARE(ipsec_lck_grp, "ipsec");
109 static LCK_MTX_DECLARE_ATTR(ipsec_lock, &ipsec_lck_grp, &ipsec_lck_attr);
110 
111 #if IPSEC_NEXUS
112 
113 SYSCTL_DECL(_net_ipsec);
114 SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
115 static int if_ipsec_verify_interface_creation = 0;
116 SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
117 
118 #define IPSEC_IF_VERIFY(_e)             if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
119 
120 #define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
121 #define IPSEC_IF_DEFAULT_RING_SIZE 64
122 #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
123 #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
124 #define IPSEC_IF_DEFAULT_BUF_SEG_SIZE   skmem_usr_buf_seg_size
125 
126 #define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES
127 #define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
128 #define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
129 #define IPSEC_NETIF_WMM_RX_RING_COUNT 1
130 #define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT
131 #define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT
132 
133 #define IPSEC_IF_MIN_RING_SIZE 8
134 #define IPSEC_IF_MAX_RING_SIZE 1024
135 
136 #define IPSEC_IF_MIN_SLOT_SIZE 1024
137 #define IPSEC_IF_MAX_SLOT_SIZE (16 * 1024)
138 
139 #define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512
140 
141 #define IPSEC_KPIPE_FLAG_WAKE_PKT 0x01
142 
143 static int if_ipsec_max_pending_input = IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT;
144 
145 static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
146 static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
147 static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
148 
149 static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
150 static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
151 static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
152 
153 SYSCTL_INT(_net_ipsec, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_max_pending_input, 0, "");
154 SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
155     &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
156 SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
157     &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
158 SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
159     &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
160 
161 static int if_ipsec_debug = 0;
162 SYSCTL_INT(_net_ipsec, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_debug, 0, "");
163 
164 static errno_t
165 ipsec_register_nexus(void);
166 
167 typedef struct ipsec_nx {
168 	uuid_t if_provider;
169 	uuid_t if_instance;
170 	uuid_t fsw_provider;
171 	uuid_t fsw_instance;
172 	uuid_t fsw_device;
173 	uuid_t fsw_agent;
174 } *ipsec_nx_t;
175 
176 static nexus_controller_t ipsec_ncd;
177 static int ipsec_ncd_refcount;
178 static uuid_t ipsec_kpipe_uuid;
179 
180 #endif // IPSEC_NEXUS
181 
182 /* Control block allocated for each kernel control connection */
183 struct ipsec_pcb {
184 	TAILQ_ENTRY(ipsec_pcb)  ipsec_chain;
185 	kern_ctl_ref            ipsec_ctlref;
186 	ifnet_t                 ipsec_ifp;
187 	u_int32_t               ipsec_unit;
188 	u_int32_t               ipsec_unique_id;
189 	// These external flags can be set with IPSEC_OPT_FLAGS
190 	u_int32_t               ipsec_external_flags;
191 	// These internal flags are only used within this driver
192 	u_int32_t               ipsec_internal_flags;
193 	u_int32_t               ipsec_input_frag_size;
194 	bool                    ipsec_frag_size_set;
195 	int                     ipsec_ext_ifdata_stats;
196 	mbuf_svc_class_t        ipsec_output_service_class;
197 	char                    ipsec_if_xname[IFXNAMSIZ];
198 	char                    ipsec_unique_name[IFXNAMSIZ];
199 	// PCB lock protects state fields, like ipsec_kpipe_count
200 	decl_lck_rw_data(, ipsec_pcb_lock);
201 	// lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers
202 	decl_lck_mtx_data(, ipsec_pcb_data_move_lock);
203 	u_int32_t               ipsec_pcb_data_move; /* number of data moving contexts */
204 	u_int32_t               ipsec_pcb_drainers; /* number of threads waiting to drain */
205 	u_int32_t               ipsec_pcb_data_path_state; /* internal state of interface data path */
206 	ipsec_dscp_mapping_t    ipsec_output_dscp_mapping;
207 
208 #if IPSEC_NEXUS
209 	lck_mtx_t               ipsec_input_chain_lock;
210 	lck_mtx_t               ipsec_kpipe_encrypt_lock;
211 	lck_mtx_t               ipsec_kpipe_decrypt_lock;
212 	struct mbuf *           ipsec_input_chain;
213 	struct mbuf *           ipsec_input_chain_last;
214 	u_int32_t               ipsec_input_chain_count;
215 	// Input chain lock protects the list of input mbufs
216 	// The input chain lock must be taken AFTER the PCB lock if both are held
217 	struct ipsec_nx         ipsec_nx;
218 	u_int32_t               ipsec_kpipe_count;
219 	pid_t                   ipsec_kpipe_pid;
220 	uuid_t                  ipsec_kpipe_proc_uuid;
221 	uuid_t                  ipsec_kpipe_uuid[IPSEC_IF_MAX_RING_COUNT];
222 	void *                  ipsec_kpipe_rxring[IPSEC_IF_MAX_RING_COUNT];
223 	void *                  ipsec_kpipe_txring[IPSEC_IF_MAX_RING_COUNT];
224 	kern_pbufpool_t         ipsec_kpipe_pp;
225 	u_int32_t               ipsec_kpipe_tx_ring_size;
226 	u_int32_t               ipsec_kpipe_rx_ring_size;
227 
228 	kern_nexus_t            ipsec_netif_nexus;
229 	kern_pbufpool_t         ipsec_netif_pp;
230 	void *                  ipsec_netif_rxring[IPSEC_NETIF_MAX_RX_RING_COUNT];
231 	void *                  ipsec_netif_txring[IPSEC_NETIF_MAX_TX_RING_COUNT];
232 	uint64_t                ipsec_netif_txring_size;
233 
234 	u_int32_t               ipsec_slot_size;
235 	u_int32_t               ipsec_netif_ring_size;
236 	u_int32_t               ipsec_tx_fsw_ring_size;
237 	u_int32_t               ipsec_rx_fsw_ring_size;
238 	bool                    ipsec_use_netif;
239 	bool                    ipsec_needs_netagent;
240 #endif // IPSEC_NEXUS
241 };
242 
243 /* These are internal flags not exposed outside this file */
244 #define IPSEC_FLAGS_KPIPE_ALLOCATED 1
245 
246 /* data movement refcounting functions */
247 static boolean_t ipsec_data_move_begin(struct ipsec_pcb *pcb);
248 static void ipsec_data_move_end(struct ipsec_pcb *pcb);
249 static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb);
250 
251 /* Data path states */
252 #define IPSEC_PCB_DATA_PATH_READY    0x1
253 
254 /* Macros to set/clear/test data path states */
255 #define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY)
256 #define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY)
257 #define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0)
258 
259 #if IPSEC_NEXUS
260 /* Macros to clear/set/test flags. */
261 static inline void
ipsec_flag_set(struct ipsec_pcb * pcb,uint32_t flag)262 ipsec_flag_set(struct ipsec_pcb *pcb, uint32_t flag)
263 {
264 	pcb->ipsec_internal_flags |= flag;
265 }
266 static inline void
ipsec_flag_clr(struct ipsec_pcb * pcb,uint32_t flag)267 ipsec_flag_clr(struct ipsec_pcb *pcb, uint32_t flag)
268 {
269 	pcb->ipsec_internal_flags &= ~flag;
270 }
271 
272 static inline bool
ipsec_flag_isset(struct ipsec_pcb * pcb,uint32_t flag)273 ipsec_flag_isset(struct ipsec_pcb *pcb, uint32_t flag)
274 {
275 	return !!(pcb->ipsec_internal_flags & flag);
276 }
277 #endif // IPSEC_NEXUS
278 
279 TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
280 
281 static KALLOC_TYPE_DEFINE(ipsec_pcb_zone, struct ipsec_pcb, NET_KT_DEFAULT);
282 
283 #define IPSECQ_MAXLEN 256
284 
285 #if IPSEC_NEXUS
286 static int
287 sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
288 {
289 #pragma unused(arg1, arg2)
290 	int value = if_ipsec_ring_size;
291 
292 	int error = sysctl_handle_int(oidp, &value, 0, req);
293 	if (error || !req->newptr) {
294 		return error;
295 	}
296 
297 	if (value < IPSEC_IF_MIN_RING_SIZE ||
298 	    value > IPSEC_IF_MAX_RING_SIZE) {
299 		return EINVAL;
300 	}
301 
302 	if_ipsec_ring_size = value;
303 
304 	return 0;
305 }
306 
307 static int
308 sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
309 {
310 #pragma unused(arg1, arg2)
311 	int value = if_ipsec_tx_fsw_ring_size;
312 
313 	int error = sysctl_handle_int(oidp, &value, 0, req);
314 	if (error || !req->newptr) {
315 		return error;
316 	}
317 
318 	if (value < IPSEC_IF_MIN_RING_SIZE ||
319 	    value > IPSEC_IF_MAX_RING_SIZE) {
320 		return EINVAL;
321 	}
322 
323 	if_ipsec_tx_fsw_ring_size = value;
324 
325 	return 0;
326 }
327 
328 static int
329 sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
330 {
331 #pragma unused(arg1, arg2)
332 	int value = if_ipsec_rx_fsw_ring_size;
333 
334 	int error = sysctl_handle_int(oidp, &value, 0, req);
335 	if (error || !req->newptr) {
336 		return error;
337 	}
338 
339 	if (value < IPSEC_IF_MIN_RING_SIZE ||
340 	    value > IPSEC_IF_MAX_RING_SIZE) {
341 		return EINVAL;
342 	}
343 
344 	if_ipsec_rx_fsw_ring_size = value;
345 
346 	return 0;
347 }
348 
349 
350 static inline bool
ipsec_in_wmm_mode(struct ipsec_pcb * pcb)351 ipsec_in_wmm_mode(struct ipsec_pcb *pcb)
352 {
353 	return pcb->ipsec_kpipe_count == IPSEC_IF_WMM_RING_COUNT;
354 }
355 
356 #endif // IPSEC_NEXUS
357 
358 errno_t
ipsec_register_control(void)359 ipsec_register_control(void)
360 {
361 	struct kern_ctl_reg     kern_ctl;
362 	errno_t                 result = 0;
363 
364 #if IPSEC_NEXUS
365 	ipsec_register_nexus();
366 #endif // IPSEC_NEXUS
367 
368 	TAILQ_INIT(&ipsec_head);
369 
370 	bzero(&kern_ctl, sizeof(kern_ctl));
371 	strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
372 	kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
373 	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP; /* Require root */
374 	kern_ctl.ctl_sendsize = 64 * 1024;
375 	kern_ctl.ctl_recvsize = 64 * 1024;
376 	kern_ctl.ctl_setup = ipsec_ctl_setup;
377 	kern_ctl.ctl_bind = ipsec_ctl_bind;
378 	kern_ctl.ctl_connect = ipsec_ctl_connect;
379 	kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
380 	kern_ctl.ctl_send = ipsec_ctl_send;
381 	kern_ctl.ctl_setopt = ipsec_ctl_setopt;
382 	kern_ctl.ctl_getopt = ipsec_ctl_getopt;
383 
384 	result = ctl_register(&kern_ctl, &ipsec_kctlref);
385 	if (result != 0) {
386 		os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - ctl_register failed: %d\n", result);
387 		return result;
388 	}
389 
390 	/* Register the protocol plumbers */
391 	if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC,
392 	    ipsec_attach_proto, NULL)) != 0) {
393 		os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n",
394 		    result);
395 		ctl_deregister(ipsec_kctlref);
396 		return result;
397 	}
398 
399 	/* Register the protocol plumbers */
400 	if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC,
401 	    ipsec_attach_proto, NULL)) != 0) {
402 		proto_unregister_plumber(PF_INET, IFNET_FAMILY_IPSEC);
403 		ctl_deregister(ipsec_kctlref);
404 		os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n",
405 		    result);
406 		return result;
407 	}
408 
409 	return 0;
410 }
411 
412 /* Helpers */
413 int
ipsec_interface_isvalid(ifnet_t interface)414 ipsec_interface_isvalid(ifnet_t interface)
415 {
416 	struct ipsec_pcb *__single pcb = NULL;
417 
418 	if (interface == NULL) {
419 		return 0;
420 	}
421 
422 	pcb = ifnet_softc(interface);
423 
424 	if (pcb == NULL) {
425 		return 0;
426 	}
427 
428 	/* When ctl disconnects, ipsec_unit is set to 0 */
429 	if (pcb->ipsec_unit == 0) {
430 		return 0;
431 	}
432 
433 	return 1;
434 }
435 
436 #if IPSEC_NEXUS
437 boolean_t
ipsec_interface_needs_netagent(ifnet_t interface)438 ipsec_interface_needs_netagent(ifnet_t interface)
439 {
440 	struct ipsec_pcb *__single pcb = NULL;
441 
442 	if (interface == NULL) {
443 		return FALSE;
444 	}
445 
446 	pcb = ifnet_softc(interface);
447 
448 	if (pcb == NULL) {
449 		return FALSE;
450 	}
451 
452 	return pcb->ipsec_needs_netagent == true;
453 }
454 #endif // IPSEC_NEXUS
455 
456 static errno_t
ipsec_ifnet_set_attrs(ifnet_t ifp)457 ipsec_ifnet_set_attrs(ifnet_t ifp)
458 {
459 	/* Set flags and additional information. */
460 	ifnet_set_mtu(ifp, 1500);
461 	ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
462 
463 	/* The interface must generate its own IPv6 LinkLocal address,
464 	 * if possible following the recommendation of RFC2472 to the 64bit interface ID
465 	 */
466 	ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
467 
468 #if !IPSEC_NEXUS
469 	/* Reset the stats in case as the interface may have been recycled */
470 	struct ifnet_stats_param stats;
471 	bzero(&stats, sizeof(struct ifnet_stats_param));
472 	ifnet_set_stat(ifp, &stats);
473 #endif // !IPSEC_NEXUS
474 
475 	return 0;
476 }
477 
478 #if IPSEC_NEXUS
479 
480 static uuid_t ipsec_nx_dom_prov;
481 
482 static errno_t
ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)483 ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
484 {
485 	return 0;
486 }
487 
488 static void
ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)489 ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
490 {
491 	// Ignore
492 }
493 
494 static errno_t
ipsec_register_nexus(void)495 ipsec_register_nexus(void)
496 {
497 	const struct kern_nexus_domain_provider_init dp_init = {
498 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
499 		.nxdpi_flags = 0,
500 		.nxdpi_init = ipsec_nxdp_init,
501 		.nxdpi_fini = ipsec_nxdp_fini
502 	};
503 	nexus_domain_provider_name_t domain_provider_name = "com.apple.ipsec";
504 	errno_t err = 0;
505 
506 	/* ipsec_nxdp_init() is called before this function returns */
507 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
508 	    domain_provider_name,
509 	    &dp_init, sizeof(dp_init),
510 	    &ipsec_nx_dom_prov);
511 	if (err != 0) {
512 		os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
513 		return err;
514 	}
515 	return 0;
516 }
517 
518 static errno_t
ipsec_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)519 ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
520 {
521 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
522 	pcb->ipsec_netif_nexus = nexus;
523 	return ipsec_ifnet_set_attrs(ifp);
524 }
525 
526 static errno_t
ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,proc_t p,kern_nexus_t nexus,nexus_port_t nexus_port,kern_channel_t channel,void ** ch_ctx)527 ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
528     proc_t p, kern_nexus_t nexus,
529     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
530 {
531 #pragma unused(nxprov, p)
532 #pragma unused(nexus, nexus_port, channel, ch_ctx)
533 	return 0;
534 }
535 
536 static errno_t
ipsec_nexus_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)537 ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
538     kern_channel_t channel)
539 {
540 #pragma unused(nxprov, channel)
541 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
542 	boolean_t ok = ifnet_get_ioref(pcb->ipsec_ifp);
543 	/* Mark the data path as ready */
544 	if (ok) {
545 		lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
546 		IPSEC_SET_DATA_PATH_READY(pcb);
547 		lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
548 	}
549 	return ok ? 0 : ENXIO;
550 }
551 
552 static void
ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)553 ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
554     kern_channel_t channel)
555 {
556 #pragma unused(nxprov, channel)
557 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
558 
559 	VERIFY(pcb->ipsec_kpipe_count != 0);
560 
561 	/* Wait until all threads in the data paths are done. */
562 	ipsec_wait_data_move_drain(pcb);
563 }
564 
565 static void
ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)566 ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
567     kern_channel_t channel)
568 {
569 #pragma unused(nxprov, channel)
570 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
571 
572 	/* Wait until all threads in the data paths are done. */
573 	ipsec_wait_data_move_drain(pcb);
574 }
575 
576 static void
ipsec_nexus_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)577 ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
578     kern_channel_t channel)
579 {
580 #pragma unused(nxprov, channel)
581 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
582 	if (pcb->ipsec_netif_nexus == nexus) {
583 		pcb->ipsec_netif_nexus = NULL;
584 	}
585 	ifnet_decr_iorefcnt(pcb->ipsec_ifp);
586 }
587 
588 static errno_t
ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)589 ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
590     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
591     void **ring_ctx)
592 {
593 #pragma unused(nxprov)
594 #pragma unused(channel)
595 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
596 	uint8_t ring_idx;
597 
598 	for (ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
599 		if (!uuid_compare(channel->ch_info->cinfo_nx_uuid, pcb->ipsec_kpipe_uuid[ring_idx])) {
600 			break;
601 		}
602 	}
603 
604 	if (ring_idx == pcb->ipsec_kpipe_count) {
605 		uuid_string_t uuidstr;
606 		uuid_unparse(channel->ch_info->cinfo_nx_uuid, uuidstr);
607 		os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n", __func__, pcb->ipsec_if_xname, uuidstr);
608 		return ENOENT;
609 	}
610 
611 	*ring_ctx = __unsafe_forge_single(void *, (uintptr_t)ring_idx);
612 
613 	if (!is_tx_ring) {
614 		VERIFY(pcb->ipsec_kpipe_rxring[ring_idx] == NULL);
615 		pcb->ipsec_kpipe_rxring[ring_idx] = ring;
616 	} else {
617 		VERIFY(pcb->ipsec_kpipe_txring[ring_idx] == NULL);
618 		pcb->ipsec_kpipe_txring[ring_idx] = ring;
619 	}
620 	return 0;
621 }
622 
623 static void
ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)624 ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
625     kern_channel_ring_t ring)
626 {
627 #pragma unused(nxprov)
628 	bool found = false;
629 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
630 
631 	for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
632 		if (pcb->ipsec_kpipe_rxring[i] == ring) {
633 			pcb->ipsec_kpipe_rxring[i] = NULL;
634 			found = true;
635 		} else if (pcb->ipsec_kpipe_txring[i] == ring) {
636 			pcb->ipsec_kpipe_txring[i] = NULL;
637 			found = true;
638 		}
639 	}
640 	VERIFY(found);
641 }
642 
643 static errno_t
ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)644 ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
645     kern_channel_ring_t tx_ring, uint32_t flags)
646 {
647 #pragma unused(nxprov)
648 #pragma unused(flags)
649 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
650 
651 	if (!ipsec_data_move_begin(pcb)) {
652 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
653 		return 0;
654 	}
655 
656 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
657 
658 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
659 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
660 		ipsec_data_move_end(pcb);
661 		return 0;
662 	}
663 
664 	VERIFY(pcb->ipsec_kpipe_count);
665 
666 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
667 	if (tx_slot == NULL) {
668 		// Nothing to write, bail
669 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
670 		ipsec_data_move_end(pcb);
671 		return 0;
672 	}
673 
674 	// Signal the netif ring to read
675 	kern_channel_ring_t __single rx_ring = pcb->ipsec_netif_rxring[0];
676 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
677 
678 	if (rx_ring != NULL) {
679 		kern_channel_notify(rx_ring, 0);
680 	}
681 
682 	ipsec_data_move_end(pcb);
683 	return 0;
684 }
685 
686 static errno_t
ipsec_encrypt_kpipe_pkt(ifnet_t interface,kern_packet_t sph,kern_packet_t dph)687 ipsec_encrypt_kpipe_pkt(ifnet_t interface, kern_packet_t sph,
688     kern_packet_t dph)
689 {
690 	uint8_t *sbaddr = NULL;
691 	int err = 0;
692 	uint32_t slen = 0;
693 
694 	VERIFY(interface != NULL);
695 	VERIFY(sph != 0);
696 	VERIFY(dph != 0);
697 
698 	kern_buflet_t __single sbuf = __packet_get_next_buflet(sph, NULL);
699 	VERIFY(sbuf != NULL);
700 	slen = __buflet_get_data_length(sbuf);
701 
702 	if (__improbable(slen < sizeof(struct ip))) {
703 		os_log_error(OS_LOG_DEFAULT, "ipsec encrypt kpipe pkt: source "
704 		    "buffer shorter than ip header, %u\n", slen);
705 		return EINVAL;
706 	}
707 
708 	sbaddr = ipsec_kern_buflet_to_buffer(sbuf);
709 	struct ip *ip = (struct ip *)(void *)sbaddr;
710 	ASSERT(IP_HDR_ALIGNED_P(ip));
711 
712 	u_int ip_vers = ip->ip_v;
713 	switch (ip_vers) {
714 	case IPVERSION: {
715 		err = ipsec4_interface_kpipe_output(interface, sph, dph);
716 		if (__improbable(err != 0)) {
717 			os_log_error(OS_LOG_DEFAULT, "ipsec4 interface kpipe "
718 			    "output error %d\n", err);
719 			return err;
720 		}
721 		break;
722 	}
723 	case 6: {
724 		err = ipsec6_interface_kpipe_output(interface, sph, dph);
725 		if (__improbable(err != 0)) {
726 			os_log_error(OS_LOG_DEFAULT, "ipsec6 interface kpipe "
727 			    "output error %d\n", err);
728 			return err;
729 		}
730 		break;
731 	}
732 	default: {
733 		os_log_error(OS_LOG_DEFAULT, "received unknown packet version: %d\n",
734 		    ip_vers);
735 		return EINVAL;
736 	}
737 	}
738 
739 	return err;
740 }
741 
742 static errno_t
ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)743 ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
744     kern_channel_ring_t rx_ring, uint32_t flags)
745 {
746 #pragma unused(nxprov)
747 #pragma unused(flags)
748 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
749 	struct kern_channel_ring_stat_increment rx_ring_stats;
750 	uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring);
751 
752 	if (!ipsec_data_move_begin(pcb)) {
753 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
754 		return 0;
755 	}
756 
757 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
758 
759 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
760 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
761 		ipsec_data_move_end(pcb);
762 		return 0;
763 	}
764 
765 	VERIFY(pcb->ipsec_kpipe_count);
766 	VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
767 
768 	// Reclaim user-released slots
769 	(void) kern_channel_reclaim(rx_ring);
770 
771 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
772 	if (avail == 0) {
773 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
774 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
775 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
776 		ipsec_data_move_end(pcb);
777 		return 0;
778 	}
779 
780 	kern_channel_ring_t __single tx_ring = pcb->ipsec_netif_txring[ring_idx];
781 	if (tx_ring == NULL) {
782 		// Net-If TX ring not set up yet, nothing to read
783 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
784 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
785 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
786 		ipsec_data_move_end(pcb);
787 		return 0;
788 	}
789 
790 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
791 
792 	// Unlock ipsec before entering ring
793 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
794 
795 	(void)kr_enter(tx_ring, TRUE);
796 
797 	// Lock again after entering and validate
798 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
799 	if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
800 		// Ring no longer valid
801 		// Unlock first, then exit ring
802 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
803 		kr_exit(tx_ring);
804 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
805 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
806 		ipsec_data_move_end(pcb);
807 		return 0;
808 	}
809 
810 	struct kern_channel_ring_stat_increment tx_ring_stats;
811 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
812 	kern_channel_slot_t tx_pslot = NULL;
813 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
814 	if (tx_slot == NULL) {
815 		// Nothing to read, don't bother signalling
816 		// Unlock first, then exit ring
817 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
818 		kr_exit(tx_ring);
819 		ipsec_data_move_end(pcb);
820 		return 0;
821 	}
822 
823 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
824 	VERIFY(rx_pp != NULL);
825 	struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
826 	VERIFY(tx_pp != NULL);
827 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
828 	kern_channel_slot_t rx_pslot = NULL;
829 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
830 	kern_packet_t tx_chain_ph = 0;
831 
832 	while (rx_slot != NULL && tx_slot != NULL) {
833 		size_t tx_pkt_length = 0;
834 		errno_t error = 0;
835 
836 		// Allocate rx packet
837 		kern_packet_t rx_ph = 0;
838 		error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
839 		if (__improbable(error != 0)) {
840 			os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
841 			    "failed to allocate packet\n", pcb->ipsec_ifp->if_xname);
842 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
843 			STATS_INC(nifs, NETIF_STATS_DROP);
844 			break;
845 		}
846 
847 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
848 		if (__improbable(tx_ph == 0)) {
849 			// Advance TX ring
850 			tx_pslot = tx_slot;
851 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
852 			kern_pbufpool_free(rx_pp, rx_ph);
853 			continue;
854 		}
855 
856 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
857 		if (tx_chain_ph != 0) {
858 			kern_packet_append(tx_ph, tx_chain_ph);
859 		}
860 		tx_chain_ph = tx_ph;
861 
862 		// Advance TX ring
863 		tx_pslot = tx_slot;
864 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
865 
866 		bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
867 
868 		tx_pkt_length = kern_packet_get_data_length(tx_ph);
869 		if (tx_pkt_length == 0 || tx_pkt_length > pcb->ipsec_slot_size) {
870 			os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
871 			    "packet length %zu", pcb->ipsec_ifp->if_xname,
872 			    tx_pkt_length);
873 			kern_pbufpool_free(rx_pp, rx_ph);
874 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
875 			STATS_INC(nifs, NETIF_STATS_DROP);
876 			continue;
877 		}
878 
879 		// Increment TX stats
880 		tx_ring_stats.kcrsi_slots_transferred++;
881 		tx_ring_stats.kcrsi_bytes_transferred += tx_pkt_length;
882 
883 		// Encrypt packet
884 		lck_mtx_lock(&pcb->ipsec_kpipe_encrypt_lock);
885 		error = ipsec_encrypt_kpipe_pkt(pcb->ipsec_ifp, tx_ph, rx_ph);
886 		lck_mtx_unlock(&pcb->ipsec_kpipe_encrypt_lock);
887 		if (__improbable(error != 0)) {
888 			os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
889 			    "failed to encrypt packet", pcb->ipsec_ifp->if_xname);
890 			kern_pbufpool_free(rx_pp, rx_ph);
891 			STATS_INC(nifs, NETIF_STATS_DROP);
892 			continue;
893 		}
894 
895 		kern_packet_clear_flow_uuid(rx_ph);         // Zero flow id
896 		// Finalize and attach the packet
897 		kern_buflet_t __single rx_buf = __packet_get_next_buflet(rx_ph, NULL);
898 		error = kern_buflet_set_data_offset(rx_buf, 0);
899 		VERIFY(error == 0);
900 		error = kern_packet_finalize(rx_ph);
901 		VERIFY(error == 0);
902 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
903 		VERIFY(error == 0);
904 
905 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
906 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
907 
908 		rx_ring_stats.kcrsi_slots_transferred++;
909 		rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph);
910 
911 		if (!pcb->ipsec_ext_ifdata_stats) {
912 			ifnet_stat_increment_out(pcb->ipsec_ifp, 1,
913 			    kern_packet_get_data_length(rx_ph), 0);
914 		}
915 
916 		rx_pslot = rx_slot;
917 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
918 	}
919 
920 	if (rx_pslot) {
921 		kern_channel_advance_slot(rx_ring, rx_pslot);
922 		kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
923 	}
924 
925 	if (tx_chain_ph != 0) {
926 		kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
927 	}
928 
929 	if (tx_pslot) {
930 		kern_channel_advance_slot(tx_ring, tx_pslot);
931 		kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
932 		(void)kern_channel_reclaim(tx_ring);
933 	}
934 
935 	/* always reenable output */
936 	errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
937 	if (error != 0) {
938 		os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
939 	}
940 
941 	// Unlock first, then exit ring
942 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
943 
944 	if (tx_pslot != NULL) {
945 		kern_channel_notify(tx_ring, 0);
946 	}
947 	kr_exit(tx_ring);
948 
949 	ipsec_data_move_end(pcb);
950 	return 0;
951 }
952 
953 static uint8_t
ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)954 ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
955 {
956 	switch (svc_class) {
957 	case KPKT_SC_VO: {
958 		return 0;
959 	}
960 	case KPKT_SC_VI: {
961 		return 1;
962 	}
963 	case KPKT_SC_BE: {
964 		return 2;
965 	}
966 	case KPKT_SC_BK: {
967 		return 3;
968 	}
969 	default: {
970 		VERIFY(0);
971 		return 0;
972 	}
973 	}
974 }
975 
976 static errno_t
ipsec_netif_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)977 ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
978     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
979     void **ring_ctx)
980 {
981 #pragma unused(nxprov)
982 #pragma unused(channel)
983 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
984 
985 	if (!is_tx_ring) {
986 		VERIFY(pcb->ipsec_netif_rxring[0] == NULL);
987 		pcb->ipsec_netif_rxring[0] = ring;
988 	} else {
989 		uint8_t ring_idx = 0;
990 		if (ipsec_in_wmm_mode(pcb)) {
991 			int err;
992 			kern_packet_svc_class_t svc_class;
993 			err = kern_channel_get_service_class(ring, &svc_class);
994 			VERIFY(err == 0);
995 			ring_idx = ipsec_find_tx_ring_by_svc(svc_class);
996 			VERIFY(ring_idx < IPSEC_IF_WMM_RING_COUNT);
997 		}
998 
999 		*ring_ctx = __unsafe_forge_single(void *, (uintptr_t)ring_idx);
1000 
1001 		VERIFY(pcb->ipsec_netif_txring[ring_idx] == NULL);
1002 		pcb->ipsec_netif_txring[ring_idx] = ring;
1003 	}
1004 	return 0;
1005 }
1006 
1007 static void
ipsec_netif_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)1008 ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1009     kern_channel_ring_t ring)
1010 {
1011 #pragma unused(nxprov)
1012 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
1013 	bool found = false;
1014 
1015 	for (int i = 0; i < IPSEC_NETIF_MAX_RX_RING_COUNT; i++) {
1016 		if (pcb->ipsec_netif_rxring[i] == ring) {
1017 			pcb->ipsec_netif_rxring[i] = NULL;
1018 			VERIFY(!found);
1019 			found = true;
1020 		}
1021 	}
1022 	for (int i = 0; i < IPSEC_NETIF_MAX_TX_RING_COUNT; i++) {
1023 		if (pcb->ipsec_netif_txring[i] == ring) {
1024 			pcb->ipsec_netif_txring[i] = NULL;
1025 			VERIFY(!found);
1026 			found = true;
1027 		}
1028 	}
1029 	VERIFY(found);
1030 }
1031 
1032 static bool
ipsec_netif_check_policy(ifnet_t interface,mbuf_t data)1033 ipsec_netif_check_policy(ifnet_t interface, mbuf_t data)
1034 {
1035 	necp_kernel_policy_result necp_result = 0;
1036 	necp_kernel_policy_result_parameter necp_result_parameter = {};
1037 	uint32_t necp_matched_policy_id = 0;
1038 	struct ip_out_args args4 = { };
1039 	struct ip6_out_args args6 = { };
1040 
1041 	// This packet has been marked with IP level policy, do not mark again.
1042 	if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
1043 		return true;
1044 	}
1045 
1046 	size_t length = mbuf_pkthdr_len(data);
1047 	if (length < sizeof(struct ip)) {
1048 		return false;
1049 	}
1050 
1051 	struct ip *ip = mtod(data, struct ip *);
1052 	u_int ip_version = ip->ip_v;
1053 	switch (ip_version) {
1054 	case 4: {
1055 		if (interface != NULL) {
1056 			args4.ipoa_flags |= IPOAF_BOUND_IF;
1057 			args4.ipoa_boundif = interface->if_index;
1058 		}
1059 		necp_matched_policy_id = necp_ip_output_find_policy_match(data, IP_OUTARGS, &args4, NULL,
1060 		    &necp_result, &necp_result_parameter);
1061 		break;
1062 	}
1063 	case 6: {
1064 		if (interface != NULL) {
1065 			args6.ip6oa_flags |= IP6OAF_BOUND_IF;
1066 			args6.ip6oa_boundif = interface->if_index;
1067 		}
1068 		necp_matched_policy_id = necp_ip6_output_find_policy_match(data, IPV6_OUTARGS, &args6, NULL,
1069 		    &necp_result, &necp_result_parameter);
1070 		break;
1071 	}
1072 	default: {
1073 		return false;
1074 	}
1075 	}
1076 
1077 	if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
1078 	    necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
1079 		/* Drop and flow divert packets should be blocked at the IP layer */
1080 		return false;
1081 	}
1082 
1083 	necp_mark_packet_from_ip(data, necp_matched_policy_id);
1084 	return true;
1085 }
1086 
1087 static errno_t
ipsec_netif_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)1088 ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1089     kern_channel_ring_t tx_ring, uint32_t flags)
1090 {
1091 #pragma unused(nxprov)
1092 #pragma unused(flags)
1093 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
1094 
1095 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1096 
1097 	if (!ipsec_data_move_begin(pcb)) {
1098 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1099 		return 0;
1100 	}
1101 
1102 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1103 
1104 	struct kern_channel_ring_stat_increment tx_ring_stats;
1105 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1106 	kern_channel_slot_t tx_pslot = NULL;
1107 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1108 	kern_packet_t tx_chain_ph = 0;
1109 
1110 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
1111 
1112 	if (tx_slot == NULL) {
1113 		// Nothing to write, don't bother signalling
1114 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1115 		ipsec_data_move_end(pcb);
1116 		return 0;
1117 	}
1118 
1119 	if (pcb->ipsec_kpipe_count &&
1120 	    ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1121 		// Select the corresponding kpipe rx ring
1122 		uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(tx_ring);
1123 		VERIFY(ring_idx < IPSEC_IF_MAX_RING_COUNT);
1124 		kern_channel_ring_t __single rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1125 
1126 		// Unlock while calling notify
1127 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1128 
1129 		// Signal the kernel pipe ring to read
1130 		if (rx_ring != NULL) {
1131 			kern_channel_notify(rx_ring, 0);
1132 		}
1133 
1134 		ipsec_data_move_end(pcb);
1135 		return 0;
1136 	}
1137 
1138 	// If we're here, we're injecting into the BSD stack
1139 	while (tx_slot != NULL) {
1140 		size_t length = 0;
1141 		mbuf_t __single data = NULL;
1142 
1143 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1144 
1145 		if (tx_ph == 0) {
1146 			// Advance TX ring
1147 			tx_pslot = tx_slot;
1148 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1149 			continue;
1150 		}
1151 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
1152 		if (tx_chain_ph != 0) {
1153 			kern_packet_append(tx_ph, tx_chain_ph);
1154 		}
1155 		tx_chain_ph = tx_ph;
1156 
1157 		// Advance TX ring
1158 		tx_pslot = tx_slot;
1159 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1160 
1161 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1162 		VERIFY(tx_buf != NULL);
1163 
1164 		uint8_t *tx_baddr = ipsec_kern_buflet_to_buffer(tx_buf);
1165 		VERIFY(tx_baddr != 0);
1166 		tx_baddr += kern_buflet_get_data_offset(tx_buf);
1167 
1168 		bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1169 
1170 		length = MIN(kern_packet_get_data_length(tx_ph),
1171 		    pcb->ipsec_slot_size);
1172 
1173 		if (length > 0) {
1174 			errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1175 			if (error == 0) {
1176 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1177 				if (error == 0) {
1178 					// Mark packet from policy
1179 					uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1180 					uint32_t skip_policy_id = kern_packet_get_skip_policy_id(tx_ph);
1181 					necp_mark_packet_from_ip_with_skip(data, policy_id, skip_policy_id);
1182 
1183 					// Check policy with NECP
1184 					if (!ipsec_netif_check_policy(pcb->ipsec_ifp, data)) {
1185 						os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1186 						STATS_INC(nifs, NETIF_STATS_DROP);
1187 						mbuf_freem(data);
1188 						data = NULL;
1189 					} else {
1190 						// Send through encryption
1191 						error = ipsec_output(pcb->ipsec_ifp, data);
1192 						if (error != 0) {
1193 							os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
1194 						}
1195 					}
1196 				} else {
1197 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1198 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1199 					STATS_INC(nifs, NETIF_STATS_DROP);
1200 					mbuf_freem(data);
1201 					data = NULL;
1202 				}
1203 			} else {
1204 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1205 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1206 				STATS_INC(nifs, NETIF_STATS_DROP);
1207 			}
1208 		} else {
1209 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1210 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1211 			STATS_INC(nifs, NETIF_STATS_DROP);
1212 		}
1213 
1214 		if (data == NULL) {
1215 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
1216 			break;
1217 		}
1218 
1219 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1220 		STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
1221 
1222 		tx_ring_stats.kcrsi_slots_transferred++;
1223 		tx_ring_stats.kcrsi_bytes_transferred += length;
1224 	}
1225 
1226 	if (tx_chain_ph != 0) {
1227 		kern_pbufpool_free_chain(tx_ring->ckr_pp, tx_chain_ph);
1228 	}
1229 
1230 	if (tx_pslot) {
1231 		kern_channel_advance_slot(tx_ring, tx_pslot);
1232 		kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1233 		(void)kern_channel_reclaim(tx_ring);
1234 	}
1235 
1236 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1237 	ipsec_data_move_end(pcb);
1238 
1239 	return 0;
1240 }
1241 
1242 static errno_t
ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags,uint8_t ring_idx)1243 ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1244     kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx)
1245 {
1246 #pragma unused(nxprov)
1247 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
1248 	boolean_t more = false;
1249 	errno_t rc = 0;
1250 
1251 	VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0);
1252 
1253 	/*
1254 	 * Refill and sync the ring; we may be racing against another thread doing
1255 	 * an RX sync that also wants to do kr_enter(), and so use the blocking
1256 	 * variant here.
1257 	 */
1258 	rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1259 	if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
1260 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n", __func__,
1261 		    pcb->ipsec_if_xname, ring->ckr_name, rc);
1262 	}
1263 
1264 	(void) kr_enter(ring, TRUE);
1265 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1266 	if (ring != pcb->ipsec_netif_txring[ring_idx]) {
1267 		// ring no longer valid
1268 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1269 		kr_exit(ring);
1270 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n", __func__,
1271 		    pcb->ipsec_if_xname, ring->ckr_name, ring_idx);
1272 		return ENXIO;
1273 	}
1274 
1275 	if (pcb->ipsec_kpipe_count) {
1276 		uint32_t tx_available = kern_channel_available_slot_count(ring);
1277 		if (pcb->ipsec_netif_txring_size > 0 &&
1278 		    tx_available >= pcb->ipsec_netif_txring_size - 1) {
1279 			// No room left in tx ring, disable output for now
1280 			errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1281 			if (error != 0) {
1282 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
1283 			}
1284 		}
1285 	}
1286 
1287 	if (pcb->ipsec_kpipe_count) {
1288 		kern_channel_ring_t __single rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1289 
1290 		// Unlock while calling notify
1291 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1292 		// Signal the kernel pipe ring to read
1293 		if (rx_ring != NULL) {
1294 			kern_channel_notify(rx_ring, 0);
1295 		}
1296 	} else {
1297 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1298 	}
1299 
1300 	kr_exit(ring);
1301 
1302 	return 0;
1303 }
1304 
1305 static errno_t
ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,__unused uint32_t flags)1306 ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1307     kern_channel_ring_t ring, __unused uint32_t flags)
1308 {
1309 	errno_t ret = 0;
1310 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
1311 
1312 	if (!ipsec_data_move_begin(pcb)) {
1313 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1314 		return 0;
1315 	}
1316 
1317 	if (ipsec_in_wmm_mode(pcb)) {
1318 		for (uint8_t i = 0; i < IPSEC_IF_WMM_RING_COUNT; i++) {
1319 			kern_channel_ring_t __single nring = pcb->ipsec_netif_txring[i];
1320 			ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, nring, flags, i);
1321 			if (ret) {
1322 				break;
1323 			}
1324 		}
1325 	} else {
1326 		ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring, flags, 0);
1327 	}
1328 
1329 	ipsec_data_move_end(pcb);
1330 	return ret;
1331 }
1332 
1333 static errno_t
ipsec_transform_kpipe_pkt_to_netif_pkt(struct ipsec_pcb * pcb,struct kern_channel_ring_stat_increment * tx_ring_stats,struct netif_stats * nifs,kern_packet_t kpipe_ph,kern_packet_t netif_ph)1334 ipsec_transform_kpipe_pkt_to_netif_pkt(struct ipsec_pcb *pcb,
1335     struct kern_channel_ring_stat_increment *tx_ring_stats,
1336     struct netif_stats *nifs, kern_packet_t kpipe_ph, kern_packet_t netif_ph)
1337 {
1338 	kern_buflet_t kpipe_buf = NULL, netif_buf = NULL;
1339 	uint8_t *kpipe_baddr = NULL, *netif_baddr = NULL;
1340 	uuid_t flow_uuid;
1341 	size_t iphlen = 0;
1342 	uint32_t kpipe_buf_len = 0, netif_buf_lim = 0;
1343 	int err = 0;
1344 
1345 	VERIFY(kpipe_ph != 0);
1346 	VERIFY(netif_ph != 0);
1347 	VERIFY(pcb != NULL);
1348 	VERIFY(tx_ring_stats != NULL);
1349 	VERIFY(nifs != NULL);
1350 
1351 	kpipe_buf = kern_packet_get_next_buflet(kpipe_ph, NULL);
1352 	VERIFY(kpipe_buf != NULL);
1353 	kpipe_baddr = ipsec_kern_buflet_to_buffer(kpipe_buf);
1354 	VERIFY(kpipe_baddr != NULL);
1355 	kpipe_baddr += kern_buflet_get_data_offset(kpipe_buf);
1356 	kpipe_buf_len = kern_buflet_get_data_length(kpipe_buf);
1357 
1358 	netif_buf = kern_packet_get_next_buflet(netif_ph, NULL);
1359 	VERIFY(netif_buf != NULL);
1360 	netif_baddr = ipsec_kern_buflet_to_buffer(netif_buf);
1361 	VERIFY(netif_baddr != NULL);
1362 	netif_baddr += kern_buflet_get_data_offset(netif_buf);
1363 	netif_buf_lim = __buflet_get_data_limit(netif_buf);
1364 	netif_buf_lim -= __buflet_get_data_offset(netif_buf);
1365 
1366 	if (kpipe_buf_len > pcb->ipsec_slot_size) {
1367 		os_log_info(OS_LOG_DEFAULT,
1368 		    "ipsec_transform_kpipe_pkt_to_netif_pkt %s: kpipe buffer length "
1369 		    "%u > pcb ipsec slot size %u", pcb->ipsec_ifp->if_xname,
1370 		    kpipe_buf_len, pcb->ipsec_slot_size);
1371 		STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1372 		err = EMSGSIZE;
1373 		goto bad;
1374 	}
1375 
1376 	tx_ring_stats->kcrsi_slots_transferred++;
1377 	tx_ring_stats->kcrsi_bytes_transferred += kpipe_buf_len;
1378 
1379 	if (__improbable(kpipe_buf_len < sizeof(struct ip))) {
1380 		os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
1381 		    "packet length %u\n", pcb->ipsec_ifp->if_xname, kpipe_buf_len);
1382 		STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1383 		err = EBADMSG;
1384 		goto bad;
1385 	}
1386 
1387 	struct ip *ip = (struct ip *)(void *)kpipe_baddr;
1388 	ASSERT(IP_HDR_ALIGNED_P(ip));
1389 
1390 	u_int ip_vers = ip->ip_v;
1391 	switch (ip_vers) {
1392 	case IPVERSION: {
1393 #ifdef _IP_VHL
1394 		iphlen = IP_VHL_HL(ip->ip_vhl) << 2;
1395 #else /* _IP_VHL */
1396 		iphlen = ip->ip_hl << 2;
1397 #endif /* _IP_VHL */
1398 		break;
1399 	}
1400 	case 6: {
1401 		iphlen = sizeof(struct ip6_hdr);
1402 		break;
1403 	}
1404 	default: {
1405 		os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
1406 		    "ip version %u\n", pcb->ipsec_ifp->if_xname, ip_vers);
1407 		err = EBADMSG;
1408 		goto bad;
1409 	}
1410 	}
1411 
1412 	if (__improbable(kpipe_buf_len < iphlen)) {
1413 		os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
1414 		    "packet length %u\n", pcb->ipsec_ifp->if_xname, kpipe_buf_len);
1415 		STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1416 		err = EBADMSG;
1417 		goto bad;
1418 	}
1419 
1420 	if (__improbable(netif_buf_lim < iphlen)) {
1421 		os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - netif "
1422 		    "buffer length %u too short\n", pcb->ipsec_ifp->if_xname, netif_buf_lim);
1423 		STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1424 		err = EBADMSG;
1425 		goto bad;
1426 	}
1427 
1428 	memcpy(netif_baddr, kpipe_baddr, iphlen);
1429 	__buflet_set_data_length(netif_buf, (uint16_t)iphlen);
1430 
1431 	lck_mtx_lock(&pcb->ipsec_kpipe_decrypt_lock);
1432 	err = esp_kpipe_input(pcb->ipsec_ifp, kpipe_ph, netif_ph);
1433 	lck_mtx_unlock(&pcb->ipsec_kpipe_decrypt_lock);
1434 
1435 	if (__improbable((err != 0))) {
1436 		goto bad;
1437 	}
1438 
1439 	kern_packet_get_flow_uuid(kpipe_ph, &flow_uuid);
1440 	uint8_t *id_8 = (uint8_t *)flow_uuid;
1441 	if (__improbable((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT)) {
1442 		os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s: wake packet "
1443 		    "flag is set\n", pcb->ipsec_ifp->if_xname);
1444 		__packet_set_wake_flag(netif_ph);
1445 	}
1446 
1447 	kern_packet_clear_flow_uuid(netif_ph);
1448 	err = kern_buflet_set_data_offset(netif_buf, 0);
1449 	VERIFY(err == 0);
1450 	err = kern_packet_set_link_header_offset(netif_ph, 0);
1451 	VERIFY(err == 0);
1452 	err = kern_packet_set_network_header_offset(netif_ph, 0);
1453 	VERIFY(err == 0);
1454 	err = kern_packet_finalize(netif_ph);
1455 	VERIFY(err == 0);
1456 
1457 	return 0;
1458 bad:
1459 	STATS_INC(nifs, NETIF_STATS_DROP);
1460 	return err;
1461 }
1462 
1463 
1464 static errno_t
ipsec_netif_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)1465 ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1466     kern_channel_ring_t rx_ring, uint32_t flags)
1467 {
1468 #pragma unused(nxprov)
1469 #pragma unused(flags)
1470 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
1471 	struct kern_channel_ring_stat_increment rx_ring_stats;
1472 
1473 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1474 
1475 	if (!ipsec_data_move_begin(pcb)) {
1476 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1477 		return 0;
1478 	}
1479 
1480 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1481 
1482 	// Reclaim user-released slots
1483 	(void) kern_channel_reclaim(rx_ring);
1484 
1485 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1486 
1487 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
1488 	if (avail == 0) {
1489 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1490 		ipsec_data_move_end(pcb);
1491 		return 0;
1492 	}
1493 
1494 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1495 	VERIFY(rx_pp != NULL);
1496 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1497 	kern_channel_slot_t rx_pslot = NULL;
1498 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1499 
1500 	while (rx_slot != NULL) {
1501 		// Check for a waiting packet
1502 		lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1503 		mbuf_t __single data = pcb->ipsec_input_chain;
1504 		if (data == NULL) {
1505 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1506 			break;
1507 		}
1508 
1509 		// Allocate rx packet
1510 		kern_packet_t rx_ph = 0;
1511 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1512 		if (__improbable(error != 0)) {
1513 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1514 			STATS_INC(nifs, NETIF_STATS_DROP);
1515 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1516 			break;
1517 		}
1518 
1519 		// Advance waiting packets
1520 		if (pcb->ipsec_input_chain_count > 0) {
1521 			pcb->ipsec_input_chain_count--;
1522 		}
1523 		pcb->ipsec_input_chain = data->m_nextpkt;
1524 		data->m_nextpkt = NULL;
1525 		if (pcb->ipsec_input_chain == NULL) {
1526 			pcb->ipsec_input_chain_last = NULL;
1527 		}
1528 		lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1529 
1530 		size_t length = mbuf_pkthdr_len(data);
1531 
1532 		if (length < sizeof(struct ip)) {
1533 			// Flush data
1534 			mbuf_freem(data);
1535 			kern_pbufpool_free(rx_pp, rx_ph);
1536 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1537 			STATS_INC(nifs, NETIF_STATS_DROP);
1538 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1539 			    pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
1540 			continue;
1541 		}
1542 
1543 		uint32_t af = 0;
1544 		struct ip *ip = mtod(data, struct ip *);
1545 		u_int ip_version = ip->ip_v;
1546 		switch (ip_version) {
1547 		case 4: {
1548 			af = AF_INET;
1549 			break;
1550 		}
1551 		case 6: {
1552 			af = AF_INET6;
1553 			break;
1554 		}
1555 		default: {
1556 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1557 			    pcb->ipsec_ifp->if_xname, ip_version);
1558 			break;
1559 		}
1560 		}
1561 
1562 		if (length > PP_BUF_SIZE_DEF(rx_pp) ||
1563 		    (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
1564 			// We need to fragment to send up into the netif
1565 
1566 			u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp);
1567 			if (pcb->ipsec_frag_size_set &&
1568 			    pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) {
1569 				fragment_mtu = pcb->ipsec_input_frag_size;
1570 			}
1571 
1572 			mbuf_t fragment_chain = NULL;
1573 			switch (af) {
1574 			case AF_INET: {
1575 				// ip_fragment expects the length in host order
1576 				ip->ip_len = ntohs(ip->ip_len);
1577 
1578 				// ip_fragment will modify the original data, don't free
1579 				int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1580 				if (fragment_error == 0 && data != NULL) {
1581 					fragment_chain = data;
1582 				} else {
1583 					STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1584 					STATS_INC(nifs, NETIF_STATS_DROP);
1585 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1586 					    pcb->ipsec_ifp->if_xname, length, fragment_error);
1587 				}
1588 				break;
1589 			}
1590 			case AF_INET6: {
1591 				if (length < sizeof(struct ip6_hdr)) {
1592 					mbuf_freem(data);
1593 					STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1594 					STATS_INC(nifs, NETIF_STATS_DROP);
1595 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1596 					    pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1597 				} else {
1598 					// ip6_do_fragmentation will free the original data on success only
1599 					struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1600 
1601 					int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
1602 					    ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid((uint64_t)data)));
1603 					if (fragment_error == 0 && data != NULL) {
1604 						fragment_chain = data;
1605 					} else {
1606 						mbuf_freem(data);
1607 						STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1608 						STATS_INC(nifs, NETIF_STATS_DROP);
1609 						os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1610 						    pcb->ipsec_ifp->if_xname, length, fragment_error);
1611 					}
1612 				}
1613 				break;
1614 			}
1615 			default: {
1616 				// Cannot fragment unknown families
1617 				mbuf_freem(data);
1618 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1619 				STATS_INC(nifs, NETIF_STATS_DROP);
1620 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1621 				    pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
1622 				break;
1623 			}
1624 			}
1625 
1626 			if (fragment_chain != NULL) {
1627 				// Add fragments to chain before continuing
1628 				lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1629 				if (pcb->ipsec_input_chain != NULL) {
1630 					pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1631 				} else {
1632 					pcb->ipsec_input_chain = fragment_chain;
1633 				}
1634 				pcb->ipsec_input_chain_count++;
1635 				while (fragment_chain->m_nextpkt) {
1636 					VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1637 					fragment_chain = fragment_chain->m_nextpkt;
1638 					pcb->ipsec_input_chain_count++;
1639 				}
1640 				pcb->ipsec_input_chain_last = fragment_chain;
1641 				lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1642 			}
1643 
1644 			// Make sure to free unused rx packet
1645 			kern_pbufpool_free(rx_pp, rx_ph);
1646 
1647 			continue;
1648 		}
1649 
1650 		mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1651 
1652 		// Fillout rx packet
1653 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1654 		VERIFY(rx_buf != NULL);
1655 		uint8_t *rx_baddr = ipsec_kern_buflet_to_buffer(rx_buf);
1656 		VERIFY(rx_baddr != NULL);
1657 
1658 		// Copy-in data from mbuf to buflet
1659 		mbuf_copydata(data, 0, length, (void *)rx_baddr);
1660 		kern_packet_clear_flow_uuid(rx_ph);         // Zero flow id
1661 
1662 		// Finalize and attach the packet
1663 		error = kern_buflet_set_data_offset(rx_buf, 0);
1664 		VERIFY(error == 0);
1665 		error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
1666 		VERIFY(error == 0);
1667 		error = kern_packet_set_headroom(rx_ph, 0);
1668 		VERIFY(error == 0);
1669 		error = kern_packet_finalize(rx_ph);
1670 		VERIFY(error == 0);
1671 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1672 		VERIFY(error == 0);
1673 
1674 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1675 		STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
1676 		bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1677 
1678 		rx_ring_stats.kcrsi_slots_transferred++;
1679 		rx_ring_stats.kcrsi_bytes_transferred += length;
1680 
1681 		if (!pcb->ipsec_ext_ifdata_stats) {
1682 			ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
1683 		}
1684 
1685 		mbuf_freem(data);
1686 
1687 		// Advance ring
1688 		rx_pslot = rx_slot;
1689 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1690 	}
1691 
1692 	for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
1693 		struct kern_channel_ring_stat_increment tx_ring_stats = {};
1694 		kern_channel_slot_t tx_pslot = NULL;
1695 		kern_channel_slot_t tx_slot = NULL;
1696 
1697 		kern_channel_ring_t __single tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
1698 		if (tx_ring == NULL) {
1699 			// Net-If TX ring not set up yet, nothing to read
1700 			goto done;
1701 		}
1702 
1703 		// Unlock ipsec before entering ring
1704 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1705 
1706 		(void)kr_enter(tx_ring, TRUE);
1707 
1708 		// Lock again after entering and validate
1709 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1710 
1711 		if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
1712 			goto done;
1713 		}
1714 
1715 		tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1716 		if (tx_slot == NULL) {
1717 			// Nothing to read, don't bother signalling
1718 			goto done;
1719 		}
1720 
1721 		while (rx_slot != NULL && tx_slot != NULL) {
1722 			errno_t error = 0;
1723 
1724 			// Allocate rx packet
1725 			kern_packet_t rx_ph = 0;
1726 			error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1727 			if (__improbable(error != 0)) {
1728 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1729 				STATS_INC(nifs, NETIF_STATS_DROP);
1730 				break;
1731 			}
1732 
1733 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1734 			tx_pslot = tx_slot;
1735 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1736 			if (tx_ph == 0) {
1737 				kern_pbufpool_free(rx_pp, rx_ph);
1738 				continue;
1739 			}
1740 
1741 			error = ipsec_transform_kpipe_pkt_to_netif_pkt(pcb,
1742 			    &tx_ring_stats, nifs, tx_ph, rx_ph);
1743 			if (error != 0) {
1744 				// Failed to get decrypted packet
1745 				kern_pbufpool_free(rx_pp, rx_ph);
1746 				continue;
1747 			}
1748 
1749 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1750 			VERIFY(error == 0);
1751 
1752 			STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1753 			STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
1754 
1755 			bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1756 
1757 			rx_ring_stats.kcrsi_slots_transferred++;
1758 			rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph);
1759 
1760 			if (!pcb->ipsec_ext_ifdata_stats) {
1761 				ifnet_stat_increment_in(pcb->ipsec_ifp, 1,
1762 				    kern_packet_get_data_length(rx_ph), 0);
1763 			}
1764 
1765 			rx_pslot = rx_slot;
1766 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1767 		}
1768 
1769 done:
1770 		if (tx_pslot) {
1771 			kern_channel_advance_slot(tx_ring, tx_pslot);
1772 			kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1773 			(void)kern_channel_reclaim(tx_ring);
1774 		}
1775 
1776 		// Unlock first, then exit ring
1777 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1778 		if (tx_ring != NULL) {
1779 			if (tx_pslot != NULL) {
1780 				kern_channel_notify(tx_ring, 0);
1781 			}
1782 			kr_exit(tx_ring);
1783 		}
1784 
1785 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1786 	}
1787 
1788 	if (rx_pslot) {
1789 		kern_channel_advance_slot(rx_ring, rx_pslot);
1790 		kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1791 	}
1792 
1793 
1794 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1795 
1796 	ipsec_data_move_end(pcb);
1797 	return 0;
1798 }
1799 
1800 static errno_t
ipsec_nexus_ifattach(struct ipsec_pcb * pcb,struct ifnet_init_eparams * init_params,struct ifnet ** ifp)1801 ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
1802     struct ifnet_init_eparams *init_params,
1803     struct ifnet **ifp)
1804 {
1805 	errno_t err;
1806 	nexus_controller_t controller = kern_nexus_shared_controller();
1807 	struct kern_nexus_net_init net_init;
1808 	struct kern_pbufpool_init pp_init;
1809 
1810 	nexus_name_t provider_name;
1811 	snprintf((char *)provider_name, sizeof(provider_name),
1812 	    "com.apple.netif.%s", pcb->ipsec_if_xname);
1813 
1814 	struct kern_nexus_provider_init prov_init = {
1815 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1816 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1817 		.nxpi_pre_connect = ipsec_nexus_pre_connect,
1818 		.nxpi_connected = ipsec_nexus_connected,
1819 		.nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
1820 		.nxpi_disconnected = ipsec_nexus_disconnected,
1821 		.nxpi_ring_init = ipsec_netif_ring_init,
1822 		.nxpi_ring_fini = ipsec_netif_ring_fini,
1823 		.nxpi_slot_init = NULL,
1824 		.nxpi_slot_fini = NULL,
1825 		.nxpi_sync_tx = ipsec_netif_sync_tx,
1826 		.nxpi_sync_rx = ipsec_netif_sync_rx,
1827 		.nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
1828 	};
1829 
1830 	nexus_attr_t __single nxa = NULL;
1831 	err = kern_nexus_attr_create(&nxa);
1832 	IPSEC_IF_VERIFY(err == 0);
1833 	if (err != 0) {
1834 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1835 		    __func__, err);
1836 		goto failed;
1837 	}
1838 
1839 	uint64_t slot_buffer_size = pcb->ipsec_slot_size;
1840 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1841 	VERIFY(err == 0);
1842 
1843 	// Reset ring size for netif nexus to limit memory usage
1844 	uint64_t ring_size = pcb->ipsec_netif_ring_size;
1845 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1846 	VERIFY(err == 0);
1847 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1848 	VERIFY(err == 0);
1849 
1850 	assert(err == 0);
1851 
1852 	if (ipsec_in_wmm_mode(pcb)) {
1853 		os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n",
1854 		    __func__, pcb->ipsec_if_xname);
1855 
1856 		init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED;
1857 
1858 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_RINGS,
1859 		    IPSEC_NETIF_WMM_TX_RING_COUNT);
1860 		VERIFY(err == 0);
1861 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_RINGS,
1862 		    IPSEC_NETIF_WMM_RX_RING_COUNT);
1863 		VERIFY(err == 0);
1864 
1865 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_QMAP, NEXUS_QMAP_TYPE_WMM);
1866 		VERIFY(err == 0);
1867 	}
1868 
1869 	pcb->ipsec_netif_txring_size = ring_size;
1870 
1871 	bzero(&pp_init, sizeof(pp_init));
1872 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1873 	pp_init.kbi_flags |= (KBIF_VIRTUAL_DEVICE | KBIF_USER_ACCESS);
1874 	// Note: we need more packets than can be held in the tx and rx rings because
1875 	// packets can also be in the AQM queue(s)
1876 	pp_init.kbi_packets = pcb->ipsec_netif_ring_size * (2 * pcb->ipsec_kpipe_count + 1);
1877 	pp_init.kbi_bufsize = pcb->ipsec_slot_size;
1878 	pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
1879 	pp_init.kbi_max_frags = 1;
1880 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1881 	    "%s", provider_name);
1882 	pp_init.kbi_ctx = NULL;
1883 	pp_init.kbi_ctx_retain = NULL;
1884 	pp_init.kbi_ctx_release = NULL;
1885 
1886 	err = kern_pbufpool_create(&pp_init, &pcb->ipsec_netif_pp, NULL);
1887 	if (err != 0) {
1888 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
1889 		goto failed;
1890 	}
1891 
1892 	err = kern_nexus_controller_register_provider(controller,
1893 	    ipsec_nx_dom_prov,
1894 	    provider_name,
1895 	    &prov_init,
1896 	    sizeof(prov_init),
1897 	    nxa,
1898 	    &pcb->ipsec_nx.if_provider);
1899 	IPSEC_IF_VERIFY(err == 0);
1900 	if (err != 0) {
1901 		os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
1902 		    __func__, err);
1903 		goto failed;
1904 	}
1905 
1906 	bzero(&net_init, sizeof(net_init));
1907 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1908 	net_init.nxneti_flags = 0;
1909 	net_init.nxneti_eparams = init_params;
1910 	net_init.nxneti_lladdr = NULL;
1911 	net_init.nxneti_prepare = ipsec_netif_prepare;
1912 	net_init.nxneti_rx_pbufpool = pcb->ipsec_netif_pp;
1913 	net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp;
1914 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
1915 	    pcb->ipsec_nx.if_provider,
1916 	    pcb,
1917 	    NULL,
1918 	    &pcb->ipsec_nx.if_instance,
1919 	    &net_init,
1920 	    ifp);
1921 	IPSEC_IF_VERIFY(err == 0);
1922 	if (err != 0) {
1923 		os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
1924 		    __func__, err);
1925 		kern_nexus_controller_deregister_provider(controller,
1926 		    pcb->ipsec_nx.if_provider);
1927 		uuid_clear(pcb->ipsec_nx.if_provider);
1928 		goto failed;
1929 	}
1930 
1931 failed:
1932 	if (nxa) {
1933 		kern_nexus_attr_destroy(nxa);
1934 	}
1935 	if (err && pcb->ipsec_netif_pp != NULL) {
1936 		kern_pbufpool_destroy(pcb->ipsec_netif_pp);
1937 		pcb->ipsec_netif_pp = NULL;
1938 	}
1939 	return err;
1940 }
1941 
1942 static void
ipsec_detach_provider_and_instance(uuid_t provider,uuid_t instance)1943 ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
1944 {
1945 	nexus_controller_t controller = kern_nexus_shared_controller();
1946 	errno_t err;
1947 
1948 	if (!uuid_is_null(instance)) {
1949 		err = kern_nexus_controller_free_provider_instance(controller,
1950 		    instance);
1951 		if (err != 0) {
1952 			os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
1953 			    __func__, err);
1954 		}
1955 		uuid_clear(instance);
1956 	}
1957 	if (!uuid_is_null(provider)) {
1958 		err = kern_nexus_controller_deregister_provider(controller,
1959 		    provider);
1960 		if (err != 0) {
1961 			os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
1962 		}
1963 		uuid_clear(provider);
1964 	}
1965 	return;
1966 }
1967 
1968 static void
ipsec_nexus_detach(struct ipsec_pcb * pcb)1969 ipsec_nexus_detach(struct ipsec_pcb *pcb)
1970 {
1971 	ipsec_nx_t nx = &pcb->ipsec_nx;
1972 	nexus_controller_t controller = kern_nexus_shared_controller();
1973 	errno_t err;
1974 
1975 	if (!uuid_is_null(nx->fsw_device)) {
1976 		err = kern_nexus_ifdetach(controller,
1977 		    nx->fsw_instance,
1978 		    nx->fsw_device);
1979 		if (err != 0) {
1980 			os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
1981 			    __func__, err);
1982 		}
1983 	}
1984 
1985 	ipsec_detach_provider_and_instance(nx->fsw_provider,
1986 	    nx->fsw_instance);
1987 	ipsec_detach_provider_and_instance(nx->if_provider,
1988 	    nx->if_instance);
1989 
1990 	if (pcb->ipsec_netif_pp != NULL) {
1991 		kern_pbufpool_destroy(pcb->ipsec_netif_pp);
1992 		pcb->ipsec_netif_pp = NULL;
1993 	}
1994 	memset(nx, 0, sizeof(*nx));
1995 }
1996 
1997 static errno_t
ipsec_create_fs_provider_and_instance(struct ipsec_pcb * pcb,const char * type_name,const char * ifname,uuid_t * provider,uuid_t * instance)1998 ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
1999     const char *type_name,
2000     const char *ifname,
2001     uuid_t *provider, uuid_t *instance)
2002 {
2003 	nexus_attr_t __single attr = NULL;
2004 	nexus_controller_t controller = kern_nexus_shared_controller();
2005 	uuid_t dom_prov;
2006 	errno_t err;
2007 	struct kern_nexus_init init;
2008 	nexus_name_t    provider_name;
2009 
2010 	err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
2011 	    &dom_prov);
2012 	IPSEC_IF_VERIFY(err == 0);
2013 	if (err != 0) {
2014 		os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
2015 		    __func__, type_name, err);
2016 		goto failed;
2017 	}
2018 
2019 	err = kern_nexus_attr_create(&attr);
2020 	IPSEC_IF_VERIFY(err == 0);
2021 	if (err != 0) {
2022 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2023 		    __func__, err);
2024 		goto failed;
2025 	}
2026 
2027 	uint64_t slot_buffer_size = pcb->ipsec_slot_size;
2028 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2029 	VERIFY(err == 0);
2030 
2031 	// Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
2032 	uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
2033 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
2034 	VERIFY(err == 0);
2035 	uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
2036 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
2037 	VERIFY(err == 0);
2038 	/*
2039 	 * Configure flowswitch to use super-packet (multi-buflet).
2040 	 * This allows flowswitch to perform intra-stack packet aggregation.
2041 	 */
2042 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
2043 	    NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
2044 	VERIFY(err == 0);
2045 
2046 	snprintf((char *)provider_name, sizeof(provider_name),
2047 	    "com.apple.%s.%s", type_name, ifname);
2048 	err = kern_nexus_controller_register_provider(controller,
2049 	    dom_prov,
2050 	    provider_name,
2051 	    NULL,
2052 	    0,
2053 	    attr,
2054 	    provider);
2055 	kern_nexus_attr_destroy(attr);
2056 	attr = NULL;
2057 	IPSEC_IF_VERIFY(err == 0);
2058 	if (err != 0) {
2059 		os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
2060 		    __func__, type_name, err);
2061 		goto failed;
2062 	}
2063 	bzero(&init, sizeof(init));
2064 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2065 	err = kern_nexus_controller_alloc_provider_instance(controller,
2066 	    *provider,
2067 	    NULL, NULL,
2068 	    instance, &init);
2069 	IPSEC_IF_VERIFY(err == 0);
2070 	if (err != 0) {
2071 		os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
2072 		    __func__, type_name, err);
2073 		kern_nexus_controller_deregister_provider(controller,
2074 		    *provider);
2075 		uuid_clear(*provider);
2076 	}
2077 failed:
2078 	return err;
2079 }
2080 
2081 static errno_t
ipsec_flowswitch_attach(struct ipsec_pcb * pcb)2082 ipsec_flowswitch_attach(struct ipsec_pcb *pcb)
2083 {
2084 	nexus_controller_t controller = kern_nexus_shared_controller();
2085 	errno_t err = 0;
2086 	ipsec_nx_t nx = &pcb->ipsec_nx;
2087 
2088 	// Allocate flowswitch
2089 	err = ipsec_create_fs_provider_and_instance(pcb,
2090 	    "flowswitch",
2091 	    pcb->ipsec_ifp->if_xname,
2092 	    &nx->fsw_provider,
2093 	    &nx->fsw_instance);
2094 	if (err != 0) {
2095 		os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
2096 		    __func__);
2097 		goto failed;
2098 	}
2099 
2100 	// Attach flowswitch to device port
2101 	err = kern_nexus_ifattach(controller, nx->fsw_instance,
2102 	    NULL, nx->if_instance,
2103 	    FALSE, &nx->fsw_device);
2104 	if (err != 0) {
2105 		os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
2106 		goto failed;
2107 	}
2108 
2109 	// Extract the agent UUID and save for later
2110 	struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
2111 	if (flowswitch_nx != NULL) {
2112 		struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
2113 		if (flowswitch != NULL) {
2114 			FSW_RLOCK(flowswitch);
2115 			uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
2116 			FSW_UNLOCK(flowswitch);
2117 		} else {
2118 			os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - flowswitch is NULL\n");
2119 		}
2120 		nx_release(flowswitch_nx);
2121 	} else {
2122 		os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n");
2123 	}
2124 
2125 	return 0;
2126 
2127 failed:
2128 	ipsec_nexus_detach(pcb);
2129 
2130 	errno_t detach_error = 0;
2131 	if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
2132 		panic("ipsec_flowswitch_attach - ifnet_detach failed: %d", detach_error);
2133 		/* NOT REACHED */
2134 	}
2135 
2136 	return err;
2137 }
2138 
2139 #pragma mark Kernel Pipe Nexus
2140 
2141 static errno_t
ipsec_register_kernel_pipe_nexus(struct ipsec_pcb * pcb)2142 ipsec_register_kernel_pipe_nexus(struct ipsec_pcb *pcb)
2143 {
2144 	nexus_attr_t __single nxa = NULL;
2145 	nexus_name_t provider_name = "com.apple.nexus.ipsec.kpipe";
2146 	errno_t result;
2147 
2148 	lck_mtx_lock(&ipsec_lock);
2149 	if (ipsec_ncd_refcount++) {
2150 		lck_mtx_unlock(&ipsec_lock);
2151 		return 0;
2152 	}
2153 
2154 	result = kern_nexus_controller_create(&ipsec_ncd);
2155 	if (result) {
2156 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
2157 		    __FUNCTION__, result);
2158 		goto done;
2159 	}
2160 
2161 	uuid_t dom_prov;
2162 	result = kern_nexus_get_default_domain_provider(
2163 		NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
2164 	if (result) {
2165 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
2166 		    __FUNCTION__, result);
2167 		goto done;
2168 	}
2169 
2170 	struct kern_nexus_provider_init prov_init = {
2171 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2172 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2173 		.nxpi_pre_connect = ipsec_nexus_pre_connect,
2174 		.nxpi_connected = ipsec_nexus_connected,
2175 		.nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
2176 		.nxpi_disconnected = ipsec_nexus_disconnected,
2177 		.nxpi_ring_init = ipsec_kpipe_ring_init,
2178 		.nxpi_ring_fini = ipsec_kpipe_ring_fini,
2179 		.nxpi_slot_init = NULL,
2180 		.nxpi_slot_fini = NULL,
2181 		.nxpi_sync_tx = ipsec_kpipe_sync_tx,
2182 		.nxpi_sync_rx = ipsec_kpipe_sync_rx,
2183 		.nxpi_tx_doorbell = NULL,
2184 	};
2185 
2186 	result = kern_nexus_attr_create(&nxa);
2187 	if (result) {
2188 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2189 		    __FUNCTION__, result);
2190 		goto done;
2191 	}
2192 
2193 	uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2194 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2195 	VERIFY(result == 0);
2196 
2197 	// Reset ring size for kernel pipe nexus to limit memory usage
2198 	// Note: It's better to have less on slots on the kpipe TX ring than the netif
2199 	// so back pressure is applied at the AQM layer
2200 	uint64_t ring_size =
2201 	    pcb->ipsec_kpipe_tx_ring_size != 0 ? pcb->ipsec_kpipe_tx_ring_size :
2202 	    pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2203 	    if_ipsec_ring_size;
2204 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
2205 	VERIFY(result == 0);
2206 
2207 	ring_size =
2208 	    pcb->ipsec_kpipe_rx_ring_size != 0 ? pcb->ipsec_kpipe_rx_ring_size :
2209 	    pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2210 	    if_ipsec_ring_size;
2211 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
2212 	VERIFY(result == 0);
2213 
2214 	result = kern_nexus_controller_register_provider(ipsec_ncd,
2215 	    dom_prov,
2216 	    provider_name,
2217 	    &prov_init,
2218 	    sizeof(prov_init),
2219 	    nxa,
2220 	    &ipsec_kpipe_uuid);
2221 	if (result) {
2222 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
2223 		    __FUNCTION__, result);
2224 		goto done;
2225 	}
2226 
2227 done:
2228 	if (nxa) {
2229 		kern_nexus_attr_destroy(nxa);
2230 	}
2231 
2232 	if (result) {
2233 		if (ipsec_ncd) {
2234 			kern_nexus_controller_destroy(ipsec_ncd);
2235 			ipsec_ncd = NULL;
2236 		}
2237 		ipsec_ncd_refcount = 0;
2238 	}
2239 
2240 	lck_mtx_unlock(&ipsec_lock);
2241 
2242 	return result;
2243 }
2244 
2245 static void
ipsec_unregister_kernel_pipe_nexus(void)2246 ipsec_unregister_kernel_pipe_nexus(void)
2247 {
2248 	lck_mtx_lock(&ipsec_lock);
2249 
2250 	VERIFY(ipsec_ncd_refcount > 0);
2251 
2252 	if (--ipsec_ncd_refcount == 0) {
2253 		kern_nexus_controller_destroy(ipsec_ncd);
2254 		ipsec_ncd = NULL;
2255 	}
2256 
2257 	lck_mtx_unlock(&ipsec_lock);
2258 }
2259 
2260 /* This structure only holds onto kpipe channels that need to be
2261  * freed in the future, but are cleared from the pcb under lock
2262  */
2263 struct ipsec_detached_channels {
2264 	int count;
2265 	kern_pbufpool_t pp;
2266 	uuid_t uuids[IPSEC_IF_MAX_RING_COUNT];
2267 };
2268 
2269 static void
ipsec_detach_channels(struct ipsec_pcb * pcb,struct ipsec_detached_channels * dc)2270 ipsec_detach_channels(struct ipsec_pcb *pcb, struct ipsec_detached_channels *dc)
2271 {
2272 	LCK_RW_ASSERT(&pcb->ipsec_pcb_lock, LCK_RW_TYPE_EXCLUSIVE);
2273 
2274 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
2275 		for (int i = 0; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2276 			VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2277 		}
2278 		dc->count = 0;
2279 		return;
2280 	}
2281 
2282 	dc->count = pcb->ipsec_kpipe_count;
2283 
2284 	VERIFY(dc->count >= 0);
2285 	VERIFY(dc->count <= IPSEC_IF_MAX_RING_COUNT);
2286 
2287 	for (int i = 0; i < dc->count; i++) {
2288 		VERIFY(!uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2289 		uuid_copy(dc->uuids[i], pcb->ipsec_kpipe_uuid[i]);
2290 		uuid_clear(pcb->ipsec_kpipe_uuid[i]);
2291 	}
2292 	for (int i = dc->count; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2293 		VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2294 	}
2295 
2296 	if (dc->count) {
2297 		VERIFY(pcb->ipsec_kpipe_pp);
2298 	} else {
2299 		VERIFY(!pcb->ipsec_kpipe_pp);
2300 	}
2301 
2302 	dc->pp = pcb->ipsec_kpipe_pp;
2303 
2304 	pcb->ipsec_kpipe_pp = NULL;
2305 
2306 	ipsec_flag_clr(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2307 }
2308 
2309 static void
ipsec_free_channels(struct ipsec_detached_channels * dc)2310 ipsec_free_channels(struct ipsec_detached_channels *dc)
2311 {
2312 	if (!dc->count) {
2313 		return;
2314 	}
2315 
2316 	for (int i = 0; i < dc->count; i++) {
2317 		errno_t result;
2318 		result = kern_nexus_controller_free_provider_instance(ipsec_ncd,
2319 		    dc->uuids[i]);
2320 		VERIFY(!result);
2321 	}
2322 
2323 	VERIFY(dc->pp);
2324 	kern_pbufpool_destroy(dc->pp);
2325 
2326 	ipsec_unregister_kernel_pipe_nexus();
2327 
2328 	memset(dc, 0, sizeof(*dc));
2329 }
2330 
2331 static errno_t
ipsec_enable_channel(struct ipsec_pcb * pcb,struct proc * proc)2332 ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
2333 {
2334 	struct kern_nexus_init init;
2335 	struct kern_pbufpool_init pp_init;
2336 	uuid_t uuid_null = {0};
2337 	errno_t result;
2338 
2339 	kauth_cred_t cred = kauth_cred_get();
2340 	result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
2341 	if (result) {
2342 		return result;
2343 	}
2344 
2345 	VERIFY(pcb->ipsec_kpipe_count);
2346 	VERIFY(!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED));
2347 
2348 	result = ipsec_register_kernel_pipe_nexus(pcb);
2349 
2350 	lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2351 
2352 	if (result) {
2353 		os_log_error(OS_LOG_DEFAULT, "%s: %s failed to register kernel pipe nexus\n",
2354 		    __func__, pcb->ipsec_if_xname);
2355 		goto done;
2356 	}
2357 
2358 	VERIFY(ipsec_ncd);
2359 
2360 	bzero(&pp_init, sizeof(pp_init));
2361 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
2362 	pp_init.kbi_flags |= (KBIF_VIRTUAL_DEVICE | KBIF_USER_ACCESS);
2363 	// Note: We only needs are many packets as can be held in the tx and rx rings
2364 	pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2 * pcb->ipsec_kpipe_count;
2365 	pp_init.kbi_bufsize = pcb->ipsec_slot_size;
2366 	pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
2367 	pp_init.kbi_max_frags = 1;
2368 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
2369 	    "com.apple.kpipe.%s", pcb->ipsec_if_xname);
2370 	pp_init.kbi_ctx = NULL;
2371 	pp_init.kbi_ctx_retain = NULL;
2372 	pp_init.kbi_ctx_release = NULL;
2373 
2374 	result = kern_pbufpool_create(&pp_init, &pcb->ipsec_kpipe_pp,
2375 	    NULL);
2376 	if (result != 0) {
2377 		os_log_error(OS_LOG_DEFAULT, "%s: %s pbufbool create failed, error %d\n",
2378 		    __func__, pcb->ipsec_if_xname, result);
2379 		goto done;
2380 	}
2381 
2382 	bzero(&init, sizeof(init));
2383 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2384 	init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp;
2385 
2386 	for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
2387 		VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2388 		result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
2389 		    ipsec_kpipe_uuid, pcb, NULL, &pcb->ipsec_kpipe_uuid[i], &init);
2390 
2391 		if (result == 0) {
2392 			nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
2393 			const bool has_proc_uuid = !uuid_is_null(pcb->ipsec_kpipe_proc_uuid);
2394 			pid_t pid = pcb->ipsec_kpipe_pid;
2395 			if (!pid && !has_proc_uuid) {
2396 				pid = proc_pid(proc);
2397 			}
2398 			result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
2399 			    (const uint8_t *)pcb->ipsec_kpipe_uuid[i], &port,
2400 			    pid, has_proc_uuid ? pcb->ipsec_kpipe_proc_uuid : uuid_null, NULL,
2401 			    0, has_proc_uuid ? NEXUS_BIND_EXEC_UUID:NEXUS_BIND_PID);
2402 		}
2403 
2404 		if (result) {
2405 			/* Unwind all of them on error */
2406 			for (int j = 0; j < IPSEC_IF_MAX_RING_COUNT; j++) {
2407 				if (!uuid_is_null(pcb->ipsec_kpipe_uuid[j])) {
2408 					kern_nexus_controller_free_provider_instance(ipsec_ncd,
2409 					    pcb->ipsec_kpipe_uuid[j]);
2410 					uuid_clear(pcb->ipsec_kpipe_uuid[j]);
2411 				}
2412 			}
2413 			goto done;
2414 		}
2415 	}
2416 
2417 done:
2418 	lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2419 
2420 	if (result) {
2421 		if (pcb->ipsec_kpipe_pp != NULL) {
2422 			kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2423 			pcb->ipsec_kpipe_pp = NULL;
2424 		}
2425 		ipsec_unregister_kernel_pipe_nexus();
2426 	} else {
2427 		ipsec_flag_set(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2428 	}
2429 
2430 	return result;
2431 }
2432 
2433 #endif // IPSEC_NEXUS
2434 
2435 
2436 /* Kernel control functions */
2437 
2438 static inline int
ipsec_find_by_unit(u_int32_t unit)2439 ipsec_find_by_unit(u_int32_t unit)
2440 {
2441 	struct ipsec_pcb *next_pcb = NULL;
2442 	int found = 0;
2443 
2444 	TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2445 		if (next_pcb->ipsec_unit == unit) {
2446 			found = 1;
2447 			break;
2448 		}
2449 	}
2450 
2451 	return found;
2452 }
2453 
2454 static inline void
ipsec_free_pcb(struct ipsec_pcb * pcb,bool locked)2455 ipsec_free_pcb(struct ipsec_pcb *pcb, bool locked)
2456 {
2457 #if IPSEC_NEXUS
2458 	mbuf_freem_list(pcb->ipsec_input_chain);
2459 	pcb->ipsec_input_chain_count = 0;
2460 	lck_mtx_destroy(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp);
2461 	lck_mtx_destroy(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp);
2462 	lck_mtx_destroy(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp);
2463 #endif // IPSEC_NEXUS
2464 	lck_mtx_destroy(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp);
2465 	lck_rw_destroy(&pcb->ipsec_pcb_lock, &ipsec_lck_grp);
2466 	if (!locked) {
2467 		lck_mtx_lock(&ipsec_lock);
2468 	}
2469 	TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
2470 	if (!locked) {
2471 		lck_mtx_unlock(&ipsec_lock);
2472 	}
2473 	zfree(ipsec_pcb_zone, pcb);
2474 }
2475 
2476 static errno_t
ipsec_ctl_setup(u_int32_t * unit,void ** unitinfo)2477 ipsec_ctl_setup(u_int32_t *unit, void **unitinfo)
2478 {
2479 	if (unit == NULL || unitinfo == NULL) {
2480 		return EINVAL;
2481 	}
2482 
2483 	lck_mtx_lock(&ipsec_lock);
2484 
2485 	/* Find next available unit */
2486 	if (*unit == 0) {
2487 		*unit = 1;
2488 		while (*unit != ctl_maxunit) {
2489 			if (ipsec_find_by_unit(*unit)) {
2490 				(*unit)++;
2491 			} else {
2492 				break;
2493 			}
2494 		}
2495 		if (*unit == ctl_maxunit) {
2496 			lck_mtx_unlock(&ipsec_lock);
2497 			return EBUSY;
2498 		}
2499 	} else if (ipsec_find_by_unit(*unit)) {
2500 		lck_mtx_unlock(&ipsec_lock);
2501 		return EBUSY;
2502 	}
2503 
2504 	/* Find some open interface id */
2505 	u_int32_t chosen_unique_id = 1;
2506 	struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
2507 	if (next_pcb != NULL) {
2508 		/* List was not empty, add one to the last item */
2509 		chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2510 		next_pcb = NULL;
2511 
2512 		/*
2513 		 * If this wrapped the id number, start looking at
2514 		 * the front of the list for an unused id.
2515 		 */
2516 		if (chosen_unique_id == 0) {
2517 			/* Find the next unused ID */
2518 			chosen_unique_id = 1;
2519 			TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2520 				if (next_pcb->ipsec_unique_id > chosen_unique_id) {
2521 					/* We found a gap */
2522 					break;
2523 				}
2524 
2525 				chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2526 			}
2527 		}
2528 	}
2529 
2530 	struct ipsec_pcb *pcb = zalloc_flags(ipsec_pcb_zone, Z_WAITOK | Z_ZERO);
2531 
2532 	*unitinfo = pcb;
2533 	pcb->ipsec_unit = *unit;
2534 	pcb->ipsec_unique_id = chosen_unique_id;
2535 
2536 	if (next_pcb != NULL) {
2537 		TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
2538 	} else {
2539 		TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
2540 	}
2541 
2542 	lck_mtx_unlock(&ipsec_lock);
2543 
2544 	return 0;
2545 }
2546 
2547 static errno_t
ipsec_ctl_bind(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)2548 ipsec_ctl_bind(kern_ctl_ref kctlref,
2549     struct sockaddr_ctl *sac,
2550     void **unitinfo)
2551 {
2552 	if (*unitinfo == NULL) {
2553 		u_int32_t unit = 0;
2554 		(void)ipsec_ctl_setup(&unit, unitinfo);
2555 	}
2556 
2557 	struct ipsec_pcb *pcb = (struct ipsec_pcb *)*unitinfo;
2558 	if (pcb == NULL) {
2559 		return EINVAL;
2560 	}
2561 
2562 	if (pcb->ipsec_ctlref != NULL) {
2563 		// Return if bind was already called
2564 		return EINVAL;
2565 	}
2566 
2567 	/* Setup the protocol control block */
2568 	pcb->ipsec_ctlref = kctlref;
2569 	pcb->ipsec_unit = sac->sc_unit;
2570 	pcb->ipsec_output_service_class = MBUF_SC_OAM;
2571 
2572 #if IPSEC_NEXUS
2573 	pcb->ipsec_use_netif = false;
2574 	pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2575 	pcb->ipsec_netif_ring_size = if_ipsec_ring_size;
2576 	pcb->ipsec_tx_fsw_ring_size = if_ipsec_tx_fsw_ring_size;
2577 	pcb->ipsec_rx_fsw_ring_size = if_ipsec_rx_fsw_ring_size;
2578 #endif // IPSEC_NEXUS
2579 
2580 	lck_rw_init(&pcb->ipsec_pcb_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2581 	lck_mtx_init(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2582 #if IPSEC_NEXUS
2583 	pcb->ipsec_input_chain_count = 0;
2584 	lck_mtx_init(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2585 	lck_mtx_init(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2586 	lck_mtx_init(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2587 #endif // IPSEC_NEXUS
2588 
2589 	return 0;
2590 }
2591 
2592 static errno_t
ipsec_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)2593 ipsec_ctl_connect(kern_ctl_ref kctlref,
2594     struct sockaddr_ctl *sac,
2595     void **unitinfo)
2596 {
2597 	struct ifnet_init_eparams ipsec_init = {};
2598 	errno_t result = 0;
2599 
2600 	if (*unitinfo == NULL) {
2601 		(void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2602 	}
2603 
2604 	struct ipsec_pcb *__single pcb = *unitinfo;
2605 	if (pcb == NULL) {
2606 		return EINVAL;
2607 	}
2608 
2609 	/* Handle case where ipsec_ctl_setup() was called, but ipsec_ctl_bind() was not */
2610 	if (pcb->ipsec_ctlref == NULL) {
2611 		(void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2612 	}
2613 
2614 	snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
2615 	snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
2616 	os_log(OS_LOG_DEFAULT, "ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2617 
2618 	/* Create the interface */
2619 	bzero(&ipsec_init, sizeof(ipsec_init));
2620 	ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
2621 	ipsec_init.len = sizeof(ipsec_init);
2622 
2623 #if IPSEC_NEXUS
2624 	if (pcb->ipsec_use_netif) {
2625 		ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
2626 	} else
2627 #endif // IPSEC_NEXUS
2628 	{
2629 		ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
2630 		ipsec_init.start = ipsec_start;
2631 	}
2632 	ipsec_init.name = "ipsec";
2633 	ipsec_init.unit = pcb->ipsec_unit - 1;
2634 	ipsec_init.uniqueid = pcb->ipsec_unique_name;
2635 	ipsec_init.uniqueid_len = (uint32_t)strbuflen(pcb->ipsec_unique_name,
2636 	    sizeof(pcb->ipsec_unique_name));
2637 	ipsec_init.family = IFNET_FAMILY_IPSEC;
2638 	ipsec_init.type = IFT_OTHER;
2639 	ipsec_init.demux = ipsec_demux;
2640 	ipsec_init.add_proto = ipsec_add_proto;
2641 	ipsec_init.del_proto = ipsec_del_proto;
2642 	ipsec_init.softc = pcb;
2643 	ipsec_init.ioctl = ipsec_ioctl;
2644 	ipsec_init.free = ipsec_detached;
2645 
2646 #if IPSEC_NEXUS
2647 	/* We don't support kpipes without a netif */
2648 	if (pcb->ipsec_kpipe_count && !pcb->ipsec_use_netif) {
2649 		result = ENOTSUP;
2650 		os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - kpipe requires netif: failed %d\n", result);
2651 		ipsec_free_pcb(pcb, false);
2652 		*unitinfo = NULL;
2653 		return result;
2654 	}
2655 
2656 	if (if_ipsec_debug != 0) {
2657 		printf("%s: %s%d use_netif %d kpipe_count %d slot_size %u ring_size %u "
2658 		    "kpipe_tx_ring_size %u kpipe_rx_ring_size %u\n",
2659 		    __func__,
2660 		    ipsec_init.name, ipsec_init.unit,
2661 		    pcb->ipsec_use_netif,
2662 		    pcb->ipsec_kpipe_count,
2663 		    pcb->ipsec_slot_size,
2664 		    pcb->ipsec_netif_ring_size,
2665 		    pcb->ipsec_kpipe_tx_ring_size,
2666 		    pcb->ipsec_kpipe_rx_ring_size);
2667 	}
2668 	if (pcb->ipsec_use_netif) {
2669 		if (pcb->ipsec_kpipe_count) {
2670 			result = ipsec_enable_channel(pcb, current_proc());
2671 			if (result) {
2672 				os_log_error(OS_LOG_DEFAULT, "%s: %s failed to enable channels\n",
2673 				    __func__, pcb->ipsec_if_xname);
2674 				ipsec_free_pcb(pcb, false);
2675 				*unitinfo = NULL;
2676 				return result;
2677 			}
2678 		}
2679 
2680 		result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
2681 		if (result != 0) {
2682 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
2683 			ipsec_free_pcb(pcb, false);
2684 			*unitinfo = NULL;
2685 			return result;
2686 		}
2687 
2688 		result = ipsec_flowswitch_attach(pcb);
2689 		if (result != 0) {
2690 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_flowswitch_attach failed: %d\n", result);
2691 			// Do not call ipsec_free_pcb(). We will be attached already, and will be freed later
2692 			// in ipsec_detached().
2693 			*unitinfo = NULL;
2694 			return result;
2695 		}
2696 
2697 		/* Attach to bpf */
2698 		bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
2699 	} else
2700 #endif // IPSEC_NEXUS
2701 	{
2702 		result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
2703 		if (result != 0) {
2704 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
2705 			ipsec_free_pcb(pcb, false);
2706 			*unitinfo = NULL;
2707 			return result;
2708 		}
2709 		ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
2710 
2711 		/* Attach the interface */
2712 		result = ifnet_attach(pcb->ipsec_ifp, NULL);
2713 		if (result != 0) {
2714 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
2715 			ifnet_release(pcb->ipsec_ifp);
2716 			ipsec_free_pcb(pcb, false);
2717 			*unitinfo = NULL;
2718 			return result;
2719 		}
2720 
2721 		/* Attach to bpf */
2722 		bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
2723 	}
2724 
2725 #if IPSEC_NEXUS
2726 	/*
2727 	 * Mark the data path as ready.
2728 	 * If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected.
2729 	 */
2730 	if (pcb->ipsec_kpipe_count == 0) {
2731 		lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
2732 		IPSEC_SET_DATA_PATH_READY(pcb);
2733 		lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
2734 	}
2735 #endif
2736 
2737 	/* The interfaces resoures allocated, mark it as running */
2738 	ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
2739 
2740 	return 0;
2741 }
2742 
2743 static errno_t
ipsec_detach_ip(ifnet_t interface,protocol_family_t protocol,socket_t pf_socket)2744 ipsec_detach_ip(ifnet_t                         interface,
2745     protocol_family_t       protocol,
2746     socket_t                        pf_socket)
2747 {
2748 	errno_t result = EPROTONOSUPPORT;
2749 
2750 	/* Attempt a detach */
2751 	if (protocol == PF_INET) {
2752 		struct ifreq    ifr;
2753 
2754 		bzero(&ifr, sizeof(ifr));
2755 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2756 		    ifnet_name(interface), ifnet_unit(interface));
2757 
2758 		result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
2759 	} else if (protocol == PF_INET6) {
2760 		struct in6_ifreq        ifr6;
2761 
2762 		bzero(&ifr6, sizeof(ifr6));
2763 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2764 		    ifnet_name(interface), ifnet_unit(interface));
2765 
2766 		result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
2767 	}
2768 
2769 	return result;
2770 }
2771 
2772 static void
ipsec_remove_address(ifnet_t interface,protocol_family_t protocol,ifaddr_t address,socket_t pf_socket)2773 ipsec_remove_address(ifnet_t                            interface,
2774     protocol_family_t      protocol,
2775     ifaddr_t                       address,
2776     socket_t                       pf_socket)
2777 {
2778 	errno_t result = 0;
2779 
2780 	/* Attempt a detach */
2781 	if (protocol == PF_INET) {
2782 		struct ifreq    ifr;
2783 
2784 		bzero(&ifr, sizeof(ifr));
2785 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2786 		    ifnet_name(interface), ifnet_unit(interface));
2787 		result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
2788 		if (result != 0) {
2789 			os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed: %d", result);
2790 		} else {
2791 			result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
2792 			if (result != 0) {
2793 				os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR failed: %d", result);
2794 			}
2795 		}
2796 	} else if (protocol == PF_INET6) {
2797 		struct in6_ifreq        ifr6;
2798 
2799 		bzero(&ifr6, sizeof(ifr6));
2800 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2801 		    ifnet_name(interface), ifnet_unit(interface));
2802 		result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
2803 		    sizeof(ifr6.ifr_addr));
2804 		if (result != 0) {
2805 			os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed (v6): %d",
2806 			    result);
2807 		} else {
2808 			result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
2809 			if (result != 0) {
2810 				os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
2811 				    result);
2812 			}
2813 		}
2814 	}
2815 }
2816 
2817 static void
ipsec_cleanup_family(ifnet_t interface,protocol_family_t protocol)2818 ipsec_cleanup_family(ifnet_t                            interface,
2819     protocol_family_t      protocol)
2820 {
2821 	errno_t result = 0;
2822 	socket_t __single pf_socket = NULL;
2823 	uint16_t addresses_count = 0;
2824 	ifaddr_t *__counted_by(addresses_count) addresses = NULL;
2825 	int i;
2826 
2827 	if (protocol != PF_INET && protocol != PF_INET6) {
2828 		os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - invalid protocol family %d\n", protocol);
2829 		return;
2830 	}
2831 
2832 	/* Create a socket for removing addresses and detaching the protocol */
2833 	result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
2834 	if (result != 0) {
2835 		if (result != EAFNOSUPPORT) {
2836 			os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - failed to create %s socket: %d\n",
2837 			    protocol == PF_INET ? "IP" : "IPv6", result);
2838 		}
2839 		goto cleanup;
2840 	}
2841 
2842 	/* always set SS_PRIV, we want to close and detach regardless */
2843 	sock_setpriv(pf_socket, 1);
2844 
2845 	result = ipsec_detach_ip(interface, protocol, pf_socket);
2846 	if (result == 0 || result == ENXIO) {
2847 		/* We are done! We either detached or weren't attached. */
2848 		goto cleanup;
2849 	} else if (result != EBUSY) {
2850 		/* Uh, not really sure what happened here... */
2851 		os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2852 		goto cleanup;
2853 	}
2854 
2855 	/*
2856 	 * At this point, we received an EBUSY error. This means there are
2857 	 * addresses attached. We should detach them and then try again.
2858 	 */
2859 	result = ifnet_get_address_list_family_with_count(interface, &addresses,
2860 	    &addresses_count, (sa_family_t)protocol);
2861 	if (result != 0) {
2862 		os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
2863 		    ifnet_name(interface), ifnet_unit(interface),
2864 		    protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
2865 		goto cleanup;
2866 	}
2867 
2868 	for (i = 0; addresses[i] != 0; i++) {
2869 		ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
2870 	}
2871 	ifnet_address_list_free_counted_by(addresses, addresses_count);
2872 
2873 	/*
2874 	 * The addresses should be gone, we should try the remove again.
2875 	 */
2876 	result = ipsec_detach_ip(interface, protocol, pf_socket);
2877 	if (result != 0 && result != ENXIO) {
2878 		os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2879 	}
2880 
2881 cleanup:
2882 	if (pf_socket != NULL) {
2883 		sock_close(pf_socket);
2884 	}
2885 
2886 	if (addresses != NULL) {
2887 		ifnet_address_list_free_counted_by(addresses, addresses_count);
2888 	}
2889 }
2890 
2891 static errno_t
ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo)2892 ipsec_ctl_disconnect(__unused kern_ctl_ref      kctlref,
2893     __unused u_int32_t             unit,
2894     void                                   *unitinfo)
2895 {
2896 	struct ipsec_pcb *__single pcb = unitinfo;
2897 	ifnet_t ifp = NULL;
2898 	errno_t result = 0;
2899 
2900 	if (pcb == NULL) {
2901 		return EINVAL;
2902 	}
2903 
2904 	/* Wait until all threads in the data paths are done. */
2905 	ipsec_wait_data_move_drain(pcb);
2906 
2907 #if IPSEC_NEXUS
2908 	// Tell the nexus to stop all rings
2909 	if (pcb->ipsec_netif_nexus != NULL) {
2910 		kern_nexus_stop(pcb->ipsec_netif_nexus);
2911 	}
2912 #endif // IPSEC_NEXUS
2913 
2914 	lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2915 
2916 #if IPSEC_NEXUS
2917 	if (if_ipsec_debug != 0) {
2918 		printf("ipsec_ctl_disconnect: detaching interface %s (id %s)\n",
2919 		    pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2920 	}
2921 
2922 	struct ipsec_detached_channels dc;
2923 	ipsec_detach_channels(pcb, &dc);
2924 #endif // IPSEC_NEXUS
2925 
2926 	pcb->ipsec_ctlref = NULL;
2927 
2928 	ifp = pcb->ipsec_ifp;
2929 	if (ifp != NULL) {
2930 #if IPSEC_NEXUS
2931 		if (pcb->ipsec_netif_nexus != NULL) {
2932 			/*
2933 			 * Quiesce the interface and flush any pending outbound packets.
2934 			 */
2935 			if_down(ifp);
2936 
2937 			/*
2938 			 * Suspend data movement and wait for IO threads to exit.
2939 			 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
2940 			 * do this because ipsec nexuses are attached/detached separately.
2941 			 */
2942 			ifnet_datamov_suspend_and_drain(ifp);
2943 			if ((result = ifnet_detach(ifp)) != 0) {
2944 				panic("ipsec_ctl_disconnect - ifnet_detach failed: %d", result);
2945 				/* NOT REACHED */
2946 			}
2947 
2948 			/*
2949 			 * We want to do everything in our power to ensure that the interface
2950 			 * really goes away when the socket is closed. We must remove IP/IPv6
2951 			 * addresses and detach the protocols. Finally, we can remove and
2952 			 * release the interface.
2953 			 */
2954 			key_delsp_for_ipsec_if(ifp);
2955 
2956 			ipsec_cleanup_family(ifp, AF_INET);
2957 			ipsec_cleanup_family(ifp, AF_INET6);
2958 
2959 			lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2960 
2961 			ipsec_free_channels(&dc);
2962 
2963 			ipsec_nexus_detach(pcb);
2964 
2965 			/* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
2966 			ifnet_datamov_resume(ifp);
2967 		} else
2968 #endif // IPSEC_NEXUS
2969 		{
2970 			lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2971 
2972 #if IPSEC_NEXUS
2973 			ipsec_free_channels(&dc);
2974 #endif // IPSEC_NEXUS
2975 
2976 			/*
2977 			 * We want to do everything in our power to ensure that the interface
2978 			 * really goes away when the socket is closed. We must remove IP/IPv6
2979 			 * addresses and detach the protocols. Finally, we can remove and
2980 			 * release the interface.
2981 			 */
2982 			key_delsp_for_ipsec_if(ifp);
2983 
2984 			ipsec_cleanup_family(ifp, AF_INET);
2985 			ipsec_cleanup_family(ifp, AF_INET6);
2986 
2987 			/*
2988 			 * Detach now; ipsec_detach() will be called asynchronously once
2989 			 * the I/O reference count drops to 0.  There we will invoke
2990 			 * ifnet_release().
2991 			 */
2992 			if ((result = ifnet_detach(ifp)) != 0) {
2993 				os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
2994 			}
2995 		}
2996 	} else {
2997 		// Bound, but not connected
2998 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2999 		ipsec_free_pcb(pcb, false);
3000 	}
3001 
3002 	return 0;
3003 }
3004 
3005 static errno_t
ipsec_ctl_send(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,__unused void * unitinfo,mbuf_t m,__unused int flags)3006 ipsec_ctl_send(__unused kern_ctl_ref    kctlref,
3007     __unused u_int32_t           unit,
3008     __unused void                        *unitinfo,
3009     mbuf_t                  m,
3010     __unused int                 flags)
3011 {
3012 	/* Receive messages from the control socket. Currently unused. */
3013 	mbuf_freem(m);
3014 	return 0;
3015 }
3016 
3017 static errno_t
ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * __sized_by (len)data,size_t len)3018 ipsec_ctl_setopt(__unused kern_ctl_ref  kctlref, __unused u_int32_t unit,
3019     void *unitinfo, int opt, void *__sized_by(len)data, size_t len)
3020 {
3021 	errno_t result = 0;
3022 	struct ipsec_pcb *__single pcb = unitinfo;
3023 	if (pcb == NULL) {
3024 		return EINVAL;
3025 	}
3026 
3027 	/* check for privileges for privileged options */
3028 	switch (opt) {
3029 	case IPSEC_OPT_FLAGS:
3030 	case IPSEC_OPT_EXT_IFDATA_STATS:
3031 	case IPSEC_OPT_SET_DELEGATE_INTERFACE:
3032 	case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
3033 	case IPSEC_OPT_OUTPUT_DSCP_MAPPING:
3034 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3035 			return EPERM;
3036 		}
3037 		break;
3038 	}
3039 
3040 	switch (opt) {
3041 	case IPSEC_OPT_FLAGS: {
3042 		if (len != sizeof(u_int32_t)) {
3043 			result = EMSGSIZE;
3044 		} else {
3045 			pcb->ipsec_external_flags = *(u_int32_t *)data;
3046 		}
3047 		break;
3048 	}
3049 
3050 	case IPSEC_OPT_EXT_IFDATA_STATS: {
3051 		if (len != sizeof(int)) {
3052 			result = EMSGSIZE;
3053 			break;
3054 		}
3055 		if (pcb->ipsec_ifp == NULL) {
3056 			// Only can set after connecting
3057 			result = EINVAL;
3058 			break;
3059 		}
3060 		pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
3061 		break;
3062 	}
3063 
3064 	case IPSEC_OPT_INC_IFDATA_STATS_IN:
3065 	case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
3066 		struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
3067 
3068 		if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
3069 			result = EINVAL;
3070 			break;
3071 		}
3072 		if (pcb->ipsec_ifp == NULL) {
3073 			// Only can set after connecting
3074 			result = EINVAL;
3075 			break;
3076 		}
3077 		if (!pcb->ipsec_ext_ifdata_stats) {
3078 			result = EINVAL;
3079 			break;
3080 		}
3081 		if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) {
3082 			ifnet_stat_increment_in(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3083 			    (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3084 		} else {
3085 			ifnet_stat_increment_out(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3086 			    (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3087 		}
3088 		break;
3089 	}
3090 
3091 	case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
3092 		ifnet_t __single del_ifp = NULL;
3093 		char name[IFNAMSIZ];
3094 
3095 		if (len > IFNAMSIZ - 1) {
3096 			result = EMSGSIZE;
3097 			break;
3098 		}
3099 		if (pcb->ipsec_ifp == NULL) {
3100 			// Only can set after connecting
3101 			result = EINVAL;
3102 			break;
3103 		}
3104 		if (len != 0) {                   /* if len==0, del_ifp will be NULL causing the delegate to be removed */
3105 			bcopy(data, name, len);
3106 			name[len] = 0;
3107 			result = ifnet_find_by_name(__unsafe_null_terminated_from_indexable(name), &del_ifp);
3108 		}
3109 		if (result == 0) {
3110 			os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
3111 			    __func__, pcb->ipsec_ifp->if_xname,
3112 			    del_ifp ? del_ifp->if_xname : "NULL");
3113 
3114 			result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
3115 			if (del_ifp) {
3116 				ifnet_release(del_ifp);
3117 			}
3118 		}
3119 		break;
3120 	}
3121 
3122 	case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3123 		if (len != sizeof(int)) {
3124 			result = EMSGSIZE;
3125 			break;
3126 		}
3127 		if (pcb->ipsec_ifp == NULL) {
3128 			// Only can set after connecting
3129 			result = EINVAL;
3130 			break;
3131 		}
3132 		mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
3133 		if (output_service_class == MBUF_SC_UNSPEC) {
3134 			pcb->ipsec_output_service_class = MBUF_SC_OAM;
3135 		} else {
3136 			pcb->ipsec_output_service_class = output_service_class;
3137 		}
3138 		os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
3139 		    __func__, pcb->ipsec_ifp->if_xname,
3140 		    pcb->ipsec_output_service_class);
3141 		break;
3142 	}
3143 
3144 #if IPSEC_NEXUS
3145 	case IPSEC_OPT_ENABLE_CHANNEL: {
3146 		if (len != sizeof(int)) {
3147 			result = EMSGSIZE;
3148 			break;
3149 		}
3150 		if (pcb->ipsec_ifp != NULL) {
3151 			// Only can set before connecting
3152 			result = EINVAL;
3153 			break;
3154 		}
3155 		if ((*(int *)data) != 0 &&
3156 		    (*(int *)data) != 1 &&
3157 		    (*(int *)data) != IPSEC_IF_WMM_RING_COUNT) {
3158 			result = EINVAL;
3159 			break;
3160 		}
3161 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3162 		pcb->ipsec_kpipe_count = *(int *)data;
3163 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3164 		break;
3165 	}
3166 
3167 	case IPSEC_OPT_CHANNEL_BIND_PID: {
3168 		if (len != sizeof(pid_t)) {
3169 			result = EMSGSIZE;
3170 			break;
3171 		}
3172 		if (pcb->ipsec_ifp != NULL) {
3173 			// Only can set before connecting
3174 			result = EINVAL;
3175 			break;
3176 		}
3177 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3178 		pcb->ipsec_kpipe_pid = *(pid_t *)data;
3179 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3180 		break;
3181 	}
3182 
3183 	case IPSEC_OPT_CHANNEL_BIND_UUID: {
3184 		if (len != sizeof(uuid_t)) {
3185 			result = EMSGSIZE;
3186 			break;
3187 		}
3188 		if (pcb->ipsec_ifp != NULL) {
3189 			// Only can set before connecting
3190 			result = EINVAL;
3191 			break;
3192 		}
3193 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3194 		uuid_copy(pcb->ipsec_kpipe_proc_uuid, *((uuid_t *)data));
3195 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3196 		break;
3197 	}
3198 
3199 	case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3200 		if (len != sizeof(int)) {
3201 			result = EMSGSIZE;
3202 			break;
3203 		}
3204 		if (pcb->ipsec_ifp == NULL) {
3205 			// Only can set after connecting
3206 			result = EINVAL;
3207 			break;
3208 		}
3209 		if (!if_is_fsw_transport_netagent_enabled()) {
3210 			result = ENOTSUP;
3211 			break;
3212 		}
3213 		if (uuid_is_null(pcb->ipsec_nx.fsw_agent)) {
3214 			result = ENOENT;
3215 			break;
3216 		}
3217 
3218 		uint32_t flags = netagent_get_flags(pcb->ipsec_nx.fsw_agent);
3219 
3220 		if (*(int *)data) {
3221 			flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
3222 			    NETAGENT_FLAG_NEXUS_LISTENER);
3223 			result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3224 			pcb->ipsec_needs_netagent = true;
3225 		} else {
3226 			pcb->ipsec_needs_netagent = false;
3227 			flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
3228 			    NETAGENT_FLAG_NEXUS_LISTENER);
3229 			result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3230 		}
3231 		break;
3232 	}
3233 
3234 	case IPSEC_OPT_INPUT_FRAG_SIZE: {
3235 		if (len != sizeof(u_int32_t)) {
3236 			result = EMSGSIZE;
3237 			break;
3238 		}
3239 		u_int32_t input_frag_size = *(u_int32_t *)data;
3240 		if (input_frag_size <= sizeof(struct ip6_hdr)) {
3241 			pcb->ipsec_frag_size_set = FALSE;
3242 			pcb->ipsec_input_frag_size = 0;
3243 		} else {
3244 			pcb->ipsec_frag_size_set = TRUE;
3245 			pcb->ipsec_input_frag_size = input_frag_size;
3246 		}
3247 		break;
3248 	}
3249 	case IPSEC_OPT_ENABLE_NETIF: {
3250 		if (len != sizeof(int)) {
3251 			result = EMSGSIZE;
3252 			break;
3253 		}
3254 		if (pcb->ipsec_ifp != NULL) {
3255 			// Only can set before connecting
3256 			result = EINVAL;
3257 			break;
3258 		}
3259 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3260 		pcb->ipsec_use_netif = !!(*(int *)data);
3261 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3262 		break;
3263 	}
3264 	case IPSEC_OPT_SLOT_SIZE: {
3265 		if (len != sizeof(u_int32_t)) {
3266 			result = EMSGSIZE;
3267 			break;
3268 		}
3269 		if (pcb->ipsec_ifp != NULL) {
3270 			// Only can set before connecting
3271 			result = EINVAL;
3272 			break;
3273 		}
3274 		u_int32_t slot_size = *(u_int32_t *)data;
3275 		if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
3276 		    slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
3277 			return EINVAL;
3278 		}
3279 		pcb->ipsec_slot_size = slot_size;
3280 		if (if_ipsec_debug != 0) {
3281 			printf("%s: IPSEC_OPT_SLOT_SIZE %u\n", __func__, slot_size);
3282 		}
3283 		break;
3284 	}
3285 	case IPSEC_OPT_NETIF_RING_SIZE: {
3286 		if (len != sizeof(u_int32_t)) {
3287 			result = EMSGSIZE;
3288 			break;
3289 		}
3290 		if (pcb->ipsec_ifp != NULL) {
3291 			// Only can set before connecting
3292 			result = EINVAL;
3293 			break;
3294 		}
3295 		u_int32_t ring_size = *(u_int32_t *)data;
3296 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3297 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3298 			return EINVAL;
3299 		}
3300 		pcb->ipsec_netif_ring_size = ring_size;
3301 		if (if_ipsec_debug != 0) {
3302 			printf("%s: IPSEC_OPT_NETIF_RING_SIZE %u\n", __func__, ring_size);
3303 		}
3304 		break;
3305 	}
3306 	case IPSEC_OPT_TX_FSW_RING_SIZE: {
3307 		if (len != sizeof(u_int32_t)) {
3308 			result = EMSGSIZE;
3309 			break;
3310 		}
3311 		if (pcb->ipsec_ifp != NULL) {
3312 			// Only can set before connecting
3313 			result = EINVAL;
3314 			break;
3315 		}
3316 		u_int32_t ring_size = *(u_int32_t *)data;
3317 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3318 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3319 			return EINVAL;
3320 		}
3321 		pcb->ipsec_tx_fsw_ring_size = ring_size;
3322 		if (if_ipsec_debug != 0) {
3323 			printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3324 		}
3325 		break;
3326 	}
3327 	case IPSEC_OPT_RX_FSW_RING_SIZE: {
3328 		if (len != sizeof(u_int32_t)) {
3329 			result = EMSGSIZE;
3330 			break;
3331 		}
3332 		if (pcb->ipsec_ifp != NULL) {
3333 			// Only can set before connecting
3334 			result = EINVAL;
3335 			break;
3336 		}
3337 		u_int32_t ring_size = *(u_int32_t *)data;
3338 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3339 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3340 			return EINVAL;
3341 		}
3342 		pcb->ipsec_rx_fsw_ring_size = ring_size;
3343 		if (if_ipsec_debug != 0) {
3344 			printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3345 		}
3346 		break;
3347 	}
3348 	case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3349 		if (len != sizeof(u_int32_t)) {
3350 			result = EMSGSIZE;
3351 			break;
3352 		}
3353 		if (pcb->ipsec_ifp != NULL) {
3354 			// Only can set before connecting
3355 			result = EINVAL;
3356 			break;
3357 		}
3358 		u_int32_t ring_size = *(u_int32_t *)data;
3359 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3360 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3361 			return EINVAL;
3362 		}
3363 		pcb->ipsec_kpipe_tx_ring_size = ring_size;
3364 		if (if_ipsec_debug != 0) {
3365 			printf("%s: IPSEC_OPT_KPIPE_TX_RING_SIZE %u\n", __func__, ring_size);
3366 		}
3367 		break;
3368 	}
3369 	case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3370 		if (len != sizeof(u_int32_t)) {
3371 			result = EMSGSIZE;
3372 			break;
3373 		}
3374 		if (pcb->ipsec_ifp != NULL) {
3375 			// Only can set before connecting
3376 			result = EINVAL;
3377 			break;
3378 		}
3379 		u_int32_t ring_size = *(u_int32_t *)data;
3380 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3381 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3382 			return EINVAL;
3383 		}
3384 		pcb->ipsec_kpipe_rx_ring_size = ring_size;
3385 		if (if_ipsec_debug != 0) {
3386 			printf("%s: IPSEC_OPT_KPIPE_RX_RING_SIZE %u\n", __func__, ring_size);
3387 		}
3388 		break;
3389 	}
3390 	case IPSEC_OPT_OUTPUT_DSCP_MAPPING: {
3391 		if (len != sizeof(int)) {
3392 			result = EMSGSIZE;
3393 			break;
3394 		}
3395 		if (pcb->ipsec_ifp == NULL) {
3396 			// Only can set after connecting
3397 			result = EINVAL;
3398 			break;
3399 		}
3400 
3401 		ipsec_dscp_mapping_t output_dscp_mapping = (ipsec_dscp_mapping_t)(*(int *)data);
3402 		if (output_dscp_mapping > IPSEC_DSCP_MAPPING_LEGACY) {
3403 			return EINVAL;
3404 		}
3405 
3406 		pcb->ipsec_output_dscp_mapping = output_dscp_mapping;
3407 
3408 		os_log(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_DSCP_MAPPING %s DSCP %d\n",
3409 		    __func__, pcb->ipsec_ifp->if_xname,
3410 		    pcb->ipsec_output_dscp_mapping);
3411 		break;
3412 	}
3413 
3414 #endif // IPSEC_NEXUS
3415 
3416 	default: {
3417 		result = ENOPROTOOPT;
3418 		break;
3419 	}
3420 	}
3421 
3422 	return result;
3423 }
3424 
3425 static errno_t
ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * __sized_by (* len)data,size_t * len)3426 ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
3427     __unused u_int32_t unit,
3428     void *unitinfo,
3429     int opt,
3430     void *__sized_by(*len)data,
3431     size_t *len)
3432 {
3433 	errno_t result = 0;
3434 	struct ipsec_pcb *__single pcb = unitinfo;
3435 	if (pcb == NULL) {
3436 		return EINVAL;
3437 	}
3438 
3439 	switch (opt) {
3440 	case IPSEC_OPT_FLAGS: {
3441 		if (*len != sizeof(u_int32_t)) {
3442 			result = EMSGSIZE;
3443 		} else {
3444 			*(u_int32_t *)data = pcb->ipsec_external_flags;
3445 		}
3446 		break;
3447 	}
3448 
3449 	case IPSEC_OPT_EXT_IFDATA_STATS: {
3450 		if (*len != sizeof(int)) {
3451 			result = EMSGSIZE;
3452 		} else {
3453 			*(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
3454 		}
3455 		break;
3456 	}
3457 
3458 	case IPSEC_OPT_IFNAME: {
3459 		if (*len < MIN(strbuflen(pcb->ipsec_if_xname,
3460 		    sizeof(pcb->ipsec_if_xname)) + 1, sizeof(pcb->ipsec_if_xname))) {
3461 			result = EMSGSIZE;
3462 		} else {
3463 			if (pcb->ipsec_ifp == NULL) {
3464 				// Only can get after connecting
3465 				result = EINVAL;
3466 				break;
3467 			}
3468 			*len = scnprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
3469 		}
3470 		break;
3471 	}
3472 
3473 	case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3474 		if (*len != sizeof(int)) {
3475 			result = EMSGSIZE;
3476 		} else {
3477 			*(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
3478 		}
3479 		break;
3480 	}
3481 
3482 #if IPSEC_NEXUS
3483 
3484 	case IPSEC_OPT_ENABLE_CHANNEL: {
3485 		if (*len != sizeof(int)) {
3486 			result = EMSGSIZE;
3487 		} else {
3488 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3489 			*(int *)data = pcb->ipsec_kpipe_count;
3490 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3491 		}
3492 		break;
3493 	}
3494 
3495 	case IPSEC_OPT_CHANNEL_BIND_PID: {
3496 		if (*len != sizeof(pid_t)) {
3497 			result = EMSGSIZE;
3498 		} else {
3499 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3500 			*(pid_t *)data = pcb->ipsec_kpipe_pid;
3501 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3502 		}
3503 		break;
3504 	}
3505 
3506 	case IPSEC_OPT_CHANNEL_BIND_UUID: {
3507 		if (*len != sizeof(uuid_t)) {
3508 			result = EMSGSIZE;
3509 		} else {
3510 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3511 			uuid_copy(*((uuid_t *)data), pcb->ipsec_kpipe_proc_uuid);
3512 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3513 		}
3514 		break;
3515 	}
3516 
3517 	case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3518 		if (*len != sizeof(int)) {
3519 			result = EMSGSIZE;
3520 		} else {
3521 			*(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.fsw_agent);
3522 		}
3523 		break;
3524 	}
3525 
3526 	case IPSEC_OPT_ENABLE_NETIF: {
3527 		if (*len != sizeof(int)) {
3528 			result = EMSGSIZE;
3529 		} else {
3530 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3531 			*(int *)data = !!pcb->ipsec_use_netif;
3532 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3533 		}
3534 		break;
3535 	}
3536 
3537 	case IPSEC_OPT_GET_CHANNEL_UUID: {
3538 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3539 		if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
3540 			result = ENXIO;
3541 		} else if (*len != sizeof(uuid_t) * pcb->ipsec_kpipe_count) {
3542 			result = EMSGSIZE;
3543 		} else {
3544 			for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
3545 				uuid_copy(((uuid_t *)data)[i], pcb->ipsec_kpipe_uuid[i]);
3546 			}
3547 		}
3548 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3549 		break;
3550 	}
3551 
3552 	case IPSEC_OPT_INPUT_FRAG_SIZE: {
3553 		if (*len != sizeof(u_int32_t)) {
3554 			result = EMSGSIZE;
3555 		} else {
3556 			*(u_int32_t *)data = pcb->ipsec_input_frag_size;
3557 		}
3558 		break;
3559 	}
3560 	case IPSEC_OPT_SLOT_SIZE: {
3561 		if (*len != sizeof(u_int32_t)) {
3562 			result = EMSGSIZE;
3563 		} else {
3564 			*(u_int32_t *)data = pcb->ipsec_slot_size;
3565 		}
3566 		break;
3567 	}
3568 	case IPSEC_OPT_NETIF_RING_SIZE: {
3569 		if (*len != sizeof(u_int32_t)) {
3570 			result = EMSGSIZE;
3571 		} else {
3572 			*(u_int32_t *)data = pcb->ipsec_netif_ring_size;
3573 		}
3574 		break;
3575 	}
3576 	case IPSEC_OPT_TX_FSW_RING_SIZE: {
3577 		if (*len != sizeof(u_int32_t)) {
3578 			result = EMSGSIZE;
3579 		} else {
3580 			*(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
3581 		}
3582 		break;
3583 	}
3584 	case IPSEC_OPT_RX_FSW_RING_SIZE: {
3585 		if (*len != sizeof(u_int32_t)) {
3586 			result = EMSGSIZE;
3587 		} else {
3588 			*(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
3589 		}
3590 		break;
3591 	}
3592 	case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3593 		if (*len != sizeof(u_int32_t)) {
3594 			result = EMSGSIZE;
3595 		} else {
3596 			*(u_int32_t *)data = pcb->ipsec_kpipe_tx_ring_size;
3597 		}
3598 		break;
3599 	}
3600 	case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3601 		if (*len != sizeof(u_int32_t)) {
3602 			result = EMSGSIZE;
3603 		} else {
3604 			*(u_int32_t *)data = pcb->ipsec_kpipe_rx_ring_size;
3605 		}
3606 		break;
3607 	}
3608 
3609 #endif // IPSEC_NEXUS
3610 
3611 	default: {
3612 		result = ENOPROTOOPT;
3613 		break;
3614 	}
3615 	}
3616 
3617 	return result;
3618 }
3619 
3620 /* Network Interface functions */
3621 static errno_t
ipsec_output(ifnet_t interface,mbuf_t data)3622 ipsec_output(ifnet_t interface,
3623     mbuf_t data)
3624 {
3625 	struct ipsec_pcb *__single pcb = ifnet_softc(interface);
3626 	struct ipsec_output_state ipsec_state;
3627 	struct route ro;
3628 	struct route_in6 ro6;
3629 	size_t length;
3630 	struct ip *ip = NULL;
3631 	struct ip6_hdr *ip6 = NULL;
3632 	struct ip_out_args ipoa;
3633 	struct ip6_out_args ip6oa;
3634 	int error = 0;
3635 	u_int ip_version = 0;
3636 	int flags = 0;
3637 	struct flowadv *adv = NULL;
3638 
3639 	// Make sure this packet isn't looping through the interface
3640 	if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
3641 		error = EINVAL;
3642 		goto ipsec_output_err;
3643 	}
3644 
3645 	// Mark the interface so NECP can evaluate tunnel policy
3646 	necp_mark_packet_from_interface(data, interface);
3647 
3648 	if (data->m_len < sizeof(*ip)) {
3649 		os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IP header length: %d.\n", data->m_len);
3650 		IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3651 		error = EINVAL;
3652 		goto ipsec_output_err;
3653 	}
3654 
3655 	ip = mtod(data, struct ip *);
3656 	ip_version = ip->ip_v;
3657 
3658 	switch (ip_version) {
3659 	case 4: {
3660 		u_int8_t ip_hlen = 0;
3661 #ifdef _IP_VHL
3662 		ip_hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
3663 #else
3664 		ip_hlen = (uint8_t)(ip->ip_hl << 2);
3665 #endif
3666 		if (ip_hlen < sizeof(*ip)) {
3667 			os_log_error(OS_LOG_DEFAULT, "ipsec_output: Bad ip header length %d.\n", ip_hlen);
3668 			IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3669 			error = EINVAL;
3670 			goto ipsec_output_err;
3671 		}
3672 #if IPSEC_NEXUS
3673 		if (!pcb->ipsec_use_netif)
3674 #endif // IPSEC_NEXUS
3675 		{
3676 			int af = AF_INET;
3677 			bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3678 		}
3679 
3680 		/* Apply encryption */
3681 		memset(&ipsec_state, 0, sizeof(ipsec_state));
3682 		ipsec_state.m = data;
3683 		ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
3684 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3685 		ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
3686 
3687 		error = ipsec4_interface_output(&ipsec_state, interface);
3688 		/* Tunneled in IPv6 - packet is gone */
3689 		if (error == 0 && ipsec_state.tunneled == 6) {
3690 			goto done;
3691 		}
3692 
3693 		data = ipsec_state.m;
3694 		if (error || data == NULL) {
3695 			if (error) {
3696 				os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec4_output error %d.\n", error);
3697 			}
3698 			goto ipsec_output_err;
3699 		}
3700 
3701 		/* Set traffic class, set flow */
3702 		m_set_service_class(data, pcb->ipsec_output_service_class);
3703 		data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3704 #if SKYWALK
3705 		data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
3706 #else /* !SKYWALK */
3707 		data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3708 #endif /* !SKYWALK */
3709 		data->m_pkthdr.pkt_proto = ip->ip_p;
3710 		data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3711 
3712 		/* Flip endian-ness for ip_output */
3713 		ip = mtod(data, struct ip *);
3714 		NTOHS(ip->ip_len);
3715 		NTOHS(ip->ip_off);
3716 
3717 		/* Increment statistics */
3718 		length = mbuf_pkthdr_len(data);
3719 		ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
3720 
3721 		/* Send to ip_output */
3722 		memset(&ro, 0, sizeof(ro));
3723 
3724 		flags = (IP_OUTARGS |   /* Passing out args to specify interface */
3725 		    IP_NOIPSEC);                        /* To ensure the packet doesn't go through ipsec twice */
3726 
3727 		memset(&ipoa, 0, sizeof(ipoa));
3728 		ipoa.ipoa_flowadv.code = 0;
3729 		ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
3730 		if (ipsec_state.outgoing_if) {
3731 			ipoa.ipoa_boundif = ipsec_state.outgoing_if;
3732 			ipoa.ipoa_flags |= IPOAF_BOUND_IF;
3733 		}
3734 		ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
3735 
3736 		adv = &ipoa.ipoa_flowadv;
3737 
3738 		(void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
3739 		data = NULL;
3740 
3741 		if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3742 			error = ENOBUFS;
3743 			ifnet_disable_output(interface);
3744 		}
3745 
3746 		goto done;
3747 	}
3748 	case 6: {
3749 		if (data->m_len < sizeof(*ip6)) {
3750 			os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IPv6 header length: %d.\n", data->m_len);
3751 			IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
3752 			error = EINVAL;
3753 			goto ipsec_output_err;
3754 		}
3755 #if IPSEC_NEXUS
3756 		if (!pcb->ipsec_use_netif)
3757 #endif // IPSEC_NEXUS
3758 		{
3759 			int af = AF_INET6;
3760 			bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3761 		}
3762 
3763 		data = ipsec6_splithdr(data);
3764 		if (data == NULL) {
3765 			os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_splithdr returned NULL\n");
3766 			goto ipsec_output_err;
3767 		}
3768 
3769 		ip6 = mtod(data, struct ip6_hdr *);
3770 
3771 		memset(&ipsec_state, 0, sizeof(ipsec_state));
3772 		ipsec_state.m = data;
3773 		ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
3774 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3775 		ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
3776 
3777 		error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
3778 		if (error == 0 && ipsec_state.tunneled == 4) {          /* tunneled in IPv4 - packet is gone */
3779 			goto done;
3780 		}
3781 		data = ipsec_state.m;
3782 		if (error || data == NULL) {
3783 			if (error) {
3784 				os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_output error %d\n", error);
3785 			}
3786 			goto ipsec_output_err;
3787 		}
3788 
3789 		/* Set traffic class, set flow */
3790 		m_set_service_class(data, pcb->ipsec_output_service_class);
3791 		data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3792 #if SKYWALK
3793 		data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
3794 #else /* !SKYWALK */
3795 		data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3796 #endif /* !SKYWALK */
3797 		data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3798 		data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3799 
3800 		/* Increment statistics */
3801 		length = mbuf_pkthdr_len(data);
3802 		ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
3803 
3804 		/* Send to ip6_output */
3805 		memset(&ro6, 0, sizeof(ro6));
3806 
3807 		flags = IPV6_OUTARGS;
3808 
3809 		memset(&ip6oa, 0, sizeof(ip6oa));
3810 		ip6oa.ip6oa_flowadv.code = 0;
3811 		ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
3812 		if (ipsec_state.outgoing_if) {
3813 			ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
3814 			ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
3815 			ip6_output_setsrcifscope(data, ipsec_state.outgoing_if, NULL);
3816 			ip6_output_setdstifscope(data, ipsec_state.outgoing_if, NULL);
3817 		} else {
3818 			ip6_output_setsrcifscope(data, IFSCOPE_UNKNOWN, NULL);
3819 			ip6_output_setdstifscope(data, IFSCOPE_UNKNOWN, NULL);
3820 		}
3821 		ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
3822 
3823 		adv = &ip6oa.ip6oa_flowadv;
3824 
3825 		(void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
3826 		data = NULL;
3827 
3828 		if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3829 			error = ENOBUFS;
3830 			ifnet_disable_output(interface);
3831 		}
3832 
3833 		goto done;
3834 	}
3835 	default: {
3836 		os_log_error(OS_LOG_DEFAULT, "ipsec_output: Received unknown packet version %d.\n", ip_version);
3837 		error = EINVAL;
3838 		goto ipsec_output_err;
3839 	}
3840 	}
3841 
3842 done:
3843 	return error;
3844 
3845 ipsec_output_err:
3846 	if (data) {
3847 		mbuf_freem(data);
3848 	}
3849 	goto done;
3850 }
3851 
3852 static void
ipsec_start(ifnet_t interface)3853 ipsec_start(ifnet_t     interface)
3854 {
3855 	mbuf_t __single data;
3856 	struct ipsec_pcb *__single pcb = ifnet_softc(interface);
3857 
3858 	VERIFY(pcb != NULL);
3859 	for (;;) {
3860 		if (ifnet_dequeue(interface, &data) != 0) {
3861 			break;
3862 		}
3863 		if (ipsec_output(interface, data) != 0) {
3864 			break;
3865 		}
3866 	}
3867 }
3868 
3869 /* Network Interface functions */
3870 static errno_t
ipsec_demux(__unused ifnet_t interface,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)3871 ipsec_demux(__unused ifnet_t    interface,
3872     mbuf_t                          data,
3873     __unused char           *frame_header,
3874     protocol_family_t       *protocol)
3875 {
3876 	struct ip *ip;
3877 	u_int ip_version;
3878 
3879 	while (data != NULL && mbuf_len(data) < 1) {
3880 		data = mbuf_next(data);
3881 	}
3882 
3883 	if (data == NULL) {
3884 		return ENOENT;
3885 	}
3886 
3887 	ip = mtod(data, struct ip *);
3888 	ip_version = ip->ip_v;
3889 
3890 	switch (ip_version) {
3891 	case 4:
3892 		*protocol = PF_INET;
3893 		return 0;
3894 	case 6:
3895 		*protocol = PF_INET6;
3896 		return 0;
3897 	default:
3898 		*protocol = PF_UNSPEC;
3899 		break;
3900 	}
3901 
3902 	return 0;
3903 }
3904 
3905 static errno_t
ipsec_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused u_int32_t demux_count)3906 ipsec_add_proto(__unused ifnet_t                                                interface,
3907     protocol_family_t                                               protocol,
3908     __unused const struct ifnet_demux_desc  *demux_array,
3909     __unused u_int32_t                                              demux_count)
3910 {
3911 	switch (protocol) {
3912 	case PF_INET:
3913 		return 0;
3914 	case PF_INET6:
3915 		return 0;
3916 	default:
3917 		break;
3918 	}
3919 
3920 	return ENOPROTOOPT;
3921 }
3922 
3923 static errno_t
ipsec_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)3924 ipsec_del_proto(__unused ifnet_t                        interface,
3925     __unused protocol_family_t      protocol)
3926 {
3927 	return 0;
3928 }
3929 
3930 static errno_t
ipsec_ioctl(ifnet_t interface,u_long command,void * data)3931 ipsec_ioctl(ifnet_t interface,
3932     u_long command,
3933     void *data)
3934 {
3935 #if IPSEC_NEXUS
3936 	struct ipsec_pcb *__single pcb = ifnet_softc(interface);
3937 #endif
3938 	errno_t result = 0;
3939 
3940 	switch (command) {
3941 	case SIOCSIFMTU: {
3942 #if IPSEC_NEXUS
3943 		if (pcb->ipsec_use_netif) {
3944 			// Make sure we can fit packets in the channel buffers
3945 			if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
3946 				result = EINVAL;
3947 			} else {
3948 				ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
3949 			}
3950 		} else
3951 #endif // IPSEC_NEXUS
3952 		{
3953 			ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
3954 		}
3955 		break;
3956 	}
3957 
3958 	case SIOCSIFFLAGS:
3959 		/* ifioctl() takes care of it */
3960 		break;
3961 
3962 	case SIOCSIFSUBFAMILY: {
3963 		uint32_t subfamily;
3964 
3965 		subfamily = ((struct ifreq*)data)->ifr_type.ift_subfamily;
3966 		switch (subfamily) {
3967 		case IFRTYPE_SUBFAMILY_BLUETOOTH:
3968 			interface->if_subfamily = IFNET_SUBFAMILY_BLUETOOTH;
3969 			break;
3970 		case IFRTYPE_SUBFAMILY_WIFI:
3971 			interface->if_subfamily = IFNET_SUBFAMILY_WIFI;
3972 			break;
3973 		case IFRTYPE_SUBFAMILY_QUICKRELAY:
3974 			interface->if_subfamily = IFNET_SUBFAMILY_QUICKRELAY;
3975 			break;
3976 		case IFRTYPE_SUBFAMILY_DEFAULT:
3977 			interface->if_subfamily = IFNET_SUBFAMILY_DEFAULT;
3978 			break;
3979 		default:
3980 			result = EINVAL;
3981 			break;
3982 		}
3983 		break;
3984 	}
3985 
3986 	case SIOCSIFPEEREGRESSFUNCTIONALTYPE: {
3987 		uint32_t peeregressinterfacetype;
3988 		peeregressinterfacetype = ((struct ifreq*)data)->ifr_ifru.ifru_peer_egress_functional_type;
3989 		switch (peeregressinterfacetype) {
3990 		case IFRTYPE_FUNCTIONAL_WIFI_INFRA:
3991 		case IFRTYPE_FUNCTIONAL_CELLULAR:
3992 		case IFRTYPE_FUNCTIONAL_WIRED:
3993 		case IFRTYPE_FUNCTIONAL_UNKNOWN:
3994 			interface->peer_egress_functional_type = peeregressinterfacetype;
3995 			break;
3996 		default:
3997 			result = EINVAL;
3998 			break;
3999 		}
4000 		break;
4001 	}
4002 
4003 	default:
4004 		result = EOPNOTSUPP;
4005 	}
4006 
4007 	return result;
4008 }
4009 
4010 static void
ipsec_detached(ifnet_t interface)4011 ipsec_detached(ifnet_t interface)
4012 {
4013 	struct ipsec_pcb *__single pcb = ifnet_softc(interface);
4014 
4015 	(void)ifnet_release(interface);
4016 	lck_mtx_lock(&ipsec_lock);
4017 	ipsec_free_pcb(pcb, true);
4018 	(void)ifnet_dispose(interface);
4019 	lck_mtx_unlock(&ipsec_lock);
4020 }
4021 
4022 /* Protocol Handlers */
4023 
4024 static errno_t
ipsec_proto_input(ifnet_t interface,protocol_family_t protocol,mbuf_t m,__unused char * frame_header)4025 ipsec_proto_input(ifnet_t interface,
4026     protocol_family_t     protocol,
4027     mbuf_t m,
4028     __unused char *frame_header)
4029 {
4030 	mbuf_pkthdr_setrcvif(m, interface);
4031 
4032 #if IPSEC_NEXUS
4033 	struct ipsec_pcb *__single pcb = ifnet_softc(interface);
4034 	if (!pcb->ipsec_use_netif)
4035 #endif // IPSEC_NEXUS
4036 	{
4037 		uint32_t af = 0;
4038 		struct ip *ip = mtod(m, struct ip *);
4039 		if (ip->ip_v == 4) {
4040 			af = AF_INET;
4041 		} else if (ip->ip_v == 6) {
4042 			af = AF_INET6;
4043 		}
4044 		bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
4045 		pktap_input(interface, protocol, m, NULL);
4046 	}
4047 
4048 	int32_t pktlen = m->m_pkthdr.len;
4049 	if (proto_input(protocol, m) != 0) {
4050 		ifnet_stat_increment_in(interface, 0, 0, 1);
4051 		m_freem(m);
4052 	} else {
4053 		ifnet_stat_increment_in(interface, 1, pktlen, 0);
4054 	}
4055 
4056 	return 0;
4057 }
4058 
4059 static errno_t
ipsec_proto_pre_output(__unused ifnet_t interface,protocol_family_t protocol,__unused mbuf_t * packet,__unused const struct sockaddr * dest,__unused void * route,__unused char * frame_type,__unused char * link_layer_dest)4060 ipsec_proto_pre_output(__unused ifnet_t interface,
4061     protocol_family_t    protocol,
4062     __unused mbuf_t              *packet,
4063     __unused const struct sockaddr *dest,
4064     __unused void *route,
4065     __unused char *frame_type,
4066     __unused char *link_layer_dest)
4067 {
4068 	*(protocol_family_t *)(void *)frame_type = protocol;
4069 	return 0;
4070 }
4071 
4072 static errno_t
ipsec_attach_proto(ifnet_t interface,protocol_family_t protocol)4073 ipsec_attach_proto(ifnet_t                              interface,
4074     protocol_family_t    protocol)
4075 {
4076 	struct ifnet_attach_proto_param proto;
4077 	errno_t                                                 result;
4078 
4079 	bzero(&proto, sizeof(proto));
4080 	proto.input = ipsec_proto_input;
4081 	proto.pre_output = ipsec_proto_pre_output;
4082 
4083 	result = ifnet_attach_protocol(interface, protocol, &proto);
4084 	if (result != 0 && result != EEXIST) {
4085 		os_log_error(OS_LOG_DEFAULT, "ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
4086 		    protocol, result);
4087 	}
4088 
4089 	return result;
4090 }
4091 
4092 errno_t
ipsec_inject_inbound_packet(ifnet_t interface,mbuf_t packet)4093 ipsec_inject_inbound_packet(ifnet_t     interface,
4094     mbuf_t      packet)
4095 {
4096 #if IPSEC_NEXUS
4097 	struct ipsec_pcb *__single pcb = ifnet_softc(interface);
4098 
4099 	if (pcb->ipsec_use_netif) {
4100 		if (!ipsec_data_move_begin(pcb)) {
4101 			os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__,
4102 			    if_name(pcb->ipsec_ifp));
4103 			return ENXIO;
4104 		}
4105 
4106 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4107 
4108 		lck_mtx_lock(&pcb->ipsec_input_chain_lock);
4109 
4110 		if (pcb->ipsec_input_chain_count > (u_int32_t)if_ipsec_max_pending_input) {
4111 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4112 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4113 			ipsec_data_move_end(pcb);
4114 			return ENOSPC;
4115 		}
4116 
4117 		if (pcb->ipsec_input_chain != NULL) {
4118 			pcb->ipsec_input_chain_last->m_nextpkt = packet;
4119 		} else {
4120 			pcb->ipsec_input_chain = packet;
4121 		}
4122 		pcb->ipsec_input_chain_count++;
4123 		while (packet->m_nextpkt) {
4124 			VERIFY(packet != packet->m_nextpkt);
4125 			packet = packet->m_nextpkt;
4126 			pcb->ipsec_input_chain_count++;
4127 		}
4128 		pcb->ipsec_input_chain_last = packet;
4129 		lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4130 
4131 		kern_channel_ring_t __single rx_ring = pcb->ipsec_netif_rxring[0];
4132 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4133 
4134 		if (rx_ring != NULL) {
4135 			kern_channel_notify(rx_ring, 0);
4136 		}
4137 
4138 		ipsec_data_move_end(pcb);
4139 		return 0;
4140 	} else
4141 #endif // IPSEC_NEXUS
4142 	{
4143 		errno_t error;
4144 		protocol_family_t protocol;
4145 		if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
4146 			return error;
4147 		}
4148 
4149 		return ipsec_proto_input(interface, protocol, packet, NULL);
4150 	}
4151 }
4152 
4153 void
ipsec_set_pkthdr_for_interface(ifnet_t interface,mbuf_t packet,int family,uint32_t flowid)4154 ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family,
4155     uint32_t flowid)
4156 {
4157 #pragma unused (flowid)
4158 	if (packet != NULL && interface != NULL) {
4159 		struct ipsec_pcb *__single pcb = ifnet_softc(interface);
4160 		if (pcb != NULL) {
4161 			/* Set traffic class, set flow */
4162 			m_set_service_class(packet, pcb->ipsec_output_service_class);
4163 			packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4164 #if SKYWALK
4165 			packet->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4166 			packet->m_pkthdr.pkt_flowid = flowid;
4167 #else /* !SKYWALK */
4168 			packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
4169 #endif /* !SKYWALK */
4170 			if (family == AF_INET) {
4171 				struct ip *ip = mtod(packet, struct ip *);
4172 				packet->m_pkthdr.pkt_proto = ip->ip_p;
4173 			} else if (family == AF_INET6) {
4174 				struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
4175 				packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
4176 			}
4177 			packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4178 		}
4179 	}
4180 }
4181 
4182 void
ipsec_set_ipoa_for_interface(ifnet_t interface,struct ip_out_args * ipoa)4183 ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
4184 {
4185 	struct ipsec_pcb *__single pcb;
4186 
4187 	if (interface == NULL || ipoa == NULL) {
4188 		return;
4189 	}
4190 	pcb = ifnet_softc(interface);
4191 
4192 	if (net_qos_policy_restricted == 0) {
4193 		ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
4194 		ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4195 	} else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4196 	    net_qos_policy_restrict_avapps != 0) {
4197 		ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4198 	} else {
4199 		ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4200 		ipoa->ipoa_sotc = SO_TC_VO;
4201 	}
4202 }
4203 
4204 void
ipsec_set_ip6oa_for_interface(ifnet_t interface,struct ip6_out_args * ip6oa)4205 ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
4206 {
4207 	struct ipsec_pcb *__single pcb;
4208 
4209 	if (interface == NULL || ip6oa == NULL) {
4210 		return;
4211 	}
4212 	pcb = ifnet_softc(interface);
4213 
4214 	if (net_qos_policy_restricted == 0) {
4215 		ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
4216 		ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4217 	} else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4218 	    net_qos_policy_restrict_avapps != 0) {
4219 		ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4220 	} else {
4221 		ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4222 		ip6oa->ip6oa_sotc = SO_TC_VO;
4223 	}
4224 }
4225 
4226 static boolean_t
ipsec_data_move_begin(struct ipsec_pcb * pcb)4227 ipsec_data_move_begin(struct ipsec_pcb *pcb)
4228 {
4229 	boolean_t ret = 0;
4230 
4231 	lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4232 	if ((ret = IPSEC_IS_DATA_PATH_READY(pcb))) {
4233 		pcb->ipsec_pcb_data_move++;
4234 	}
4235 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4236 
4237 	return ret;
4238 }
4239 
4240 static void
ipsec_data_move_end(struct ipsec_pcb * pcb)4241 ipsec_data_move_end(struct ipsec_pcb *pcb)
4242 {
4243 	lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4244 	VERIFY(pcb->ipsec_pcb_data_move > 0);
4245 	/*
4246 	 * if there's no more thread moving data, wakeup any
4247 	 * drainers that's blocked waiting for this.
4248 	 */
4249 	if (--pcb->ipsec_pcb_data_move == 0 && pcb->ipsec_pcb_drainers > 0) {
4250 		wakeup(&(pcb->ipsec_pcb_data_move));
4251 	}
4252 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4253 }
4254 
4255 static void
ipsec_data_move_drain(struct ipsec_pcb * pcb)4256 ipsec_data_move_drain(struct ipsec_pcb *pcb)
4257 {
4258 	lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4259 	/* data path must already be marked as not ready */
4260 	VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4261 	pcb->ipsec_pcb_drainers++;
4262 	while (pcb->ipsec_pcb_data_move != 0) {
4263 		(void)msleep(&(pcb->ipsec_pcb_data_move), &pcb->ipsec_pcb_data_move_lock,
4264 		    (PZERO - 1), __func__, NULL);
4265 	}
4266 	VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4267 	VERIFY(pcb->ipsec_pcb_drainers > 0);
4268 	pcb->ipsec_pcb_drainers--;
4269 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4270 }
4271 
4272 static void
ipsec_wait_data_move_drain(struct ipsec_pcb * pcb)4273 ipsec_wait_data_move_drain(struct ipsec_pcb *pcb)
4274 {
4275 	/*
4276 	 * Mark the data path as not usable.
4277 	 */
4278 	lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4279 	IPSEC_CLR_DATA_PATH_READY(pcb);
4280 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4281 
4282 	/* Wait until all threads in the data paths are done. */
4283 	ipsec_data_move_drain(pcb);
4284 }
4285