xref: /xnu-11215.41.3/bsd/net/if_ipsec.c (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * Copyright (c) 2012-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 #include <sys/systm.h>
31 #include <sys/kern_control.h>
32 #include <net/kpi_protocol.h>
33 #include <net/kpi_interface.h>
34 #include <sys/socket.h>
35 #include <sys/socketvar.h>
36 #include <net/if.h>
37 #include <net/if_types.h>
38 #include <net/bpf.h>
39 #include <net/if_ipsec.h>
40 #include <sys/mbuf.h>
41 #include <sys/sockio.h>
42 #include <netinet/in.h>
43 #include <netinet/ip6.h>
44 #include <netinet6/in6_var.h>
45 #include <netinet6/ip6_var.h>
46 #include <sys/kauth.h>
47 #include <netinet6/ipsec.h>
48 #include <netinet6/ipsec6.h>
49 #include <netinet6/esp.h>
50 #include <netinet6/esp6.h>
51 #include <netinet/ip.h>
52 #include <net/flowadv.h>
53 #include <net/necp.h>
54 #include <netkey/key.h>
55 #include <net/pktap.h>
56 #include <kern/zalloc.h>
57 #include <os/log.h>
58 
59 #if SKYWALK
60 #include <skywalk/os_skywalk_private.h>
61 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
62 #include <skywalk/nexus/netif/nx_netif.h>
63 #define IPSEC_NEXUS 1
64 #else // SKYWALK
65 #define IPSEC_NEXUS 0
66 #endif // SKYWALK
67 
68 extern int net_qos_policy_restricted;
69 extern int net_qos_policy_restrict_avapps;
70 
71 /* Kernel Control functions */
72 static errno_t  ipsec_ctl_setup(u_int32_t *unit, void **unitinfo);
73 static errno_t  ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
74     void **unitinfo);
75 static errno_t  ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
76     void **unitinfo);
77 static errno_t  ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
78     void *unitinfo);
79 static errno_t  ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
80     void *unitinfo, mbuf_t m, int flags);
81 static errno_t  ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
82     int opt, void *__sized_by(*len)data, size_t *len);
83 static errno_t  ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
84     int opt, void *__sized_by(len)data, size_t len);
85 
86 /* Network Interface functions */
87 static void     ipsec_start(ifnet_t     interface);
88 static errno_t  ipsec_output(ifnet_t interface, mbuf_t data);
89 static errno_t  ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
90     protocol_family_t *protocol);
91 static errno_t  ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
92     const struct ifnet_demux_desc *demux_array,
93     u_int32_t demux_count);
94 static errno_t  ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
95 static errno_t  ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
96 static void             ipsec_detached(ifnet_t interface);
97 
98 /* Protocol handlers */
99 static errno_t  ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
100 static errno_t  ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
101     mbuf_t m, char *frame_header);
102 static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
103     mbuf_t *packet, const struct sockaddr *dest, void *route,
104     char *frame_type, char *link_layer_dest);
105 
106 static kern_ctl_ref     ipsec_kctlref;
107 static LCK_ATTR_DECLARE(ipsec_lck_attr, 0, 0);
108 static LCK_GRP_DECLARE(ipsec_lck_grp, "ipsec");
109 static LCK_MTX_DECLARE_ATTR(ipsec_lock, &ipsec_lck_grp, &ipsec_lck_attr);
110 
111 #if IPSEC_NEXUS
112 
113 SYSCTL_DECL(_net_ipsec);
114 SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
115 static int if_ipsec_verify_interface_creation = 0;
116 SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
117 
118 #define IPSEC_IF_VERIFY(_e)             if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
119 
120 #define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
121 #define IPSEC_IF_DEFAULT_RING_SIZE 64
122 #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
123 #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
124 #define IPSEC_IF_DEFAULT_BUF_SEG_SIZE   skmem_usr_buf_seg_size
125 
126 #define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES
127 #define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
128 #define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
129 #define IPSEC_NETIF_WMM_RX_RING_COUNT 1
130 #define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT
131 #define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT
132 
133 #define IPSEC_IF_MIN_RING_SIZE 8
134 #define IPSEC_IF_MAX_RING_SIZE 1024
135 
136 #define IPSEC_IF_MIN_SLOT_SIZE 1024
137 #define IPSEC_IF_MAX_SLOT_SIZE (16 * 1024)
138 
139 #define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512
140 
141 #define IPSEC_KPIPE_FLAG_WAKE_PKT 0x01
142 
143 static int if_ipsec_max_pending_input = IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT;
144 
145 static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
146 static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
147 static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
148 
149 static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
150 static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
151 static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
152 
153 SYSCTL_INT(_net_ipsec, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_max_pending_input, 0, "");
154 SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
155     &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
156 SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
157     &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
158 SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
159     &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
160 
161 static int if_ipsec_debug = 0;
162 SYSCTL_INT(_net_ipsec, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_debug, 0, "");
163 
164 static errno_t
165 ipsec_register_nexus(void);
166 
167 typedef struct ipsec_nx {
168 	uuid_t if_provider;
169 	uuid_t if_instance;
170 	uuid_t fsw_provider;
171 	uuid_t fsw_instance;
172 	uuid_t fsw_device;
173 	uuid_t fsw_agent;
174 } *ipsec_nx_t;
175 
176 static nexus_controller_t ipsec_ncd;
177 static int ipsec_ncd_refcount;
178 static uuid_t ipsec_kpipe_uuid;
179 
180 #endif // IPSEC_NEXUS
181 
182 /* Control block allocated for each kernel control connection */
183 struct ipsec_pcb {
184 	TAILQ_ENTRY(ipsec_pcb)  ipsec_chain;
185 	kern_ctl_ref            ipsec_ctlref;
186 	ifnet_t                 ipsec_ifp;
187 	u_int32_t               ipsec_unit;
188 	u_int32_t               ipsec_unique_id;
189 	// These external flags can be set with IPSEC_OPT_FLAGS
190 	u_int32_t               ipsec_external_flags;
191 	// These internal flags are only used within this driver
192 	u_int32_t               ipsec_internal_flags;
193 	u_int32_t               ipsec_input_frag_size;
194 	bool                    ipsec_frag_size_set;
195 	int                     ipsec_ext_ifdata_stats;
196 	mbuf_svc_class_t        ipsec_output_service_class;
197 	char                    ipsec_if_xname[IFXNAMSIZ];
198 	char                    ipsec_unique_name[IFXNAMSIZ];
199 	// PCB lock protects state fields, like ipsec_kpipe_count
200 	decl_lck_rw_data(, ipsec_pcb_lock);
201 	// lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers
202 	decl_lck_mtx_data(, ipsec_pcb_data_move_lock);
203 	u_int32_t               ipsec_pcb_data_move; /* number of data moving contexts */
204 	u_int32_t               ipsec_pcb_drainers; /* number of threads waiting to drain */
205 	u_int32_t               ipsec_pcb_data_path_state; /* internal state of interface data path */
206 	ipsec_dscp_mapping_t    ipsec_output_dscp_mapping;
207 
208 #if IPSEC_NEXUS
209 	lck_mtx_t               ipsec_input_chain_lock;
210 	lck_mtx_t               ipsec_kpipe_encrypt_lock;
211 	lck_mtx_t               ipsec_kpipe_decrypt_lock;
212 	struct mbuf *           ipsec_input_chain;
213 	struct mbuf *           ipsec_input_chain_last;
214 	u_int32_t               ipsec_input_chain_count;
215 	// Input chain lock protects the list of input mbufs
216 	// The input chain lock must be taken AFTER the PCB lock if both are held
217 	struct ipsec_nx         ipsec_nx;
218 	u_int32_t               ipsec_kpipe_count;
219 	pid_t                   ipsec_kpipe_pid;
220 	uuid_t                  ipsec_kpipe_proc_uuid;
221 	uuid_t                  ipsec_kpipe_uuid[IPSEC_IF_MAX_RING_COUNT];
222 	void *                  ipsec_kpipe_rxring[IPSEC_IF_MAX_RING_COUNT];
223 	void *                  ipsec_kpipe_txring[IPSEC_IF_MAX_RING_COUNT];
224 	kern_pbufpool_t         ipsec_kpipe_pp;
225 	u_int32_t               ipsec_kpipe_tx_ring_size;
226 	u_int32_t               ipsec_kpipe_rx_ring_size;
227 
228 	kern_nexus_t            ipsec_netif_nexus;
229 	kern_pbufpool_t         ipsec_netif_pp;
230 	void *                  ipsec_netif_rxring[IPSEC_NETIF_MAX_RX_RING_COUNT];
231 	void *                  ipsec_netif_txring[IPSEC_NETIF_MAX_TX_RING_COUNT];
232 	uint64_t                ipsec_netif_txring_size;
233 
234 	u_int32_t               ipsec_slot_size;
235 	u_int32_t               ipsec_netif_ring_size;
236 	u_int32_t               ipsec_tx_fsw_ring_size;
237 	u_int32_t               ipsec_rx_fsw_ring_size;
238 	bool                    ipsec_use_netif;
239 	bool                    ipsec_needs_netagent;
240 #endif // IPSEC_NEXUS
241 };
242 
243 /* These are internal flags not exposed outside this file */
244 #define IPSEC_FLAGS_KPIPE_ALLOCATED 1
245 
246 /* data movement refcounting functions */
247 static boolean_t ipsec_data_move_begin(struct ipsec_pcb *pcb);
248 static void ipsec_data_move_end(struct ipsec_pcb *pcb);
249 static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb);
250 
251 /* Data path states */
252 #define IPSEC_PCB_DATA_PATH_READY    0x1
253 
254 /* Macros to set/clear/test data path states */
255 #define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY)
256 #define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY)
257 #define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0)
258 
259 #if IPSEC_NEXUS
260 /* Macros to clear/set/test flags. */
261 static inline void
ipsec_flag_set(struct ipsec_pcb * pcb,uint32_t flag)262 ipsec_flag_set(struct ipsec_pcb *pcb, uint32_t flag)
263 {
264 	pcb->ipsec_internal_flags |= flag;
265 }
266 static inline void
ipsec_flag_clr(struct ipsec_pcb * pcb,uint32_t flag)267 ipsec_flag_clr(struct ipsec_pcb *pcb, uint32_t flag)
268 {
269 	pcb->ipsec_internal_flags &= ~flag;
270 }
271 
272 static inline bool
ipsec_flag_isset(struct ipsec_pcb * pcb,uint32_t flag)273 ipsec_flag_isset(struct ipsec_pcb *pcb, uint32_t flag)
274 {
275 	return !!(pcb->ipsec_internal_flags & flag);
276 }
277 #endif // IPSEC_NEXUS
278 
279 TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
280 
281 static KALLOC_TYPE_DEFINE(ipsec_pcb_zone, struct ipsec_pcb, NET_KT_DEFAULT);
282 
283 #define IPSECQ_MAXLEN 256
284 
285 #if IPSEC_NEXUS
286 static int
287 sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
288 {
289 #pragma unused(arg1, arg2)
290 	int value = if_ipsec_ring_size;
291 
292 	int error = sysctl_handle_int(oidp, &value, 0, req);
293 	if (error || !req->newptr) {
294 		return error;
295 	}
296 
297 	if (value < IPSEC_IF_MIN_RING_SIZE ||
298 	    value > IPSEC_IF_MAX_RING_SIZE) {
299 		return EINVAL;
300 	}
301 
302 	if_ipsec_ring_size = value;
303 
304 	return 0;
305 }
306 
307 static int
308 sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
309 {
310 #pragma unused(arg1, arg2)
311 	int value = if_ipsec_tx_fsw_ring_size;
312 
313 	int error = sysctl_handle_int(oidp, &value, 0, req);
314 	if (error || !req->newptr) {
315 		return error;
316 	}
317 
318 	if (value < IPSEC_IF_MIN_RING_SIZE ||
319 	    value > IPSEC_IF_MAX_RING_SIZE) {
320 		return EINVAL;
321 	}
322 
323 	if_ipsec_tx_fsw_ring_size = value;
324 
325 	return 0;
326 }
327 
328 static int
329 sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
330 {
331 #pragma unused(arg1, arg2)
332 	int value = if_ipsec_rx_fsw_ring_size;
333 
334 	int error = sysctl_handle_int(oidp, &value, 0, req);
335 	if (error || !req->newptr) {
336 		return error;
337 	}
338 
339 	if (value < IPSEC_IF_MIN_RING_SIZE ||
340 	    value > IPSEC_IF_MAX_RING_SIZE) {
341 		return EINVAL;
342 	}
343 
344 	if_ipsec_rx_fsw_ring_size = value;
345 
346 	return 0;
347 }
348 
349 
350 static inline bool
ipsec_in_wmm_mode(struct ipsec_pcb * pcb)351 ipsec_in_wmm_mode(struct ipsec_pcb *pcb)
352 {
353 	return pcb->ipsec_kpipe_count == IPSEC_IF_WMM_RING_COUNT;
354 }
355 
356 #endif // IPSEC_NEXUS
357 
358 errno_t
ipsec_register_control(void)359 ipsec_register_control(void)
360 {
361 	struct kern_ctl_reg     kern_ctl;
362 	errno_t                 result = 0;
363 
364 #if IPSEC_NEXUS
365 	ipsec_register_nexus();
366 #endif // IPSEC_NEXUS
367 
368 	TAILQ_INIT(&ipsec_head);
369 
370 	bzero(&kern_ctl, sizeof(kern_ctl));
371 	strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
372 	kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
373 	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP; /* Require root */
374 	kern_ctl.ctl_sendsize = 64 * 1024;
375 	kern_ctl.ctl_recvsize = 64 * 1024;
376 	kern_ctl.ctl_setup = ipsec_ctl_setup;
377 	kern_ctl.ctl_bind = ipsec_ctl_bind;
378 	kern_ctl.ctl_connect = ipsec_ctl_connect;
379 	kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
380 	kern_ctl.ctl_send = ipsec_ctl_send;
381 	kern_ctl.ctl_setopt = ipsec_ctl_setopt;
382 	kern_ctl.ctl_getopt = ipsec_ctl_getopt;
383 
384 	result = ctl_register(&kern_ctl, &ipsec_kctlref);
385 	if (result != 0) {
386 		os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - ctl_register failed: %d\n", result);
387 		return result;
388 	}
389 
390 	/* Register the protocol plumbers */
391 	if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC,
392 	    ipsec_attach_proto, NULL)) != 0) {
393 		os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n",
394 		    result);
395 		ctl_deregister(ipsec_kctlref);
396 		return result;
397 	}
398 
399 	/* Register the protocol plumbers */
400 	if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC,
401 	    ipsec_attach_proto, NULL)) != 0) {
402 		proto_unregister_plumber(PF_INET, IFNET_FAMILY_IPSEC);
403 		ctl_deregister(ipsec_kctlref);
404 		os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n",
405 		    result);
406 		return result;
407 	}
408 
409 	return 0;
410 }
411 
412 /* Helpers */
413 int
ipsec_interface_isvalid(ifnet_t interface)414 ipsec_interface_isvalid(ifnet_t interface)
415 {
416 	struct ipsec_pcb *__single pcb = NULL;
417 
418 	if (interface == NULL) {
419 		return 0;
420 	}
421 
422 	pcb = ifnet_softc(interface);
423 
424 	if (pcb == NULL) {
425 		return 0;
426 	}
427 
428 	/* When ctl disconnects, ipsec_unit is set to 0 */
429 	if (pcb->ipsec_unit == 0) {
430 		return 0;
431 	}
432 
433 	return 1;
434 }
435 
436 #if IPSEC_NEXUS
437 boolean_t
ipsec_interface_needs_netagent(ifnet_t interface)438 ipsec_interface_needs_netagent(ifnet_t interface)
439 {
440 	struct ipsec_pcb *__single pcb = NULL;
441 
442 	if (interface == NULL) {
443 		return FALSE;
444 	}
445 
446 	pcb = ifnet_softc(interface);
447 
448 	if (pcb == NULL) {
449 		return FALSE;
450 	}
451 
452 	return pcb->ipsec_needs_netagent == true;
453 }
454 #endif // IPSEC_NEXUS
455 
456 static errno_t
ipsec_ifnet_set_attrs(ifnet_t ifp)457 ipsec_ifnet_set_attrs(ifnet_t ifp)
458 {
459 	/* Set flags and additional information. */
460 	ifnet_set_mtu(ifp, 1500);
461 	ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
462 
463 	/* The interface must generate its own IPv6 LinkLocal address,
464 	 * if possible following the recommendation of RFC2472 to the 64bit interface ID
465 	 */
466 	ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
467 
468 #if !IPSEC_NEXUS
469 	/* Reset the stats in case as the interface may have been recycled */
470 	struct ifnet_stats_param stats;
471 	bzero(&stats, sizeof(struct ifnet_stats_param));
472 	ifnet_set_stat(ifp, &stats);
473 #endif // !IPSEC_NEXUS
474 
475 	return 0;
476 }
477 
478 #if IPSEC_NEXUS
479 
480 static uuid_t ipsec_nx_dom_prov;
481 
482 static errno_t
ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)483 ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
484 {
485 	return 0;
486 }
487 
488 static void
ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)489 ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
490 {
491 	// Ignore
492 }
493 
494 static errno_t
ipsec_register_nexus(void)495 ipsec_register_nexus(void)
496 {
497 	const struct kern_nexus_domain_provider_init dp_init = {
498 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
499 		.nxdpi_flags = 0,
500 		.nxdpi_init = ipsec_nxdp_init,
501 		.nxdpi_fini = ipsec_nxdp_fini
502 	};
503 	nexus_domain_provider_name_t domain_provider_name = "com.apple.ipsec";
504 	errno_t err = 0;
505 
506 	/* ipsec_nxdp_init() is called before this function returns */
507 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
508 	    domain_provider_name,
509 	    &dp_init, sizeof(dp_init),
510 	    &ipsec_nx_dom_prov);
511 	if (err != 0) {
512 		os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
513 		return err;
514 	}
515 	return 0;
516 }
517 
518 static errno_t
ipsec_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)519 ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
520 {
521 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
522 	pcb->ipsec_netif_nexus = nexus;
523 	return ipsec_ifnet_set_attrs(ifp);
524 }
525 
526 static errno_t
ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,proc_t p,kern_nexus_t nexus,nexus_port_t nexus_port,kern_channel_t channel,void ** ch_ctx)527 ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
528     proc_t p, kern_nexus_t nexus,
529     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
530 {
531 #pragma unused(nxprov, p)
532 #pragma unused(nexus, nexus_port, channel, ch_ctx)
533 	return 0;
534 }
535 
536 static errno_t
ipsec_nexus_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)537 ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
538     kern_channel_t channel)
539 {
540 #pragma unused(nxprov, channel)
541 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
542 	boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, 1);
543 	/* Mark the data path as ready */
544 	if (ok) {
545 		lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
546 		IPSEC_SET_DATA_PATH_READY(pcb);
547 		lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
548 	}
549 	return ok ? 0 : ENXIO;
550 }
551 
552 static void
ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)553 ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
554     kern_channel_t channel)
555 {
556 #pragma unused(nxprov, channel)
557 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
558 
559 	VERIFY(pcb->ipsec_kpipe_count != 0);
560 
561 	/* Wait until all threads in the data paths are done. */
562 	ipsec_wait_data_move_drain(pcb);
563 }
564 
565 static void
ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)566 ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
567     kern_channel_t channel)
568 {
569 #pragma unused(nxprov, channel)
570 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
571 
572 	/* Wait until all threads in the data paths are done. */
573 	ipsec_wait_data_move_drain(pcb);
574 }
575 
576 static void
ipsec_nexus_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)577 ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
578     kern_channel_t channel)
579 {
580 #pragma unused(nxprov, channel)
581 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
582 	if (pcb->ipsec_netif_nexus == nexus) {
583 		pcb->ipsec_netif_nexus = NULL;
584 	}
585 	ifnet_decr_iorefcnt(pcb->ipsec_ifp);
586 }
587 
588 static errno_t
ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)589 ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
590     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
591     void **ring_ctx)
592 {
593 #pragma unused(nxprov)
594 #pragma unused(channel)
595 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
596 	uint8_t ring_idx;
597 
598 	for (ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
599 		if (!uuid_compare(channel->ch_info->cinfo_nx_uuid, pcb->ipsec_kpipe_uuid[ring_idx])) {
600 			break;
601 		}
602 	}
603 
604 	if (ring_idx == pcb->ipsec_kpipe_count) {
605 		uuid_string_t uuidstr;
606 		uuid_unparse(channel->ch_info->cinfo_nx_uuid, uuidstr);
607 		os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n", __func__, pcb->ipsec_if_xname, uuidstr);
608 		return ENOENT;
609 	}
610 
611 	*ring_ctx = __unsafe_forge_single(void *, (uintptr_t)ring_idx);
612 
613 	if (!is_tx_ring) {
614 		VERIFY(pcb->ipsec_kpipe_rxring[ring_idx] == NULL);
615 		pcb->ipsec_kpipe_rxring[ring_idx] = ring;
616 	} else {
617 		VERIFY(pcb->ipsec_kpipe_txring[ring_idx] == NULL);
618 		pcb->ipsec_kpipe_txring[ring_idx] = ring;
619 	}
620 	return 0;
621 }
622 
623 static void
ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)624 ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
625     kern_channel_ring_t ring)
626 {
627 #pragma unused(nxprov)
628 	bool found = false;
629 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
630 
631 	for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
632 		if (pcb->ipsec_kpipe_rxring[i] == ring) {
633 			pcb->ipsec_kpipe_rxring[i] = NULL;
634 			found = true;
635 		} else if (pcb->ipsec_kpipe_txring[i] == ring) {
636 			pcb->ipsec_kpipe_txring[i] = NULL;
637 			found = true;
638 		}
639 	}
640 	VERIFY(found);
641 }
642 
643 static errno_t
ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)644 ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
645     kern_channel_ring_t tx_ring, uint32_t flags)
646 {
647 #pragma unused(nxprov)
648 #pragma unused(flags)
649 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
650 
651 	if (!ipsec_data_move_begin(pcb)) {
652 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
653 		return 0;
654 	}
655 
656 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
657 
658 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
659 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
660 		ipsec_data_move_end(pcb);
661 		return 0;
662 	}
663 
664 	VERIFY(pcb->ipsec_kpipe_count);
665 
666 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
667 	if (tx_slot == NULL) {
668 		// Nothing to write, bail
669 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
670 		ipsec_data_move_end(pcb);
671 		return 0;
672 	}
673 
674 	// Signal the netif ring to read
675 	kern_channel_ring_t __single rx_ring = pcb->ipsec_netif_rxring[0];
676 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
677 
678 	if (rx_ring != NULL) {
679 		kern_channel_notify(rx_ring, 0);
680 	}
681 
682 	ipsec_data_move_end(pcb);
683 	return 0;
684 }
685 
686 static errno_t
ipsec_encrypt_kpipe_pkt(ifnet_t interface,kern_packet_t sph,kern_packet_t dph)687 ipsec_encrypt_kpipe_pkt(ifnet_t interface, kern_packet_t sph,
688     kern_packet_t dph)
689 {
690 	uint8_t *sbaddr = NULL;
691 	int err = 0;
692 	uint32_t slen = 0;
693 
694 	VERIFY(interface != NULL);
695 	VERIFY(sph != 0);
696 	VERIFY(dph != 0);
697 
698 	kern_buflet_t __single sbuf = __packet_get_next_buflet(sph, NULL);
699 	VERIFY(sbuf != NULL);
700 	slen = __buflet_get_data_length(sbuf);
701 
702 	if (__improbable(slen < sizeof(struct ip))) {
703 		os_log_error(OS_LOG_DEFAULT, "ipsec encrypt kpipe pkt: source "
704 		    "buffer shorter than ip header, %u\n", slen);
705 		return EINVAL;
706 	}
707 
708 	sbaddr = ipsec_kern_buflet_to_buffer(sbuf);
709 	struct ip *ip = (struct ip *)(void *)sbaddr;
710 	ASSERT(IP_HDR_ALIGNED_P(ip));
711 
712 	u_int ip_vers = ip->ip_v;
713 	switch (ip_vers) {
714 	case IPVERSION: {
715 		err = ipsec4_interface_kpipe_output(interface, sph, dph);
716 		if (__improbable(err != 0)) {
717 			os_log_error(OS_LOG_DEFAULT, "ipsec4 interface kpipe "
718 			    "output error %d\n", err);
719 			return err;
720 		}
721 		break;
722 	}
723 	case 6: {
724 		err = ipsec6_interface_kpipe_output(interface, sph, dph);
725 		if (__improbable(err != 0)) {
726 			os_log_error(OS_LOG_DEFAULT, "ipsec6 interface kpipe "
727 			    "output error %d\n", err);
728 			return err;
729 		}
730 		break;
731 	}
732 	default: {
733 		os_log_error(OS_LOG_DEFAULT, "received unknown packet version: %d\n",
734 		    ip_vers);
735 		return EINVAL;
736 	}
737 	}
738 
739 	return err;
740 }
741 
742 static errno_t
ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)743 ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
744     kern_channel_ring_t rx_ring, uint32_t flags)
745 {
746 #pragma unused(nxprov)
747 #pragma unused(flags)
748 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
749 	struct kern_channel_ring_stat_increment rx_ring_stats;
750 	uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring);
751 
752 	if (!ipsec_data_move_begin(pcb)) {
753 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
754 		return 0;
755 	}
756 
757 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
758 
759 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
760 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
761 		ipsec_data_move_end(pcb);
762 		return 0;
763 	}
764 
765 	VERIFY(pcb->ipsec_kpipe_count);
766 	VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
767 
768 	// Reclaim user-released slots
769 	(void) kern_channel_reclaim(rx_ring);
770 
771 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
772 	if (avail == 0) {
773 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
774 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
775 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
776 		ipsec_data_move_end(pcb);
777 		return 0;
778 	}
779 
780 	kern_channel_ring_t __single tx_ring = pcb->ipsec_netif_txring[ring_idx];
781 	if (tx_ring == NULL) {
782 		// Net-If TX ring not set up yet, nothing to read
783 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
784 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
785 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
786 		ipsec_data_move_end(pcb);
787 		return 0;
788 	}
789 
790 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
791 
792 	// Unlock ipsec before entering ring
793 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
794 
795 	(void)kr_enter(tx_ring, TRUE);
796 
797 	// Lock again after entering and validate
798 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
799 	if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
800 		// Ring no longer valid
801 		// Unlock first, then exit ring
802 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
803 		kr_exit(tx_ring);
804 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
805 		    pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
806 		ipsec_data_move_end(pcb);
807 		return 0;
808 	}
809 
810 	struct kern_channel_ring_stat_increment tx_ring_stats;
811 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
812 	kern_channel_slot_t tx_pslot = NULL;
813 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
814 	if (tx_slot == NULL) {
815 		// Nothing to read, don't bother signalling
816 		// Unlock first, then exit ring
817 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
818 		kr_exit(tx_ring);
819 		ipsec_data_move_end(pcb);
820 		return 0;
821 	}
822 
823 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
824 	VERIFY(rx_pp != NULL);
825 	struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
826 	VERIFY(tx_pp != NULL);
827 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
828 	kern_channel_slot_t rx_pslot = NULL;
829 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
830 	kern_packet_t tx_chain_ph = 0;
831 
832 	while (rx_slot != NULL && tx_slot != NULL) {
833 		size_t tx_pkt_length = 0;
834 		errno_t error = 0;
835 
836 		// Allocate rx packet
837 		kern_packet_t rx_ph = 0;
838 		error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
839 		if (__improbable(error != 0)) {
840 			os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
841 			    "failed to allocate packet\n", pcb->ipsec_ifp->if_xname);
842 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
843 			STATS_INC(nifs, NETIF_STATS_DROP);
844 			break;
845 		}
846 
847 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
848 		if (__improbable(tx_ph == 0)) {
849 			// Advance TX ring
850 			tx_pslot = tx_slot;
851 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
852 			kern_pbufpool_free(rx_pp, rx_ph);
853 			continue;
854 		}
855 
856 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
857 		if (tx_chain_ph != 0) {
858 			kern_packet_append(tx_ph, tx_chain_ph);
859 		}
860 		tx_chain_ph = tx_ph;
861 
862 		// Advance TX ring
863 		tx_pslot = tx_slot;
864 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
865 
866 		bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
867 
868 		tx_pkt_length = kern_packet_get_data_length(tx_ph);
869 		if (tx_pkt_length == 0 || tx_pkt_length > pcb->ipsec_slot_size) {
870 			os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
871 			    "packet length %zu", pcb->ipsec_ifp->if_xname,
872 			    tx_pkt_length);
873 			kern_pbufpool_free(rx_pp, rx_ph);
874 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
875 			STATS_INC(nifs, NETIF_STATS_DROP);
876 			continue;
877 		}
878 
879 		// Increment TX stats
880 		tx_ring_stats.kcrsi_slots_transferred++;
881 		tx_ring_stats.kcrsi_bytes_transferred += tx_pkt_length;
882 
883 		// Encrypt packet
884 		lck_mtx_lock(&pcb->ipsec_kpipe_encrypt_lock);
885 		error = ipsec_encrypt_kpipe_pkt(pcb->ipsec_ifp, tx_ph, rx_ph);
886 		lck_mtx_unlock(&pcb->ipsec_kpipe_encrypt_lock);
887 		if (__improbable(error != 0)) {
888 			os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
889 			    "failed to encrypt packet", pcb->ipsec_ifp->if_xname);
890 			kern_pbufpool_free(rx_pp, rx_ph);
891 			STATS_INC(nifs, NETIF_STATS_DROP);
892 			continue;
893 		}
894 
895 		kern_packet_clear_flow_uuid(rx_ph);         // Zero flow id
896 		// Finalize and attach the packet
897 		kern_buflet_t __single rx_buf = __packet_get_next_buflet(rx_ph, NULL);
898 		error = kern_buflet_set_data_offset(rx_buf, 0);
899 		VERIFY(error == 0);
900 		error = kern_packet_finalize(rx_ph);
901 		VERIFY(error == 0);
902 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
903 		VERIFY(error == 0);
904 
905 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
906 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
907 
908 		rx_ring_stats.kcrsi_slots_transferred++;
909 		rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph);
910 
911 		if (!pcb->ipsec_ext_ifdata_stats) {
912 			ifnet_stat_increment_out(pcb->ipsec_ifp, 1,
913 			    kern_packet_get_data_length(rx_ph), 0);
914 		}
915 
916 		rx_pslot = rx_slot;
917 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
918 	}
919 
920 	if (rx_pslot) {
921 		kern_channel_advance_slot(rx_ring, rx_pslot);
922 		kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
923 	}
924 
925 	if (tx_chain_ph != 0) {
926 		kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
927 	}
928 
929 	if (tx_pslot) {
930 		kern_channel_advance_slot(tx_ring, tx_pslot);
931 		kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
932 		(void)kern_channel_reclaim(tx_ring);
933 	}
934 
935 	/* always reenable output */
936 	errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
937 	if (error != 0) {
938 		os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
939 	}
940 
941 	// Unlock first, then exit ring
942 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
943 
944 	if (tx_pslot != NULL) {
945 		kern_channel_notify(tx_ring, 0);
946 	}
947 	kr_exit(tx_ring);
948 
949 	ipsec_data_move_end(pcb);
950 	return 0;
951 }
952 
953 static uint8_t
ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)954 ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
955 {
956 	switch (svc_class) {
957 	case KPKT_SC_VO: {
958 		return 0;
959 	}
960 	case KPKT_SC_VI: {
961 		return 1;
962 	}
963 	case KPKT_SC_BE: {
964 		return 2;
965 	}
966 	case KPKT_SC_BK: {
967 		return 3;
968 	}
969 	default: {
970 		VERIFY(0);
971 		return 0;
972 	}
973 	}
974 }
975 
976 static errno_t
ipsec_netif_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)977 ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
978     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
979     void **ring_ctx)
980 {
981 #pragma unused(nxprov)
982 #pragma unused(channel)
983 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
984 
985 	if (!is_tx_ring) {
986 		VERIFY(pcb->ipsec_netif_rxring[0] == NULL);
987 		pcb->ipsec_netif_rxring[0] = ring;
988 	} else {
989 		uint8_t ring_idx = 0;
990 		if (ipsec_in_wmm_mode(pcb)) {
991 			int err;
992 			kern_packet_svc_class_t svc_class;
993 			err = kern_channel_get_service_class(ring, &svc_class);
994 			VERIFY(err == 0);
995 			ring_idx = ipsec_find_tx_ring_by_svc(svc_class);
996 			VERIFY(ring_idx < IPSEC_IF_WMM_RING_COUNT);
997 		}
998 
999 		*ring_ctx = __unsafe_forge_single(void *, (uintptr_t)ring_idx);
1000 
1001 		VERIFY(pcb->ipsec_netif_txring[ring_idx] == NULL);
1002 		pcb->ipsec_netif_txring[ring_idx] = ring;
1003 	}
1004 	return 0;
1005 }
1006 
1007 static void
ipsec_netif_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)1008 ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1009     kern_channel_ring_t ring)
1010 {
1011 #pragma unused(nxprov)
1012 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
1013 	bool found = false;
1014 
1015 	for (int i = 0; i < IPSEC_NETIF_MAX_RX_RING_COUNT; i++) {
1016 		if (pcb->ipsec_netif_rxring[i] == ring) {
1017 			pcb->ipsec_netif_rxring[i] = NULL;
1018 			VERIFY(!found);
1019 			found = true;
1020 		}
1021 	}
1022 	for (int i = 0; i < IPSEC_NETIF_MAX_TX_RING_COUNT; i++) {
1023 		if (pcb->ipsec_netif_txring[i] == ring) {
1024 			pcb->ipsec_netif_txring[i] = NULL;
1025 			VERIFY(!found);
1026 			found = true;
1027 		}
1028 	}
1029 	VERIFY(found);
1030 }
1031 
1032 static bool
ipsec_netif_check_policy(ifnet_t interface,mbuf_t data)1033 ipsec_netif_check_policy(ifnet_t interface, mbuf_t data)
1034 {
1035 	necp_kernel_policy_result necp_result = 0;
1036 	necp_kernel_policy_result_parameter necp_result_parameter = {};
1037 	uint32_t necp_matched_policy_id = 0;
1038 	struct ip_out_args args4 = { };
1039 	struct ip6_out_args args6 = { };
1040 
1041 	// This packet has been marked with IP level policy, do not mark again.
1042 	if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
1043 		return true;
1044 	}
1045 
1046 	size_t length = mbuf_pkthdr_len(data);
1047 	if (length < sizeof(struct ip)) {
1048 		return false;
1049 	}
1050 
1051 	struct ip *ip = mtod(data, struct ip *);
1052 	u_int ip_version = ip->ip_v;
1053 	switch (ip_version) {
1054 	case 4: {
1055 		if (interface != NULL) {
1056 			args4.ipoa_flags |= IPOAF_BOUND_IF;
1057 			args4.ipoa_boundif = interface->if_index;
1058 		}
1059 		necp_matched_policy_id = necp_ip_output_find_policy_match(data, IP_OUTARGS, &args4, NULL,
1060 		    &necp_result, &necp_result_parameter);
1061 		break;
1062 	}
1063 	case 6: {
1064 		if (interface != NULL) {
1065 			args6.ip6oa_flags |= IP6OAF_BOUND_IF;
1066 			args6.ip6oa_boundif = interface->if_index;
1067 		}
1068 		necp_matched_policy_id = necp_ip6_output_find_policy_match(data, IPV6_OUTARGS, &args6, NULL,
1069 		    &necp_result, &necp_result_parameter);
1070 		break;
1071 	}
1072 	default: {
1073 		return false;
1074 	}
1075 	}
1076 
1077 	if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
1078 	    necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
1079 		/* Drop and flow divert packets should be blocked at the IP layer */
1080 		return false;
1081 	}
1082 
1083 	necp_mark_packet_from_ip(data, necp_matched_policy_id);
1084 	return true;
1085 }
1086 
1087 static errno_t
ipsec_netif_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)1088 ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1089     kern_channel_ring_t tx_ring, uint32_t flags)
1090 {
1091 #pragma unused(nxprov)
1092 #pragma unused(flags)
1093 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
1094 
1095 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1096 
1097 	if (!ipsec_data_move_begin(pcb)) {
1098 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1099 		return 0;
1100 	}
1101 
1102 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1103 
1104 	struct kern_channel_ring_stat_increment tx_ring_stats;
1105 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1106 	kern_channel_slot_t tx_pslot = NULL;
1107 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1108 	kern_packet_t tx_chain_ph = 0;
1109 
1110 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
1111 
1112 	if (tx_slot == NULL) {
1113 		// Nothing to write, don't bother signalling
1114 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1115 		ipsec_data_move_end(pcb);
1116 		return 0;
1117 	}
1118 
1119 	if (pcb->ipsec_kpipe_count &&
1120 	    ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1121 		// Select the corresponding kpipe rx ring
1122 		uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(tx_ring);
1123 		VERIFY(ring_idx < IPSEC_IF_MAX_RING_COUNT);
1124 		kern_channel_ring_t __single rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1125 
1126 		// Unlock while calling notify
1127 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1128 
1129 		// Signal the kernel pipe ring to read
1130 		if (rx_ring != NULL) {
1131 			kern_channel_notify(rx_ring, 0);
1132 		}
1133 
1134 		ipsec_data_move_end(pcb);
1135 		return 0;
1136 	}
1137 
1138 	// If we're here, we're injecting into the BSD stack
1139 	while (tx_slot != NULL) {
1140 		size_t length = 0;
1141 		mbuf_t __single data = NULL;
1142 
1143 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1144 
1145 		if (tx_ph == 0) {
1146 			// Advance TX ring
1147 			tx_pslot = tx_slot;
1148 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1149 			continue;
1150 		}
1151 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
1152 		if (tx_chain_ph != 0) {
1153 			kern_packet_append(tx_ph, tx_chain_ph);
1154 		}
1155 		tx_chain_ph = tx_ph;
1156 
1157 		// Advance TX ring
1158 		tx_pslot = tx_slot;
1159 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1160 
1161 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1162 		VERIFY(tx_buf != NULL);
1163 
1164 		uint8_t *tx_baddr = ipsec_kern_buflet_to_buffer(tx_buf);
1165 		VERIFY(tx_baddr != 0);
1166 		tx_baddr += kern_buflet_get_data_offset(tx_buf);
1167 
1168 		bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1169 
1170 		length = MIN(kern_packet_get_data_length(tx_ph),
1171 		    pcb->ipsec_slot_size);
1172 
1173 		if (length > 0) {
1174 			errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1175 			if (error == 0) {
1176 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1177 				if (error == 0) {
1178 					// Mark packet from policy
1179 					uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1180 					uint32_t skip_policy_id = kern_packet_get_skip_policy_id(tx_ph);
1181 					necp_mark_packet_from_ip_with_skip(data, policy_id, skip_policy_id);
1182 
1183 					// Check policy with NECP
1184 					if (!ipsec_netif_check_policy(pcb->ipsec_ifp, data)) {
1185 						os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1186 						STATS_INC(nifs, NETIF_STATS_DROP);
1187 						mbuf_freem(data);
1188 						data = NULL;
1189 					} else {
1190 						// Send through encryption
1191 						error = ipsec_output(pcb->ipsec_ifp, data);
1192 						if (error != 0) {
1193 							os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
1194 						}
1195 					}
1196 				} else {
1197 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1198 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1199 					STATS_INC(nifs, NETIF_STATS_DROP);
1200 					mbuf_freem(data);
1201 					data = NULL;
1202 				}
1203 			} else {
1204 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1205 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1206 				STATS_INC(nifs, NETIF_STATS_DROP);
1207 			}
1208 		} else {
1209 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1210 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1211 			STATS_INC(nifs, NETIF_STATS_DROP);
1212 		}
1213 
1214 		if (data == NULL) {
1215 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
1216 			break;
1217 		}
1218 
1219 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1220 		STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
1221 
1222 		tx_ring_stats.kcrsi_slots_transferred++;
1223 		tx_ring_stats.kcrsi_bytes_transferred += length;
1224 	}
1225 
1226 	if (tx_chain_ph != 0) {
1227 		kern_pbufpool_free_chain(tx_ring->ckr_pp, tx_chain_ph);
1228 	}
1229 
1230 	if (tx_pslot) {
1231 		kern_channel_advance_slot(tx_ring, tx_pslot);
1232 		kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1233 		(void)kern_channel_reclaim(tx_ring);
1234 	}
1235 
1236 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1237 	ipsec_data_move_end(pcb);
1238 
1239 	return 0;
1240 }
1241 
1242 static errno_t
ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags,uint8_t ring_idx)1243 ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1244     kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx)
1245 {
1246 #pragma unused(nxprov)
1247 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
1248 	boolean_t more = false;
1249 	errno_t rc = 0;
1250 
1251 	VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0);
1252 
1253 	/*
1254 	 * Refill and sync the ring; we may be racing against another thread doing
1255 	 * an RX sync that also wants to do kr_enter(), and so use the blocking
1256 	 * variant here.
1257 	 */
1258 	rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1259 	if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
1260 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n", __func__,
1261 		    pcb->ipsec_if_xname, ring->ckr_name, rc);
1262 	}
1263 
1264 	(void) kr_enter(ring, TRUE);
1265 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1266 	if (ring != pcb->ipsec_netif_txring[ring_idx]) {
1267 		// ring no longer valid
1268 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1269 		kr_exit(ring);
1270 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n", __func__,
1271 		    pcb->ipsec_if_xname, ring->ckr_name, ring_idx);
1272 		return ENXIO;
1273 	}
1274 
1275 	if (pcb->ipsec_kpipe_count) {
1276 		uint32_t tx_available = kern_channel_available_slot_count(ring);
1277 		if (pcb->ipsec_netif_txring_size > 0 &&
1278 		    tx_available >= pcb->ipsec_netif_txring_size - 1) {
1279 			// No room left in tx ring, disable output for now
1280 			errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1281 			if (error != 0) {
1282 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
1283 			}
1284 		}
1285 	}
1286 
1287 	if (pcb->ipsec_kpipe_count) {
1288 		kern_channel_ring_t __single rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1289 
1290 		// Unlock while calling notify
1291 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1292 		// Signal the kernel pipe ring to read
1293 		if (rx_ring != NULL) {
1294 			kern_channel_notify(rx_ring, 0);
1295 		}
1296 	} else {
1297 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1298 	}
1299 
1300 	kr_exit(ring);
1301 
1302 	return 0;
1303 }
1304 
1305 static errno_t
ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,__unused uint32_t flags)1306 ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1307     kern_channel_ring_t ring, __unused uint32_t flags)
1308 {
1309 	errno_t ret = 0;
1310 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
1311 
1312 	if (!ipsec_data_move_begin(pcb)) {
1313 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1314 		return 0;
1315 	}
1316 
1317 	if (ipsec_in_wmm_mode(pcb)) {
1318 		for (uint8_t i = 0; i < IPSEC_IF_WMM_RING_COUNT; i++) {
1319 			kern_channel_ring_t __single nring = pcb->ipsec_netif_txring[i];
1320 			ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, nring, flags, i);
1321 			if (ret) {
1322 				break;
1323 			}
1324 		}
1325 	} else {
1326 		ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring, flags, 0);
1327 	}
1328 
1329 	ipsec_data_move_end(pcb);
1330 	return ret;
1331 }
1332 
1333 static errno_t
ipsec_transform_kpipe_pkt_to_netif_pkt(struct ipsec_pcb * pcb,struct kern_channel_ring_stat_increment * tx_ring_stats,struct netif_stats * nifs,kern_packet_t kpipe_ph,kern_packet_t netif_ph)1334 ipsec_transform_kpipe_pkt_to_netif_pkt(struct ipsec_pcb *pcb,
1335     struct kern_channel_ring_stat_increment *tx_ring_stats,
1336     struct netif_stats *nifs, kern_packet_t kpipe_ph, kern_packet_t netif_ph)
1337 {
1338 	kern_buflet_t kpipe_buf = NULL, netif_buf = NULL;
1339 	uint8_t *kpipe_baddr = NULL, *netif_baddr = NULL;
1340 	uuid_t flow_uuid;
1341 	size_t iphlen = 0;
1342 	uint32_t kpipe_buf_len = 0, netif_buf_lim = 0;
1343 	int err = 0;
1344 
1345 	VERIFY(kpipe_ph != 0);
1346 	VERIFY(netif_ph != 0);
1347 	VERIFY(pcb != NULL);
1348 	VERIFY(tx_ring_stats != NULL);
1349 	VERIFY(nifs != NULL);
1350 
1351 	kpipe_buf = kern_packet_get_next_buflet(kpipe_ph, NULL);
1352 	VERIFY(kpipe_buf != NULL);
1353 	kpipe_baddr = ipsec_kern_buflet_to_buffer(kpipe_buf);
1354 	VERIFY(kpipe_baddr != NULL);
1355 	kpipe_baddr += kern_buflet_get_data_offset(kpipe_buf);
1356 	kpipe_buf_len = kern_buflet_get_data_length(kpipe_buf);
1357 
1358 	netif_buf = kern_packet_get_next_buflet(netif_ph, NULL);
1359 	VERIFY(netif_buf != NULL);
1360 	netif_baddr = ipsec_kern_buflet_to_buffer(netif_buf);
1361 	VERIFY(netif_baddr != NULL);
1362 	netif_baddr += kern_buflet_get_data_offset(netif_buf);
1363 	netif_buf_lim = __buflet_get_data_limit(netif_buf);
1364 	netif_buf_lim -= __buflet_get_data_offset(netif_buf);
1365 
1366 	if (kpipe_buf_len > pcb->ipsec_slot_size) {
1367 		os_log_info(OS_LOG_DEFAULT,
1368 		    "ipsec_transform_kpipe_pkt_to_netif_pkt %s: kpipe buffer length "
1369 		    "%u > pcb ipsec slot size %u", pcb->ipsec_ifp->if_xname,
1370 		    kpipe_buf_len, pcb->ipsec_slot_size);
1371 		STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1372 		err = EMSGSIZE;
1373 		goto bad;
1374 	}
1375 
1376 	tx_ring_stats->kcrsi_slots_transferred++;
1377 	tx_ring_stats->kcrsi_bytes_transferred += kpipe_buf_len;
1378 
1379 	if (__improbable(kpipe_buf_len < sizeof(struct ip))) {
1380 		os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
1381 		    "packet length %u\n", pcb->ipsec_ifp->if_xname, kpipe_buf_len);
1382 		STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1383 		err = EBADMSG;
1384 		goto bad;
1385 	}
1386 
1387 	struct ip *ip = (struct ip *)(void *)kpipe_baddr;
1388 	ASSERT(IP_HDR_ALIGNED_P(ip));
1389 
1390 	u_int ip_vers = ip->ip_v;
1391 	switch (ip_vers) {
1392 	case IPVERSION: {
1393 #ifdef _IP_VHL
1394 		iphlen = IP_VHL_HL(ip->ip_vhl) << 2;
1395 #else /* _IP_VHL */
1396 		iphlen = ip->ip_hl << 2;
1397 #endif /* _IP_VHL */
1398 		break;
1399 	}
1400 	case 6: {
1401 		iphlen = sizeof(struct ip6_hdr);
1402 		break;
1403 	}
1404 	default: {
1405 		os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
1406 		    "ip version %u\n", pcb->ipsec_ifp->if_xname, ip_vers);
1407 		err = EBADMSG;
1408 		goto bad;
1409 	}
1410 	}
1411 
1412 	if (__improbable(kpipe_buf_len < iphlen)) {
1413 		os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
1414 		    "packet length %u\n", pcb->ipsec_ifp->if_xname, kpipe_buf_len);
1415 		STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1416 		err = EBADMSG;
1417 		goto bad;
1418 	}
1419 
1420 	if (__improbable(netif_buf_lim < iphlen)) {
1421 		os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - netif "
1422 		    "buffer length %u too short\n", pcb->ipsec_ifp->if_xname, netif_buf_lim);
1423 		STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1424 		err = EBADMSG;
1425 		goto bad;
1426 	}
1427 
1428 	memcpy(netif_baddr, kpipe_baddr, iphlen);
1429 	__buflet_set_data_length(netif_buf, (uint16_t)iphlen);
1430 
1431 	lck_mtx_lock(&pcb->ipsec_kpipe_decrypt_lock);
1432 	err = esp_kpipe_input(pcb->ipsec_ifp, kpipe_ph, netif_ph);
1433 	lck_mtx_unlock(&pcb->ipsec_kpipe_decrypt_lock);
1434 
1435 	if (__improbable((err != 0))) {
1436 		goto bad;
1437 	}
1438 
1439 	kern_packet_get_flow_uuid(kpipe_ph, &flow_uuid);
1440 	uint8_t *id_8 = (uint8_t *)flow_uuid;
1441 	if (__improbable((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT)) {
1442 		os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s: wake packet "
1443 		    "flag is set\n", pcb->ipsec_ifp->if_xname);
1444 		__packet_set_wake_flag(netif_ph);
1445 	}
1446 
1447 	kern_packet_clear_flow_uuid(netif_ph);
1448 	err = kern_buflet_set_data_offset(netif_buf, 0);
1449 	VERIFY(err == 0);
1450 	err = kern_packet_set_link_header_offset(netif_ph, 0);
1451 	VERIFY(err == 0);
1452 	err = kern_packet_set_network_header_offset(netif_ph, 0);
1453 	VERIFY(err == 0);
1454 	err = kern_packet_finalize(netif_ph);
1455 	VERIFY(err == 0);
1456 
1457 	return 0;
1458 bad:
1459 	STATS_INC(nifs, NETIF_STATS_DROP);
1460 	return err;
1461 }
1462 
1463 
1464 static errno_t
ipsec_netif_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)1465 ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1466     kern_channel_ring_t rx_ring, uint32_t flags)
1467 {
1468 #pragma unused(nxprov)
1469 #pragma unused(flags)
1470 	struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
1471 	struct kern_channel_ring_stat_increment rx_ring_stats;
1472 
1473 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1474 
1475 	if (!ipsec_data_move_begin(pcb)) {
1476 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1477 		return 0;
1478 	}
1479 
1480 	lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1481 
1482 	// Reclaim user-released slots
1483 	(void) kern_channel_reclaim(rx_ring);
1484 
1485 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1486 
1487 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
1488 	if (avail == 0) {
1489 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1490 		ipsec_data_move_end(pcb);
1491 		return 0;
1492 	}
1493 
1494 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1495 	VERIFY(rx_pp != NULL);
1496 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1497 	kern_channel_slot_t rx_pslot = NULL;
1498 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1499 
1500 	while (rx_slot != NULL) {
1501 		// Check for a waiting packet
1502 		lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1503 		mbuf_t __single data = pcb->ipsec_input_chain;
1504 		if (data == NULL) {
1505 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1506 			break;
1507 		}
1508 
1509 		// Allocate rx packet
1510 		kern_packet_t rx_ph = 0;
1511 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1512 		if (__improbable(error != 0)) {
1513 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1514 			STATS_INC(nifs, NETIF_STATS_DROP);
1515 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1516 			break;
1517 		}
1518 
1519 		// Advance waiting packets
1520 		if (pcb->ipsec_input_chain_count > 0) {
1521 			pcb->ipsec_input_chain_count--;
1522 		}
1523 		pcb->ipsec_input_chain = data->m_nextpkt;
1524 		data->m_nextpkt = NULL;
1525 		if (pcb->ipsec_input_chain == NULL) {
1526 			pcb->ipsec_input_chain_last = NULL;
1527 		}
1528 		lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1529 
1530 		size_t length = mbuf_pkthdr_len(data);
1531 
1532 		if (length < sizeof(struct ip)) {
1533 			// Flush data
1534 			mbuf_freem(data);
1535 			kern_pbufpool_free(rx_pp, rx_ph);
1536 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1537 			STATS_INC(nifs, NETIF_STATS_DROP);
1538 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1539 			    pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
1540 			continue;
1541 		}
1542 
1543 		uint32_t af = 0;
1544 		struct ip *ip = mtod(data, struct ip *);
1545 		u_int ip_version = ip->ip_v;
1546 		switch (ip_version) {
1547 		case 4: {
1548 			af = AF_INET;
1549 			break;
1550 		}
1551 		case 6: {
1552 			af = AF_INET6;
1553 			break;
1554 		}
1555 		default: {
1556 			os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1557 			    pcb->ipsec_ifp->if_xname, ip_version);
1558 			break;
1559 		}
1560 		}
1561 
1562 		if (length > PP_BUF_SIZE_DEF(rx_pp) ||
1563 		    (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
1564 			// We need to fragment to send up into the netif
1565 
1566 			u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp);
1567 			if (pcb->ipsec_frag_size_set &&
1568 			    pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) {
1569 				fragment_mtu = pcb->ipsec_input_frag_size;
1570 			}
1571 
1572 			mbuf_t fragment_chain = NULL;
1573 			switch (af) {
1574 			case AF_INET: {
1575 				// ip_fragment expects the length in host order
1576 				ip->ip_len = ntohs(ip->ip_len);
1577 
1578 				// ip_fragment will modify the original data, don't free
1579 				int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1580 				if (fragment_error == 0 && data != NULL) {
1581 					fragment_chain = data;
1582 				} else {
1583 					STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1584 					STATS_INC(nifs, NETIF_STATS_DROP);
1585 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1586 					    pcb->ipsec_ifp->if_xname, length, fragment_error);
1587 				}
1588 				break;
1589 			}
1590 			case AF_INET6: {
1591 				if (length < sizeof(struct ip6_hdr)) {
1592 					mbuf_freem(data);
1593 					STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1594 					STATS_INC(nifs, NETIF_STATS_DROP);
1595 					os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1596 					    pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1597 				} else {
1598 					// ip6_do_fragmentation will free the original data on success only
1599 					struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1600 
1601 					int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
1602 					    ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid((uint64_t)data)));
1603 					if (fragment_error == 0 && data != NULL) {
1604 						fragment_chain = data;
1605 					} else {
1606 						mbuf_freem(data);
1607 						STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1608 						STATS_INC(nifs, NETIF_STATS_DROP);
1609 						os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1610 						    pcb->ipsec_ifp->if_xname, length, fragment_error);
1611 					}
1612 				}
1613 				break;
1614 			}
1615 			default: {
1616 				// Cannot fragment unknown families
1617 				mbuf_freem(data);
1618 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1619 				STATS_INC(nifs, NETIF_STATS_DROP);
1620 				os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1621 				    pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
1622 				break;
1623 			}
1624 			}
1625 
1626 			if (fragment_chain != NULL) {
1627 				// Add fragments to chain before continuing
1628 				lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1629 				if (pcb->ipsec_input_chain != NULL) {
1630 					pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1631 				} else {
1632 					pcb->ipsec_input_chain = fragment_chain;
1633 				}
1634 				pcb->ipsec_input_chain_count++;
1635 				while (fragment_chain->m_nextpkt) {
1636 					VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1637 					fragment_chain = fragment_chain->m_nextpkt;
1638 					pcb->ipsec_input_chain_count++;
1639 				}
1640 				pcb->ipsec_input_chain_last = fragment_chain;
1641 				lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1642 			}
1643 
1644 			// Make sure to free unused rx packet
1645 			kern_pbufpool_free(rx_pp, rx_ph);
1646 
1647 			continue;
1648 		}
1649 
1650 		mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1651 
1652 		// Fillout rx packet
1653 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1654 		VERIFY(rx_buf != NULL);
1655 		uint8_t *rx_baddr = ipsec_kern_buflet_to_buffer(rx_buf);
1656 		VERIFY(rx_baddr != NULL);
1657 
1658 		// Copy-in data from mbuf to buflet
1659 		mbuf_copydata(data, 0, length, (void *)rx_baddr);
1660 		kern_packet_clear_flow_uuid(rx_ph);         // Zero flow id
1661 
1662 		// Finalize and attach the packet
1663 		error = kern_buflet_set_data_offset(rx_buf, 0);
1664 		VERIFY(error == 0);
1665 		error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
1666 		VERIFY(error == 0);
1667 		error = kern_packet_set_headroom(rx_ph, 0);
1668 		VERIFY(error == 0);
1669 		error = kern_packet_finalize(rx_ph);
1670 		VERIFY(error == 0);
1671 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1672 		VERIFY(error == 0);
1673 
1674 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1675 		STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
1676 		bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1677 
1678 		rx_ring_stats.kcrsi_slots_transferred++;
1679 		rx_ring_stats.kcrsi_bytes_transferred += length;
1680 
1681 		if (!pcb->ipsec_ext_ifdata_stats) {
1682 			ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
1683 		}
1684 
1685 		mbuf_freem(data);
1686 
1687 		// Advance ring
1688 		rx_pslot = rx_slot;
1689 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1690 	}
1691 
1692 	for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
1693 		struct kern_channel_ring_stat_increment tx_ring_stats = {};
1694 		kern_channel_slot_t tx_pslot = NULL;
1695 		kern_channel_slot_t tx_slot = NULL;
1696 
1697 		kern_channel_ring_t __single tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
1698 		if (tx_ring == NULL) {
1699 			// Net-If TX ring not set up yet, nothing to read
1700 			goto done;
1701 		}
1702 
1703 		// Unlock ipsec before entering ring
1704 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1705 
1706 		(void)kr_enter(tx_ring, TRUE);
1707 
1708 		// Lock again after entering and validate
1709 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1710 
1711 		if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
1712 			goto done;
1713 		}
1714 
1715 		tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1716 		if (tx_slot == NULL) {
1717 			// Nothing to read, don't bother signalling
1718 			goto done;
1719 		}
1720 
1721 		while (rx_slot != NULL && tx_slot != NULL) {
1722 			errno_t error = 0;
1723 
1724 			// Allocate rx packet
1725 			kern_packet_t rx_ph = 0;
1726 			error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1727 			if (__improbable(error != 0)) {
1728 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1729 				STATS_INC(nifs, NETIF_STATS_DROP);
1730 				break;
1731 			}
1732 
1733 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1734 			tx_pslot = tx_slot;
1735 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1736 			if (tx_ph == 0) {
1737 				kern_pbufpool_free(rx_pp, rx_ph);
1738 				continue;
1739 			}
1740 
1741 			error = ipsec_transform_kpipe_pkt_to_netif_pkt(pcb,
1742 			    &tx_ring_stats, nifs, tx_ph, rx_ph);
1743 			if (error != 0) {
1744 				// Failed to get decrypted packet
1745 				kern_pbufpool_free(rx_pp, rx_ph);
1746 				continue;
1747 			}
1748 
1749 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1750 			VERIFY(error == 0);
1751 
1752 			STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1753 			STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
1754 
1755 			bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1756 
1757 			rx_ring_stats.kcrsi_slots_transferred++;
1758 			rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph);
1759 
1760 			if (!pcb->ipsec_ext_ifdata_stats) {
1761 				ifnet_stat_increment_in(pcb->ipsec_ifp, 1,
1762 				    kern_packet_get_data_length(rx_ph), 0);
1763 			}
1764 
1765 			rx_pslot = rx_slot;
1766 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1767 		}
1768 
1769 done:
1770 		if (tx_pslot) {
1771 			kern_channel_advance_slot(tx_ring, tx_pslot);
1772 			kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1773 			(void)kern_channel_reclaim(tx_ring);
1774 		}
1775 
1776 		// Unlock first, then exit ring
1777 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1778 		if (tx_ring != NULL) {
1779 			if (tx_pslot != NULL) {
1780 				kern_channel_notify(tx_ring, 0);
1781 			}
1782 			kr_exit(tx_ring);
1783 		}
1784 
1785 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1786 	}
1787 
1788 	if (rx_pslot) {
1789 		kern_channel_advance_slot(rx_ring, rx_pslot);
1790 		kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1791 	}
1792 
1793 
1794 	lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1795 
1796 	ipsec_data_move_end(pcb);
1797 	return 0;
1798 }
1799 
1800 static errno_t
ipsec_nexus_ifattach(struct ipsec_pcb * pcb,struct ifnet_init_eparams * init_params,struct ifnet ** ifp)1801 ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
1802     struct ifnet_init_eparams *init_params,
1803     struct ifnet **ifp)
1804 {
1805 	errno_t err;
1806 	nexus_controller_t controller = kern_nexus_shared_controller();
1807 	struct kern_nexus_net_init net_init;
1808 	struct kern_pbufpool_init pp_init;
1809 
1810 	nexus_name_t provider_name;
1811 	snprintf((char *)provider_name, sizeof(provider_name),
1812 	    "com.apple.netif.%s", pcb->ipsec_if_xname);
1813 
1814 	struct kern_nexus_provider_init prov_init = {
1815 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1816 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1817 		.nxpi_pre_connect = ipsec_nexus_pre_connect,
1818 		.nxpi_connected = ipsec_nexus_connected,
1819 		.nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
1820 		.nxpi_disconnected = ipsec_nexus_disconnected,
1821 		.nxpi_ring_init = ipsec_netif_ring_init,
1822 		.nxpi_ring_fini = ipsec_netif_ring_fini,
1823 		.nxpi_slot_init = NULL,
1824 		.nxpi_slot_fini = NULL,
1825 		.nxpi_sync_tx = ipsec_netif_sync_tx,
1826 		.nxpi_sync_rx = ipsec_netif_sync_rx,
1827 		.nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
1828 	};
1829 
1830 	nexus_attr_t __single nxa = NULL;
1831 	err = kern_nexus_attr_create(&nxa);
1832 	IPSEC_IF_VERIFY(err == 0);
1833 	if (err != 0) {
1834 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1835 		    __func__, err);
1836 		goto failed;
1837 	}
1838 
1839 	uint64_t slot_buffer_size = pcb->ipsec_slot_size;
1840 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1841 	VERIFY(err == 0);
1842 
1843 	// Reset ring size for netif nexus to limit memory usage
1844 	uint64_t ring_size = pcb->ipsec_netif_ring_size;
1845 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1846 	VERIFY(err == 0);
1847 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1848 	VERIFY(err == 0);
1849 
1850 	assert(err == 0);
1851 
1852 	if (ipsec_in_wmm_mode(pcb)) {
1853 		os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n",
1854 		    __func__, pcb->ipsec_if_xname);
1855 
1856 		init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED;
1857 
1858 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_RINGS,
1859 		    IPSEC_NETIF_WMM_TX_RING_COUNT);
1860 		VERIFY(err == 0);
1861 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_RINGS,
1862 		    IPSEC_NETIF_WMM_RX_RING_COUNT);
1863 		VERIFY(err == 0);
1864 
1865 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_QMAP, NEXUS_QMAP_TYPE_WMM);
1866 		VERIFY(err == 0);
1867 	}
1868 
1869 	pcb->ipsec_netif_txring_size = ring_size;
1870 
1871 	bzero(&pp_init, sizeof(pp_init));
1872 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1873 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
1874 	// Note: we need more packets than can be held in the tx and rx rings because
1875 	// packets can also be in the AQM queue(s)
1876 	pp_init.kbi_packets = pcb->ipsec_netif_ring_size * (2 * pcb->ipsec_kpipe_count + 1);
1877 	pp_init.kbi_bufsize = pcb->ipsec_slot_size;
1878 	pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
1879 	pp_init.kbi_max_frags = 1;
1880 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1881 	    "%s", provider_name);
1882 	pp_init.kbi_ctx = NULL;
1883 	pp_init.kbi_ctx_retain = NULL;
1884 	pp_init.kbi_ctx_release = NULL;
1885 
1886 	err = kern_pbufpool_create(&pp_init, &pcb->ipsec_netif_pp, NULL);
1887 	if (err != 0) {
1888 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
1889 		goto failed;
1890 	}
1891 
1892 	err = kern_nexus_controller_register_provider(controller,
1893 	    ipsec_nx_dom_prov,
1894 	    provider_name,
1895 	    &prov_init,
1896 	    sizeof(prov_init),
1897 	    nxa,
1898 	    &pcb->ipsec_nx.if_provider);
1899 	IPSEC_IF_VERIFY(err == 0);
1900 	if (err != 0) {
1901 		os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
1902 		    __func__, err);
1903 		goto failed;
1904 	}
1905 
1906 	bzero(&net_init, sizeof(net_init));
1907 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1908 	net_init.nxneti_flags = 0;
1909 	net_init.nxneti_eparams = init_params;
1910 	net_init.nxneti_lladdr = NULL;
1911 	net_init.nxneti_prepare = ipsec_netif_prepare;
1912 	net_init.nxneti_rx_pbufpool = pcb->ipsec_netif_pp;
1913 	net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp;
1914 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
1915 	    pcb->ipsec_nx.if_provider,
1916 	    pcb,
1917 	    NULL,
1918 	    &pcb->ipsec_nx.if_instance,
1919 	    &net_init,
1920 	    ifp);
1921 	IPSEC_IF_VERIFY(err == 0);
1922 	if (err != 0) {
1923 		os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
1924 		    __func__, err);
1925 		kern_nexus_controller_deregister_provider(controller,
1926 		    pcb->ipsec_nx.if_provider);
1927 		uuid_clear(pcb->ipsec_nx.if_provider);
1928 		goto failed;
1929 	}
1930 
1931 failed:
1932 	if (nxa) {
1933 		kern_nexus_attr_destroy(nxa);
1934 	}
1935 	if (err && pcb->ipsec_netif_pp != NULL) {
1936 		kern_pbufpool_destroy(pcb->ipsec_netif_pp);
1937 		pcb->ipsec_netif_pp = NULL;
1938 	}
1939 	return err;
1940 }
1941 
1942 static void
ipsec_detach_provider_and_instance(uuid_t provider,uuid_t instance)1943 ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
1944 {
1945 	nexus_controller_t controller = kern_nexus_shared_controller();
1946 	errno_t err;
1947 
1948 	if (!uuid_is_null(instance)) {
1949 		err = kern_nexus_controller_free_provider_instance(controller,
1950 		    instance);
1951 		if (err != 0) {
1952 			os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
1953 			    __func__, err);
1954 		}
1955 		uuid_clear(instance);
1956 	}
1957 	if (!uuid_is_null(provider)) {
1958 		err = kern_nexus_controller_deregister_provider(controller,
1959 		    provider);
1960 		if (err != 0) {
1961 			os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
1962 		}
1963 		uuid_clear(provider);
1964 	}
1965 	return;
1966 }
1967 
1968 static void
ipsec_nexus_detach(struct ipsec_pcb * pcb)1969 ipsec_nexus_detach(struct ipsec_pcb *pcb)
1970 {
1971 	ipsec_nx_t nx = &pcb->ipsec_nx;
1972 	nexus_controller_t controller = kern_nexus_shared_controller();
1973 	errno_t err;
1974 
1975 	if (!uuid_is_null(nx->fsw_device)) {
1976 		err = kern_nexus_ifdetach(controller,
1977 		    nx->fsw_instance,
1978 		    nx->fsw_device);
1979 		if (err != 0) {
1980 			os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
1981 			    __func__, err);
1982 		}
1983 	}
1984 
1985 	ipsec_detach_provider_and_instance(nx->fsw_provider,
1986 	    nx->fsw_instance);
1987 	ipsec_detach_provider_and_instance(nx->if_provider,
1988 	    nx->if_instance);
1989 
1990 	if (pcb->ipsec_netif_pp != NULL) {
1991 		kern_pbufpool_destroy(pcb->ipsec_netif_pp);
1992 		pcb->ipsec_netif_pp = NULL;
1993 	}
1994 	memset(nx, 0, sizeof(*nx));
1995 }
1996 
1997 static errno_t
ipsec_create_fs_provider_and_instance(struct ipsec_pcb * pcb,const char * type_name,const char * ifname,uuid_t * provider,uuid_t * instance)1998 ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
1999     const char *type_name,
2000     const char *ifname,
2001     uuid_t *provider, uuid_t *instance)
2002 {
2003 	nexus_attr_t __single attr = NULL;
2004 	nexus_controller_t controller = kern_nexus_shared_controller();
2005 	uuid_t dom_prov;
2006 	errno_t err;
2007 	struct kern_nexus_init init;
2008 	nexus_name_t    provider_name;
2009 
2010 	err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
2011 	    &dom_prov);
2012 	IPSEC_IF_VERIFY(err == 0);
2013 	if (err != 0) {
2014 		os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
2015 		    __func__, type_name, err);
2016 		goto failed;
2017 	}
2018 
2019 	err = kern_nexus_attr_create(&attr);
2020 	IPSEC_IF_VERIFY(err == 0);
2021 	if (err != 0) {
2022 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2023 		    __func__, err);
2024 		goto failed;
2025 	}
2026 
2027 	uint64_t slot_buffer_size = pcb->ipsec_slot_size;
2028 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2029 	VERIFY(err == 0);
2030 
2031 	// Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
2032 	uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
2033 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
2034 	VERIFY(err == 0);
2035 	uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
2036 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
2037 	VERIFY(err == 0);
2038 	/*
2039 	 * Configure flowswitch to use super-packet (multi-buflet).
2040 	 * This allows flowswitch to perform intra-stack packet aggregation.
2041 	 */
2042 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
2043 	    NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
2044 	VERIFY(err == 0);
2045 
2046 	snprintf((char *)provider_name, sizeof(provider_name),
2047 	    "com.apple.%s.%s", type_name, ifname);
2048 	err = kern_nexus_controller_register_provider(controller,
2049 	    dom_prov,
2050 	    provider_name,
2051 	    NULL,
2052 	    0,
2053 	    attr,
2054 	    provider);
2055 	kern_nexus_attr_destroy(attr);
2056 	attr = NULL;
2057 	IPSEC_IF_VERIFY(err == 0);
2058 	if (err != 0) {
2059 		os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
2060 		    __func__, type_name, err);
2061 		goto failed;
2062 	}
2063 	bzero(&init, sizeof(init));
2064 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2065 	err = kern_nexus_controller_alloc_provider_instance(controller,
2066 	    *provider,
2067 	    NULL, NULL,
2068 	    instance, &init);
2069 	IPSEC_IF_VERIFY(err == 0);
2070 	if (err != 0) {
2071 		os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
2072 		    __func__, type_name, err);
2073 		kern_nexus_controller_deregister_provider(controller,
2074 		    *provider);
2075 		uuid_clear(*provider);
2076 	}
2077 failed:
2078 	return err;
2079 }
2080 
2081 static errno_t
ipsec_flowswitch_attach(struct ipsec_pcb * pcb)2082 ipsec_flowswitch_attach(struct ipsec_pcb *pcb)
2083 {
2084 	nexus_controller_t controller = kern_nexus_shared_controller();
2085 	errno_t err = 0;
2086 	ipsec_nx_t nx = &pcb->ipsec_nx;
2087 
2088 	// Allocate flowswitch
2089 	err = ipsec_create_fs_provider_and_instance(pcb,
2090 	    "flowswitch",
2091 	    pcb->ipsec_ifp->if_xname,
2092 	    &nx->fsw_provider,
2093 	    &nx->fsw_instance);
2094 	if (err != 0) {
2095 		os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
2096 		    __func__);
2097 		goto failed;
2098 	}
2099 
2100 	// Attach flowswitch to device port
2101 	err = kern_nexus_ifattach(controller, nx->fsw_instance,
2102 	    NULL, nx->if_instance,
2103 	    FALSE, &nx->fsw_device);
2104 	if (err != 0) {
2105 		os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
2106 		goto failed;
2107 	}
2108 
2109 	// Extract the agent UUID and save for later
2110 	struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
2111 	if (flowswitch_nx != NULL) {
2112 		struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
2113 		if (flowswitch != NULL) {
2114 			FSW_RLOCK(flowswitch);
2115 			uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
2116 			FSW_UNLOCK(flowswitch);
2117 		} else {
2118 			os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - flowswitch is NULL\n");
2119 		}
2120 		nx_release(flowswitch_nx);
2121 	} else {
2122 		os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n");
2123 	}
2124 
2125 	return 0;
2126 
2127 failed:
2128 	ipsec_nexus_detach(pcb);
2129 
2130 	errno_t detach_error = 0;
2131 	if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
2132 		panic("ipsec_flowswitch_attach - ifnet_detach failed: %d", detach_error);
2133 		/* NOT REACHED */
2134 	}
2135 
2136 	return err;
2137 }
2138 
2139 #pragma mark Kernel Pipe Nexus
2140 
2141 static errno_t
ipsec_register_kernel_pipe_nexus(struct ipsec_pcb * pcb)2142 ipsec_register_kernel_pipe_nexus(struct ipsec_pcb *pcb)
2143 {
2144 	nexus_attr_t __single nxa = NULL;
2145 	nexus_name_t provider_name = "com.apple.nexus.ipsec.kpipe";
2146 	errno_t result;
2147 
2148 	lck_mtx_lock(&ipsec_lock);
2149 	if (ipsec_ncd_refcount++) {
2150 		lck_mtx_unlock(&ipsec_lock);
2151 		return 0;
2152 	}
2153 
2154 	result = kern_nexus_controller_create(&ipsec_ncd);
2155 	if (result) {
2156 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
2157 		    __FUNCTION__, result);
2158 		goto done;
2159 	}
2160 
2161 	uuid_t dom_prov;
2162 	result = kern_nexus_get_default_domain_provider(
2163 		NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
2164 	if (result) {
2165 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
2166 		    __FUNCTION__, result);
2167 		goto done;
2168 	}
2169 
2170 	struct kern_nexus_provider_init prov_init = {
2171 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2172 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2173 		.nxpi_pre_connect = ipsec_nexus_pre_connect,
2174 		.nxpi_connected = ipsec_nexus_connected,
2175 		.nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
2176 		.nxpi_disconnected = ipsec_nexus_disconnected,
2177 		.nxpi_ring_init = ipsec_kpipe_ring_init,
2178 		.nxpi_ring_fini = ipsec_kpipe_ring_fini,
2179 		.nxpi_slot_init = NULL,
2180 		.nxpi_slot_fini = NULL,
2181 		.nxpi_sync_tx = ipsec_kpipe_sync_tx,
2182 		.nxpi_sync_rx = ipsec_kpipe_sync_rx,
2183 		.nxpi_tx_doorbell = NULL,
2184 	};
2185 
2186 	result = kern_nexus_attr_create(&nxa);
2187 	if (result) {
2188 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2189 		    __FUNCTION__, result);
2190 		goto done;
2191 	}
2192 
2193 	uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2194 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2195 	VERIFY(result == 0);
2196 
2197 	// Reset ring size for kernel pipe nexus to limit memory usage
2198 	// Note: It's better to have less on slots on the kpipe TX ring than the netif
2199 	// so back pressure is applied at the AQM layer
2200 	uint64_t ring_size =
2201 	    pcb->ipsec_kpipe_tx_ring_size != 0 ? pcb->ipsec_kpipe_tx_ring_size :
2202 	    pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2203 	    if_ipsec_ring_size;
2204 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
2205 	VERIFY(result == 0);
2206 
2207 	ring_size =
2208 	    pcb->ipsec_kpipe_rx_ring_size != 0 ? pcb->ipsec_kpipe_rx_ring_size :
2209 	    pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2210 	    if_ipsec_ring_size;
2211 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
2212 	VERIFY(result == 0);
2213 
2214 	result = kern_nexus_controller_register_provider(ipsec_ncd,
2215 	    dom_prov,
2216 	    provider_name,
2217 	    &prov_init,
2218 	    sizeof(prov_init),
2219 	    nxa,
2220 	    &ipsec_kpipe_uuid);
2221 	if (result) {
2222 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
2223 		    __FUNCTION__, result);
2224 		goto done;
2225 	}
2226 
2227 done:
2228 	if (nxa) {
2229 		kern_nexus_attr_destroy(nxa);
2230 	}
2231 
2232 	if (result) {
2233 		if (ipsec_ncd) {
2234 			kern_nexus_controller_destroy(ipsec_ncd);
2235 			ipsec_ncd = NULL;
2236 		}
2237 		ipsec_ncd_refcount = 0;
2238 	}
2239 
2240 	lck_mtx_unlock(&ipsec_lock);
2241 
2242 	return result;
2243 }
2244 
2245 static void
ipsec_unregister_kernel_pipe_nexus(void)2246 ipsec_unregister_kernel_pipe_nexus(void)
2247 {
2248 	lck_mtx_lock(&ipsec_lock);
2249 
2250 	VERIFY(ipsec_ncd_refcount > 0);
2251 
2252 	if (--ipsec_ncd_refcount == 0) {
2253 		kern_nexus_controller_destroy(ipsec_ncd);
2254 		ipsec_ncd = NULL;
2255 	}
2256 
2257 	lck_mtx_unlock(&ipsec_lock);
2258 }
2259 
2260 /* This structure only holds onto kpipe channels that need to be
2261  * freed in the future, but are cleared from the pcb under lock
2262  */
2263 struct ipsec_detached_channels {
2264 	int count;
2265 	kern_pbufpool_t pp;
2266 	uuid_t uuids[IPSEC_IF_MAX_RING_COUNT];
2267 };
2268 
2269 static void
ipsec_detach_channels(struct ipsec_pcb * pcb,struct ipsec_detached_channels * dc)2270 ipsec_detach_channels(struct ipsec_pcb *pcb, struct ipsec_detached_channels *dc)
2271 {
2272 	LCK_RW_ASSERT(&pcb->ipsec_pcb_lock, LCK_RW_TYPE_EXCLUSIVE);
2273 
2274 	if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
2275 		for (int i = 0; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2276 			VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2277 		}
2278 		dc->count = 0;
2279 		return;
2280 	}
2281 
2282 	dc->count = pcb->ipsec_kpipe_count;
2283 
2284 	VERIFY(dc->count >= 0);
2285 	VERIFY(dc->count <= IPSEC_IF_MAX_RING_COUNT);
2286 
2287 	for (int i = 0; i < dc->count; i++) {
2288 		VERIFY(!uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2289 		uuid_copy(dc->uuids[i], pcb->ipsec_kpipe_uuid[i]);
2290 		uuid_clear(pcb->ipsec_kpipe_uuid[i]);
2291 	}
2292 	for (int i = dc->count; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2293 		VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2294 	}
2295 
2296 	if (dc->count) {
2297 		VERIFY(pcb->ipsec_kpipe_pp);
2298 	} else {
2299 		VERIFY(!pcb->ipsec_kpipe_pp);
2300 	}
2301 
2302 	dc->pp = pcb->ipsec_kpipe_pp;
2303 
2304 	pcb->ipsec_kpipe_pp = NULL;
2305 
2306 	ipsec_flag_clr(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2307 }
2308 
2309 static void
ipsec_free_channels(struct ipsec_detached_channels * dc)2310 ipsec_free_channels(struct ipsec_detached_channels *dc)
2311 {
2312 	if (!dc->count) {
2313 		return;
2314 	}
2315 
2316 	for (int i = 0; i < dc->count; i++) {
2317 		errno_t result;
2318 		result = kern_nexus_controller_free_provider_instance(ipsec_ncd,
2319 		    dc->uuids[i]);
2320 		VERIFY(!result);
2321 	}
2322 
2323 	VERIFY(dc->pp);
2324 	kern_pbufpool_destroy(dc->pp);
2325 
2326 	ipsec_unregister_kernel_pipe_nexus();
2327 
2328 	memset(dc, 0, sizeof(*dc));
2329 }
2330 
2331 static errno_t
ipsec_enable_channel(struct ipsec_pcb * pcb,struct proc * proc)2332 ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
2333 {
2334 	struct kern_nexus_init init;
2335 	struct kern_pbufpool_init pp_init;
2336 	uuid_t uuid_null = {0};
2337 	errno_t result;
2338 
2339 	kauth_cred_t cred = kauth_cred_get();
2340 	result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
2341 	if (result) {
2342 		return result;
2343 	}
2344 
2345 	VERIFY(pcb->ipsec_kpipe_count);
2346 	VERIFY(!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED));
2347 
2348 	result = ipsec_register_kernel_pipe_nexus(pcb);
2349 
2350 	lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2351 
2352 	if (result) {
2353 		os_log_error(OS_LOG_DEFAULT, "%s: %s failed to register kernel pipe nexus\n",
2354 		    __func__, pcb->ipsec_if_xname);
2355 		goto done;
2356 	}
2357 
2358 	VERIFY(ipsec_ncd);
2359 
2360 	bzero(&pp_init, sizeof(pp_init));
2361 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
2362 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
2363 	// Note: We only needs are many packets as can be held in the tx and rx rings
2364 	pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2 * pcb->ipsec_kpipe_count;
2365 	pp_init.kbi_bufsize = pcb->ipsec_slot_size;
2366 	pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
2367 	pp_init.kbi_max_frags = 1;
2368 	pp_init.kbi_flags |= KBIF_QUANTUM;
2369 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
2370 	    "com.apple.kpipe.%s", pcb->ipsec_if_xname);
2371 	pp_init.kbi_ctx = NULL;
2372 	pp_init.kbi_ctx_retain = NULL;
2373 	pp_init.kbi_ctx_release = NULL;
2374 
2375 	result = kern_pbufpool_create(&pp_init, &pcb->ipsec_kpipe_pp,
2376 	    NULL);
2377 	if (result != 0) {
2378 		os_log_error(OS_LOG_DEFAULT, "%s: %s pbufbool create failed, error %d\n",
2379 		    __func__, pcb->ipsec_if_xname, result);
2380 		goto done;
2381 	}
2382 
2383 	bzero(&init, sizeof(init));
2384 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2385 	init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp;
2386 
2387 	for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
2388 		VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2389 		result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
2390 		    ipsec_kpipe_uuid, pcb, NULL, &pcb->ipsec_kpipe_uuid[i], &init);
2391 
2392 		if (result == 0) {
2393 			nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
2394 			const bool has_proc_uuid = !uuid_is_null(pcb->ipsec_kpipe_proc_uuid);
2395 			pid_t pid = pcb->ipsec_kpipe_pid;
2396 			if (!pid && !has_proc_uuid) {
2397 				pid = proc_pid(proc);
2398 			}
2399 			result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
2400 			    (const uint8_t *)pcb->ipsec_kpipe_uuid[i], &port,
2401 			    pid, has_proc_uuid ? pcb->ipsec_kpipe_proc_uuid : uuid_null, NULL,
2402 			    0, has_proc_uuid ? NEXUS_BIND_EXEC_UUID:NEXUS_BIND_PID);
2403 		}
2404 
2405 		if (result) {
2406 			/* Unwind all of them on error */
2407 			for (int j = 0; j < IPSEC_IF_MAX_RING_COUNT; j++) {
2408 				if (!uuid_is_null(pcb->ipsec_kpipe_uuid[j])) {
2409 					kern_nexus_controller_free_provider_instance(ipsec_ncd,
2410 					    pcb->ipsec_kpipe_uuid[j]);
2411 					uuid_clear(pcb->ipsec_kpipe_uuid[j]);
2412 				}
2413 			}
2414 			goto done;
2415 		}
2416 	}
2417 
2418 done:
2419 	lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2420 
2421 	if (result) {
2422 		if (pcb->ipsec_kpipe_pp != NULL) {
2423 			kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2424 			pcb->ipsec_kpipe_pp = NULL;
2425 		}
2426 		ipsec_unregister_kernel_pipe_nexus();
2427 	} else {
2428 		ipsec_flag_set(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2429 	}
2430 
2431 	return result;
2432 }
2433 
2434 #endif // IPSEC_NEXUS
2435 
2436 
2437 /* Kernel control functions */
2438 
2439 static inline int
ipsec_find_by_unit(u_int32_t unit)2440 ipsec_find_by_unit(u_int32_t unit)
2441 {
2442 	struct ipsec_pcb *next_pcb = NULL;
2443 	int found = 0;
2444 
2445 	TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2446 		if (next_pcb->ipsec_unit == unit) {
2447 			found = 1;
2448 			break;
2449 		}
2450 	}
2451 
2452 	return found;
2453 }
2454 
2455 static inline void
ipsec_free_pcb(struct ipsec_pcb * pcb,bool locked)2456 ipsec_free_pcb(struct ipsec_pcb *pcb, bool locked)
2457 {
2458 #if IPSEC_NEXUS
2459 	mbuf_freem_list(pcb->ipsec_input_chain);
2460 	pcb->ipsec_input_chain_count = 0;
2461 	lck_mtx_destroy(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp);
2462 	lck_mtx_destroy(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp);
2463 	lck_mtx_destroy(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp);
2464 #endif // IPSEC_NEXUS
2465 	lck_mtx_destroy(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp);
2466 	lck_rw_destroy(&pcb->ipsec_pcb_lock, &ipsec_lck_grp);
2467 	if (!locked) {
2468 		lck_mtx_lock(&ipsec_lock);
2469 	}
2470 	TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
2471 	if (!locked) {
2472 		lck_mtx_unlock(&ipsec_lock);
2473 	}
2474 	zfree(ipsec_pcb_zone, pcb);
2475 }
2476 
2477 static errno_t
ipsec_ctl_setup(u_int32_t * unit,void ** unitinfo)2478 ipsec_ctl_setup(u_int32_t *unit, void **unitinfo)
2479 {
2480 	if (unit == NULL || unitinfo == NULL) {
2481 		return EINVAL;
2482 	}
2483 
2484 	lck_mtx_lock(&ipsec_lock);
2485 
2486 	/* Find next available unit */
2487 	if (*unit == 0) {
2488 		*unit = 1;
2489 		while (*unit != ctl_maxunit) {
2490 			if (ipsec_find_by_unit(*unit)) {
2491 				(*unit)++;
2492 			} else {
2493 				break;
2494 			}
2495 		}
2496 		if (*unit == ctl_maxunit) {
2497 			lck_mtx_unlock(&ipsec_lock);
2498 			return EBUSY;
2499 		}
2500 	} else if (ipsec_find_by_unit(*unit)) {
2501 		lck_mtx_unlock(&ipsec_lock);
2502 		return EBUSY;
2503 	}
2504 
2505 	/* Find some open interface id */
2506 	u_int32_t chosen_unique_id = 1;
2507 	struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
2508 	if (next_pcb != NULL) {
2509 		/* List was not empty, add one to the last item */
2510 		chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2511 		next_pcb = NULL;
2512 
2513 		/*
2514 		 * If this wrapped the id number, start looking at
2515 		 * the front of the list for an unused id.
2516 		 */
2517 		if (chosen_unique_id == 0) {
2518 			/* Find the next unused ID */
2519 			chosen_unique_id = 1;
2520 			TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2521 				if (next_pcb->ipsec_unique_id > chosen_unique_id) {
2522 					/* We found a gap */
2523 					break;
2524 				}
2525 
2526 				chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2527 			}
2528 		}
2529 	}
2530 
2531 	struct ipsec_pcb *pcb = zalloc_flags(ipsec_pcb_zone, Z_WAITOK | Z_ZERO);
2532 
2533 	*unitinfo = pcb;
2534 	pcb->ipsec_unit = *unit;
2535 	pcb->ipsec_unique_id = chosen_unique_id;
2536 
2537 	if (next_pcb != NULL) {
2538 		TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
2539 	} else {
2540 		TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
2541 	}
2542 
2543 	lck_mtx_unlock(&ipsec_lock);
2544 
2545 	return 0;
2546 }
2547 
2548 static errno_t
ipsec_ctl_bind(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)2549 ipsec_ctl_bind(kern_ctl_ref kctlref,
2550     struct sockaddr_ctl *sac,
2551     void **unitinfo)
2552 {
2553 	if (*unitinfo == NULL) {
2554 		u_int32_t unit = 0;
2555 		(void)ipsec_ctl_setup(&unit, unitinfo);
2556 	}
2557 
2558 	struct ipsec_pcb *pcb = (struct ipsec_pcb *)*unitinfo;
2559 	if (pcb == NULL) {
2560 		return EINVAL;
2561 	}
2562 
2563 	if (pcb->ipsec_ctlref != NULL) {
2564 		// Return if bind was already called
2565 		return EINVAL;
2566 	}
2567 
2568 	/* Setup the protocol control block */
2569 	pcb->ipsec_ctlref = kctlref;
2570 	pcb->ipsec_unit = sac->sc_unit;
2571 	pcb->ipsec_output_service_class = MBUF_SC_OAM;
2572 
2573 #if IPSEC_NEXUS
2574 	pcb->ipsec_use_netif = false;
2575 	pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2576 	pcb->ipsec_netif_ring_size = if_ipsec_ring_size;
2577 	pcb->ipsec_tx_fsw_ring_size = if_ipsec_tx_fsw_ring_size;
2578 	pcb->ipsec_rx_fsw_ring_size = if_ipsec_rx_fsw_ring_size;
2579 #endif // IPSEC_NEXUS
2580 
2581 	lck_rw_init(&pcb->ipsec_pcb_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2582 	lck_mtx_init(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2583 #if IPSEC_NEXUS
2584 	pcb->ipsec_input_chain_count = 0;
2585 	lck_mtx_init(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2586 	lck_mtx_init(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2587 	lck_mtx_init(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2588 #endif // IPSEC_NEXUS
2589 
2590 	return 0;
2591 }
2592 
2593 static errno_t
ipsec_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)2594 ipsec_ctl_connect(kern_ctl_ref kctlref,
2595     struct sockaddr_ctl *sac,
2596     void **unitinfo)
2597 {
2598 	struct ifnet_init_eparams ipsec_init = {};
2599 	errno_t result = 0;
2600 
2601 	if (*unitinfo == NULL) {
2602 		(void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2603 	}
2604 
2605 	struct ipsec_pcb *__single pcb = *unitinfo;
2606 	if (pcb == NULL) {
2607 		return EINVAL;
2608 	}
2609 
2610 	/* Handle case where ipsec_ctl_setup() was called, but ipsec_ctl_bind() was not */
2611 	if (pcb->ipsec_ctlref == NULL) {
2612 		(void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2613 	}
2614 
2615 	snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
2616 	snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
2617 	os_log(OS_LOG_DEFAULT, "ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2618 
2619 	/* Create the interface */
2620 	bzero(&ipsec_init, sizeof(ipsec_init));
2621 	ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
2622 	ipsec_init.len = sizeof(ipsec_init);
2623 
2624 #if IPSEC_NEXUS
2625 	if (pcb->ipsec_use_netif) {
2626 		ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
2627 	} else
2628 #endif // IPSEC_NEXUS
2629 	{
2630 		ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
2631 		ipsec_init.start = ipsec_start;
2632 	}
2633 	ipsec_init.name = "ipsec";
2634 	ipsec_init.unit = pcb->ipsec_unit - 1;
2635 	ipsec_init.uniqueid = pcb->ipsec_unique_name;
2636 	ipsec_init.uniqueid_len = (uint32_t)strbuflen(pcb->ipsec_unique_name,
2637 	    sizeof(pcb->ipsec_unique_name));
2638 	ipsec_init.family = IFNET_FAMILY_IPSEC;
2639 	ipsec_init.type = IFT_OTHER;
2640 	ipsec_init.demux = ipsec_demux;
2641 	ipsec_init.add_proto = ipsec_add_proto;
2642 	ipsec_init.del_proto = ipsec_del_proto;
2643 	ipsec_init.softc = pcb;
2644 	ipsec_init.ioctl = ipsec_ioctl;
2645 	ipsec_init.free = ipsec_detached;
2646 
2647 #if IPSEC_NEXUS
2648 	/* We don't support kpipes without a netif */
2649 	if (pcb->ipsec_kpipe_count && !pcb->ipsec_use_netif) {
2650 		result = ENOTSUP;
2651 		os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - kpipe requires netif: failed %d\n", result);
2652 		ipsec_free_pcb(pcb, false);
2653 		*unitinfo = NULL;
2654 		return result;
2655 	}
2656 
2657 	if (if_ipsec_debug != 0) {
2658 		printf("%s: %s%d use_netif %d kpipe_count %d slot_size %u ring_size %u "
2659 		    "kpipe_tx_ring_size %u kpipe_rx_ring_size %u\n",
2660 		    __func__,
2661 		    ipsec_init.name, ipsec_init.unit,
2662 		    pcb->ipsec_use_netif,
2663 		    pcb->ipsec_kpipe_count,
2664 		    pcb->ipsec_slot_size,
2665 		    pcb->ipsec_netif_ring_size,
2666 		    pcb->ipsec_kpipe_tx_ring_size,
2667 		    pcb->ipsec_kpipe_rx_ring_size);
2668 	}
2669 	if (pcb->ipsec_use_netif) {
2670 		if (pcb->ipsec_kpipe_count) {
2671 			result = ipsec_enable_channel(pcb, current_proc());
2672 			if (result) {
2673 				os_log_error(OS_LOG_DEFAULT, "%s: %s failed to enable channels\n",
2674 				    __func__, pcb->ipsec_if_xname);
2675 				ipsec_free_pcb(pcb, false);
2676 				*unitinfo = NULL;
2677 				return result;
2678 			}
2679 		}
2680 
2681 		result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
2682 		if (result != 0) {
2683 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
2684 			ipsec_free_pcb(pcb, false);
2685 			*unitinfo = NULL;
2686 			return result;
2687 		}
2688 
2689 		result = ipsec_flowswitch_attach(pcb);
2690 		if (result != 0) {
2691 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_flowswitch_attach failed: %d\n", result);
2692 			// Do not call ipsec_free_pcb(). We will be attached already, and will be freed later
2693 			// in ipsec_detached().
2694 			*unitinfo = NULL;
2695 			return result;
2696 		}
2697 
2698 		/* Attach to bpf */
2699 		bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
2700 	} else
2701 #endif // IPSEC_NEXUS
2702 	{
2703 		result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
2704 		if (result != 0) {
2705 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
2706 			ipsec_free_pcb(pcb, false);
2707 			*unitinfo = NULL;
2708 			return result;
2709 		}
2710 		ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
2711 
2712 		/* Attach the interface */
2713 		result = ifnet_attach(pcb->ipsec_ifp, NULL);
2714 		if (result != 0) {
2715 			os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
2716 			ifnet_release(pcb->ipsec_ifp);
2717 			ipsec_free_pcb(pcb, false);
2718 			*unitinfo = NULL;
2719 			return result;
2720 		}
2721 
2722 		/* Attach to bpf */
2723 		bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
2724 	}
2725 
2726 #if IPSEC_NEXUS
2727 	/*
2728 	 * Mark the data path as ready.
2729 	 * If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected.
2730 	 */
2731 	if (pcb->ipsec_kpipe_count == 0) {
2732 		lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
2733 		IPSEC_SET_DATA_PATH_READY(pcb);
2734 		lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
2735 	}
2736 #endif
2737 
2738 	/* The interfaces resoures allocated, mark it as running */
2739 	ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
2740 
2741 	return 0;
2742 }
2743 
2744 static errno_t
ipsec_detach_ip(ifnet_t interface,protocol_family_t protocol,socket_t pf_socket)2745 ipsec_detach_ip(ifnet_t                         interface,
2746     protocol_family_t       protocol,
2747     socket_t                        pf_socket)
2748 {
2749 	errno_t result = EPROTONOSUPPORT;
2750 
2751 	/* Attempt a detach */
2752 	if (protocol == PF_INET) {
2753 		struct ifreq    ifr;
2754 
2755 		bzero(&ifr, sizeof(ifr));
2756 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2757 		    ifnet_name(interface), ifnet_unit(interface));
2758 
2759 		result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
2760 	} else if (protocol == PF_INET6) {
2761 		struct in6_ifreq        ifr6;
2762 
2763 		bzero(&ifr6, sizeof(ifr6));
2764 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2765 		    ifnet_name(interface), ifnet_unit(interface));
2766 
2767 		result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
2768 	}
2769 
2770 	return result;
2771 }
2772 
2773 static void
ipsec_remove_address(ifnet_t interface,protocol_family_t protocol,ifaddr_t address,socket_t pf_socket)2774 ipsec_remove_address(ifnet_t                            interface,
2775     protocol_family_t      protocol,
2776     ifaddr_t                       address,
2777     socket_t                       pf_socket)
2778 {
2779 	errno_t result = 0;
2780 
2781 	/* Attempt a detach */
2782 	if (protocol == PF_INET) {
2783 		struct ifreq    ifr;
2784 
2785 		bzero(&ifr, sizeof(ifr));
2786 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2787 		    ifnet_name(interface), ifnet_unit(interface));
2788 		result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
2789 		if (result != 0) {
2790 			os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed: %d", result);
2791 		} else {
2792 			result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
2793 			if (result != 0) {
2794 				os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR failed: %d", result);
2795 			}
2796 		}
2797 	} else if (protocol == PF_INET6) {
2798 		struct in6_ifreq        ifr6;
2799 
2800 		bzero(&ifr6, sizeof(ifr6));
2801 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2802 		    ifnet_name(interface), ifnet_unit(interface));
2803 		result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
2804 		    sizeof(ifr6.ifr_addr));
2805 		if (result != 0) {
2806 			os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed (v6): %d",
2807 			    result);
2808 		} else {
2809 			result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
2810 			if (result != 0) {
2811 				os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
2812 				    result);
2813 			}
2814 		}
2815 	}
2816 }
2817 
2818 static void
ipsec_cleanup_family(ifnet_t interface,protocol_family_t protocol)2819 ipsec_cleanup_family(ifnet_t                            interface,
2820     protocol_family_t      protocol)
2821 {
2822 	errno_t result = 0;
2823 	socket_t __single pf_socket = NULL;
2824 	uint16_t addresses_count = 0;
2825 	ifaddr_t *__counted_by(addresses_count) addresses = NULL;
2826 	int i;
2827 
2828 	if (protocol != PF_INET && protocol != PF_INET6) {
2829 		os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - invalid protocol family %d\n", protocol);
2830 		return;
2831 	}
2832 
2833 	/* Create a socket for removing addresses and detaching the protocol */
2834 	result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
2835 	if (result != 0) {
2836 		if (result != EAFNOSUPPORT) {
2837 			os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - failed to create %s socket: %d\n",
2838 			    protocol == PF_INET ? "IP" : "IPv6", result);
2839 		}
2840 		goto cleanup;
2841 	}
2842 
2843 	/* always set SS_PRIV, we want to close and detach regardless */
2844 	sock_setpriv(pf_socket, 1);
2845 
2846 	result = ipsec_detach_ip(interface, protocol, pf_socket);
2847 	if (result == 0 || result == ENXIO) {
2848 		/* We are done! We either detached or weren't attached. */
2849 		goto cleanup;
2850 	} else if (result != EBUSY) {
2851 		/* Uh, not really sure what happened here... */
2852 		os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2853 		goto cleanup;
2854 	}
2855 
2856 	/*
2857 	 * At this point, we received an EBUSY error. This means there are
2858 	 * addresses attached. We should detach them and then try again.
2859 	 */
2860 	result = ifnet_get_address_list_family_with_count(interface, &addresses,
2861 	    &addresses_count, (sa_family_t)protocol);
2862 	if (result != 0) {
2863 		os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
2864 		    ifnet_name(interface), ifnet_unit(interface),
2865 		    protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
2866 		goto cleanup;
2867 	}
2868 
2869 	for (i = 0; addresses[i] != 0; i++) {
2870 		ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
2871 	}
2872 	ifnet_address_list_free_counted_by(addresses, addresses_count);
2873 
2874 	/*
2875 	 * The addresses should be gone, we should try the remove again.
2876 	 */
2877 	result = ipsec_detach_ip(interface, protocol, pf_socket);
2878 	if (result != 0 && result != ENXIO) {
2879 		os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2880 	}
2881 
2882 cleanup:
2883 	if (pf_socket != NULL) {
2884 		sock_close(pf_socket);
2885 	}
2886 
2887 	if (addresses != NULL) {
2888 		ifnet_address_list_free_counted_by(addresses, addresses_count);
2889 	}
2890 }
2891 
2892 static errno_t
ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo)2893 ipsec_ctl_disconnect(__unused kern_ctl_ref      kctlref,
2894     __unused u_int32_t             unit,
2895     void                                   *unitinfo)
2896 {
2897 	struct ipsec_pcb *__single pcb = unitinfo;
2898 	ifnet_t ifp = NULL;
2899 	errno_t result = 0;
2900 
2901 	if (pcb == NULL) {
2902 		return EINVAL;
2903 	}
2904 
2905 	/* Wait until all threads in the data paths are done. */
2906 	ipsec_wait_data_move_drain(pcb);
2907 
2908 #if IPSEC_NEXUS
2909 	// Tell the nexus to stop all rings
2910 	if (pcb->ipsec_netif_nexus != NULL) {
2911 		kern_nexus_stop(pcb->ipsec_netif_nexus);
2912 	}
2913 #endif // IPSEC_NEXUS
2914 
2915 	lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2916 
2917 #if IPSEC_NEXUS
2918 	if (if_ipsec_debug != 0) {
2919 		printf("ipsec_ctl_disconnect: detaching interface %s (id %s)\n",
2920 		    pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2921 	}
2922 
2923 	struct ipsec_detached_channels dc;
2924 	ipsec_detach_channels(pcb, &dc);
2925 #endif // IPSEC_NEXUS
2926 
2927 	pcb->ipsec_ctlref = NULL;
2928 
2929 	ifp = pcb->ipsec_ifp;
2930 	if (ifp != NULL) {
2931 #if IPSEC_NEXUS
2932 		if (pcb->ipsec_netif_nexus != NULL) {
2933 			/*
2934 			 * Quiesce the interface and flush any pending outbound packets.
2935 			 */
2936 			if_down(ifp);
2937 
2938 			/*
2939 			 * Suspend data movement and wait for IO threads to exit.
2940 			 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
2941 			 * do this because ipsec nexuses are attached/detached separately.
2942 			 */
2943 			ifnet_datamov_suspend_and_drain(ifp);
2944 			if ((result = ifnet_detach(ifp)) != 0) {
2945 				panic("ipsec_ctl_disconnect - ifnet_detach failed: %d", result);
2946 				/* NOT REACHED */
2947 			}
2948 
2949 			/*
2950 			 * We want to do everything in our power to ensure that the interface
2951 			 * really goes away when the socket is closed. We must remove IP/IPv6
2952 			 * addresses and detach the protocols. Finally, we can remove and
2953 			 * release the interface.
2954 			 */
2955 			key_delsp_for_ipsec_if(ifp);
2956 
2957 			ipsec_cleanup_family(ifp, AF_INET);
2958 			ipsec_cleanup_family(ifp, AF_INET6);
2959 
2960 			lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2961 
2962 			ipsec_free_channels(&dc);
2963 
2964 			ipsec_nexus_detach(pcb);
2965 
2966 			/* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
2967 			ifnet_datamov_resume(ifp);
2968 		} else
2969 #endif // IPSEC_NEXUS
2970 		{
2971 			lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2972 
2973 #if IPSEC_NEXUS
2974 			ipsec_free_channels(&dc);
2975 #endif // IPSEC_NEXUS
2976 
2977 			/*
2978 			 * We want to do everything in our power to ensure that the interface
2979 			 * really goes away when the socket is closed. We must remove IP/IPv6
2980 			 * addresses and detach the protocols. Finally, we can remove and
2981 			 * release the interface.
2982 			 */
2983 			key_delsp_for_ipsec_if(ifp);
2984 
2985 			ipsec_cleanup_family(ifp, AF_INET);
2986 			ipsec_cleanup_family(ifp, AF_INET6);
2987 
2988 			/*
2989 			 * Detach now; ipsec_detach() will be called asynchronously once
2990 			 * the I/O reference count drops to 0.  There we will invoke
2991 			 * ifnet_release().
2992 			 */
2993 			if ((result = ifnet_detach(ifp)) != 0) {
2994 				os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
2995 			}
2996 		}
2997 	} else {
2998 		// Bound, but not connected
2999 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3000 		ipsec_free_pcb(pcb, false);
3001 	}
3002 
3003 	return 0;
3004 }
3005 
3006 static errno_t
ipsec_ctl_send(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,__unused void * unitinfo,mbuf_t m,__unused int flags)3007 ipsec_ctl_send(__unused kern_ctl_ref    kctlref,
3008     __unused u_int32_t           unit,
3009     __unused void                        *unitinfo,
3010     mbuf_t                  m,
3011     __unused int                 flags)
3012 {
3013 	/* Receive messages from the control socket. Currently unused. */
3014 	mbuf_freem(m);
3015 	return 0;
3016 }
3017 
3018 static errno_t
ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * __sized_by (len)data,size_t len)3019 ipsec_ctl_setopt(__unused kern_ctl_ref  kctlref, __unused u_int32_t unit,
3020     void *unitinfo, int opt, void *__sized_by(len)data, size_t len)
3021 {
3022 	errno_t result = 0;
3023 	struct ipsec_pcb *__single pcb = unitinfo;
3024 	if (pcb == NULL) {
3025 		return EINVAL;
3026 	}
3027 
3028 	/* check for privileges for privileged options */
3029 	switch (opt) {
3030 	case IPSEC_OPT_FLAGS:
3031 	case IPSEC_OPT_EXT_IFDATA_STATS:
3032 	case IPSEC_OPT_SET_DELEGATE_INTERFACE:
3033 	case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
3034 	case IPSEC_OPT_OUTPUT_DSCP_MAPPING:
3035 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3036 			return EPERM;
3037 		}
3038 		break;
3039 	}
3040 
3041 	switch (opt) {
3042 	case IPSEC_OPT_FLAGS: {
3043 		if (len != sizeof(u_int32_t)) {
3044 			result = EMSGSIZE;
3045 		} else {
3046 			pcb->ipsec_external_flags = *(u_int32_t *)data;
3047 		}
3048 		break;
3049 	}
3050 
3051 	case IPSEC_OPT_EXT_IFDATA_STATS: {
3052 		if (len != sizeof(int)) {
3053 			result = EMSGSIZE;
3054 			break;
3055 		}
3056 		if (pcb->ipsec_ifp == NULL) {
3057 			// Only can set after connecting
3058 			result = EINVAL;
3059 			break;
3060 		}
3061 		pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
3062 		break;
3063 	}
3064 
3065 	case IPSEC_OPT_INC_IFDATA_STATS_IN:
3066 	case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
3067 		struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
3068 
3069 		if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
3070 			result = EINVAL;
3071 			break;
3072 		}
3073 		if (pcb->ipsec_ifp == NULL) {
3074 			// Only can set after connecting
3075 			result = EINVAL;
3076 			break;
3077 		}
3078 		if (!pcb->ipsec_ext_ifdata_stats) {
3079 			result = EINVAL;
3080 			break;
3081 		}
3082 		if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) {
3083 			ifnet_stat_increment_in(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3084 			    (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3085 		} else {
3086 			ifnet_stat_increment_out(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3087 			    (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3088 		}
3089 		break;
3090 	}
3091 
3092 	case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
3093 		ifnet_t __single del_ifp = NULL;
3094 		char name[IFNAMSIZ];
3095 
3096 		if (len > IFNAMSIZ - 1) {
3097 			result = EMSGSIZE;
3098 			break;
3099 		}
3100 		if (pcb->ipsec_ifp == NULL) {
3101 			// Only can set after connecting
3102 			result = EINVAL;
3103 			break;
3104 		}
3105 		if (len != 0) {                   /* if len==0, del_ifp will be NULL causing the delegate to be removed */
3106 			bcopy(data, name, len);
3107 			name[len] = 0;
3108 			result = ifnet_find_by_name(__unsafe_null_terminated_from_indexable(name), &del_ifp);
3109 		}
3110 		if (result == 0) {
3111 			os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
3112 			    __func__, pcb->ipsec_ifp->if_xname,
3113 			    del_ifp ? del_ifp->if_xname : "NULL");
3114 
3115 			result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
3116 			if (del_ifp) {
3117 				ifnet_release(del_ifp);
3118 			}
3119 		}
3120 		break;
3121 	}
3122 
3123 	case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3124 		if (len != sizeof(int)) {
3125 			result = EMSGSIZE;
3126 			break;
3127 		}
3128 		if (pcb->ipsec_ifp == NULL) {
3129 			// Only can set after connecting
3130 			result = EINVAL;
3131 			break;
3132 		}
3133 		mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
3134 		if (output_service_class == MBUF_SC_UNSPEC) {
3135 			pcb->ipsec_output_service_class = MBUF_SC_OAM;
3136 		} else {
3137 			pcb->ipsec_output_service_class = output_service_class;
3138 		}
3139 		os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
3140 		    __func__, pcb->ipsec_ifp->if_xname,
3141 		    pcb->ipsec_output_service_class);
3142 		break;
3143 	}
3144 
3145 #if IPSEC_NEXUS
3146 	case IPSEC_OPT_ENABLE_CHANNEL: {
3147 		if (len != sizeof(int)) {
3148 			result = EMSGSIZE;
3149 			break;
3150 		}
3151 		if (pcb->ipsec_ifp != NULL) {
3152 			// Only can set before connecting
3153 			result = EINVAL;
3154 			break;
3155 		}
3156 		if ((*(int *)data) != 0 &&
3157 		    (*(int *)data) != 1 &&
3158 		    (*(int *)data) != IPSEC_IF_WMM_RING_COUNT) {
3159 			result = EINVAL;
3160 			break;
3161 		}
3162 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3163 		pcb->ipsec_kpipe_count = *(int *)data;
3164 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3165 		break;
3166 	}
3167 
3168 	case IPSEC_OPT_CHANNEL_BIND_PID: {
3169 		if (len != sizeof(pid_t)) {
3170 			result = EMSGSIZE;
3171 			break;
3172 		}
3173 		if (pcb->ipsec_ifp != NULL) {
3174 			// Only can set before connecting
3175 			result = EINVAL;
3176 			break;
3177 		}
3178 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3179 		pcb->ipsec_kpipe_pid = *(pid_t *)data;
3180 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3181 		break;
3182 	}
3183 
3184 	case IPSEC_OPT_CHANNEL_BIND_UUID: {
3185 		if (len != sizeof(uuid_t)) {
3186 			result = EMSGSIZE;
3187 			break;
3188 		}
3189 		if (pcb->ipsec_ifp != NULL) {
3190 			// Only can set before connecting
3191 			result = EINVAL;
3192 			break;
3193 		}
3194 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3195 		uuid_copy(pcb->ipsec_kpipe_proc_uuid, *((uuid_t *)data));
3196 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3197 		break;
3198 	}
3199 
3200 	case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3201 		if (len != sizeof(int)) {
3202 			result = EMSGSIZE;
3203 			break;
3204 		}
3205 		if (pcb->ipsec_ifp == NULL) {
3206 			// Only can set after connecting
3207 			result = EINVAL;
3208 			break;
3209 		}
3210 		if (!if_is_fsw_transport_netagent_enabled()) {
3211 			result = ENOTSUP;
3212 			break;
3213 		}
3214 		if (uuid_is_null(pcb->ipsec_nx.fsw_agent)) {
3215 			result = ENOENT;
3216 			break;
3217 		}
3218 
3219 		uint32_t flags = netagent_get_flags(pcb->ipsec_nx.fsw_agent);
3220 
3221 		if (*(int *)data) {
3222 			flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
3223 			    NETAGENT_FLAG_NEXUS_LISTENER);
3224 			result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3225 			pcb->ipsec_needs_netagent = true;
3226 		} else {
3227 			pcb->ipsec_needs_netagent = false;
3228 			flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
3229 			    NETAGENT_FLAG_NEXUS_LISTENER);
3230 			result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3231 		}
3232 		break;
3233 	}
3234 
3235 	case IPSEC_OPT_INPUT_FRAG_SIZE: {
3236 		if (len != sizeof(u_int32_t)) {
3237 			result = EMSGSIZE;
3238 			break;
3239 		}
3240 		u_int32_t input_frag_size = *(u_int32_t *)data;
3241 		if (input_frag_size <= sizeof(struct ip6_hdr)) {
3242 			pcb->ipsec_frag_size_set = FALSE;
3243 			pcb->ipsec_input_frag_size = 0;
3244 		} else {
3245 			pcb->ipsec_frag_size_set = TRUE;
3246 			pcb->ipsec_input_frag_size = input_frag_size;
3247 		}
3248 		break;
3249 	}
3250 	case IPSEC_OPT_ENABLE_NETIF: {
3251 		if (len != sizeof(int)) {
3252 			result = EMSGSIZE;
3253 			break;
3254 		}
3255 		if (pcb->ipsec_ifp != NULL) {
3256 			// Only can set before connecting
3257 			result = EINVAL;
3258 			break;
3259 		}
3260 		lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3261 		pcb->ipsec_use_netif = !!(*(int *)data);
3262 		lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3263 		break;
3264 	}
3265 	case IPSEC_OPT_SLOT_SIZE: {
3266 		if (len != sizeof(u_int32_t)) {
3267 			result = EMSGSIZE;
3268 			break;
3269 		}
3270 		if (pcb->ipsec_ifp != NULL) {
3271 			// Only can set before connecting
3272 			result = EINVAL;
3273 			break;
3274 		}
3275 		u_int32_t slot_size = *(u_int32_t *)data;
3276 		if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
3277 		    slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
3278 			return EINVAL;
3279 		}
3280 		pcb->ipsec_slot_size = slot_size;
3281 		if (if_ipsec_debug != 0) {
3282 			printf("%s: IPSEC_OPT_SLOT_SIZE %u\n", __func__, slot_size);
3283 		}
3284 		break;
3285 	}
3286 	case IPSEC_OPT_NETIF_RING_SIZE: {
3287 		if (len != sizeof(u_int32_t)) {
3288 			result = EMSGSIZE;
3289 			break;
3290 		}
3291 		if (pcb->ipsec_ifp != NULL) {
3292 			// Only can set before connecting
3293 			result = EINVAL;
3294 			break;
3295 		}
3296 		u_int32_t ring_size = *(u_int32_t *)data;
3297 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3298 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3299 			return EINVAL;
3300 		}
3301 		pcb->ipsec_netif_ring_size = ring_size;
3302 		if (if_ipsec_debug != 0) {
3303 			printf("%s: IPSEC_OPT_NETIF_RING_SIZE %u\n", __func__, ring_size);
3304 		}
3305 		break;
3306 	}
3307 	case IPSEC_OPT_TX_FSW_RING_SIZE: {
3308 		if (len != sizeof(u_int32_t)) {
3309 			result = EMSGSIZE;
3310 			break;
3311 		}
3312 		if (pcb->ipsec_ifp != NULL) {
3313 			// Only can set before connecting
3314 			result = EINVAL;
3315 			break;
3316 		}
3317 		u_int32_t ring_size = *(u_int32_t *)data;
3318 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3319 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3320 			return EINVAL;
3321 		}
3322 		pcb->ipsec_tx_fsw_ring_size = ring_size;
3323 		if (if_ipsec_debug != 0) {
3324 			printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3325 		}
3326 		break;
3327 	}
3328 	case IPSEC_OPT_RX_FSW_RING_SIZE: {
3329 		if (len != sizeof(u_int32_t)) {
3330 			result = EMSGSIZE;
3331 			break;
3332 		}
3333 		if (pcb->ipsec_ifp != NULL) {
3334 			// Only can set before connecting
3335 			result = EINVAL;
3336 			break;
3337 		}
3338 		u_int32_t ring_size = *(u_int32_t *)data;
3339 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3340 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3341 			return EINVAL;
3342 		}
3343 		pcb->ipsec_rx_fsw_ring_size = ring_size;
3344 		if (if_ipsec_debug != 0) {
3345 			printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3346 		}
3347 		break;
3348 	}
3349 	case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3350 		if (len != sizeof(u_int32_t)) {
3351 			result = EMSGSIZE;
3352 			break;
3353 		}
3354 		if (pcb->ipsec_ifp != NULL) {
3355 			// Only can set before connecting
3356 			result = EINVAL;
3357 			break;
3358 		}
3359 		u_int32_t ring_size = *(u_int32_t *)data;
3360 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3361 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3362 			return EINVAL;
3363 		}
3364 		pcb->ipsec_kpipe_tx_ring_size = ring_size;
3365 		if (if_ipsec_debug != 0) {
3366 			printf("%s: IPSEC_OPT_KPIPE_TX_RING_SIZE %u\n", __func__, ring_size);
3367 		}
3368 		break;
3369 	}
3370 	case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3371 		if (len != sizeof(u_int32_t)) {
3372 			result = EMSGSIZE;
3373 			break;
3374 		}
3375 		if (pcb->ipsec_ifp != NULL) {
3376 			// Only can set before connecting
3377 			result = EINVAL;
3378 			break;
3379 		}
3380 		u_int32_t ring_size = *(u_int32_t *)data;
3381 		if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3382 		    ring_size > IPSEC_IF_MAX_RING_SIZE) {
3383 			return EINVAL;
3384 		}
3385 		pcb->ipsec_kpipe_rx_ring_size = ring_size;
3386 		if (if_ipsec_debug != 0) {
3387 			printf("%s: IPSEC_OPT_KPIPE_RX_RING_SIZE %u\n", __func__, ring_size);
3388 		}
3389 		break;
3390 	}
3391 	case IPSEC_OPT_OUTPUT_DSCP_MAPPING: {
3392 		if (len != sizeof(int)) {
3393 			result = EMSGSIZE;
3394 			break;
3395 		}
3396 		if (pcb->ipsec_ifp == NULL) {
3397 			// Only can set after connecting
3398 			result = EINVAL;
3399 			break;
3400 		}
3401 
3402 		ipsec_dscp_mapping_t output_dscp_mapping = (ipsec_dscp_mapping_t)(*(int *)data);
3403 		if (output_dscp_mapping > IPSEC_DSCP_MAPPING_LEGACY) {
3404 			return EINVAL;
3405 		}
3406 
3407 		pcb->ipsec_output_dscp_mapping = output_dscp_mapping;
3408 
3409 		os_log(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_DSCP_MAPPING %s DSCP %d\n",
3410 		    __func__, pcb->ipsec_ifp->if_xname,
3411 		    pcb->ipsec_output_dscp_mapping);
3412 		break;
3413 	}
3414 
3415 #endif // IPSEC_NEXUS
3416 
3417 	default: {
3418 		result = ENOPROTOOPT;
3419 		break;
3420 	}
3421 	}
3422 
3423 	return result;
3424 }
3425 
3426 static errno_t
ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * __sized_by (* len)data,size_t * len)3427 ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
3428     __unused u_int32_t unit,
3429     void *unitinfo,
3430     int opt,
3431     void *__sized_by(*len)data,
3432     size_t *len)
3433 {
3434 	errno_t result = 0;
3435 	struct ipsec_pcb *__single pcb = unitinfo;
3436 	if (pcb == NULL) {
3437 		return EINVAL;
3438 	}
3439 
3440 	switch (opt) {
3441 	case IPSEC_OPT_FLAGS: {
3442 		if (*len != sizeof(u_int32_t)) {
3443 			result = EMSGSIZE;
3444 		} else {
3445 			*(u_int32_t *)data = pcb->ipsec_external_flags;
3446 		}
3447 		break;
3448 	}
3449 
3450 	case IPSEC_OPT_EXT_IFDATA_STATS: {
3451 		if (*len != sizeof(int)) {
3452 			result = EMSGSIZE;
3453 		} else {
3454 			*(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
3455 		}
3456 		break;
3457 	}
3458 
3459 	case IPSEC_OPT_IFNAME: {
3460 		if (*len < MIN(strbuflen(pcb->ipsec_if_xname,
3461 		    sizeof(pcb->ipsec_if_xname)) + 1, sizeof(pcb->ipsec_if_xname))) {
3462 			result = EMSGSIZE;
3463 		} else {
3464 			if (pcb->ipsec_ifp == NULL) {
3465 				// Only can get after connecting
3466 				result = EINVAL;
3467 				break;
3468 			}
3469 			*len = scnprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
3470 		}
3471 		break;
3472 	}
3473 
3474 	case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3475 		if (*len != sizeof(int)) {
3476 			result = EMSGSIZE;
3477 		} else {
3478 			*(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
3479 		}
3480 		break;
3481 	}
3482 
3483 #if IPSEC_NEXUS
3484 
3485 	case IPSEC_OPT_ENABLE_CHANNEL: {
3486 		if (*len != sizeof(int)) {
3487 			result = EMSGSIZE;
3488 		} else {
3489 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3490 			*(int *)data = pcb->ipsec_kpipe_count;
3491 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3492 		}
3493 		break;
3494 	}
3495 
3496 	case IPSEC_OPT_CHANNEL_BIND_PID: {
3497 		if (*len != sizeof(pid_t)) {
3498 			result = EMSGSIZE;
3499 		} else {
3500 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3501 			*(pid_t *)data = pcb->ipsec_kpipe_pid;
3502 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3503 		}
3504 		break;
3505 	}
3506 
3507 	case IPSEC_OPT_CHANNEL_BIND_UUID: {
3508 		if (*len != sizeof(uuid_t)) {
3509 			result = EMSGSIZE;
3510 		} else {
3511 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3512 			uuid_copy(*((uuid_t *)data), pcb->ipsec_kpipe_proc_uuid);
3513 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3514 		}
3515 		break;
3516 	}
3517 
3518 	case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3519 		if (*len != sizeof(int)) {
3520 			result = EMSGSIZE;
3521 		} else {
3522 			*(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.fsw_agent);
3523 		}
3524 		break;
3525 	}
3526 
3527 	case IPSEC_OPT_ENABLE_NETIF: {
3528 		if (*len != sizeof(int)) {
3529 			result = EMSGSIZE;
3530 		} else {
3531 			lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3532 			*(int *)data = !!pcb->ipsec_use_netif;
3533 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3534 		}
3535 		break;
3536 	}
3537 
3538 	case IPSEC_OPT_GET_CHANNEL_UUID: {
3539 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3540 		if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
3541 			result = ENXIO;
3542 		} else if (*len != sizeof(uuid_t) * pcb->ipsec_kpipe_count) {
3543 			result = EMSGSIZE;
3544 		} else {
3545 			for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
3546 				uuid_copy(((uuid_t *)data)[i], pcb->ipsec_kpipe_uuid[i]);
3547 			}
3548 		}
3549 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3550 		break;
3551 	}
3552 
3553 	case IPSEC_OPT_INPUT_FRAG_SIZE: {
3554 		if (*len != sizeof(u_int32_t)) {
3555 			result = EMSGSIZE;
3556 		} else {
3557 			*(u_int32_t *)data = pcb->ipsec_input_frag_size;
3558 		}
3559 		break;
3560 	}
3561 	case IPSEC_OPT_SLOT_SIZE: {
3562 		if (*len != sizeof(u_int32_t)) {
3563 			result = EMSGSIZE;
3564 		} else {
3565 			*(u_int32_t *)data = pcb->ipsec_slot_size;
3566 		}
3567 		break;
3568 	}
3569 	case IPSEC_OPT_NETIF_RING_SIZE: {
3570 		if (*len != sizeof(u_int32_t)) {
3571 			result = EMSGSIZE;
3572 		} else {
3573 			*(u_int32_t *)data = pcb->ipsec_netif_ring_size;
3574 		}
3575 		break;
3576 	}
3577 	case IPSEC_OPT_TX_FSW_RING_SIZE: {
3578 		if (*len != sizeof(u_int32_t)) {
3579 			result = EMSGSIZE;
3580 		} else {
3581 			*(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
3582 		}
3583 		break;
3584 	}
3585 	case IPSEC_OPT_RX_FSW_RING_SIZE: {
3586 		if (*len != sizeof(u_int32_t)) {
3587 			result = EMSGSIZE;
3588 		} else {
3589 			*(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
3590 		}
3591 		break;
3592 	}
3593 	case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3594 		if (*len != sizeof(u_int32_t)) {
3595 			result = EMSGSIZE;
3596 		} else {
3597 			*(u_int32_t *)data = pcb->ipsec_kpipe_tx_ring_size;
3598 		}
3599 		break;
3600 	}
3601 	case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3602 		if (*len != sizeof(u_int32_t)) {
3603 			result = EMSGSIZE;
3604 		} else {
3605 			*(u_int32_t *)data = pcb->ipsec_kpipe_rx_ring_size;
3606 		}
3607 		break;
3608 	}
3609 
3610 #endif // IPSEC_NEXUS
3611 
3612 	default: {
3613 		result = ENOPROTOOPT;
3614 		break;
3615 	}
3616 	}
3617 
3618 	return result;
3619 }
3620 
3621 /* Network Interface functions */
3622 static errno_t
ipsec_output(ifnet_t interface,mbuf_t data)3623 ipsec_output(ifnet_t interface,
3624     mbuf_t data)
3625 {
3626 	struct ipsec_pcb *__single pcb = ifnet_softc(interface);
3627 	struct ipsec_output_state ipsec_state;
3628 	struct route ro;
3629 	struct route_in6 ro6;
3630 	size_t length;
3631 	struct ip *ip = NULL;
3632 	struct ip6_hdr *ip6 = NULL;
3633 	struct ip_out_args ipoa;
3634 	struct ip6_out_args ip6oa;
3635 	int error = 0;
3636 	u_int ip_version = 0;
3637 	int flags = 0;
3638 	struct flowadv *adv = NULL;
3639 
3640 	// Make sure this packet isn't looping through the interface
3641 	if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
3642 		error = EINVAL;
3643 		goto ipsec_output_err;
3644 	}
3645 
3646 	// Mark the interface so NECP can evaluate tunnel policy
3647 	necp_mark_packet_from_interface(data, interface);
3648 
3649 	if (data->m_len < sizeof(*ip)) {
3650 		os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IP header length: %d.\n", data->m_len);
3651 		IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3652 		error = EINVAL;
3653 		goto ipsec_output_err;
3654 	}
3655 
3656 	ip = mtod(data, struct ip *);
3657 	ip_version = ip->ip_v;
3658 
3659 	switch (ip_version) {
3660 	case 4: {
3661 		u_int8_t ip_hlen = 0;
3662 #ifdef _IP_VHL
3663 		ip_hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
3664 #else
3665 		ip_hlen = (uint8_t)(ip->ip_hl << 2);
3666 #endif
3667 		if (ip_hlen < sizeof(*ip)) {
3668 			os_log_error(OS_LOG_DEFAULT, "ipsec_output: Bad ip header length %d.\n", ip_hlen);
3669 			IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3670 			error = EINVAL;
3671 			goto ipsec_output_err;
3672 		}
3673 #if IPSEC_NEXUS
3674 		if (!pcb->ipsec_use_netif)
3675 #endif // IPSEC_NEXUS
3676 		{
3677 			int af = AF_INET;
3678 			bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3679 		}
3680 
3681 		/* Apply encryption */
3682 		memset(&ipsec_state, 0, sizeof(ipsec_state));
3683 		ipsec_state.m = data;
3684 		ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
3685 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3686 		ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
3687 
3688 		error = ipsec4_interface_output(&ipsec_state, interface);
3689 		/* Tunneled in IPv6 - packet is gone */
3690 		if (error == 0 && ipsec_state.tunneled == 6) {
3691 			goto done;
3692 		}
3693 
3694 		data = ipsec_state.m;
3695 		if (error || data == NULL) {
3696 			if (error) {
3697 				os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec4_output error %d.\n", error);
3698 			}
3699 			goto ipsec_output_err;
3700 		}
3701 
3702 		/* Set traffic class, set flow */
3703 		m_set_service_class(data, pcb->ipsec_output_service_class);
3704 		data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3705 #if SKYWALK
3706 		data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
3707 #else /* !SKYWALK */
3708 		data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3709 #endif /* !SKYWALK */
3710 		data->m_pkthdr.pkt_proto = ip->ip_p;
3711 		data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3712 
3713 		/* Flip endian-ness for ip_output */
3714 		ip = mtod(data, struct ip *);
3715 		NTOHS(ip->ip_len);
3716 		NTOHS(ip->ip_off);
3717 
3718 		/* Increment statistics */
3719 		length = mbuf_pkthdr_len(data);
3720 		ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
3721 
3722 		/* Send to ip_output */
3723 		memset(&ro, 0, sizeof(ro));
3724 
3725 		flags = (IP_OUTARGS |   /* Passing out args to specify interface */
3726 		    IP_NOIPSEC);                        /* To ensure the packet doesn't go through ipsec twice */
3727 
3728 		memset(&ipoa, 0, sizeof(ipoa));
3729 		ipoa.ipoa_flowadv.code = 0;
3730 		ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
3731 		if (ipsec_state.outgoing_if) {
3732 			ipoa.ipoa_boundif = ipsec_state.outgoing_if;
3733 			ipoa.ipoa_flags |= IPOAF_BOUND_IF;
3734 		}
3735 		ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
3736 
3737 		adv = &ipoa.ipoa_flowadv;
3738 
3739 		(void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
3740 		data = NULL;
3741 
3742 		if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3743 			error = ENOBUFS;
3744 			ifnet_disable_output(interface);
3745 		}
3746 
3747 		goto done;
3748 	}
3749 	case 6: {
3750 		if (data->m_len < sizeof(*ip6)) {
3751 			os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IPv6 header length: %d.\n", data->m_len);
3752 			IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
3753 			error = EINVAL;
3754 			goto ipsec_output_err;
3755 		}
3756 #if IPSEC_NEXUS
3757 		if (!pcb->ipsec_use_netif)
3758 #endif // IPSEC_NEXUS
3759 		{
3760 			int af = AF_INET6;
3761 			bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3762 		}
3763 
3764 		data = ipsec6_splithdr(data);
3765 		if (data == NULL) {
3766 			os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_splithdr returned NULL\n");
3767 			goto ipsec_output_err;
3768 		}
3769 
3770 		ip6 = mtod(data, struct ip6_hdr *);
3771 
3772 		memset(&ipsec_state, 0, sizeof(ipsec_state));
3773 		ipsec_state.m = data;
3774 		ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
3775 		memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3776 		ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
3777 
3778 		error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
3779 		if (error == 0 && ipsec_state.tunneled == 4) {          /* tunneled in IPv4 - packet is gone */
3780 			goto done;
3781 		}
3782 		data = ipsec_state.m;
3783 		if (error || data == NULL) {
3784 			if (error) {
3785 				os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_output error %d\n", error);
3786 			}
3787 			goto ipsec_output_err;
3788 		}
3789 
3790 		/* Set traffic class, set flow */
3791 		m_set_service_class(data, pcb->ipsec_output_service_class);
3792 		data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3793 #if SKYWALK
3794 		data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
3795 #else /* !SKYWALK */
3796 		data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3797 #endif /* !SKYWALK */
3798 		data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3799 		data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3800 
3801 		/* Increment statistics */
3802 		length = mbuf_pkthdr_len(data);
3803 		ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
3804 
3805 		/* Send to ip6_output */
3806 		memset(&ro6, 0, sizeof(ro6));
3807 
3808 		flags = IPV6_OUTARGS;
3809 
3810 		memset(&ip6oa, 0, sizeof(ip6oa));
3811 		ip6oa.ip6oa_flowadv.code = 0;
3812 		ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
3813 		if (ipsec_state.outgoing_if) {
3814 			ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
3815 			ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
3816 			ip6_output_setsrcifscope(data, ipsec_state.outgoing_if, NULL);
3817 			ip6_output_setdstifscope(data, ipsec_state.outgoing_if, NULL);
3818 		} else {
3819 			ip6_output_setsrcifscope(data, IFSCOPE_UNKNOWN, NULL);
3820 			ip6_output_setdstifscope(data, IFSCOPE_UNKNOWN, NULL);
3821 		}
3822 		ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
3823 
3824 		adv = &ip6oa.ip6oa_flowadv;
3825 
3826 		(void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
3827 		data = NULL;
3828 
3829 		if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3830 			error = ENOBUFS;
3831 			ifnet_disable_output(interface);
3832 		}
3833 
3834 		goto done;
3835 	}
3836 	default: {
3837 		os_log_error(OS_LOG_DEFAULT, "ipsec_output: Received unknown packet version %d.\n", ip_version);
3838 		error = EINVAL;
3839 		goto ipsec_output_err;
3840 	}
3841 	}
3842 
3843 done:
3844 	return error;
3845 
3846 ipsec_output_err:
3847 	if (data) {
3848 		mbuf_freem(data);
3849 	}
3850 	goto done;
3851 }
3852 
3853 static void
ipsec_start(ifnet_t interface)3854 ipsec_start(ifnet_t     interface)
3855 {
3856 	mbuf_t __single data;
3857 	struct ipsec_pcb *__single pcb = ifnet_softc(interface);
3858 
3859 	VERIFY(pcb != NULL);
3860 	for (;;) {
3861 		if (ifnet_dequeue(interface, &data) != 0) {
3862 			break;
3863 		}
3864 		if (ipsec_output(interface, data) != 0) {
3865 			break;
3866 		}
3867 	}
3868 }
3869 
3870 /* Network Interface functions */
3871 static errno_t
ipsec_demux(__unused ifnet_t interface,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)3872 ipsec_demux(__unused ifnet_t    interface,
3873     mbuf_t                          data,
3874     __unused char           *frame_header,
3875     protocol_family_t       *protocol)
3876 {
3877 	struct ip *ip;
3878 	u_int ip_version;
3879 
3880 	while (data != NULL && mbuf_len(data) < 1) {
3881 		data = mbuf_next(data);
3882 	}
3883 
3884 	if (data == NULL) {
3885 		return ENOENT;
3886 	}
3887 
3888 	ip = mtod(data, struct ip *);
3889 	ip_version = ip->ip_v;
3890 
3891 	switch (ip_version) {
3892 	case 4:
3893 		*protocol = PF_INET;
3894 		return 0;
3895 	case 6:
3896 		*protocol = PF_INET6;
3897 		return 0;
3898 	default:
3899 		*protocol = PF_UNSPEC;
3900 		break;
3901 	}
3902 
3903 	return 0;
3904 }
3905 
3906 static errno_t
ipsec_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused u_int32_t demux_count)3907 ipsec_add_proto(__unused ifnet_t                                                interface,
3908     protocol_family_t                                               protocol,
3909     __unused const struct ifnet_demux_desc  *demux_array,
3910     __unused u_int32_t                                              demux_count)
3911 {
3912 	switch (protocol) {
3913 	case PF_INET:
3914 		return 0;
3915 	case PF_INET6:
3916 		return 0;
3917 	default:
3918 		break;
3919 	}
3920 
3921 	return ENOPROTOOPT;
3922 }
3923 
3924 static errno_t
ipsec_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)3925 ipsec_del_proto(__unused ifnet_t                        interface,
3926     __unused protocol_family_t      protocol)
3927 {
3928 	return 0;
3929 }
3930 
3931 static errno_t
ipsec_ioctl(ifnet_t interface,u_long command,void * data)3932 ipsec_ioctl(ifnet_t interface,
3933     u_long command,
3934     void *data)
3935 {
3936 #if IPSEC_NEXUS
3937 	struct ipsec_pcb *__single pcb = ifnet_softc(interface);
3938 #endif
3939 	errno_t result = 0;
3940 
3941 	switch (command) {
3942 	case SIOCSIFMTU: {
3943 #if IPSEC_NEXUS
3944 		if (pcb->ipsec_use_netif) {
3945 			// Make sure we can fit packets in the channel buffers
3946 			if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
3947 				result = EINVAL;
3948 			} else {
3949 				ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
3950 			}
3951 		} else
3952 #endif // IPSEC_NEXUS
3953 		{
3954 			ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
3955 		}
3956 		break;
3957 	}
3958 
3959 	case SIOCSIFFLAGS:
3960 		/* ifioctl() takes care of it */
3961 		break;
3962 
3963 	case SIOCSIFSUBFAMILY: {
3964 		uint32_t subfamily;
3965 
3966 		subfamily = ((struct ifreq*)data)->ifr_type.ift_subfamily;
3967 		switch (subfamily) {
3968 		case IFRTYPE_SUBFAMILY_BLUETOOTH:
3969 			interface->if_subfamily = IFNET_SUBFAMILY_BLUETOOTH;
3970 			break;
3971 		case IFRTYPE_SUBFAMILY_WIFI:
3972 			interface->if_subfamily = IFNET_SUBFAMILY_WIFI;
3973 			break;
3974 		case IFRTYPE_SUBFAMILY_QUICKRELAY:
3975 			interface->if_subfamily = IFNET_SUBFAMILY_QUICKRELAY;
3976 			break;
3977 		case IFRTYPE_SUBFAMILY_DEFAULT:
3978 			interface->if_subfamily = IFNET_SUBFAMILY_DEFAULT;
3979 			break;
3980 		default:
3981 			result = EINVAL;
3982 			break;
3983 		}
3984 		break;
3985 	}
3986 
3987 	case SIOCSIFPEEREGRESSFUNCTIONALTYPE: {
3988 		uint32_t peeregressinterfacetype;
3989 		peeregressinterfacetype = ((struct ifreq*)data)->ifr_ifru.ifru_peer_egress_functional_type;
3990 		switch (peeregressinterfacetype) {
3991 		case IFRTYPE_FUNCTIONAL_WIFI_INFRA:
3992 		case IFRTYPE_FUNCTIONAL_CELLULAR:
3993 		case IFRTYPE_FUNCTIONAL_WIRED:
3994 		case IFRTYPE_FUNCTIONAL_UNKNOWN:
3995 			interface->peer_egress_functional_type = peeregressinterfacetype;
3996 			break;
3997 		default:
3998 			result = EINVAL;
3999 			break;
4000 		}
4001 		break;
4002 	}
4003 
4004 	default:
4005 		result = EOPNOTSUPP;
4006 	}
4007 
4008 	return result;
4009 }
4010 
4011 static void
ipsec_detached(ifnet_t interface)4012 ipsec_detached(ifnet_t interface)
4013 {
4014 	struct ipsec_pcb *__single pcb = ifnet_softc(interface);
4015 
4016 	(void)ifnet_release(interface);
4017 	lck_mtx_lock(&ipsec_lock);
4018 	ipsec_free_pcb(pcb, true);
4019 	(void)ifnet_dispose(interface);
4020 	lck_mtx_unlock(&ipsec_lock);
4021 }
4022 
4023 /* Protocol Handlers */
4024 
4025 static errno_t
ipsec_proto_input(ifnet_t interface,protocol_family_t protocol,mbuf_t m,__unused char * frame_header)4026 ipsec_proto_input(ifnet_t interface,
4027     protocol_family_t     protocol,
4028     mbuf_t m,
4029     __unused char *frame_header)
4030 {
4031 	mbuf_pkthdr_setrcvif(m, interface);
4032 
4033 #if IPSEC_NEXUS
4034 	struct ipsec_pcb *__single pcb = ifnet_softc(interface);
4035 	if (!pcb->ipsec_use_netif)
4036 #endif // IPSEC_NEXUS
4037 	{
4038 		uint32_t af = 0;
4039 		struct ip *ip = mtod(m, struct ip *);
4040 		if (ip->ip_v == 4) {
4041 			af = AF_INET;
4042 		} else if (ip->ip_v == 6) {
4043 			af = AF_INET6;
4044 		}
4045 		bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
4046 		pktap_input(interface, protocol, m, NULL);
4047 	}
4048 
4049 	int32_t pktlen = m->m_pkthdr.len;
4050 	if (proto_input(protocol, m) != 0) {
4051 		ifnet_stat_increment_in(interface, 0, 0, 1);
4052 		m_freem(m);
4053 	} else {
4054 		ifnet_stat_increment_in(interface, 1, pktlen, 0);
4055 	}
4056 
4057 	return 0;
4058 }
4059 
4060 static errno_t
ipsec_proto_pre_output(__unused ifnet_t interface,protocol_family_t protocol,__unused mbuf_t * packet,__unused const struct sockaddr * dest,__unused void * route,__unused char * frame_type,__unused char * link_layer_dest)4061 ipsec_proto_pre_output(__unused ifnet_t interface,
4062     protocol_family_t    protocol,
4063     __unused mbuf_t              *packet,
4064     __unused const struct sockaddr *dest,
4065     __unused void *route,
4066     __unused char *frame_type,
4067     __unused char *link_layer_dest)
4068 {
4069 	*(protocol_family_t *)(void *)frame_type = protocol;
4070 	return 0;
4071 }
4072 
4073 static errno_t
ipsec_attach_proto(ifnet_t interface,protocol_family_t protocol)4074 ipsec_attach_proto(ifnet_t                              interface,
4075     protocol_family_t    protocol)
4076 {
4077 	struct ifnet_attach_proto_param proto;
4078 	errno_t                                                 result;
4079 
4080 	bzero(&proto, sizeof(proto));
4081 	proto.input = ipsec_proto_input;
4082 	proto.pre_output = ipsec_proto_pre_output;
4083 
4084 	result = ifnet_attach_protocol(interface, protocol, &proto);
4085 	if (result != 0 && result != EEXIST) {
4086 		os_log_error(OS_LOG_DEFAULT, "ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
4087 		    protocol, result);
4088 	}
4089 
4090 	return result;
4091 }
4092 
4093 errno_t
ipsec_inject_inbound_packet(ifnet_t interface,mbuf_t packet)4094 ipsec_inject_inbound_packet(ifnet_t     interface,
4095     mbuf_t      packet)
4096 {
4097 #if IPSEC_NEXUS
4098 	struct ipsec_pcb *__single pcb = ifnet_softc(interface);
4099 
4100 	if (pcb->ipsec_use_netif) {
4101 		if (!ipsec_data_move_begin(pcb)) {
4102 			os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__,
4103 			    if_name(pcb->ipsec_ifp));
4104 			return ENXIO;
4105 		}
4106 
4107 		lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4108 
4109 		lck_mtx_lock(&pcb->ipsec_input_chain_lock);
4110 
4111 		if (pcb->ipsec_input_chain_count > (u_int32_t)if_ipsec_max_pending_input) {
4112 			lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4113 			lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4114 			ipsec_data_move_end(pcb);
4115 			return ENOSPC;
4116 		}
4117 
4118 		if (pcb->ipsec_input_chain != NULL) {
4119 			pcb->ipsec_input_chain_last->m_nextpkt = packet;
4120 		} else {
4121 			pcb->ipsec_input_chain = packet;
4122 		}
4123 		pcb->ipsec_input_chain_count++;
4124 		while (packet->m_nextpkt) {
4125 			VERIFY(packet != packet->m_nextpkt);
4126 			packet = packet->m_nextpkt;
4127 			pcb->ipsec_input_chain_count++;
4128 		}
4129 		pcb->ipsec_input_chain_last = packet;
4130 		lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4131 
4132 		kern_channel_ring_t __single rx_ring = pcb->ipsec_netif_rxring[0];
4133 		lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4134 
4135 		if (rx_ring != NULL) {
4136 			kern_channel_notify(rx_ring, 0);
4137 		}
4138 
4139 		ipsec_data_move_end(pcb);
4140 		return 0;
4141 	} else
4142 #endif // IPSEC_NEXUS
4143 	{
4144 		errno_t error;
4145 		protocol_family_t protocol;
4146 		if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
4147 			return error;
4148 		}
4149 
4150 		return ipsec_proto_input(interface, protocol, packet, NULL);
4151 	}
4152 }
4153 
4154 void
ipsec_set_pkthdr_for_interface(ifnet_t interface,mbuf_t packet,int family,uint32_t flowid)4155 ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family,
4156     uint32_t flowid)
4157 {
4158 #pragma unused (flowid)
4159 	if (packet != NULL && interface != NULL) {
4160 		struct ipsec_pcb *__single pcb = ifnet_softc(interface);
4161 		if (pcb != NULL) {
4162 			/* Set traffic class, set flow */
4163 			m_set_service_class(packet, pcb->ipsec_output_service_class);
4164 			packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4165 #if SKYWALK
4166 			packet->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4167 			packet->m_pkthdr.pkt_flowid = flowid;
4168 #else /* !SKYWALK */
4169 			packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
4170 #endif /* !SKYWALK */
4171 			if (family == AF_INET) {
4172 				struct ip *ip = mtod(packet, struct ip *);
4173 				packet->m_pkthdr.pkt_proto = ip->ip_p;
4174 			} else if (family == AF_INET6) {
4175 				struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
4176 				packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
4177 			}
4178 			packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4179 		}
4180 	}
4181 }
4182 
4183 void
ipsec_set_ipoa_for_interface(ifnet_t interface,struct ip_out_args * ipoa)4184 ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
4185 {
4186 	struct ipsec_pcb *__single pcb;
4187 
4188 	if (interface == NULL || ipoa == NULL) {
4189 		return;
4190 	}
4191 	pcb = ifnet_softc(interface);
4192 
4193 	if (net_qos_policy_restricted == 0) {
4194 		ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
4195 		ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4196 	} else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4197 	    net_qos_policy_restrict_avapps != 0) {
4198 		ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4199 	} else {
4200 		ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4201 		ipoa->ipoa_sotc = SO_TC_VO;
4202 	}
4203 }
4204 
4205 void
ipsec_set_ip6oa_for_interface(ifnet_t interface,struct ip6_out_args * ip6oa)4206 ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
4207 {
4208 	struct ipsec_pcb *__single pcb;
4209 
4210 	if (interface == NULL || ip6oa == NULL) {
4211 		return;
4212 	}
4213 	pcb = ifnet_softc(interface);
4214 
4215 	if (net_qos_policy_restricted == 0) {
4216 		ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
4217 		ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4218 	} else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4219 	    net_qos_policy_restrict_avapps != 0) {
4220 		ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4221 	} else {
4222 		ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4223 		ip6oa->ip6oa_sotc = SO_TC_VO;
4224 	}
4225 }
4226 
4227 static boolean_t
ipsec_data_move_begin(struct ipsec_pcb * pcb)4228 ipsec_data_move_begin(struct ipsec_pcb *pcb)
4229 {
4230 	boolean_t ret = 0;
4231 
4232 	lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4233 	if ((ret = IPSEC_IS_DATA_PATH_READY(pcb))) {
4234 		pcb->ipsec_pcb_data_move++;
4235 	}
4236 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4237 
4238 	return ret;
4239 }
4240 
4241 static void
ipsec_data_move_end(struct ipsec_pcb * pcb)4242 ipsec_data_move_end(struct ipsec_pcb *pcb)
4243 {
4244 	lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4245 	VERIFY(pcb->ipsec_pcb_data_move > 0);
4246 	/*
4247 	 * if there's no more thread moving data, wakeup any
4248 	 * drainers that's blocked waiting for this.
4249 	 */
4250 	if (--pcb->ipsec_pcb_data_move == 0 && pcb->ipsec_pcb_drainers > 0) {
4251 		wakeup(&(pcb->ipsec_pcb_data_move));
4252 	}
4253 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4254 }
4255 
4256 static void
ipsec_data_move_drain(struct ipsec_pcb * pcb)4257 ipsec_data_move_drain(struct ipsec_pcb *pcb)
4258 {
4259 	lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4260 	/* data path must already be marked as not ready */
4261 	VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4262 	pcb->ipsec_pcb_drainers++;
4263 	while (pcb->ipsec_pcb_data_move != 0) {
4264 		(void)msleep(&(pcb->ipsec_pcb_data_move), &pcb->ipsec_pcb_data_move_lock,
4265 		    (PZERO - 1), __func__, NULL);
4266 	}
4267 	VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4268 	VERIFY(pcb->ipsec_pcb_drainers > 0);
4269 	pcb->ipsec_pcb_drainers--;
4270 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4271 }
4272 
4273 static void
ipsec_wait_data_move_drain(struct ipsec_pcb * pcb)4274 ipsec_wait_data_move_drain(struct ipsec_pcb *pcb)
4275 {
4276 	/*
4277 	 * Mark the data path as not usable.
4278 	 */
4279 	lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4280 	IPSEC_CLR_DATA_PATH_READY(pcb);
4281 	lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4282 
4283 	/* Wait until all threads in the data paths are done. */
4284 	ipsec_data_move_drain(pcb);
4285 }
4286