1 /*
2 * Copyright (c) 2012-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29
30 #include <sys/systm.h>
31 #include <sys/kern_control.h>
32 #include <net/kpi_protocol.h>
33 #include <net/kpi_interface.h>
34 #include <sys/socket.h>
35 #include <sys/socketvar.h>
36 #include <net/if.h>
37 #include <net/if_types.h>
38 #include <net/bpf.h>
39 #include <net/if_ipsec.h>
40 #include <sys/mbuf.h>
41 #include <sys/sockio.h>
42 #include <netinet/in.h>
43 #include <netinet/ip6.h>
44 #include <netinet6/in6_var.h>
45 #include <netinet6/ip6_var.h>
46 #include <sys/kauth.h>
47 #include <netinet6/ipsec.h>
48 #include <netinet6/ipsec6.h>
49 #include <netinet6/esp.h>
50 #include <netinet6/esp6.h>
51 #include <netinet/ip.h>
52 #include <net/flowadv.h>
53 #include <net/necp.h>
54 #include <netkey/key.h>
55 #include <net/pktap.h>
56 #include <kern/zalloc.h>
57 #include <os/log.h>
58
59 #if SKYWALK
60 #include <skywalk/os_skywalk_private.h>
61 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
62 #include <skywalk/nexus/netif/nx_netif.h>
63 #define IPSEC_NEXUS 1
64 #else // SKYWALK
65 #define IPSEC_NEXUS 0
66 #endif // SKYWALK
67
68 extern int net_qos_policy_restricted;
69 extern int net_qos_policy_restrict_avapps;
70
71 /* Kernel Control functions */
72 static errno_t ipsec_ctl_setup(u_int32_t *unit, void **unitinfo);
73 static errno_t ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
74 void **unitinfo);
75 static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
76 void **unitinfo);
77 static errno_t ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
78 void *unitinfo);
79 static errno_t ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
80 void *unitinfo, mbuf_t m, int flags);
81 static errno_t ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
82 int opt, void *__sized_by(*len)data, size_t *len);
83 static errno_t ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
84 int opt, void *__sized_by(len)data, size_t len);
85
86 /* Network Interface functions */
87 static void ipsec_start(ifnet_t interface);
88 static errno_t ipsec_output(ifnet_t interface, mbuf_t data);
89 static errno_t ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
90 protocol_family_t *protocol);
91 static errno_t ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
92 const struct ifnet_demux_desc *demux_array,
93 u_int32_t demux_count);
94 static errno_t ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
95 static errno_t ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
96 static void ipsec_detached(ifnet_t interface);
97
98 /* Protocol handlers */
99 static errno_t ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
100 static errno_t ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
101 mbuf_t m, char *frame_header);
102 static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
103 mbuf_t *packet, const struct sockaddr *dest, void *route,
104 char *frame_type, char *link_layer_dest);
105
106 static kern_ctl_ref ipsec_kctlref;
107 static LCK_ATTR_DECLARE(ipsec_lck_attr, 0, 0);
108 static LCK_GRP_DECLARE(ipsec_lck_grp, "ipsec");
109 static LCK_MTX_DECLARE_ATTR(ipsec_lock, &ipsec_lck_grp, &ipsec_lck_attr);
110
111 #if IPSEC_NEXUS
112
113 SYSCTL_DECL(_net_ipsec);
114 SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
115 static int if_ipsec_verify_interface_creation = 0;
116 SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
117
118 #define IPSEC_IF_VERIFY(_e) if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
119
120 #define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
121 #define IPSEC_IF_DEFAULT_RING_SIZE 64
122 #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
123 #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
124 #define IPSEC_IF_DEFAULT_BUF_SEG_SIZE skmem_usr_buf_seg_size
125
126 #define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES
127 #define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
128 #define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
129 #define IPSEC_NETIF_WMM_RX_RING_COUNT 1
130 #define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT
131 #define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT
132
133 #define IPSEC_IF_MIN_RING_SIZE 8
134 #define IPSEC_IF_MAX_RING_SIZE 1024
135
136 #define IPSEC_IF_MIN_SLOT_SIZE 1024
137 #define IPSEC_IF_MAX_SLOT_SIZE (16 * 1024)
138
139 #define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512
140
141 #define IPSEC_KPIPE_FLAG_WAKE_PKT 0x01
142
143 static int if_ipsec_max_pending_input = IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT;
144
145 static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
146 static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
147 static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
148
149 static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
150 static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
151 static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
152
153 SYSCTL_INT(_net_ipsec, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_max_pending_input, 0, "");
154 SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
155 &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
156 SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
157 &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
158 SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
159 &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
160
161 static int if_ipsec_debug = 0;
162 SYSCTL_INT(_net_ipsec, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_debug, 0, "");
163
164 static errno_t
165 ipsec_register_nexus(void);
166
167 typedef struct ipsec_nx {
168 uuid_t if_provider;
169 uuid_t if_instance;
170 uuid_t fsw_provider;
171 uuid_t fsw_instance;
172 uuid_t fsw_device;
173 uuid_t fsw_agent;
174 } *ipsec_nx_t;
175
176 static nexus_controller_t ipsec_ncd;
177 static int ipsec_ncd_refcount;
178 static uuid_t ipsec_kpipe_uuid;
179
180 #endif // IPSEC_NEXUS
181
182 /* Control block allocated for each kernel control connection */
183 struct ipsec_pcb {
184 TAILQ_ENTRY(ipsec_pcb) ipsec_chain;
185 kern_ctl_ref ipsec_ctlref;
186 ifnet_t ipsec_ifp;
187 u_int32_t ipsec_unit;
188 u_int32_t ipsec_unique_id;
189 // These external flags can be set with IPSEC_OPT_FLAGS
190 u_int32_t ipsec_external_flags;
191 // These internal flags are only used within this driver
192 u_int32_t ipsec_internal_flags;
193 u_int32_t ipsec_input_frag_size;
194 bool ipsec_frag_size_set;
195 int ipsec_ext_ifdata_stats;
196 mbuf_svc_class_t ipsec_output_service_class;
197 char ipsec_if_xname[IFXNAMSIZ];
198 char ipsec_unique_name[IFXNAMSIZ];
199 // PCB lock protects state fields, like ipsec_kpipe_count
200 decl_lck_rw_data(, ipsec_pcb_lock);
201 // lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers
202 decl_lck_mtx_data(, ipsec_pcb_data_move_lock);
203 u_int32_t ipsec_pcb_data_move; /* number of data moving contexts */
204 u_int32_t ipsec_pcb_drainers; /* number of threads waiting to drain */
205 u_int32_t ipsec_pcb_data_path_state; /* internal state of interface data path */
206 ipsec_dscp_mapping_t ipsec_output_dscp_mapping;
207
208 #if IPSEC_NEXUS
209 lck_mtx_t ipsec_input_chain_lock;
210 lck_mtx_t ipsec_kpipe_encrypt_lock;
211 lck_mtx_t ipsec_kpipe_decrypt_lock;
212 struct mbuf * ipsec_input_chain;
213 struct mbuf * ipsec_input_chain_last;
214 u_int32_t ipsec_input_chain_count;
215 // Input chain lock protects the list of input mbufs
216 // The input chain lock must be taken AFTER the PCB lock if both are held
217 struct ipsec_nx ipsec_nx;
218 u_int32_t ipsec_kpipe_count;
219 pid_t ipsec_kpipe_pid;
220 uuid_t ipsec_kpipe_proc_uuid;
221 uuid_t ipsec_kpipe_uuid[IPSEC_IF_MAX_RING_COUNT];
222 void * ipsec_kpipe_rxring[IPSEC_IF_MAX_RING_COUNT];
223 void * ipsec_kpipe_txring[IPSEC_IF_MAX_RING_COUNT];
224 kern_pbufpool_t ipsec_kpipe_pp;
225 u_int32_t ipsec_kpipe_tx_ring_size;
226 u_int32_t ipsec_kpipe_rx_ring_size;
227
228 kern_nexus_t ipsec_netif_nexus;
229 kern_pbufpool_t ipsec_netif_pp;
230 void * ipsec_netif_rxring[IPSEC_NETIF_MAX_RX_RING_COUNT];
231 void * ipsec_netif_txring[IPSEC_NETIF_MAX_TX_RING_COUNT];
232 uint64_t ipsec_netif_txring_size;
233
234 u_int32_t ipsec_slot_size;
235 u_int32_t ipsec_netif_ring_size;
236 u_int32_t ipsec_tx_fsw_ring_size;
237 u_int32_t ipsec_rx_fsw_ring_size;
238 bool ipsec_use_netif;
239 bool ipsec_needs_netagent;
240 #endif // IPSEC_NEXUS
241 };
242
243 /* These are internal flags not exposed outside this file */
244 #define IPSEC_FLAGS_KPIPE_ALLOCATED 1
245
246 /* data movement refcounting functions */
247 static boolean_t ipsec_data_move_begin(struct ipsec_pcb *pcb);
248 static void ipsec_data_move_end(struct ipsec_pcb *pcb);
249 static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb);
250
251 /* Data path states */
252 #define IPSEC_PCB_DATA_PATH_READY 0x1
253
254 /* Macros to set/clear/test data path states */
255 #define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY)
256 #define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY)
257 #define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0)
258
259 #if IPSEC_NEXUS
260 /* Macros to clear/set/test flags. */
261 static inline void
ipsec_flag_set(struct ipsec_pcb * pcb,uint32_t flag)262 ipsec_flag_set(struct ipsec_pcb *pcb, uint32_t flag)
263 {
264 pcb->ipsec_internal_flags |= flag;
265 }
266 static inline void
ipsec_flag_clr(struct ipsec_pcb * pcb,uint32_t flag)267 ipsec_flag_clr(struct ipsec_pcb *pcb, uint32_t flag)
268 {
269 pcb->ipsec_internal_flags &= ~flag;
270 }
271
272 static inline bool
ipsec_flag_isset(struct ipsec_pcb * pcb,uint32_t flag)273 ipsec_flag_isset(struct ipsec_pcb *pcb, uint32_t flag)
274 {
275 return !!(pcb->ipsec_internal_flags & flag);
276 }
277 #endif // IPSEC_NEXUS
278
279 TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
280
281 static KALLOC_TYPE_DEFINE(ipsec_pcb_zone, struct ipsec_pcb, NET_KT_DEFAULT);
282
283 #define IPSECQ_MAXLEN 256
284
285 #if IPSEC_NEXUS
286 static int
287 sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
288 {
289 #pragma unused(arg1, arg2)
290 int value = if_ipsec_ring_size;
291
292 int error = sysctl_handle_int(oidp, &value, 0, req);
293 if (error || !req->newptr) {
294 return error;
295 }
296
297 if (value < IPSEC_IF_MIN_RING_SIZE ||
298 value > IPSEC_IF_MAX_RING_SIZE) {
299 return EINVAL;
300 }
301
302 if_ipsec_ring_size = value;
303
304 return 0;
305 }
306
307 static int
308 sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
309 {
310 #pragma unused(arg1, arg2)
311 int value = if_ipsec_tx_fsw_ring_size;
312
313 int error = sysctl_handle_int(oidp, &value, 0, req);
314 if (error || !req->newptr) {
315 return error;
316 }
317
318 if (value < IPSEC_IF_MIN_RING_SIZE ||
319 value > IPSEC_IF_MAX_RING_SIZE) {
320 return EINVAL;
321 }
322
323 if_ipsec_tx_fsw_ring_size = value;
324
325 return 0;
326 }
327
328 static int
329 sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
330 {
331 #pragma unused(arg1, arg2)
332 int value = if_ipsec_rx_fsw_ring_size;
333
334 int error = sysctl_handle_int(oidp, &value, 0, req);
335 if (error || !req->newptr) {
336 return error;
337 }
338
339 if (value < IPSEC_IF_MIN_RING_SIZE ||
340 value > IPSEC_IF_MAX_RING_SIZE) {
341 return EINVAL;
342 }
343
344 if_ipsec_rx_fsw_ring_size = value;
345
346 return 0;
347 }
348
349
350 static inline bool
ipsec_in_wmm_mode(struct ipsec_pcb * pcb)351 ipsec_in_wmm_mode(struct ipsec_pcb *pcb)
352 {
353 return pcb->ipsec_kpipe_count == IPSEC_IF_WMM_RING_COUNT;
354 }
355
356 #endif // IPSEC_NEXUS
357
358 errno_t
ipsec_register_control(void)359 ipsec_register_control(void)
360 {
361 struct kern_ctl_reg kern_ctl;
362 errno_t result = 0;
363
364 #if IPSEC_NEXUS
365 ipsec_register_nexus();
366 #endif // IPSEC_NEXUS
367
368 TAILQ_INIT(&ipsec_head);
369
370 bzero(&kern_ctl, sizeof(kern_ctl));
371 strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
372 kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
373 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP; /* Require root */
374 kern_ctl.ctl_sendsize = 64 * 1024;
375 kern_ctl.ctl_recvsize = 64 * 1024;
376 kern_ctl.ctl_setup = ipsec_ctl_setup;
377 kern_ctl.ctl_bind = ipsec_ctl_bind;
378 kern_ctl.ctl_connect = ipsec_ctl_connect;
379 kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
380 kern_ctl.ctl_send = ipsec_ctl_send;
381 kern_ctl.ctl_setopt = ipsec_ctl_setopt;
382 kern_ctl.ctl_getopt = ipsec_ctl_getopt;
383
384 result = ctl_register(&kern_ctl, &ipsec_kctlref);
385 if (result != 0) {
386 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - ctl_register failed: %d\n", result);
387 return result;
388 }
389
390 /* Register the protocol plumbers */
391 if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC,
392 ipsec_attach_proto, NULL)) != 0) {
393 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n",
394 result);
395 ctl_deregister(ipsec_kctlref);
396 return result;
397 }
398
399 /* Register the protocol plumbers */
400 if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC,
401 ipsec_attach_proto, NULL)) != 0) {
402 proto_unregister_plumber(PF_INET, IFNET_FAMILY_IPSEC);
403 ctl_deregister(ipsec_kctlref);
404 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n",
405 result);
406 return result;
407 }
408
409 return 0;
410 }
411
412 /* Helpers */
413 int
ipsec_interface_isvalid(ifnet_t interface)414 ipsec_interface_isvalid(ifnet_t interface)
415 {
416 struct ipsec_pcb *__single pcb = NULL;
417
418 if (interface == NULL) {
419 return 0;
420 }
421
422 pcb = ifnet_softc(interface);
423
424 if (pcb == NULL) {
425 return 0;
426 }
427
428 /* When ctl disconnects, ipsec_unit is set to 0 */
429 if (pcb->ipsec_unit == 0) {
430 return 0;
431 }
432
433 return 1;
434 }
435
436 #if IPSEC_NEXUS
437 boolean_t
ipsec_interface_needs_netagent(ifnet_t interface)438 ipsec_interface_needs_netagent(ifnet_t interface)
439 {
440 struct ipsec_pcb *__single pcb = NULL;
441
442 if (interface == NULL) {
443 return FALSE;
444 }
445
446 pcb = ifnet_softc(interface);
447
448 if (pcb == NULL) {
449 return FALSE;
450 }
451
452 return pcb->ipsec_needs_netagent == true;
453 }
454 #endif // IPSEC_NEXUS
455
456 static errno_t
ipsec_ifnet_set_attrs(ifnet_t ifp)457 ipsec_ifnet_set_attrs(ifnet_t ifp)
458 {
459 /* Set flags and additional information. */
460 ifnet_set_mtu(ifp, 1500);
461 ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
462
463 /* The interface must generate its own IPv6 LinkLocal address,
464 * if possible following the recommendation of RFC2472 to the 64bit interface ID
465 */
466 ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
467
468 #if !IPSEC_NEXUS
469 /* Reset the stats in case as the interface may have been recycled */
470 struct ifnet_stats_param stats;
471 bzero(&stats, sizeof(struct ifnet_stats_param));
472 ifnet_set_stat(ifp, &stats);
473 #endif // !IPSEC_NEXUS
474
475 return 0;
476 }
477
478 #if IPSEC_NEXUS
479
480 static uuid_t ipsec_nx_dom_prov;
481
482 static errno_t
ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)483 ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
484 {
485 return 0;
486 }
487
488 static void
ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)489 ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
490 {
491 // Ignore
492 }
493
494 static errno_t
ipsec_register_nexus(void)495 ipsec_register_nexus(void)
496 {
497 const struct kern_nexus_domain_provider_init dp_init = {
498 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
499 .nxdpi_flags = 0,
500 .nxdpi_init = ipsec_nxdp_init,
501 .nxdpi_fini = ipsec_nxdp_fini
502 };
503 nexus_domain_provider_name_t domain_provider_name = "com.apple.ipsec";
504 errno_t err = 0;
505
506 /* ipsec_nxdp_init() is called before this function returns */
507 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
508 domain_provider_name,
509 &dp_init, sizeof(dp_init),
510 &ipsec_nx_dom_prov);
511 if (err != 0) {
512 os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
513 return err;
514 }
515 return 0;
516 }
517
518 static errno_t
ipsec_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)519 ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
520 {
521 struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
522 pcb->ipsec_netif_nexus = nexus;
523 return ipsec_ifnet_set_attrs(ifp);
524 }
525
526 static errno_t
ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,proc_t p,kern_nexus_t nexus,nexus_port_t nexus_port,kern_channel_t channel,void ** ch_ctx)527 ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
528 proc_t p, kern_nexus_t nexus,
529 nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
530 {
531 #pragma unused(nxprov, p)
532 #pragma unused(nexus, nexus_port, channel, ch_ctx)
533 return 0;
534 }
535
536 static errno_t
ipsec_nexus_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)537 ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
538 kern_channel_t channel)
539 {
540 #pragma unused(nxprov, channel)
541 struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
542 boolean_t ok = ifnet_get_ioref(pcb->ipsec_ifp);
543 /* Mark the data path as ready */
544 if (ok) {
545 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
546 IPSEC_SET_DATA_PATH_READY(pcb);
547 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
548 }
549 return ok ? 0 : ENXIO;
550 }
551
552 static void
ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)553 ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
554 kern_channel_t channel)
555 {
556 #pragma unused(nxprov, channel)
557 struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
558
559 VERIFY(pcb->ipsec_kpipe_count != 0);
560
561 /* Wait until all threads in the data paths are done. */
562 ipsec_wait_data_move_drain(pcb);
563 }
564
565 static void
ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)566 ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
567 kern_channel_t channel)
568 {
569 #pragma unused(nxprov, channel)
570 struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
571
572 /* Wait until all threads in the data paths are done. */
573 ipsec_wait_data_move_drain(pcb);
574 }
575
576 static void
ipsec_nexus_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)577 ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
578 kern_channel_t channel)
579 {
580 #pragma unused(nxprov, channel)
581 struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
582 if (pcb->ipsec_netif_nexus == nexus) {
583 pcb->ipsec_netif_nexus = NULL;
584 }
585 ifnet_decr_iorefcnt(pcb->ipsec_ifp);
586 }
587
588 static errno_t
ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)589 ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
590 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
591 void **ring_ctx)
592 {
593 #pragma unused(nxprov)
594 #pragma unused(channel)
595 struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
596 uint8_t ring_idx;
597
598 for (ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
599 if (!uuid_compare(channel->ch_info->cinfo_nx_uuid, pcb->ipsec_kpipe_uuid[ring_idx])) {
600 break;
601 }
602 }
603
604 if (ring_idx == pcb->ipsec_kpipe_count) {
605 uuid_string_t uuidstr;
606 uuid_unparse(channel->ch_info->cinfo_nx_uuid, uuidstr);
607 os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n", __func__, pcb->ipsec_if_xname, uuidstr);
608 return ENOENT;
609 }
610
611 *ring_ctx = __unsafe_forge_single(void *, (uintptr_t)ring_idx);
612
613 if (!is_tx_ring) {
614 VERIFY(pcb->ipsec_kpipe_rxring[ring_idx] == NULL);
615 pcb->ipsec_kpipe_rxring[ring_idx] = ring;
616 } else {
617 VERIFY(pcb->ipsec_kpipe_txring[ring_idx] == NULL);
618 pcb->ipsec_kpipe_txring[ring_idx] = ring;
619 }
620 return 0;
621 }
622
623 static void
ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)624 ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
625 kern_channel_ring_t ring)
626 {
627 #pragma unused(nxprov)
628 bool found = false;
629 struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
630
631 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
632 if (pcb->ipsec_kpipe_rxring[i] == ring) {
633 pcb->ipsec_kpipe_rxring[i] = NULL;
634 found = true;
635 } else if (pcb->ipsec_kpipe_txring[i] == ring) {
636 pcb->ipsec_kpipe_txring[i] = NULL;
637 found = true;
638 }
639 }
640 VERIFY(found);
641 }
642
643 static errno_t
ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)644 ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
645 kern_channel_ring_t tx_ring, uint32_t flags)
646 {
647 #pragma unused(nxprov)
648 #pragma unused(flags)
649 struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
650
651 if (!ipsec_data_move_begin(pcb)) {
652 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
653 return 0;
654 }
655
656 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
657
658 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
659 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
660 ipsec_data_move_end(pcb);
661 return 0;
662 }
663
664 VERIFY(pcb->ipsec_kpipe_count);
665
666 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
667 if (tx_slot == NULL) {
668 // Nothing to write, bail
669 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
670 ipsec_data_move_end(pcb);
671 return 0;
672 }
673
674 // Signal the netif ring to read
675 kern_channel_ring_t __single rx_ring = pcb->ipsec_netif_rxring[0];
676 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
677
678 if (rx_ring != NULL) {
679 kern_channel_notify(rx_ring, 0);
680 }
681
682 ipsec_data_move_end(pcb);
683 return 0;
684 }
685
686 static errno_t
ipsec_encrypt_kpipe_pkt(ifnet_t interface,kern_packet_t sph,kern_packet_t dph)687 ipsec_encrypt_kpipe_pkt(ifnet_t interface, kern_packet_t sph,
688 kern_packet_t dph)
689 {
690 uint8_t *sbaddr = NULL;
691 int err = 0;
692 uint32_t slen = 0;
693
694 VERIFY(interface != NULL);
695 VERIFY(sph != 0);
696 VERIFY(dph != 0);
697
698 kern_buflet_t __single sbuf = __packet_get_next_buflet(sph, NULL);
699 VERIFY(sbuf != NULL);
700 slen = __buflet_get_data_length(sbuf);
701
702 if (__improbable(slen < sizeof(struct ip))) {
703 os_log_error(OS_LOG_DEFAULT, "ipsec encrypt kpipe pkt: source "
704 "buffer shorter than ip header, %u\n", slen);
705 return EINVAL;
706 }
707
708 sbaddr = ipsec_kern_buflet_to_buffer(sbuf);
709 struct ip *ip = (struct ip *)(void *)sbaddr;
710 ASSERT(IP_HDR_ALIGNED_P(ip));
711
712 u_int ip_vers = ip->ip_v;
713 switch (ip_vers) {
714 case IPVERSION: {
715 err = ipsec4_interface_kpipe_output(interface, sph, dph);
716 if (__improbable(err != 0)) {
717 os_log_error(OS_LOG_DEFAULT, "ipsec4 interface kpipe "
718 "output error %d\n", err);
719 return err;
720 }
721 break;
722 }
723 case 6: {
724 err = ipsec6_interface_kpipe_output(interface, sph, dph);
725 if (__improbable(err != 0)) {
726 os_log_error(OS_LOG_DEFAULT, "ipsec6 interface kpipe "
727 "output error %d\n", err);
728 return err;
729 }
730 break;
731 }
732 default: {
733 os_log_error(OS_LOG_DEFAULT, "received unknown packet version: %d\n",
734 ip_vers);
735 return EINVAL;
736 }
737 }
738
739 return err;
740 }
741
742 static errno_t
ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)743 ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
744 kern_channel_ring_t rx_ring, uint32_t flags)
745 {
746 #pragma unused(nxprov)
747 #pragma unused(flags)
748 struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
749 struct kern_channel_ring_stat_increment rx_ring_stats;
750 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring);
751
752 if (!ipsec_data_move_begin(pcb)) {
753 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
754 return 0;
755 }
756
757 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
758
759 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
760 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
761 ipsec_data_move_end(pcb);
762 return 0;
763 }
764
765 VERIFY(pcb->ipsec_kpipe_count);
766 VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
767
768 // Reclaim user-released slots
769 (void) kern_channel_reclaim(rx_ring);
770
771 uint32_t avail = kern_channel_available_slot_count(rx_ring);
772 if (avail == 0) {
773 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
774 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
775 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
776 ipsec_data_move_end(pcb);
777 return 0;
778 }
779
780 kern_channel_ring_t __single tx_ring = pcb->ipsec_netif_txring[ring_idx];
781 if (tx_ring == NULL) {
782 // Net-If TX ring not set up yet, nothing to read
783 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
784 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
785 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
786 ipsec_data_move_end(pcb);
787 return 0;
788 }
789
790 struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
791
792 // Unlock ipsec before entering ring
793 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
794
795 (void)kr_enter(tx_ring, TRUE);
796
797 // Lock again after entering and validate
798 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
799 if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
800 // Ring no longer valid
801 // Unlock first, then exit ring
802 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
803 kr_exit(tx_ring);
804 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
805 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
806 ipsec_data_move_end(pcb);
807 return 0;
808 }
809
810 struct kern_channel_ring_stat_increment tx_ring_stats;
811 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
812 kern_channel_slot_t tx_pslot = NULL;
813 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
814 if (tx_slot == NULL) {
815 // Nothing to read, don't bother signalling
816 // Unlock first, then exit ring
817 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
818 kr_exit(tx_ring);
819 ipsec_data_move_end(pcb);
820 return 0;
821 }
822
823 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
824 VERIFY(rx_pp != NULL);
825 struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
826 VERIFY(tx_pp != NULL);
827 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
828 kern_channel_slot_t rx_pslot = NULL;
829 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
830 kern_packet_t tx_chain_ph = 0;
831
832 while (rx_slot != NULL && tx_slot != NULL) {
833 size_t tx_pkt_length = 0;
834 errno_t error = 0;
835
836 // Allocate rx packet
837 kern_packet_t rx_ph = 0;
838 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
839 if (__improbable(error != 0)) {
840 os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
841 "failed to allocate packet\n", pcb->ipsec_ifp->if_xname);
842 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
843 STATS_INC(nifs, NETIF_STATS_DROP);
844 break;
845 }
846
847 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
848 if (__improbable(tx_ph == 0)) {
849 // Advance TX ring
850 tx_pslot = tx_slot;
851 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
852 kern_pbufpool_free(rx_pp, rx_ph);
853 continue;
854 }
855
856 (void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
857 if (tx_chain_ph != 0) {
858 kern_packet_append(tx_ph, tx_chain_ph);
859 }
860 tx_chain_ph = tx_ph;
861
862 // Advance TX ring
863 tx_pslot = tx_slot;
864 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
865
866 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
867
868 tx_pkt_length = kern_packet_get_data_length(tx_ph);
869 if (tx_pkt_length == 0 || tx_pkt_length > pcb->ipsec_slot_size) {
870 os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
871 "packet length %zu", pcb->ipsec_ifp->if_xname,
872 tx_pkt_length);
873 kern_pbufpool_free(rx_pp, rx_ph);
874 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
875 STATS_INC(nifs, NETIF_STATS_DROP);
876 continue;
877 }
878
879 // Increment TX stats
880 tx_ring_stats.kcrsi_slots_transferred++;
881 tx_ring_stats.kcrsi_bytes_transferred += tx_pkt_length;
882
883 // Encrypt packet
884 lck_mtx_lock(&pcb->ipsec_kpipe_encrypt_lock);
885 error = ipsec_encrypt_kpipe_pkt(pcb->ipsec_ifp, tx_ph, rx_ph);
886 lck_mtx_unlock(&pcb->ipsec_kpipe_encrypt_lock);
887 if (__improbable(error != 0)) {
888 os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
889 "failed to encrypt packet", pcb->ipsec_ifp->if_xname);
890 kern_pbufpool_free(rx_pp, rx_ph);
891 STATS_INC(nifs, NETIF_STATS_DROP);
892 continue;
893 }
894
895 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
896 // Finalize and attach the packet
897 kern_buflet_t __single rx_buf = __packet_get_next_buflet(rx_ph, NULL);
898 error = kern_buflet_set_data_offset(rx_buf, 0);
899 VERIFY(error == 0);
900 error = kern_packet_finalize(rx_ph);
901 VERIFY(error == 0);
902 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
903 VERIFY(error == 0);
904
905 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
906 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
907
908 rx_ring_stats.kcrsi_slots_transferred++;
909 rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph);
910
911 if (!pcb->ipsec_ext_ifdata_stats) {
912 ifnet_stat_increment_out(pcb->ipsec_ifp, 1,
913 kern_packet_get_data_length(rx_ph), 0);
914 }
915
916 rx_pslot = rx_slot;
917 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
918 }
919
920 if (rx_pslot) {
921 kern_channel_advance_slot(rx_ring, rx_pslot);
922 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
923 }
924
925 if (tx_chain_ph != 0) {
926 kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
927 }
928
929 if (tx_pslot) {
930 kern_channel_advance_slot(tx_ring, tx_pslot);
931 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
932 (void)kern_channel_reclaim(tx_ring);
933 }
934
935 /* always reenable output */
936 errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
937 if (error != 0) {
938 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
939 }
940
941 // Unlock first, then exit ring
942 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
943
944 if (tx_pslot != NULL) {
945 kern_channel_notify(tx_ring, 0);
946 }
947 kr_exit(tx_ring);
948
949 ipsec_data_move_end(pcb);
950 return 0;
951 }
952
953 static uint8_t
ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)954 ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
955 {
956 switch (svc_class) {
957 case KPKT_SC_VO: {
958 return 0;
959 }
960 case KPKT_SC_VI: {
961 return 1;
962 }
963 case KPKT_SC_BE: {
964 return 2;
965 }
966 case KPKT_SC_BK: {
967 return 3;
968 }
969 default: {
970 VERIFY(0);
971 return 0;
972 }
973 }
974 }
975
976 static errno_t
ipsec_netif_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)977 ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
978 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
979 void **ring_ctx)
980 {
981 #pragma unused(nxprov)
982 #pragma unused(channel)
983 struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
984
985 if (!is_tx_ring) {
986 VERIFY(pcb->ipsec_netif_rxring[0] == NULL);
987 pcb->ipsec_netif_rxring[0] = ring;
988 } else {
989 uint8_t ring_idx = 0;
990 if (ipsec_in_wmm_mode(pcb)) {
991 int err;
992 kern_packet_svc_class_t svc_class;
993 err = kern_channel_get_service_class(ring, &svc_class);
994 VERIFY(err == 0);
995 ring_idx = ipsec_find_tx_ring_by_svc(svc_class);
996 VERIFY(ring_idx < IPSEC_IF_WMM_RING_COUNT);
997 }
998
999 *ring_ctx = __unsafe_forge_single(void *, (uintptr_t)ring_idx);
1000
1001 VERIFY(pcb->ipsec_netif_txring[ring_idx] == NULL);
1002 pcb->ipsec_netif_txring[ring_idx] = ring;
1003 }
1004 return 0;
1005 }
1006
1007 static void
ipsec_netif_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)1008 ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1009 kern_channel_ring_t ring)
1010 {
1011 #pragma unused(nxprov)
1012 struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
1013 bool found = false;
1014
1015 for (int i = 0; i < IPSEC_NETIF_MAX_RX_RING_COUNT; i++) {
1016 if (pcb->ipsec_netif_rxring[i] == ring) {
1017 pcb->ipsec_netif_rxring[i] = NULL;
1018 VERIFY(!found);
1019 found = true;
1020 }
1021 }
1022 for (int i = 0; i < IPSEC_NETIF_MAX_TX_RING_COUNT; i++) {
1023 if (pcb->ipsec_netif_txring[i] == ring) {
1024 pcb->ipsec_netif_txring[i] = NULL;
1025 VERIFY(!found);
1026 found = true;
1027 }
1028 }
1029 VERIFY(found);
1030 }
1031
1032 static bool
ipsec_netif_check_policy(ifnet_t interface,mbuf_t data)1033 ipsec_netif_check_policy(ifnet_t interface, mbuf_t data)
1034 {
1035 necp_kernel_policy_result necp_result = 0;
1036 necp_kernel_policy_result_parameter necp_result_parameter = {};
1037 uint32_t necp_matched_policy_id = 0;
1038 struct ip_out_args args4 = { };
1039 struct ip6_out_args args6 = { };
1040
1041 // This packet has been marked with IP level policy, do not mark again.
1042 if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
1043 return true;
1044 }
1045
1046 size_t length = mbuf_pkthdr_len(data);
1047 if (length < sizeof(struct ip)) {
1048 return false;
1049 }
1050
1051 struct ip *ip = mtod(data, struct ip *);
1052 u_int ip_version = ip->ip_v;
1053 switch (ip_version) {
1054 case 4: {
1055 if (interface != NULL) {
1056 args4.ipoa_flags |= IPOAF_BOUND_IF;
1057 args4.ipoa_boundif = interface->if_index;
1058 }
1059 necp_matched_policy_id = necp_ip_output_find_policy_match(data, IP_OUTARGS, &args4, NULL,
1060 &necp_result, &necp_result_parameter);
1061 break;
1062 }
1063 case 6: {
1064 if (interface != NULL) {
1065 args6.ip6oa_flags |= IP6OAF_BOUND_IF;
1066 args6.ip6oa_boundif = interface->if_index;
1067 }
1068 necp_matched_policy_id = necp_ip6_output_find_policy_match(data, IPV6_OUTARGS, &args6, NULL,
1069 &necp_result, &necp_result_parameter);
1070 break;
1071 }
1072 default: {
1073 return false;
1074 }
1075 }
1076
1077 if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
1078 necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
1079 /* Drop and flow divert packets should be blocked at the IP layer */
1080 return false;
1081 }
1082
1083 necp_mark_packet_from_ip(data, necp_matched_policy_id);
1084 return true;
1085 }
1086
1087 static errno_t
ipsec_netif_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)1088 ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1089 kern_channel_ring_t tx_ring, uint32_t flags)
1090 {
1091 #pragma unused(nxprov)
1092 #pragma unused(flags)
1093 struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
1094
1095 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1096
1097 if (!ipsec_data_move_begin(pcb)) {
1098 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1099 return 0;
1100 }
1101
1102 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1103
1104 struct kern_channel_ring_stat_increment tx_ring_stats;
1105 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1106 kern_channel_slot_t tx_pslot = NULL;
1107 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1108 kern_packet_t tx_chain_ph = 0;
1109
1110 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
1111
1112 if (tx_slot == NULL) {
1113 // Nothing to write, don't bother signalling
1114 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1115 ipsec_data_move_end(pcb);
1116 return 0;
1117 }
1118
1119 if (pcb->ipsec_kpipe_count &&
1120 ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1121 // Select the corresponding kpipe rx ring
1122 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(tx_ring);
1123 VERIFY(ring_idx < IPSEC_IF_MAX_RING_COUNT);
1124 kern_channel_ring_t __single rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1125
1126 // Unlock while calling notify
1127 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1128
1129 // Signal the kernel pipe ring to read
1130 if (rx_ring != NULL) {
1131 kern_channel_notify(rx_ring, 0);
1132 }
1133
1134 ipsec_data_move_end(pcb);
1135 return 0;
1136 }
1137
1138 // If we're here, we're injecting into the BSD stack
1139 while (tx_slot != NULL) {
1140 size_t length = 0;
1141 mbuf_t __single data = NULL;
1142
1143 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1144
1145 if (tx_ph == 0) {
1146 // Advance TX ring
1147 tx_pslot = tx_slot;
1148 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1149 continue;
1150 }
1151 (void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
1152 if (tx_chain_ph != 0) {
1153 kern_packet_append(tx_ph, tx_chain_ph);
1154 }
1155 tx_chain_ph = tx_ph;
1156
1157 // Advance TX ring
1158 tx_pslot = tx_slot;
1159 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1160
1161 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1162 VERIFY(tx_buf != NULL);
1163
1164 uint8_t *tx_baddr = ipsec_kern_buflet_to_buffer(tx_buf);
1165 VERIFY(tx_baddr != 0);
1166 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1167
1168 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1169
1170 length = MIN(kern_packet_get_data_length(tx_ph),
1171 pcb->ipsec_slot_size);
1172
1173 if (length > 0) {
1174 errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1175 if (error == 0) {
1176 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1177 if (error == 0) {
1178 // Mark packet from policy
1179 uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1180 uint32_t skip_policy_id = kern_packet_get_skip_policy_id(tx_ph);
1181 necp_mark_packet_from_ip_with_skip(data, policy_id, skip_policy_id);
1182
1183 // Check policy with NECP
1184 if (!ipsec_netif_check_policy(pcb->ipsec_ifp, data)) {
1185 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1186 STATS_INC(nifs, NETIF_STATS_DROP);
1187 mbuf_freem(data);
1188 data = NULL;
1189 } else {
1190 // Send through encryption
1191 error = ipsec_output(pcb->ipsec_ifp, data);
1192 if (error != 0) {
1193 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
1194 }
1195 }
1196 } else {
1197 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1198 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1199 STATS_INC(nifs, NETIF_STATS_DROP);
1200 mbuf_freem(data);
1201 data = NULL;
1202 }
1203 } else {
1204 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1205 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1206 STATS_INC(nifs, NETIF_STATS_DROP);
1207 }
1208 } else {
1209 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1210 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1211 STATS_INC(nifs, NETIF_STATS_DROP);
1212 }
1213
1214 if (data == NULL) {
1215 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
1216 break;
1217 }
1218
1219 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1220 STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
1221
1222 tx_ring_stats.kcrsi_slots_transferred++;
1223 tx_ring_stats.kcrsi_bytes_transferred += length;
1224 }
1225
1226 if (tx_chain_ph != 0) {
1227 kern_pbufpool_free_chain(tx_ring->ckr_pp, tx_chain_ph);
1228 }
1229
1230 if (tx_pslot) {
1231 kern_channel_advance_slot(tx_ring, tx_pslot);
1232 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1233 (void)kern_channel_reclaim(tx_ring);
1234 }
1235
1236 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1237 ipsec_data_move_end(pcb);
1238
1239 return 0;
1240 }
1241
1242 static errno_t
ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags,uint8_t ring_idx)1243 ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1244 kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx)
1245 {
1246 #pragma unused(nxprov)
1247 struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
1248 boolean_t more = false;
1249 errno_t rc = 0;
1250
1251 VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0);
1252
1253 /*
1254 * Refill and sync the ring; we may be racing against another thread doing
1255 * an RX sync that also wants to do kr_enter(), and so use the blocking
1256 * variant here.
1257 */
1258 rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1259 if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
1260 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n", __func__,
1261 pcb->ipsec_if_xname, ring->ckr_name, rc);
1262 }
1263
1264 (void) kr_enter(ring, TRUE);
1265 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1266 if (ring != pcb->ipsec_netif_txring[ring_idx]) {
1267 // ring no longer valid
1268 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1269 kr_exit(ring);
1270 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n", __func__,
1271 pcb->ipsec_if_xname, ring->ckr_name, ring_idx);
1272 return ENXIO;
1273 }
1274
1275 if (pcb->ipsec_kpipe_count) {
1276 uint32_t tx_available = kern_channel_available_slot_count(ring);
1277 if (pcb->ipsec_netif_txring_size > 0 &&
1278 tx_available >= pcb->ipsec_netif_txring_size - 1) {
1279 // No room left in tx ring, disable output for now
1280 errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1281 if (error != 0) {
1282 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
1283 }
1284 }
1285 }
1286
1287 if (pcb->ipsec_kpipe_count) {
1288 kern_channel_ring_t __single rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1289
1290 // Unlock while calling notify
1291 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1292 // Signal the kernel pipe ring to read
1293 if (rx_ring != NULL) {
1294 kern_channel_notify(rx_ring, 0);
1295 }
1296 } else {
1297 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1298 }
1299
1300 kr_exit(ring);
1301
1302 return 0;
1303 }
1304
1305 static errno_t
ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,__unused uint32_t flags)1306 ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1307 kern_channel_ring_t ring, __unused uint32_t flags)
1308 {
1309 errno_t ret = 0;
1310 struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
1311
1312 if (!ipsec_data_move_begin(pcb)) {
1313 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1314 return 0;
1315 }
1316
1317 if (ipsec_in_wmm_mode(pcb)) {
1318 for (uint8_t i = 0; i < IPSEC_IF_WMM_RING_COUNT; i++) {
1319 kern_channel_ring_t __single nring = pcb->ipsec_netif_txring[i];
1320 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, nring, flags, i);
1321 if (ret) {
1322 break;
1323 }
1324 }
1325 } else {
1326 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring, flags, 0);
1327 }
1328
1329 ipsec_data_move_end(pcb);
1330 return ret;
1331 }
1332
1333 static errno_t
ipsec_transform_kpipe_pkt_to_netif_pkt(struct ipsec_pcb * pcb,struct kern_channel_ring_stat_increment * tx_ring_stats,struct netif_stats * nifs,kern_packet_t kpipe_ph,kern_packet_t netif_ph)1334 ipsec_transform_kpipe_pkt_to_netif_pkt(struct ipsec_pcb *pcb,
1335 struct kern_channel_ring_stat_increment *tx_ring_stats,
1336 struct netif_stats *nifs, kern_packet_t kpipe_ph, kern_packet_t netif_ph)
1337 {
1338 kern_buflet_t kpipe_buf = NULL, netif_buf = NULL;
1339 uint8_t *kpipe_baddr = NULL, *netif_baddr = NULL;
1340 uuid_t flow_uuid;
1341 size_t iphlen = 0;
1342 uint32_t kpipe_buf_len = 0, netif_buf_lim = 0;
1343 int err = 0;
1344
1345 VERIFY(kpipe_ph != 0);
1346 VERIFY(netif_ph != 0);
1347 VERIFY(pcb != NULL);
1348 VERIFY(tx_ring_stats != NULL);
1349 VERIFY(nifs != NULL);
1350
1351 kpipe_buf = kern_packet_get_next_buflet(kpipe_ph, NULL);
1352 VERIFY(kpipe_buf != NULL);
1353 kpipe_baddr = ipsec_kern_buflet_to_buffer(kpipe_buf);
1354 VERIFY(kpipe_baddr != NULL);
1355 kpipe_baddr += kern_buflet_get_data_offset(kpipe_buf);
1356 kpipe_buf_len = kern_buflet_get_data_length(kpipe_buf);
1357
1358 netif_buf = kern_packet_get_next_buflet(netif_ph, NULL);
1359 VERIFY(netif_buf != NULL);
1360 netif_baddr = ipsec_kern_buflet_to_buffer(netif_buf);
1361 VERIFY(netif_baddr != NULL);
1362 netif_baddr += kern_buflet_get_data_offset(netif_buf);
1363 netif_buf_lim = __buflet_get_data_limit(netif_buf);
1364 netif_buf_lim -= __buflet_get_data_offset(netif_buf);
1365
1366 if (kpipe_buf_len > pcb->ipsec_slot_size) {
1367 os_log_info(OS_LOG_DEFAULT,
1368 "ipsec_transform_kpipe_pkt_to_netif_pkt %s: kpipe buffer length "
1369 "%u > pcb ipsec slot size %u", pcb->ipsec_ifp->if_xname,
1370 kpipe_buf_len, pcb->ipsec_slot_size);
1371 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1372 err = EMSGSIZE;
1373 goto bad;
1374 }
1375
1376 tx_ring_stats->kcrsi_slots_transferred++;
1377 tx_ring_stats->kcrsi_bytes_transferred += kpipe_buf_len;
1378
1379 if (__improbable(kpipe_buf_len < sizeof(struct ip))) {
1380 os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
1381 "packet length %u\n", pcb->ipsec_ifp->if_xname, kpipe_buf_len);
1382 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1383 err = EBADMSG;
1384 goto bad;
1385 }
1386
1387 struct ip *ip = (struct ip *)(void *)kpipe_baddr;
1388 ASSERT(IP_HDR_ALIGNED_P(ip));
1389
1390 u_int ip_vers = ip->ip_v;
1391 switch (ip_vers) {
1392 case IPVERSION: {
1393 #ifdef _IP_VHL
1394 iphlen = IP_VHL_HL(ip->ip_vhl) << 2;
1395 #else /* _IP_VHL */
1396 iphlen = ip->ip_hl << 2;
1397 #endif /* _IP_VHL */
1398 break;
1399 }
1400 case 6: {
1401 iphlen = sizeof(struct ip6_hdr);
1402 break;
1403 }
1404 default: {
1405 os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
1406 "ip version %u\n", pcb->ipsec_ifp->if_xname, ip_vers);
1407 err = EBADMSG;
1408 goto bad;
1409 }
1410 }
1411
1412 if (__improbable(kpipe_buf_len < iphlen)) {
1413 os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
1414 "packet length %u\n", pcb->ipsec_ifp->if_xname, kpipe_buf_len);
1415 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1416 err = EBADMSG;
1417 goto bad;
1418 }
1419
1420 if (__improbable(netif_buf_lim < iphlen)) {
1421 os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - netif "
1422 "buffer length %u too short\n", pcb->ipsec_ifp->if_xname, netif_buf_lim);
1423 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1424 err = EBADMSG;
1425 goto bad;
1426 }
1427
1428 memcpy(netif_baddr, kpipe_baddr, iphlen);
1429 __buflet_set_data_length(netif_buf, (uint16_t)iphlen);
1430
1431 lck_mtx_lock(&pcb->ipsec_kpipe_decrypt_lock);
1432 err = esp_kpipe_input(pcb->ipsec_ifp, kpipe_ph, netif_ph);
1433 lck_mtx_unlock(&pcb->ipsec_kpipe_decrypt_lock);
1434
1435 if (__improbable((err != 0))) {
1436 goto bad;
1437 }
1438
1439 kern_packet_get_flow_uuid(kpipe_ph, &flow_uuid);
1440 uint8_t *id_8 = (uint8_t *)flow_uuid;
1441 if (__improbable((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT)) {
1442 os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s: wake packet "
1443 "flag is set\n", pcb->ipsec_ifp->if_xname);
1444 __packet_set_wake_flag(netif_ph);
1445 }
1446
1447 kern_packet_clear_flow_uuid(netif_ph);
1448 err = kern_buflet_set_data_offset(netif_buf, 0);
1449 VERIFY(err == 0);
1450 err = kern_packet_set_link_header_offset(netif_ph, 0);
1451 VERIFY(err == 0);
1452 err = kern_packet_set_network_header_offset(netif_ph, 0);
1453 VERIFY(err == 0);
1454 err = kern_packet_finalize(netif_ph);
1455 VERIFY(err == 0);
1456
1457 return 0;
1458 bad:
1459 STATS_INC(nifs, NETIF_STATS_DROP);
1460 return err;
1461 }
1462
1463
1464 static errno_t
ipsec_netif_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)1465 ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1466 kern_channel_ring_t rx_ring, uint32_t flags)
1467 {
1468 #pragma unused(nxprov)
1469 #pragma unused(flags)
1470 struct ipsec_pcb *__single pcb = kern_nexus_get_context(nexus);
1471 struct kern_channel_ring_stat_increment rx_ring_stats;
1472
1473 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1474
1475 if (!ipsec_data_move_begin(pcb)) {
1476 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1477 return 0;
1478 }
1479
1480 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1481
1482 // Reclaim user-released slots
1483 (void) kern_channel_reclaim(rx_ring);
1484
1485 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1486
1487 uint32_t avail = kern_channel_available_slot_count(rx_ring);
1488 if (avail == 0) {
1489 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1490 ipsec_data_move_end(pcb);
1491 return 0;
1492 }
1493
1494 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1495 VERIFY(rx_pp != NULL);
1496 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1497 kern_channel_slot_t rx_pslot = NULL;
1498 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1499
1500 while (rx_slot != NULL) {
1501 // Check for a waiting packet
1502 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1503 mbuf_t __single data = pcb->ipsec_input_chain;
1504 if (data == NULL) {
1505 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1506 break;
1507 }
1508
1509 // Allocate rx packet
1510 kern_packet_t rx_ph = 0;
1511 errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1512 if (__improbable(error != 0)) {
1513 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1514 STATS_INC(nifs, NETIF_STATS_DROP);
1515 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1516 break;
1517 }
1518
1519 // Advance waiting packets
1520 if (pcb->ipsec_input_chain_count > 0) {
1521 pcb->ipsec_input_chain_count--;
1522 }
1523 pcb->ipsec_input_chain = data->m_nextpkt;
1524 data->m_nextpkt = NULL;
1525 if (pcb->ipsec_input_chain == NULL) {
1526 pcb->ipsec_input_chain_last = NULL;
1527 }
1528 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1529
1530 size_t length = mbuf_pkthdr_len(data);
1531
1532 if (length < sizeof(struct ip)) {
1533 // Flush data
1534 mbuf_freem(data);
1535 kern_pbufpool_free(rx_pp, rx_ph);
1536 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1537 STATS_INC(nifs, NETIF_STATS_DROP);
1538 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1539 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
1540 continue;
1541 }
1542
1543 uint32_t af = 0;
1544 struct ip *ip = mtod(data, struct ip *);
1545 u_int ip_version = ip->ip_v;
1546 switch (ip_version) {
1547 case 4: {
1548 af = AF_INET;
1549 break;
1550 }
1551 case 6: {
1552 af = AF_INET6;
1553 break;
1554 }
1555 default: {
1556 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1557 pcb->ipsec_ifp->if_xname, ip_version);
1558 break;
1559 }
1560 }
1561
1562 if (length > PP_BUF_SIZE_DEF(rx_pp) ||
1563 (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
1564 // We need to fragment to send up into the netif
1565
1566 u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp);
1567 if (pcb->ipsec_frag_size_set &&
1568 pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) {
1569 fragment_mtu = pcb->ipsec_input_frag_size;
1570 }
1571
1572 mbuf_t fragment_chain = NULL;
1573 switch (af) {
1574 case AF_INET: {
1575 // ip_fragment expects the length in host order
1576 ip->ip_len = ntohs(ip->ip_len);
1577
1578 // ip_fragment will modify the original data, don't free
1579 int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1580 if (fragment_error == 0 && data != NULL) {
1581 fragment_chain = data;
1582 } else {
1583 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1584 STATS_INC(nifs, NETIF_STATS_DROP);
1585 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1586 pcb->ipsec_ifp->if_xname, length, fragment_error);
1587 }
1588 break;
1589 }
1590 case AF_INET6: {
1591 if (length < sizeof(struct ip6_hdr)) {
1592 mbuf_freem(data);
1593 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1594 STATS_INC(nifs, NETIF_STATS_DROP);
1595 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1596 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1597 } else {
1598 // ip6_do_fragmentation will free the original data on success only
1599 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1600
1601 int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
1602 ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid((uint64_t)data)));
1603 if (fragment_error == 0 && data != NULL) {
1604 fragment_chain = data;
1605 } else {
1606 mbuf_freem(data);
1607 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1608 STATS_INC(nifs, NETIF_STATS_DROP);
1609 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1610 pcb->ipsec_ifp->if_xname, length, fragment_error);
1611 }
1612 }
1613 break;
1614 }
1615 default: {
1616 // Cannot fragment unknown families
1617 mbuf_freem(data);
1618 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1619 STATS_INC(nifs, NETIF_STATS_DROP);
1620 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1621 pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
1622 break;
1623 }
1624 }
1625
1626 if (fragment_chain != NULL) {
1627 // Add fragments to chain before continuing
1628 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1629 if (pcb->ipsec_input_chain != NULL) {
1630 pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1631 } else {
1632 pcb->ipsec_input_chain = fragment_chain;
1633 }
1634 pcb->ipsec_input_chain_count++;
1635 while (fragment_chain->m_nextpkt) {
1636 VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1637 fragment_chain = fragment_chain->m_nextpkt;
1638 pcb->ipsec_input_chain_count++;
1639 }
1640 pcb->ipsec_input_chain_last = fragment_chain;
1641 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1642 }
1643
1644 // Make sure to free unused rx packet
1645 kern_pbufpool_free(rx_pp, rx_ph);
1646
1647 continue;
1648 }
1649
1650 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1651
1652 // Fillout rx packet
1653 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1654 VERIFY(rx_buf != NULL);
1655 uint8_t *rx_baddr = ipsec_kern_buflet_to_buffer(rx_buf);
1656 VERIFY(rx_baddr != NULL);
1657
1658 // Copy-in data from mbuf to buflet
1659 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1660 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1661
1662 // Finalize and attach the packet
1663 error = kern_buflet_set_data_offset(rx_buf, 0);
1664 VERIFY(error == 0);
1665 error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
1666 VERIFY(error == 0);
1667 error = kern_packet_set_headroom(rx_ph, 0);
1668 VERIFY(error == 0);
1669 error = kern_packet_finalize(rx_ph);
1670 VERIFY(error == 0);
1671 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1672 VERIFY(error == 0);
1673
1674 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1675 STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
1676 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1677
1678 rx_ring_stats.kcrsi_slots_transferred++;
1679 rx_ring_stats.kcrsi_bytes_transferred += length;
1680
1681 if (!pcb->ipsec_ext_ifdata_stats) {
1682 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
1683 }
1684
1685 mbuf_freem(data);
1686
1687 // Advance ring
1688 rx_pslot = rx_slot;
1689 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1690 }
1691
1692 for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
1693 struct kern_channel_ring_stat_increment tx_ring_stats = {};
1694 kern_channel_slot_t tx_pslot = NULL;
1695 kern_channel_slot_t tx_slot = NULL;
1696
1697 kern_channel_ring_t __single tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
1698 if (tx_ring == NULL) {
1699 // Net-If TX ring not set up yet, nothing to read
1700 goto done;
1701 }
1702
1703 // Unlock ipsec before entering ring
1704 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1705
1706 (void)kr_enter(tx_ring, TRUE);
1707
1708 // Lock again after entering and validate
1709 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1710
1711 if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
1712 goto done;
1713 }
1714
1715 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1716 if (tx_slot == NULL) {
1717 // Nothing to read, don't bother signalling
1718 goto done;
1719 }
1720
1721 while (rx_slot != NULL && tx_slot != NULL) {
1722 errno_t error = 0;
1723
1724 // Allocate rx packet
1725 kern_packet_t rx_ph = 0;
1726 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1727 if (__improbable(error != 0)) {
1728 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1729 STATS_INC(nifs, NETIF_STATS_DROP);
1730 break;
1731 }
1732
1733 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1734 tx_pslot = tx_slot;
1735 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1736 if (tx_ph == 0) {
1737 kern_pbufpool_free(rx_pp, rx_ph);
1738 continue;
1739 }
1740
1741 error = ipsec_transform_kpipe_pkt_to_netif_pkt(pcb,
1742 &tx_ring_stats, nifs, tx_ph, rx_ph);
1743 if (error != 0) {
1744 // Failed to get decrypted packet
1745 kern_pbufpool_free(rx_pp, rx_ph);
1746 continue;
1747 }
1748
1749 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1750 VERIFY(error == 0);
1751
1752 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1753 STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
1754
1755 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1756
1757 rx_ring_stats.kcrsi_slots_transferred++;
1758 rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph);
1759
1760 if (!pcb->ipsec_ext_ifdata_stats) {
1761 ifnet_stat_increment_in(pcb->ipsec_ifp, 1,
1762 kern_packet_get_data_length(rx_ph), 0);
1763 }
1764
1765 rx_pslot = rx_slot;
1766 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1767 }
1768
1769 done:
1770 if (tx_pslot) {
1771 kern_channel_advance_slot(tx_ring, tx_pslot);
1772 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1773 (void)kern_channel_reclaim(tx_ring);
1774 }
1775
1776 // Unlock first, then exit ring
1777 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1778 if (tx_ring != NULL) {
1779 if (tx_pslot != NULL) {
1780 kern_channel_notify(tx_ring, 0);
1781 }
1782 kr_exit(tx_ring);
1783 }
1784
1785 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1786 }
1787
1788 if (rx_pslot) {
1789 kern_channel_advance_slot(rx_ring, rx_pslot);
1790 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1791 }
1792
1793
1794 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1795
1796 ipsec_data_move_end(pcb);
1797 return 0;
1798 }
1799
1800 static errno_t
ipsec_nexus_ifattach(struct ipsec_pcb * pcb,struct ifnet_init_eparams * init_params,struct ifnet ** ifp)1801 ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
1802 struct ifnet_init_eparams *init_params,
1803 struct ifnet **ifp)
1804 {
1805 errno_t err;
1806 nexus_controller_t controller = kern_nexus_shared_controller();
1807 struct kern_nexus_net_init net_init;
1808 struct kern_pbufpool_init pp_init;
1809
1810 nexus_name_t provider_name;
1811 snprintf((char *)provider_name, sizeof(provider_name),
1812 "com.apple.netif.%s", pcb->ipsec_if_xname);
1813
1814 struct kern_nexus_provider_init prov_init = {
1815 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1816 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1817 .nxpi_pre_connect = ipsec_nexus_pre_connect,
1818 .nxpi_connected = ipsec_nexus_connected,
1819 .nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
1820 .nxpi_disconnected = ipsec_nexus_disconnected,
1821 .nxpi_ring_init = ipsec_netif_ring_init,
1822 .nxpi_ring_fini = ipsec_netif_ring_fini,
1823 .nxpi_slot_init = NULL,
1824 .nxpi_slot_fini = NULL,
1825 .nxpi_sync_tx = ipsec_netif_sync_tx,
1826 .nxpi_sync_rx = ipsec_netif_sync_rx,
1827 .nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
1828 };
1829
1830 nexus_attr_t __single nxa = NULL;
1831 err = kern_nexus_attr_create(&nxa);
1832 IPSEC_IF_VERIFY(err == 0);
1833 if (err != 0) {
1834 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1835 __func__, err);
1836 goto failed;
1837 }
1838
1839 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
1840 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1841 VERIFY(err == 0);
1842
1843 // Reset ring size for netif nexus to limit memory usage
1844 uint64_t ring_size = pcb->ipsec_netif_ring_size;
1845 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1846 VERIFY(err == 0);
1847 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1848 VERIFY(err == 0);
1849
1850 assert(err == 0);
1851
1852 if (ipsec_in_wmm_mode(pcb)) {
1853 os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n",
1854 __func__, pcb->ipsec_if_xname);
1855
1856 init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED;
1857
1858 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_RINGS,
1859 IPSEC_NETIF_WMM_TX_RING_COUNT);
1860 VERIFY(err == 0);
1861 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_RINGS,
1862 IPSEC_NETIF_WMM_RX_RING_COUNT);
1863 VERIFY(err == 0);
1864
1865 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_QMAP, NEXUS_QMAP_TYPE_WMM);
1866 VERIFY(err == 0);
1867 }
1868
1869 pcb->ipsec_netif_txring_size = ring_size;
1870
1871 bzero(&pp_init, sizeof(pp_init));
1872 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1873 pp_init.kbi_flags |= (KBIF_VIRTUAL_DEVICE | KBIF_USER_ACCESS);
1874 // Note: we need more packets than can be held in the tx and rx rings because
1875 // packets can also be in the AQM queue(s)
1876 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * (2 * pcb->ipsec_kpipe_count + 1);
1877 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
1878 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
1879 pp_init.kbi_max_frags = 1;
1880 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1881 "%s", provider_name);
1882 pp_init.kbi_ctx = NULL;
1883 pp_init.kbi_ctx_retain = NULL;
1884 pp_init.kbi_ctx_release = NULL;
1885
1886 err = kern_pbufpool_create(&pp_init, &pcb->ipsec_netif_pp, NULL);
1887 if (err != 0) {
1888 os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
1889 goto failed;
1890 }
1891
1892 err = kern_nexus_controller_register_provider(controller,
1893 ipsec_nx_dom_prov,
1894 provider_name,
1895 &prov_init,
1896 sizeof(prov_init),
1897 nxa,
1898 &pcb->ipsec_nx.if_provider);
1899 IPSEC_IF_VERIFY(err == 0);
1900 if (err != 0) {
1901 os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
1902 __func__, err);
1903 goto failed;
1904 }
1905
1906 bzero(&net_init, sizeof(net_init));
1907 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1908 net_init.nxneti_flags = 0;
1909 net_init.nxneti_eparams = init_params;
1910 net_init.nxneti_lladdr = NULL;
1911 net_init.nxneti_prepare = ipsec_netif_prepare;
1912 net_init.nxneti_rx_pbufpool = pcb->ipsec_netif_pp;
1913 net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp;
1914 err = kern_nexus_controller_alloc_net_provider_instance(controller,
1915 pcb->ipsec_nx.if_provider,
1916 pcb,
1917 NULL,
1918 &pcb->ipsec_nx.if_instance,
1919 &net_init,
1920 ifp);
1921 IPSEC_IF_VERIFY(err == 0);
1922 if (err != 0) {
1923 os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
1924 __func__, err);
1925 kern_nexus_controller_deregister_provider(controller,
1926 pcb->ipsec_nx.if_provider);
1927 uuid_clear(pcb->ipsec_nx.if_provider);
1928 goto failed;
1929 }
1930
1931 failed:
1932 if (nxa) {
1933 kern_nexus_attr_destroy(nxa);
1934 }
1935 if (err && pcb->ipsec_netif_pp != NULL) {
1936 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
1937 pcb->ipsec_netif_pp = NULL;
1938 }
1939 return err;
1940 }
1941
1942 static void
ipsec_detach_provider_and_instance(uuid_t provider,uuid_t instance)1943 ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
1944 {
1945 nexus_controller_t controller = kern_nexus_shared_controller();
1946 errno_t err;
1947
1948 if (!uuid_is_null(instance)) {
1949 err = kern_nexus_controller_free_provider_instance(controller,
1950 instance);
1951 if (err != 0) {
1952 os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
1953 __func__, err);
1954 }
1955 uuid_clear(instance);
1956 }
1957 if (!uuid_is_null(provider)) {
1958 err = kern_nexus_controller_deregister_provider(controller,
1959 provider);
1960 if (err != 0) {
1961 os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
1962 }
1963 uuid_clear(provider);
1964 }
1965 return;
1966 }
1967
1968 static void
ipsec_nexus_detach(struct ipsec_pcb * pcb)1969 ipsec_nexus_detach(struct ipsec_pcb *pcb)
1970 {
1971 ipsec_nx_t nx = &pcb->ipsec_nx;
1972 nexus_controller_t controller = kern_nexus_shared_controller();
1973 errno_t err;
1974
1975 if (!uuid_is_null(nx->fsw_device)) {
1976 err = kern_nexus_ifdetach(controller,
1977 nx->fsw_instance,
1978 nx->fsw_device);
1979 if (err != 0) {
1980 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
1981 __func__, err);
1982 }
1983 }
1984
1985 ipsec_detach_provider_and_instance(nx->fsw_provider,
1986 nx->fsw_instance);
1987 ipsec_detach_provider_and_instance(nx->if_provider,
1988 nx->if_instance);
1989
1990 if (pcb->ipsec_netif_pp != NULL) {
1991 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
1992 pcb->ipsec_netif_pp = NULL;
1993 }
1994 memset(nx, 0, sizeof(*nx));
1995 }
1996
1997 static errno_t
ipsec_create_fs_provider_and_instance(struct ipsec_pcb * pcb,const char * type_name,const char * ifname,uuid_t * provider,uuid_t * instance)1998 ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
1999 const char *type_name,
2000 const char *ifname,
2001 uuid_t *provider, uuid_t *instance)
2002 {
2003 nexus_attr_t __single attr = NULL;
2004 nexus_controller_t controller = kern_nexus_shared_controller();
2005 uuid_t dom_prov;
2006 errno_t err;
2007 struct kern_nexus_init init;
2008 nexus_name_t provider_name;
2009
2010 err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
2011 &dom_prov);
2012 IPSEC_IF_VERIFY(err == 0);
2013 if (err != 0) {
2014 os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
2015 __func__, type_name, err);
2016 goto failed;
2017 }
2018
2019 err = kern_nexus_attr_create(&attr);
2020 IPSEC_IF_VERIFY(err == 0);
2021 if (err != 0) {
2022 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2023 __func__, err);
2024 goto failed;
2025 }
2026
2027 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
2028 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2029 VERIFY(err == 0);
2030
2031 // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
2032 uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
2033 err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
2034 VERIFY(err == 0);
2035 uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
2036 err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
2037 VERIFY(err == 0);
2038 /*
2039 * Configure flowswitch to use super-packet (multi-buflet).
2040 * This allows flowswitch to perform intra-stack packet aggregation.
2041 */
2042 err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
2043 NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
2044 VERIFY(err == 0);
2045
2046 snprintf((char *)provider_name, sizeof(provider_name),
2047 "com.apple.%s.%s", type_name, ifname);
2048 err = kern_nexus_controller_register_provider(controller,
2049 dom_prov,
2050 provider_name,
2051 NULL,
2052 0,
2053 attr,
2054 provider);
2055 kern_nexus_attr_destroy(attr);
2056 attr = NULL;
2057 IPSEC_IF_VERIFY(err == 0);
2058 if (err != 0) {
2059 os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
2060 __func__, type_name, err);
2061 goto failed;
2062 }
2063 bzero(&init, sizeof(init));
2064 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2065 err = kern_nexus_controller_alloc_provider_instance(controller,
2066 *provider,
2067 NULL, NULL,
2068 instance, &init);
2069 IPSEC_IF_VERIFY(err == 0);
2070 if (err != 0) {
2071 os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
2072 __func__, type_name, err);
2073 kern_nexus_controller_deregister_provider(controller,
2074 *provider);
2075 uuid_clear(*provider);
2076 }
2077 failed:
2078 return err;
2079 }
2080
2081 static errno_t
ipsec_flowswitch_attach(struct ipsec_pcb * pcb)2082 ipsec_flowswitch_attach(struct ipsec_pcb *pcb)
2083 {
2084 nexus_controller_t controller = kern_nexus_shared_controller();
2085 errno_t err = 0;
2086 ipsec_nx_t nx = &pcb->ipsec_nx;
2087
2088 // Allocate flowswitch
2089 err = ipsec_create_fs_provider_and_instance(pcb,
2090 "flowswitch",
2091 pcb->ipsec_ifp->if_xname,
2092 &nx->fsw_provider,
2093 &nx->fsw_instance);
2094 if (err != 0) {
2095 os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
2096 __func__);
2097 goto failed;
2098 }
2099
2100 // Attach flowswitch to device port
2101 err = kern_nexus_ifattach(controller, nx->fsw_instance,
2102 NULL, nx->if_instance,
2103 FALSE, &nx->fsw_device);
2104 if (err != 0) {
2105 os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
2106 goto failed;
2107 }
2108
2109 // Extract the agent UUID and save for later
2110 struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
2111 if (flowswitch_nx != NULL) {
2112 struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
2113 if (flowswitch != NULL) {
2114 FSW_RLOCK(flowswitch);
2115 uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
2116 FSW_UNLOCK(flowswitch);
2117 } else {
2118 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - flowswitch is NULL\n");
2119 }
2120 nx_release(flowswitch_nx);
2121 } else {
2122 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n");
2123 }
2124
2125 return 0;
2126
2127 failed:
2128 ipsec_nexus_detach(pcb);
2129
2130 errno_t detach_error = 0;
2131 if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
2132 panic("ipsec_flowswitch_attach - ifnet_detach failed: %d", detach_error);
2133 /* NOT REACHED */
2134 }
2135
2136 return err;
2137 }
2138
2139 #pragma mark Kernel Pipe Nexus
2140
2141 static errno_t
ipsec_register_kernel_pipe_nexus(struct ipsec_pcb * pcb)2142 ipsec_register_kernel_pipe_nexus(struct ipsec_pcb *pcb)
2143 {
2144 nexus_attr_t __single nxa = NULL;
2145 nexus_name_t provider_name = "com.apple.nexus.ipsec.kpipe";
2146 errno_t result;
2147
2148 lck_mtx_lock(&ipsec_lock);
2149 if (ipsec_ncd_refcount++) {
2150 lck_mtx_unlock(&ipsec_lock);
2151 return 0;
2152 }
2153
2154 result = kern_nexus_controller_create(&ipsec_ncd);
2155 if (result) {
2156 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
2157 __FUNCTION__, result);
2158 goto done;
2159 }
2160
2161 uuid_t dom_prov;
2162 result = kern_nexus_get_default_domain_provider(
2163 NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
2164 if (result) {
2165 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
2166 __FUNCTION__, result);
2167 goto done;
2168 }
2169
2170 struct kern_nexus_provider_init prov_init = {
2171 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2172 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2173 .nxpi_pre_connect = ipsec_nexus_pre_connect,
2174 .nxpi_connected = ipsec_nexus_connected,
2175 .nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
2176 .nxpi_disconnected = ipsec_nexus_disconnected,
2177 .nxpi_ring_init = ipsec_kpipe_ring_init,
2178 .nxpi_ring_fini = ipsec_kpipe_ring_fini,
2179 .nxpi_slot_init = NULL,
2180 .nxpi_slot_fini = NULL,
2181 .nxpi_sync_tx = ipsec_kpipe_sync_tx,
2182 .nxpi_sync_rx = ipsec_kpipe_sync_rx,
2183 .nxpi_tx_doorbell = NULL,
2184 };
2185
2186 result = kern_nexus_attr_create(&nxa);
2187 if (result) {
2188 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2189 __FUNCTION__, result);
2190 goto done;
2191 }
2192
2193 uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2194 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2195 VERIFY(result == 0);
2196
2197 // Reset ring size for kernel pipe nexus to limit memory usage
2198 // Note: It's better to have less on slots on the kpipe TX ring than the netif
2199 // so back pressure is applied at the AQM layer
2200 uint64_t ring_size =
2201 pcb->ipsec_kpipe_tx_ring_size != 0 ? pcb->ipsec_kpipe_tx_ring_size :
2202 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2203 if_ipsec_ring_size;
2204 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
2205 VERIFY(result == 0);
2206
2207 ring_size =
2208 pcb->ipsec_kpipe_rx_ring_size != 0 ? pcb->ipsec_kpipe_rx_ring_size :
2209 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2210 if_ipsec_ring_size;
2211 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
2212 VERIFY(result == 0);
2213
2214 result = kern_nexus_controller_register_provider(ipsec_ncd,
2215 dom_prov,
2216 provider_name,
2217 &prov_init,
2218 sizeof(prov_init),
2219 nxa,
2220 &ipsec_kpipe_uuid);
2221 if (result) {
2222 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
2223 __FUNCTION__, result);
2224 goto done;
2225 }
2226
2227 done:
2228 if (nxa) {
2229 kern_nexus_attr_destroy(nxa);
2230 }
2231
2232 if (result) {
2233 if (ipsec_ncd) {
2234 kern_nexus_controller_destroy(ipsec_ncd);
2235 ipsec_ncd = NULL;
2236 }
2237 ipsec_ncd_refcount = 0;
2238 }
2239
2240 lck_mtx_unlock(&ipsec_lock);
2241
2242 return result;
2243 }
2244
2245 static void
ipsec_unregister_kernel_pipe_nexus(void)2246 ipsec_unregister_kernel_pipe_nexus(void)
2247 {
2248 lck_mtx_lock(&ipsec_lock);
2249
2250 VERIFY(ipsec_ncd_refcount > 0);
2251
2252 if (--ipsec_ncd_refcount == 0) {
2253 kern_nexus_controller_destroy(ipsec_ncd);
2254 ipsec_ncd = NULL;
2255 }
2256
2257 lck_mtx_unlock(&ipsec_lock);
2258 }
2259
2260 /* This structure only holds onto kpipe channels that need to be
2261 * freed in the future, but are cleared from the pcb under lock
2262 */
2263 struct ipsec_detached_channels {
2264 int count;
2265 kern_pbufpool_t pp;
2266 uuid_t uuids[IPSEC_IF_MAX_RING_COUNT];
2267 };
2268
2269 static void
ipsec_detach_channels(struct ipsec_pcb * pcb,struct ipsec_detached_channels * dc)2270 ipsec_detach_channels(struct ipsec_pcb *pcb, struct ipsec_detached_channels *dc)
2271 {
2272 LCK_RW_ASSERT(&pcb->ipsec_pcb_lock, LCK_RW_TYPE_EXCLUSIVE);
2273
2274 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
2275 for (int i = 0; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2276 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2277 }
2278 dc->count = 0;
2279 return;
2280 }
2281
2282 dc->count = pcb->ipsec_kpipe_count;
2283
2284 VERIFY(dc->count >= 0);
2285 VERIFY(dc->count <= IPSEC_IF_MAX_RING_COUNT);
2286
2287 for (int i = 0; i < dc->count; i++) {
2288 VERIFY(!uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2289 uuid_copy(dc->uuids[i], pcb->ipsec_kpipe_uuid[i]);
2290 uuid_clear(pcb->ipsec_kpipe_uuid[i]);
2291 }
2292 for (int i = dc->count; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2293 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2294 }
2295
2296 if (dc->count) {
2297 VERIFY(pcb->ipsec_kpipe_pp);
2298 } else {
2299 VERIFY(!pcb->ipsec_kpipe_pp);
2300 }
2301
2302 dc->pp = pcb->ipsec_kpipe_pp;
2303
2304 pcb->ipsec_kpipe_pp = NULL;
2305
2306 ipsec_flag_clr(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2307 }
2308
2309 static void
ipsec_free_channels(struct ipsec_detached_channels * dc)2310 ipsec_free_channels(struct ipsec_detached_channels *dc)
2311 {
2312 if (!dc->count) {
2313 return;
2314 }
2315
2316 for (int i = 0; i < dc->count; i++) {
2317 errno_t result;
2318 result = kern_nexus_controller_free_provider_instance(ipsec_ncd,
2319 dc->uuids[i]);
2320 VERIFY(!result);
2321 }
2322
2323 VERIFY(dc->pp);
2324 kern_pbufpool_destroy(dc->pp);
2325
2326 ipsec_unregister_kernel_pipe_nexus();
2327
2328 memset(dc, 0, sizeof(*dc));
2329 }
2330
2331 static errno_t
ipsec_enable_channel(struct ipsec_pcb * pcb,struct proc * proc)2332 ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
2333 {
2334 struct kern_nexus_init init;
2335 struct kern_pbufpool_init pp_init;
2336 uuid_t uuid_null = {0};
2337 errno_t result;
2338
2339 kauth_cred_t cred = kauth_cred_get();
2340 result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
2341 if (result) {
2342 return result;
2343 }
2344
2345 VERIFY(pcb->ipsec_kpipe_count);
2346 VERIFY(!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED));
2347
2348 result = ipsec_register_kernel_pipe_nexus(pcb);
2349
2350 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2351
2352 if (result) {
2353 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to register kernel pipe nexus\n",
2354 __func__, pcb->ipsec_if_xname);
2355 goto done;
2356 }
2357
2358 VERIFY(ipsec_ncd);
2359
2360 bzero(&pp_init, sizeof(pp_init));
2361 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
2362 pp_init.kbi_flags |= (KBIF_VIRTUAL_DEVICE | KBIF_USER_ACCESS);
2363 // Note: We only needs are many packets as can be held in the tx and rx rings
2364 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2 * pcb->ipsec_kpipe_count;
2365 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
2366 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
2367 pp_init.kbi_max_frags = 1;
2368 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
2369 "com.apple.kpipe.%s", pcb->ipsec_if_xname);
2370 pp_init.kbi_ctx = NULL;
2371 pp_init.kbi_ctx_retain = NULL;
2372 pp_init.kbi_ctx_release = NULL;
2373
2374 result = kern_pbufpool_create(&pp_init, &pcb->ipsec_kpipe_pp,
2375 NULL);
2376 if (result != 0) {
2377 os_log_error(OS_LOG_DEFAULT, "%s: %s pbufbool create failed, error %d\n",
2378 __func__, pcb->ipsec_if_xname, result);
2379 goto done;
2380 }
2381
2382 bzero(&init, sizeof(init));
2383 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2384 init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp;
2385
2386 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
2387 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2388 result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
2389 ipsec_kpipe_uuid, pcb, NULL, &pcb->ipsec_kpipe_uuid[i], &init);
2390
2391 if (result == 0) {
2392 nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
2393 const bool has_proc_uuid = !uuid_is_null(pcb->ipsec_kpipe_proc_uuid);
2394 pid_t pid = pcb->ipsec_kpipe_pid;
2395 if (!pid && !has_proc_uuid) {
2396 pid = proc_pid(proc);
2397 }
2398 result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
2399 (const uint8_t *)pcb->ipsec_kpipe_uuid[i], &port,
2400 pid, has_proc_uuid ? pcb->ipsec_kpipe_proc_uuid : uuid_null, NULL,
2401 0, has_proc_uuid ? NEXUS_BIND_EXEC_UUID:NEXUS_BIND_PID);
2402 }
2403
2404 if (result) {
2405 /* Unwind all of them on error */
2406 for (int j = 0; j < IPSEC_IF_MAX_RING_COUNT; j++) {
2407 if (!uuid_is_null(pcb->ipsec_kpipe_uuid[j])) {
2408 kern_nexus_controller_free_provider_instance(ipsec_ncd,
2409 pcb->ipsec_kpipe_uuid[j]);
2410 uuid_clear(pcb->ipsec_kpipe_uuid[j]);
2411 }
2412 }
2413 goto done;
2414 }
2415 }
2416
2417 done:
2418 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2419
2420 if (result) {
2421 if (pcb->ipsec_kpipe_pp != NULL) {
2422 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2423 pcb->ipsec_kpipe_pp = NULL;
2424 }
2425 ipsec_unregister_kernel_pipe_nexus();
2426 } else {
2427 ipsec_flag_set(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2428 }
2429
2430 return result;
2431 }
2432
2433 #endif // IPSEC_NEXUS
2434
2435
2436 /* Kernel control functions */
2437
2438 static inline int
ipsec_find_by_unit(u_int32_t unit)2439 ipsec_find_by_unit(u_int32_t unit)
2440 {
2441 struct ipsec_pcb *next_pcb = NULL;
2442 int found = 0;
2443
2444 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2445 if (next_pcb->ipsec_unit == unit) {
2446 found = 1;
2447 break;
2448 }
2449 }
2450
2451 return found;
2452 }
2453
2454 static inline void
ipsec_free_pcb(struct ipsec_pcb * pcb,bool locked)2455 ipsec_free_pcb(struct ipsec_pcb *pcb, bool locked)
2456 {
2457 #if IPSEC_NEXUS
2458 mbuf_freem_list(pcb->ipsec_input_chain);
2459 pcb->ipsec_input_chain_count = 0;
2460 lck_mtx_destroy(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp);
2461 lck_mtx_destroy(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp);
2462 lck_mtx_destroy(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp);
2463 #endif // IPSEC_NEXUS
2464 lck_mtx_destroy(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp);
2465 lck_rw_destroy(&pcb->ipsec_pcb_lock, &ipsec_lck_grp);
2466 if (!locked) {
2467 lck_mtx_lock(&ipsec_lock);
2468 }
2469 TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
2470 if (!locked) {
2471 lck_mtx_unlock(&ipsec_lock);
2472 }
2473 zfree(ipsec_pcb_zone, pcb);
2474 }
2475
2476 static errno_t
ipsec_ctl_setup(u_int32_t * unit,void ** unitinfo)2477 ipsec_ctl_setup(u_int32_t *unit, void **unitinfo)
2478 {
2479 if (unit == NULL || unitinfo == NULL) {
2480 return EINVAL;
2481 }
2482
2483 lck_mtx_lock(&ipsec_lock);
2484
2485 /* Find next available unit */
2486 if (*unit == 0) {
2487 *unit = 1;
2488 while (*unit != ctl_maxunit) {
2489 if (ipsec_find_by_unit(*unit)) {
2490 (*unit)++;
2491 } else {
2492 break;
2493 }
2494 }
2495 if (*unit == ctl_maxunit) {
2496 lck_mtx_unlock(&ipsec_lock);
2497 return EBUSY;
2498 }
2499 } else if (ipsec_find_by_unit(*unit)) {
2500 lck_mtx_unlock(&ipsec_lock);
2501 return EBUSY;
2502 }
2503
2504 /* Find some open interface id */
2505 u_int32_t chosen_unique_id = 1;
2506 struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
2507 if (next_pcb != NULL) {
2508 /* List was not empty, add one to the last item */
2509 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2510 next_pcb = NULL;
2511
2512 /*
2513 * If this wrapped the id number, start looking at
2514 * the front of the list for an unused id.
2515 */
2516 if (chosen_unique_id == 0) {
2517 /* Find the next unused ID */
2518 chosen_unique_id = 1;
2519 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2520 if (next_pcb->ipsec_unique_id > chosen_unique_id) {
2521 /* We found a gap */
2522 break;
2523 }
2524
2525 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2526 }
2527 }
2528 }
2529
2530 struct ipsec_pcb *pcb = zalloc_flags(ipsec_pcb_zone, Z_WAITOK | Z_ZERO);
2531
2532 *unitinfo = pcb;
2533 pcb->ipsec_unit = *unit;
2534 pcb->ipsec_unique_id = chosen_unique_id;
2535
2536 if (next_pcb != NULL) {
2537 TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
2538 } else {
2539 TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
2540 }
2541
2542 lck_mtx_unlock(&ipsec_lock);
2543
2544 return 0;
2545 }
2546
2547 static errno_t
ipsec_ctl_bind(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)2548 ipsec_ctl_bind(kern_ctl_ref kctlref,
2549 struct sockaddr_ctl *sac,
2550 void **unitinfo)
2551 {
2552 if (*unitinfo == NULL) {
2553 u_int32_t unit = 0;
2554 (void)ipsec_ctl_setup(&unit, unitinfo);
2555 }
2556
2557 struct ipsec_pcb *pcb = (struct ipsec_pcb *)*unitinfo;
2558 if (pcb == NULL) {
2559 return EINVAL;
2560 }
2561
2562 if (pcb->ipsec_ctlref != NULL) {
2563 // Return if bind was already called
2564 return EINVAL;
2565 }
2566
2567 /* Setup the protocol control block */
2568 pcb->ipsec_ctlref = kctlref;
2569 pcb->ipsec_unit = sac->sc_unit;
2570 pcb->ipsec_output_service_class = MBUF_SC_OAM;
2571
2572 #if IPSEC_NEXUS
2573 pcb->ipsec_use_netif = false;
2574 pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2575 pcb->ipsec_netif_ring_size = if_ipsec_ring_size;
2576 pcb->ipsec_tx_fsw_ring_size = if_ipsec_tx_fsw_ring_size;
2577 pcb->ipsec_rx_fsw_ring_size = if_ipsec_rx_fsw_ring_size;
2578 #endif // IPSEC_NEXUS
2579
2580 lck_rw_init(&pcb->ipsec_pcb_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2581 lck_mtx_init(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2582 #if IPSEC_NEXUS
2583 pcb->ipsec_input_chain_count = 0;
2584 lck_mtx_init(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2585 lck_mtx_init(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2586 lck_mtx_init(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2587 #endif // IPSEC_NEXUS
2588
2589 return 0;
2590 }
2591
2592 static errno_t
ipsec_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)2593 ipsec_ctl_connect(kern_ctl_ref kctlref,
2594 struct sockaddr_ctl *sac,
2595 void **unitinfo)
2596 {
2597 struct ifnet_init_eparams ipsec_init = {};
2598 errno_t result = 0;
2599
2600 if (*unitinfo == NULL) {
2601 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2602 }
2603
2604 struct ipsec_pcb *__single pcb = *unitinfo;
2605 if (pcb == NULL) {
2606 return EINVAL;
2607 }
2608
2609 /* Handle case where ipsec_ctl_setup() was called, but ipsec_ctl_bind() was not */
2610 if (pcb->ipsec_ctlref == NULL) {
2611 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2612 }
2613
2614 snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
2615 snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
2616 os_log(OS_LOG_DEFAULT, "ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2617
2618 /* Create the interface */
2619 bzero(&ipsec_init, sizeof(ipsec_init));
2620 ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
2621 ipsec_init.len = sizeof(ipsec_init);
2622
2623 #if IPSEC_NEXUS
2624 if (pcb->ipsec_use_netif) {
2625 ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
2626 } else
2627 #endif // IPSEC_NEXUS
2628 {
2629 ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
2630 ipsec_init.start = ipsec_start;
2631 }
2632 ipsec_init.name = "ipsec";
2633 ipsec_init.unit = pcb->ipsec_unit - 1;
2634 ipsec_init.uniqueid = pcb->ipsec_unique_name;
2635 ipsec_init.uniqueid_len = (uint32_t)strbuflen(pcb->ipsec_unique_name,
2636 sizeof(pcb->ipsec_unique_name));
2637 ipsec_init.family = IFNET_FAMILY_IPSEC;
2638 ipsec_init.type = IFT_OTHER;
2639 ipsec_init.demux = ipsec_demux;
2640 ipsec_init.add_proto = ipsec_add_proto;
2641 ipsec_init.del_proto = ipsec_del_proto;
2642 ipsec_init.softc = pcb;
2643 ipsec_init.ioctl = ipsec_ioctl;
2644 ipsec_init.free = ipsec_detached;
2645
2646 #if IPSEC_NEXUS
2647 /* We don't support kpipes without a netif */
2648 if (pcb->ipsec_kpipe_count && !pcb->ipsec_use_netif) {
2649 result = ENOTSUP;
2650 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - kpipe requires netif: failed %d\n", result);
2651 ipsec_free_pcb(pcb, false);
2652 *unitinfo = NULL;
2653 return result;
2654 }
2655
2656 if (if_ipsec_debug != 0) {
2657 printf("%s: %s%d use_netif %d kpipe_count %d slot_size %u ring_size %u "
2658 "kpipe_tx_ring_size %u kpipe_rx_ring_size %u\n",
2659 __func__,
2660 ipsec_init.name, ipsec_init.unit,
2661 pcb->ipsec_use_netif,
2662 pcb->ipsec_kpipe_count,
2663 pcb->ipsec_slot_size,
2664 pcb->ipsec_netif_ring_size,
2665 pcb->ipsec_kpipe_tx_ring_size,
2666 pcb->ipsec_kpipe_rx_ring_size);
2667 }
2668 if (pcb->ipsec_use_netif) {
2669 if (pcb->ipsec_kpipe_count) {
2670 result = ipsec_enable_channel(pcb, current_proc());
2671 if (result) {
2672 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to enable channels\n",
2673 __func__, pcb->ipsec_if_xname);
2674 ipsec_free_pcb(pcb, false);
2675 *unitinfo = NULL;
2676 return result;
2677 }
2678 }
2679
2680 result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
2681 if (result != 0) {
2682 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
2683 ipsec_free_pcb(pcb, false);
2684 *unitinfo = NULL;
2685 return result;
2686 }
2687
2688 result = ipsec_flowswitch_attach(pcb);
2689 if (result != 0) {
2690 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_flowswitch_attach failed: %d\n", result);
2691 // Do not call ipsec_free_pcb(). We will be attached already, and will be freed later
2692 // in ipsec_detached().
2693 *unitinfo = NULL;
2694 return result;
2695 }
2696
2697 /* Attach to bpf */
2698 bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
2699 } else
2700 #endif // IPSEC_NEXUS
2701 {
2702 result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
2703 if (result != 0) {
2704 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
2705 ipsec_free_pcb(pcb, false);
2706 *unitinfo = NULL;
2707 return result;
2708 }
2709 ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
2710
2711 /* Attach the interface */
2712 result = ifnet_attach(pcb->ipsec_ifp, NULL);
2713 if (result != 0) {
2714 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
2715 ifnet_release(pcb->ipsec_ifp);
2716 ipsec_free_pcb(pcb, false);
2717 *unitinfo = NULL;
2718 return result;
2719 }
2720
2721 /* Attach to bpf */
2722 bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
2723 }
2724
2725 #if IPSEC_NEXUS
2726 /*
2727 * Mark the data path as ready.
2728 * If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected.
2729 */
2730 if (pcb->ipsec_kpipe_count == 0) {
2731 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
2732 IPSEC_SET_DATA_PATH_READY(pcb);
2733 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
2734 }
2735 #endif
2736
2737 /* The interfaces resoures allocated, mark it as running */
2738 ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
2739
2740 return 0;
2741 }
2742
2743 static errno_t
ipsec_detach_ip(ifnet_t interface,protocol_family_t protocol,socket_t pf_socket)2744 ipsec_detach_ip(ifnet_t interface,
2745 protocol_family_t protocol,
2746 socket_t pf_socket)
2747 {
2748 errno_t result = EPROTONOSUPPORT;
2749
2750 /* Attempt a detach */
2751 if (protocol == PF_INET) {
2752 struct ifreq ifr;
2753
2754 bzero(&ifr, sizeof(ifr));
2755 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2756 ifnet_name(interface), ifnet_unit(interface));
2757
2758 result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
2759 } else if (protocol == PF_INET6) {
2760 struct in6_ifreq ifr6;
2761
2762 bzero(&ifr6, sizeof(ifr6));
2763 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2764 ifnet_name(interface), ifnet_unit(interface));
2765
2766 result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
2767 }
2768
2769 return result;
2770 }
2771
2772 static void
ipsec_remove_address(ifnet_t interface,protocol_family_t protocol,ifaddr_t address,socket_t pf_socket)2773 ipsec_remove_address(ifnet_t interface,
2774 protocol_family_t protocol,
2775 ifaddr_t address,
2776 socket_t pf_socket)
2777 {
2778 errno_t result = 0;
2779
2780 /* Attempt a detach */
2781 if (protocol == PF_INET) {
2782 struct ifreq ifr;
2783
2784 bzero(&ifr, sizeof(ifr));
2785 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2786 ifnet_name(interface), ifnet_unit(interface));
2787 result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
2788 if (result != 0) {
2789 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed: %d", result);
2790 } else {
2791 result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
2792 if (result != 0) {
2793 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR failed: %d", result);
2794 }
2795 }
2796 } else if (protocol == PF_INET6) {
2797 struct in6_ifreq ifr6;
2798
2799 bzero(&ifr6, sizeof(ifr6));
2800 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2801 ifnet_name(interface), ifnet_unit(interface));
2802 result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
2803 sizeof(ifr6.ifr_addr));
2804 if (result != 0) {
2805 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed (v6): %d",
2806 result);
2807 } else {
2808 result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
2809 if (result != 0) {
2810 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
2811 result);
2812 }
2813 }
2814 }
2815 }
2816
2817 static void
ipsec_cleanup_family(ifnet_t interface,protocol_family_t protocol)2818 ipsec_cleanup_family(ifnet_t interface,
2819 protocol_family_t protocol)
2820 {
2821 errno_t result = 0;
2822 socket_t __single pf_socket = NULL;
2823 uint16_t addresses_count = 0;
2824 ifaddr_t *__counted_by(addresses_count) addresses = NULL;
2825 int i;
2826
2827 if (protocol != PF_INET && protocol != PF_INET6) {
2828 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - invalid protocol family %d\n", protocol);
2829 return;
2830 }
2831
2832 /* Create a socket for removing addresses and detaching the protocol */
2833 result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
2834 if (result != 0) {
2835 if (result != EAFNOSUPPORT) {
2836 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - failed to create %s socket: %d\n",
2837 protocol == PF_INET ? "IP" : "IPv6", result);
2838 }
2839 goto cleanup;
2840 }
2841
2842 /* always set SS_PRIV, we want to close and detach regardless */
2843 sock_setpriv(pf_socket, 1);
2844
2845 result = ipsec_detach_ip(interface, protocol, pf_socket);
2846 if (result == 0 || result == ENXIO) {
2847 /* We are done! We either detached or weren't attached. */
2848 goto cleanup;
2849 } else if (result != EBUSY) {
2850 /* Uh, not really sure what happened here... */
2851 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2852 goto cleanup;
2853 }
2854
2855 /*
2856 * At this point, we received an EBUSY error. This means there are
2857 * addresses attached. We should detach them and then try again.
2858 */
2859 result = ifnet_get_address_list_family_with_count(interface, &addresses,
2860 &addresses_count, (sa_family_t)protocol);
2861 if (result != 0) {
2862 os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
2863 ifnet_name(interface), ifnet_unit(interface),
2864 protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
2865 goto cleanup;
2866 }
2867
2868 for (i = 0; addresses[i] != 0; i++) {
2869 ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
2870 }
2871 ifnet_address_list_free_counted_by(addresses, addresses_count);
2872
2873 /*
2874 * The addresses should be gone, we should try the remove again.
2875 */
2876 result = ipsec_detach_ip(interface, protocol, pf_socket);
2877 if (result != 0 && result != ENXIO) {
2878 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2879 }
2880
2881 cleanup:
2882 if (pf_socket != NULL) {
2883 sock_close(pf_socket);
2884 }
2885
2886 if (addresses != NULL) {
2887 ifnet_address_list_free_counted_by(addresses, addresses_count);
2888 }
2889 }
2890
2891 static errno_t
ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo)2892 ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,
2893 __unused u_int32_t unit,
2894 void *unitinfo)
2895 {
2896 struct ipsec_pcb *__single pcb = unitinfo;
2897 ifnet_t ifp = NULL;
2898 errno_t result = 0;
2899
2900 if (pcb == NULL) {
2901 return EINVAL;
2902 }
2903
2904 /* Wait until all threads in the data paths are done. */
2905 ipsec_wait_data_move_drain(pcb);
2906
2907 #if IPSEC_NEXUS
2908 // Tell the nexus to stop all rings
2909 if (pcb->ipsec_netif_nexus != NULL) {
2910 kern_nexus_stop(pcb->ipsec_netif_nexus);
2911 }
2912 #endif // IPSEC_NEXUS
2913
2914 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2915
2916 #if IPSEC_NEXUS
2917 if (if_ipsec_debug != 0) {
2918 printf("ipsec_ctl_disconnect: detaching interface %s (id %s)\n",
2919 pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2920 }
2921
2922 struct ipsec_detached_channels dc;
2923 ipsec_detach_channels(pcb, &dc);
2924 #endif // IPSEC_NEXUS
2925
2926 pcb->ipsec_ctlref = NULL;
2927
2928 ifp = pcb->ipsec_ifp;
2929 if (ifp != NULL) {
2930 #if IPSEC_NEXUS
2931 if (pcb->ipsec_netif_nexus != NULL) {
2932 /*
2933 * Quiesce the interface and flush any pending outbound packets.
2934 */
2935 if_down(ifp);
2936
2937 /*
2938 * Suspend data movement and wait for IO threads to exit.
2939 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
2940 * do this because ipsec nexuses are attached/detached separately.
2941 */
2942 ifnet_datamov_suspend_and_drain(ifp);
2943 if ((result = ifnet_detach(ifp)) != 0) {
2944 panic("ipsec_ctl_disconnect - ifnet_detach failed: %d", result);
2945 /* NOT REACHED */
2946 }
2947
2948 /*
2949 * We want to do everything in our power to ensure that the interface
2950 * really goes away when the socket is closed. We must remove IP/IPv6
2951 * addresses and detach the protocols. Finally, we can remove and
2952 * release the interface.
2953 */
2954 key_delsp_for_ipsec_if(ifp);
2955
2956 ipsec_cleanup_family(ifp, AF_INET);
2957 ipsec_cleanup_family(ifp, AF_INET6);
2958
2959 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2960
2961 ipsec_free_channels(&dc);
2962
2963 ipsec_nexus_detach(pcb);
2964
2965 /* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
2966 ifnet_datamov_resume(ifp);
2967 } else
2968 #endif // IPSEC_NEXUS
2969 {
2970 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2971
2972 #if IPSEC_NEXUS
2973 ipsec_free_channels(&dc);
2974 #endif // IPSEC_NEXUS
2975
2976 /*
2977 * We want to do everything in our power to ensure that the interface
2978 * really goes away when the socket is closed. We must remove IP/IPv6
2979 * addresses and detach the protocols. Finally, we can remove and
2980 * release the interface.
2981 */
2982 key_delsp_for_ipsec_if(ifp);
2983
2984 ipsec_cleanup_family(ifp, AF_INET);
2985 ipsec_cleanup_family(ifp, AF_INET6);
2986
2987 /*
2988 * Detach now; ipsec_detach() will be called asynchronously once
2989 * the I/O reference count drops to 0. There we will invoke
2990 * ifnet_release().
2991 */
2992 if ((result = ifnet_detach(ifp)) != 0) {
2993 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
2994 }
2995 }
2996 } else {
2997 // Bound, but not connected
2998 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2999 ipsec_free_pcb(pcb, false);
3000 }
3001
3002 return 0;
3003 }
3004
3005 static errno_t
ipsec_ctl_send(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,__unused void * unitinfo,mbuf_t m,__unused int flags)3006 ipsec_ctl_send(__unused kern_ctl_ref kctlref,
3007 __unused u_int32_t unit,
3008 __unused void *unitinfo,
3009 mbuf_t m,
3010 __unused int flags)
3011 {
3012 /* Receive messages from the control socket. Currently unused. */
3013 mbuf_freem(m);
3014 return 0;
3015 }
3016
3017 static errno_t
ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * __sized_by (len)data,size_t len)3018 ipsec_ctl_setopt(__unused kern_ctl_ref kctlref, __unused u_int32_t unit,
3019 void *unitinfo, int opt, void *__sized_by(len)data, size_t len)
3020 {
3021 errno_t result = 0;
3022 struct ipsec_pcb *__single pcb = unitinfo;
3023 if (pcb == NULL) {
3024 return EINVAL;
3025 }
3026
3027 /* check for privileges for privileged options */
3028 switch (opt) {
3029 case IPSEC_OPT_FLAGS:
3030 case IPSEC_OPT_EXT_IFDATA_STATS:
3031 case IPSEC_OPT_SET_DELEGATE_INTERFACE:
3032 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
3033 case IPSEC_OPT_OUTPUT_DSCP_MAPPING:
3034 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3035 return EPERM;
3036 }
3037 break;
3038 }
3039
3040 switch (opt) {
3041 case IPSEC_OPT_FLAGS: {
3042 if (len != sizeof(u_int32_t)) {
3043 result = EMSGSIZE;
3044 } else {
3045 pcb->ipsec_external_flags = *(u_int32_t *)data;
3046 }
3047 break;
3048 }
3049
3050 case IPSEC_OPT_EXT_IFDATA_STATS: {
3051 if (len != sizeof(int)) {
3052 result = EMSGSIZE;
3053 break;
3054 }
3055 if (pcb->ipsec_ifp == NULL) {
3056 // Only can set after connecting
3057 result = EINVAL;
3058 break;
3059 }
3060 pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
3061 break;
3062 }
3063
3064 case IPSEC_OPT_INC_IFDATA_STATS_IN:
3065 case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
3066 struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
3067
3068 if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
3069 result = EINVAL;
3070 break;
3071 }
3072 if (pcb->ipsec_ifp == NULL) {
3073 // Only can set after connecting
3074 result = EINVAL;
3075 break;
3076 }
3077 if (!pcb->ipsec_ext_ifdata_stats) {
3078 result = EINVAL;
3079 break;
3080 }
3081 if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) {
3082 ifnet_stat_increment_in(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3083 (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3084 } else {
3085 ifnet_stat_increment_out(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3086 (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3087 }
3088 break;
3089 }
3090
3091 case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
3092 ifnet_t __single del_ifp = NULL;
3093 char name[IFNAMSIZ];
3094
3095 if (len > IFNAMSIZ - 1) {
3096 result = EMSGSIZE;
3097 break;
3098 }
3099 if (pcb->ipsec_ifp == NULL) {
3100 // Only can set after connecting
3101 result = EINVAL;
3102 break;
3103 }
3104 if (len != 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */
3105 bcopy(data, name, len);
3106 name[len] = 0;
3107 result = ifnet_find_by_name(__unsafe_null_terminated_from_indexable(name), &del_ifp);
3108 }
3109 if (result == 0) {
3110 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
3111 __func__, pcb->ipsec_ifp->if_xname,
3112 del_ifp ? del_ifp->if_xname : "NULL");
3113
3114 result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
3115 if (del_ifp) {
3116 ifnet_release(del_ifp);
3117 }
3118 }
3119 break;
3120 }
3121
3122 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3123 if (len != sizeof(int)) {
3124 result = EMSGSIZE;
3125 break;
3126 }
3127 if (pcb->ipsec_ifp == NULL) {
3128 // Only can set after connecting
3129 result = EINVAL;
3130 break;
3131 }
3132 mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
3133 if (output_service_class == MBUF_SC_UNSPEC) {
3134 pcb->ipsec_output_service_class = MBUF_SC_OAM;
3135 } else {
3136 pcb->ipsec_output_service_class = output_service_class;
3137 }
3138 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
3139 __func__, pcb->ipsec_ifp->if_xname,
3140 pcb->ipsec_output_service_class);
3141 break;
3142 }
3143
3144 #if IPSEC_NEXUS
3145 case IPSEC_OPT_ENABLE_CHANNEL: {
3146 if (len != sizeof(int)) {
3147 result = EMSGSIZE;
3148 break;
3149 }
3150 if (pcb->ipsec_ifp != NULL) {
3151 // Only can set before connecting
3152 result = EINVAL;
3153 break;
3154 }
3155 if ((*(int *)data) != 0 &&
3156 (*(int *)data) != 1 &&
3157 (*(int *)data) != IPSEC_IF_WMM_RING_COUNT) {
3158 result = EINVAL;
3159 break;
3160 }
3161 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3162 pcb->ipsec_kpipe_count = *(int *)data;
3163 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3164 break;
3165 }
3166
3167 case IPSEC_OPT_CHANNEL_BIND_PID: {
3168 if (len != sizeof(pid_t)) {
3169 result = EMSGSIZE;
3170 break;
3171 }
3172 if (pcb->ipsec_ifp != NULL) {
3173 // Only can set before connecting
3174 result = EINVAL;
3175 break;
3176 }
3177 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3178 pcb->ipsec_kpipe_pid = *(pid_t *)data;
3179 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3180 break;
3181 }
3182
3183 case IPSEC_OPT_CHANNEL_BIND_UUID: {
3184 if (len != sizeof(uuid_t)) {
3185 result = EMSGSIZE;
3186 break;
3187 }
3188 if (pcb->ipsec_ifp != NULL) {
3189 // Only can set before connecting
3190 result = EINVAL;
3191 break;
3192 }
3193 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3194 uuid_copy(pcb->ipsec_kpipe_proc_uuid, *((uuid_t *)data));
3195 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3196 break;
3197 }
3198
3199 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3200 if (len != sizeof(int)) {
3201 result = EMSGSIZE;
3202 break;
3203 }
3204 if (pcb->ipsec_ifp == NULL) {
3205 // Only can set after connecting
3206 result = EINVAL;
3207 break;
3208 }
3209 if (!if_is_fsw_transport_netagent_enabled()) {
3210 result = ENOTSUP;
3211 break;
3212 }
3213 if (uuid_is_null(pcb->ipsec_nx.fsw_agent)) {
3214 result = ENOENT;
3215 break;
3216 }
3217
3218 uint32_t flags = netagent_get_flags(pcb->ipsec_nx.fsw_agent);
3219
3220 if (*(int *)data) {
3221 flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
3222 NETAGENT_FLAG_NEXUS_LISTENER);
3223 result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3224 pcb->ipsec_needs_netagent = true;
3225 } else {
3226 pcb->ipsec_needs_netagent = false;
3227 flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
3228 NETAGENT_FLAG_NEXUS_LISTENER);
3229 result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3230 }
3231 break;
3232 }
3233
3234 case IPSEC_OPT_INPUT_FRAG_SIZE: {
3235 if (len != sizeof(u_int32_t)) {
3236 result = EMSGSIZE;
3237 break;
3238 }
3239 u_int32_t input_frag_size = *(u_int32_t *)data;
3240 if (input_frag_size <= sizeof(struct ip6_hdr)) {
3241 pcb->ipsec_frag_size_set = FALSE;
3242 pcb->ipsec_input_frag_size = 0;
3243 } else {
3244 pcb->ipsec_frag_size_set = TRUE;
3245 pcb->ipsec_input_frag_size = input_frag_size;
3246 }
3247 break;
3248 }
3249 case IPSEC_OPT_ENABLE_NETIF: {
3250 if (len != sizeof(int)) {
3251 result = EMSGSIZE;
3252 break;
3253 }
3254 if (pcb->ipsec_ifp != NULL) {
3255 // Only can set before connecting
3256 result = EINVAL;
3257 break;
3258 }
3259 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3260 pcb->ipsec_use_netif = !!(*(int *)data);
3261 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3262 break;
3263 }
3264 case IPSEC_OPT_SLOT_SIZE: {
3265 if (len != sizeof(u_int32_t)) {
3266 result = EMSGSIZE;
3267 break;
3268 }
3269 if (pcb->ipsec_ifp != NULL) {
3270 // Only can set before connecting
3271 result = EINVAL;
3272 break;
3273 }
3274 u_int32_t slot_size = *(u_int32_t *)data;
3275 if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
3276 slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
3277 return EINVAL;
3278 }
3279 pcb->ipsec_slot_size = slot_size;
3280 if (if_ipsec_debug != 0) {
3281 printf("%s: IPSEC_OPT_SLOT_SIZE %u\n", __func__, slot_size);
3282 }
3283 break;
3284 }
3285 case IPSEC_OPT_NETIF_RING_SIZE: {
3286 if (len != sizeof(u_int32_t)) {
3287 result = EMSGSIZE;
3288 break;
3289 }
3290 if (pcb->ipsec_ifp != NULL) {
3291 // Only can set before connecting
3292 result = EINVAL;
3293 break;
3294 }
3295 u_int32_t ring_size = *(u_int32_t *)data;
3296 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3297 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3298 return EINVAL;
3299 }
3300 pcb->ipsec_netif_ring_size = ring_size;
3301 if (if_ipsec_debug != 0) {
3302 printf("%s: IPSEC_OPT_NETIF_RING_SIZE %u\n", __func__, ring_size);
3303 }
3304 break;
3305 }
3306 case IPSEC_OPT_TX_FSW_RING_SIZE: {
3307 if (len != sizeof(u_int32_t)) {
3308 result = EMSGSIZE;
3309 break;
3310 }
3311 if (pcb->ipsec_ifp != NULL) {
3312 // Only can set before connecting
3313 result = EINVAL;
3314 break;
3315 }
3316 u_int32_t ring_size = *(u_int32_t *)data;
3317 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3318 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3319 return EINVAL;
3320 }
3321 pcb->ipsec_tx_fsw_ring_size = ring_size;
3322 if (if_ipsec_debug != 0) {
3323 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3324 }
3325 break;
3326 }
3327 case IPSEC_OPT_RX_FSW_RING_SIZE: {
3328 if (len != sizeof(u_int32_t)) {
3329 result = EMSGSIZE;
3330 break;
3331 }
3332 if (pcb->ipsec_ifp != NULL) {
3333 // Only can set before connecting
3334 result = EINVAL;
3335 break;
3336 }
3337 u_int32_t ring_size = *(u_int32_t *)data;
3338 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3339 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3340 return EINVAL;
3341 }
3342 pcb->ipsec_rx_fsw_ring_size = ring_size;
3343 if (if_ipsec_debug != 0) {
3344 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3345 }
3346 break;
3347 }
3348 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3349 if (len != sizeof(u_int32_t)) {
3350 result = EMSGSIZE;
3351 break;
3352 }
3353 if (pcb->ipsec_ifp != NULL) {
3354 // Only can set before connecting
3355 result = EINVAL;
3356 break;
3357 }
3358 u_int32_t ring_size = *(u_int32_t *)data;
3359 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3360 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3361 return EINVAL;
3362 }
3363 pcb->ipsec_kpipe_tx_ring_size = ring_size;
3364 if (if_ipsec_debug != 0) {
3365 printf("%s: IPSEC_OPT_KPIPE_TX_RING_SIZE %u\n", __func__, ring_size);
3366 }
3367 break;
3368 }
3369 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3370 if (len != sizeof(u_int32_t)) {
3371 result = EMSGSIZE;
3372 break;
3373 }
3374 if (pcb->ipsec_ifp != NULL) {
3375 // Only can set before connecting
3376 result = EINVAL;
3377 break;
3378 }
3379 u_int32_t ring_size = *(u_int32_t *)data;
3380 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3381 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3382 return EINVAL;
3383 }
3384 pcb->ipsec_kpipe_rx_ring_size = ring_size;
3385 if (if_ipsec_debug != 0) {
3386 printf("%s: IPSEC_OPT_KPIPE_RX_RING_SIZE %u\n", __func__, ring_size);
3387 }
3388 break;
3389 }
3390 case IPSEC_OPT_OUTPUT_DSCP_MAPPING: {
3391 if (len != sizeof(int)) {
3392 result = EMSGSIZE;
3393 break;
3394 }
3395 if (pcb->ipsec_ifp == NULL) {
3396 // Only can set after connecting
3397 result = EINVAL;
3398 break;
3399 }
3400
3401 ipsec_dscp_mapping_t output_dscp_mapping = (ipsec_dscp_mapping_t)(*(int *)data);
3402 if (output_dscp_mapping > IPSEC_DSCP_MAPPING_LEGACY) {
3403 return EINVAL;
3404 }
3405
3406 pcb->ipsec_output_dscp_mapping = output_dscp_mapping;
3407
3408 os_log(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_DSCP_MAPPING %s DSCP %d\n",
3409 __func__, pcb->ipsec_ifp->if_xname,
3410 pcb->ipsec_output_dscp_mapping);
3411 break;
3412 }
3413
3414 #endif // IPSEC_NEXUS
3415
3416 default: {
3417 result = ENOPROTOOPT;
3418 break;
3419 }
3420 }
3421
3422 return result;
3423 }
3424
3425 static errno_t
ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * __sized_by (* len)data,size_t * len)3426 ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
3427 __unused u_int32_t unit,
3428 void *unitinfo,
3429 int opt,
3430 void *__sized_by(*len)data,
3431 size_t *len)
3432 {
3433 errno_t result = 0;
3434 struct ipsec_pcb *__single pcb = unitinfo;
3435 if (pcb == NULL) {
3436 return EINVAL;
3437 }
3438
3439 switch (opt) {
3440 case IPSEC_OPT_FLAGS: {
3441 if (*len != sizeof(u_int32_t)) {
3442 result = EMSGSIZE;
3443 } else {
3444 *(u_int32_t *)data = pcb->ipsec_external_flags;
3445 }
3446 break;
3447 }
3448
3449 case IPSEC_OPT_EXT_IFDATA_STATS: {
3450 if (*len != sizeof(int)) {
3451 result = EMSGSIZE;
3452 } else {
3453 *(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
3454 }
3455 break;
3456 }
3457
3458 case IPSEC_OPT_IFNAME: {
3459 if (*len < MIN(strbuflen(pcb->ipsec_if_xname,
3460 sizeof(pcb->ipsec_if_xname)) + 1, sizeof(pcb->ipsec_if_xname))) {
3461 result = EMSGSIZE;
3462 } else {
3463 if (pcb->ipsec_ifp == NULL) {
3464 // Only can get after connecting
3465 result = EINVAL;
3466 break;
3467 }
3468 *len = scnprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
3469 }
3470 break;
3471 }
3472
3473 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3474 if (*len != sizeof(int)) {
3475 result = EMSGSIZE;
3476 } else {
3477 *(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
3478 }
3479 break;
3480 }
3481
3482 #if IPSEC_NEXUS
3483
3484 case IPSEC_OPT_ENABLE_CHANNEL: {
3485 if (*len != sizeof(int)) {
3486 result = EMSGSIZE;
3487 } else {
3488 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3489 *(int *)data = pcb->ipsec_kpipe_count;
3490 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3491 }
3492 break;
3493 }
3494
3495 case IPSEC_OPT_CHANNEL_BIND_PID: {
3496 if (*len != sizeof(pid_t)) {
3497 result = EMSGSIZE;
3498 } else {
3499 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3500 *(pid_t *)data = pcb->ipsec_kpipe_pid;
3501 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3502 }
3503 break;
3504 }
3505
3506 case IPSEC_OPT_CHANNEL_BIND_UUID: {
3507 if (*len != sizeof(uuid_t)) {
3508 result = EMSGSIZE;
3509 } else {
3510 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3511 uuid_copy(*((uuid_t *)data), pcb->ipsec_kpipe_proc_uuid);
3512 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3513 }
3514 break;
3515 }
3516
3517 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3518 if (*len != sizeof(int)) {
3519 result = EMSGSIZE;
3520 } else {
3521 *(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.fsw_agent);
3522 }
3523 break;
3524 }
3525
3526 case IPSEC_OPT_ENABLE_NETIF: {
3527 if (*len != sizeof(int)) {
3528 result = EMSGSIZE;
3529 } else {
3530 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3531 *(int *)data = !!pcb->ipsec_use_netif;
3532 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3533 }
3534 break;
3535 }
3536
3537 case IPSEC_OPT_GET_CHANNEL_UUID: {
3538 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3539 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
3540 result = ENXIO;
3541 } else if (*len != sizeof(uuid_t) * pcb->ipsec_kpipe_count) {
3542 result = EMSGSIZE;
3543 } else {
3544 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
3545 uuid_copy(((uuid_t *)data)[i], pcb->ipsec_kpipe_uuid[i]);
3546 }
3547 }
3548 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3549 break;
3550 }
3551
3552 case IPSEC_OPT_INPUT_FRAG_SIZE: {
3553 if (*len != sizeof(u_int32_t)) {
3554 result = EMSGSIZE;
3555 } else {
3556 *(u_int32_t *)data = pcb->ipsec_input_frag_size;
3557 }
3558 break;
3559 }
3560 case IPSEC_OPT_SLOT_SIZE: {
3561 if (*len != sizeof(u_int32_t)) {
3562 result = EMSGSIZE;
3563 } else {
3564 *(u_int32_t *)data = pcb->ipsec_slot_size;
3565 }
3566 break;
3567 }
3568 case IPSEC_OPT_NETIF_RING_SIZE: {
3569 if (*len != sizeof(u_int32_t)) {
3570 result = EMSGSIZE;
3571 } else {
3572 *(u_int32_t *)data = pcb->ipsec_netif_ring_size;
3573 }
3574 break;
3575 }
3576 case IPSEC_OPT_TX_FSW_RING_SIZE: {
3577 if (*len != sizeof(u_int32_t)) {
3578 result = EMSGSIZE;
3579 } else {
3580 *(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
3581 }
3582 break;
3583 }
3584 case IPSEC_OPT_RX_FSW_RING_SIZE: {
3585 if (*len != sizeof(u_int32_t)) {
3586 result = EMSGSIZE;
3587 } else {
3588 *(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
3589 }
3590 break;
3591 }
3592 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3593 if (*len != sizeof(u_int32_t)) {
3594 result = EMSGSIZE;
3595 } else {
3596 *(u_int32_t *)data = pcb->ipsec_kpipe_tx_ring_size;
3597 }
3598 break;
3599 }
3600 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3601 if (*len != sizeof(u_int32_t)) {
3602 result = EMSGSIZE;
3603 } else {
3604 *(u_int32_t *)data = pcb->ipsec_kpipe_rx_ring_size;
3605 }
3606 break;
3607 }
3608
3609 #endif // IPSEC_NEXUS
3610
3611 default: {
3612 result = ENOPROTOOPT;
3613 break;
3614 }
3615 }
3616
3617 return result;
3618 }
3619
3620 /* Network Interface functions */
3621 static errno_t
ipsec_output(ifnet_t interface,mbuf_t data)3622 ipsec_output(ifnet_t interface,
3623 mbuf_t data)
3624 {
3625 struct ipsec_pcb *__single pcb = ifnet_softc(interface);
3626 struct ipsec_output_state ipsec_state;
3627 struct route ro;
3628 struct route_in6 ro6;
3629 size_t length;
3630 struct ip *ip = NULL;
3631 struct ip6_hdr *ip6 = NULL;
3632 struct ip_out_args ipoa;
3633 struct ip6_out_args ip6oa;
3634 int error = 0;
3635 u_int ip_version = 0;
3636 int flags = 0;
3637 struct flowadv *adv = NULL;
3638
3639 // Make sure this packet isn't looping through the interface
3640 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
3641 error = EINVAL;
3642 goto ipsec_output_err;
3643 }
3644
3645 // Mark the interface so NECP can evaluate tunnel policy
3646 necp_mark_packet_from_interface(data, interface);
3647
3648 if (data->m_len < sizeof(*ip)) {
3649 os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IP header length: %d.\n", data->m_len);
3650 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3651 error = EINVAL;
3652 goto ipsec_output_err;
3653 }
3654
3655 ip = mtod(data, struct ip *);
3656 ip_version = ip->ip_v;
3657
3658 switch (ip_version) {
3659 case 4: {
3660 u_int8_t ip_hlen = 0;
3661 #ifdef _IP_VHL
3662 ip_hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
3663 #else
3664 ip_hlen = (uint8_t)(ip->ip_hl << 2);
3665 #endif
3666 if (ip_hlen < sizeof(*ip)) {
3667 os_log_error(OS_LOG_DEFAULT, "ipsec_output: Bad ip header length %d.\n", ip_hlen);
3668 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3669 error = EINVAL;
3670 goto ipsec_output_err;
3671 }
3672 #if IPSEC_NEXUS
3673 if (!pcb->ipsec_use_netif)
3674 #endif // IPSEC_NEXUS
3675 {
3676 int af = AF_INET;
3677 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3678 }
3679
3680 /* Apply encryption */
3681 memset(&ipsec_state, 0, sizeof(ipsec_state));
3682 ipsec_state.m = data;
3683 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
3684 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3685 ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
3686
3687 error = ipsec4_interface_output(&ipsec_state, interface);
3688 /* Tunneled in IPv6 - packet is gone */
3689 if (error == 0 && ipsec_state.tunneled == 6) {
3690 goto done;
3691 }
3692
3693 data = ipsec_state.m;
3694 if (error || data == NULL) {
3695 if (error) {
3696 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec4_output error %d.\n", error);
3697 }
3698 goto ipsec_output_err;
3699 }
3700
3701 /* Set traffic class, set flow */
3702 m_set_service_class(data, pcb->ipsec_output_service_class);
3703 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3704 #if SKYWALK
3705 data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
3706 #else /* !SKYWALK */
3707 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3708 #endif /* !SKYWALK */
3709 data->m_pkthdr.pkt_proto = ip->ip_p;
3710 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3711
3712 /* Flip endian-ness for ip_output */
3713 ip = mtod(data, struct ip *);
3714 NTOHS(ip->ip_len);
3715 NTOHS(ip->ip_off);
3716
3717 /* Increment statistics */
3718 length = mbuf_pkthdr_len(data);
3719 ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
3720
3721 /* Send to ip_output */
3722 memset(&ro, 0, sizeof(ro));
3723
3724 flags = (IP_OUTARGS | /* Passing out args to specify interface */
3725 IP_NOIPSEC); /* To ensure the packet doesn't go through ipsec twice */
3726
3727 memset(&ipoa, 0, sizeof(ipoa));
3728 ipoa.ipoa_flowadv.code = 0;
3729 ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
3730 if (ipsec_state.outgoing_if) {
3731 ipoa.ipoa_boundif = ipsec_state.outgoing_if;
3732 ipoa.ipoa_flags |= IPOAF_BOUND_IF;
3733 }
3734 ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
3735
3736 adv = &ipoa.ipoa_flowadv;
3737
3738 (void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
3739 data = NULL;
3740
3741 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3742 error = ENOBUFS;
3743 ifnet_disable_output(interface);
3744 }
3745
3746 goto done;
3747 }
3748 case 6: {
3749 if (data->m_len < sizeof(*ip6)) {
3750 os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IPv6 header length: %d.\n", data->m_len);
3751 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
3752 error = EINVAL;
3753 goto ipsec_output_err;
3754 }
3755 #if IPSEC_NEXUS
3756 if (!pcb->ipsec_use_netif)
3757 #endif // IPSEC_NEXUS
3758 {
3759 int af = AF_INET6;
3760 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3761 }
3762
3763 data = ipsec6_splithdr(data);
3764 if (data == NULL) {
3765 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_splithdr returned NULL\n");
3766 goto ipsec_output_err;
3767 }
3768
3769 ip6 = mtod(data, struct ip6_hdr *);
3770
3771 memset(&ipsec_state, 0, sizeof(ipsec_state));
3772 ipsec_state.m = data;
3773 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
3774 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3775 ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
3776
3777 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
3778 if (error == 0 && ipsec_state.tunneled == 4) { /* tunneled in IPv4 - packet is gone */
3779 goto done;
3780 }
3781 data = ipsec_state.m;
3782 if (error || data == NULL) {
3783 if (error) {
3784 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_output error %d\n", error);
3785 }
3786 goto ipsec_output_err;
3787 }
3788
3789 /* Set traffic class, set flow */
3790 m_set_service_class(data, pcb->ipsec_output_service_class);
3791 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3792 #if SKYWALK
3793 data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
3794 #else /* !SKYWALK */
3795 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3796 #endif /* !SKYWALK */
3797 data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3798 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3799
3800 /* Increment statistics */
3801 length = mbuf_pkthdr_len(data);
3802 ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
3803
3804 /* Send to ip6_output */
3805 memset(&ro6, 0, sizeof(ro6));
3806
3807 flags = IPV6_OUTARGS;
3808
3809 memset(&ip6oa, 0, sizeof(ip6oa));
3810 ip6oa.ip6oa_flowadv.code = 0;
3811 ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
3812 if (ipsec_state.outgoing_if) {
3813 ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
3814 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
3815 ip6_output_setsrcifscope(data, ipsec_state.outgoing_if, NULL);
3816 ip6_output_setdstifscope(data, ipsec_state.outgoing_if, NULL);
3817 } else {
3818 ip6_output_setsrcifscope(data, IFSCOPE_UNKNOWN, NULL);
3819 ip6_output_setdstifscope(data, IFSCOPE_UNKNOWN, NULL);
3820 }
3821 ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
3822
3823 adv = &ip6oa.ip6oa_flowadv;
3824
3825 (void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
3826 data = NULL;
3827
3828 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3829 error = ENOBUFS;
3830 ifnet_disable_output(interface);
3831 }
3832
3833 goto done;
3834 }
3835 default: {
3836 os_log_error(OS_LOG_DEFAULT, "ipsec_output: Received unknown packet version %d.\n", ip_version);
3837 error = EINVAL;
3838 goto ipsec_output_err;
3839 }
3840 }
3841
3842 done:
3843 return error;
3844
3845 ipsec_output_err:
3846 if (data) {
3847 mbuf_freem(data);
3848 }
3849 goto done;
3850 }
3851
3852 static void
ipsec_start(ifnet_t interface)3853 ipsec_start(ifnet_t interface)
3854 {
3855 mbuf_t __single data;
3856 struct ipsec_pcb *__single pcb = ifnet_softc(interface);
3857
3858 VERIFY(pcb != NULL);
3859 for (;;) {
3860 if (ifnet_dequeue(interface, &data) != 0) {
3861 break;
3862 }
3863 if (ipsec_output(interface, data) != 0) {
3864 break;
3865 }
3866 }
3867 }
3868
3869 /* Network Interface functions */
3870 static errno_t
ipsec_demux(__unused ifnet_t interface,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)3871 ipsec_demux(__unused ifnet_t interface,
3872 mbuf_t data,
3873 __unused char *frame_header,
3874 protocol_family_t *protocol)
3875 {
3876 struct ip *ip;
3877 u_int ip_version;
3878
3879 while (data != NULL && mbuf_len(data) < 1) {
3880 data = mbuf_next(data);
3881 }
3882
3883 if (data == NULL) {
3884 return ENOENT;
3885 }
3886
3887 ip = mtod(data, struct ip *);
3888 ip_version = ip->ip_v;
3889
3890 switch (ip_version) {
3891 case 4:
3892 *protocol = PF_INET;
3893 return 0;
3894 case 6:
3895 *protocol = PF_INET6;
3896 return 0;
3897 default:
3898 *protocol = PF_UNSPEC;
3899 break;
3900 }
3901
3902 return 0;
3903 }
3904
3905 static errno_t
ipsec_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused u_int32_t demux_count)3906 ipsec_add_proto(__unused ifnet_t interface,
3907 protocol_family_t protocol,
3908 __unused const struct ifnet_demux_desc *demux_array,
3909 __unused u_int32_t demux_count)
3910 {
3911 switch (protocol) {
3912 case PF_INET:
3913 return 0;
3914 case PF_INET6:
3915 return 0;
3916 default:
3917 break;
3918 }
3919
3920 return ENOPROTOOPT;
3921 }
3922
3923 static errno_t
ipsec_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)3924 ipsec_del_proto(__unused ifnet_t interface,
3925 __unused protocol_family_t protocol)
3926 {
3927 return 0;
3928 }
3929
3930 static errno_t
ipsec_ioctl(ifnet_t interface,u_long command,void * data)3931 ipsec_ioctl(ifnet_t interface,
3932 u_long command,
3933 void *data)
3934 {
3935 #if IPSEC_NEXUS
3936 struct ipsec_pcb *__single pcb = ifnet_softc(interface);
3937 #endif
3938 errno_t result = 0;
3939
3940 switch (command) {
3941 case SIOCSIFMTU: {
3942 #if IPSEC_NEXUS
3943 if (pcb->ipsec_use_netif) {
3944 // Make sure we can fit packets in the channel buffers
3945 if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
3946 result = EINVAL;
3947 } else {
3948 ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
3949 }
3950 } else
3951 #endif // IPSEC_NEXUS
3952 {
3953 ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
3954 }
3955 break;
3956 }
3957
3958 case SIOCSIFFLAGS:
3959 /* ifioctl() takes care of it */
3960 break;
3961
3962 case SIOCSIFSUBFAMILY: {
3963 uint32_t subfamily;
3964
3965 subfamily = ((struct ifreq*)data)->ifr_type.ift_subfamily;
3966 switch (subfamily) {
3967 case IFRTYPE_SUBFAMILY_BLUETOOTH:
3968 interface->if_subfamily = IFNET_SUBFAMILY_BLUETOOTH;
3969 break;
3970 case IFRTYPE_SUBFAMILY_WIFI:
3971 interface->if_subfamily = IFNET_SUBFAMILY_WIFI;
3972 break;
3973 case IFRTYPE_SUBFAMILY_QUICKRELAY:
3974 interface->if_subfamily = IFNET_SUBFAMILY_QUICKRELAY;
3975 break;
3976 case IFRTYPE_SUBFAMILY_DEFAULT:
3977 interface->if_subfamily = IFNET_SUBFAMILY_DEFAULT;
3978 break;
3979 default:
3980 result = EINVAL;
3981 break;
3982 }
3983 break;
3984 }
3985
3986 case SIOCSIFPEEREGRESSFUNCTIONALTYPE: {
3987 uint32_t peeregressinterfacetype;
3988 peeregressinterfacetype = ((struct ifreq*)data)->ifr_ifru.ifru_peer_egress_functional_type;
3989 switch (peeregressinterfacetype) {
3990 case IFRTYPE_FUNCTIONAL_WIFI_INFRA:
3991 case IFRTYPE_FUNCTIONAL_CELLULAR:
3992 case IFRTYPE_FUNCTIONAL_WIRED:
3993 case IFRTYPE_FUNCTIONAL_UNKNOWN:
3994 interface->peer_egress_functional_type = peeregressinterfacetype;
3995 break;
3996 default:
3997 result = EINVAL;
3998 break;
3999 }
4000 break;
4001 }
4002
4003 default:
4004 result = EOPNOTSUPP;
4005 }
4006
4007 return result;
4008 }
4009
4010 static void
ipsec_detached(ifnet_t interface)4011 ipsec_detached(ifnet_t interface)
4012 {
4013 struct ipsec_pcb *__single pcb = ifnet_softc(interface);
4014
4015 (void)ifnet_release(interface);
4016 lck_mtx_lock(&ipsec_lock);
4017 ipsec_free_pcb(pcb, true);
4018 (void)ifnet_dispose(interface);
4019 lck_mtx_unlock(&ipsec_lock);
4020 }
4021
4022 /* Protocol Handlers */
4023
4024 static errno_t
ipsec_proto_input(ifnet_t interface,protocol_family_t protocol,mbuf_t m,__unused char * frame_header)4025 ipsec_proto_input(ifnet_t interface,
4026 protocol_family_t protocol,
4027 mbuf_t m,
4028 __unused char *frame_header)
4029 {
4030 mbuf_pkthdr_setrcvif(m, interface);
4031
4032 #if IPSEC_NEXUS
4033 struct ipsec_pcb *__single pcb = ifnet_softc(interface);
4034 if (!pcb->ipsec_use_netif)
4035 #endif // IPSEC_NEXUS
4036 {
4037 uint32_t af = 0;
4038 struct ip *ip = mtod(m, struct ip *);
4039 if (ip->ip_v == 4) {
4040 af = AF_INET;
4041 } else if (ip->ip_v == 6) {
4042 af = AF_INET6;
4043 }
4044 bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
4045 pktap_input(interface, protocol, m, NULL);
4046 }
4047
4048 int32_t pktlen = m->m_pkthdr.len;
4049 if (proto_input(protocol, m) != 0) {
4050 ifnet_stat_increment_in(interface, 0, 0, 1);
4051 m_freem(m);
4052 } else {
4053 ifnet_stat_increment_in(interface, 1, pktlen, 0);
4054 }
4055
4056 return 0;
4057 }
4058
4059 static errno_t
ipsec_proto_pre_output(__unused ifnet_t interface,protocol_family_t protocol,__unused mbuf_t * packet,__unused const struct sockaddr * dest,__unused void * route,__unused char * frame_type,__unused char * link_layer_dest)4060 ipsec_proto_pre_output(__unused ifnet_t interface,
4061 protocol_family_t protocol,
4062 __unused mbuf_t *packet,
4063 __unused const struct sockaddr *dest,
4064 __unused void *route,
4065 __unused char *frame_type,
4066 __unused char *link_layer_dest)
4067 {
4068 *(protocol_family_t *)(void *)frame_type = protocol;
4069 return 0;
4070 }
4071
4072 static errno_t
ipsec_attach_proto(ifnet_t interface,protocol_family_t protocol)4073 ipsec_attach_proto(ifnet_t interface,
4074 protocol_family_t protocol)
4075 {
4076 struct ifnet_attach_proto_param proto;
4077 errno_t result;
4078
4079 bzero(&proto, sizeof(proto));
4080 proto.input = ipsec_proto_input;
4081 proto.pre_output = ipsec_proto_pre_output;
4082
4083 result = ifnet_attach_protocol(interface, protocol, &proto);
4084 if (result != 0 && result != EEXIST) {
4085 os_log_error(OS_LOG_DEFAULT, "ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
4086 protocol, result);
4087 }
4088
4089 return result;
4090 }
4091
4092 errno_t
ipsec_inject_inbound_packet(ifnet_t interface,mbuf_t packet)4093 ipsec_inject_inbound_packet(ifnet_t interface,
4094 mbuf_t packet)
4095 {
4096 #if IPSEC_NEXUS
4097 struct ipsec_pcb *__single pcb = ifnet_softc(interface);
4098
4099 if (pcb->ipsec_use_netif) {
4100 if (!ipsec_data_move_begin(pcb)) {
4101 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__,
4102 if_name(pcb->ipsec_ifp));
4103 return ENXIO;
4104 }
4105
4106 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4107
4108 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
4109
4110 if (pcb->ipsec_input_chain_count > (u_int32_t)if_ipsec_max_pending_input) {
4111 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4112 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4113 ipsec_data_move_end(pcb);
4114 return ENOSPC;
4115 }
4116
4117 if (pcb->ipsec_input_chain != NULL) {
4118 pcb->ipsec_input_chain_last->m_nextpkt = packet;
4119 } else {
4120 pcb->ipsec_input_chain = packet;
4121 }
4122 pcb->ipsec_input_chain_count++;
4123 while (packet->m_nextpkt) {
4124 VERIFY(packet != packet->m_nextpkt);
4125 packet = packet->m_nextpkt;
4126 pcb->ipsec_input_chain_count++;
4127 }
4128 pcb->ipsec_input_chain_last = packet;
4129 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4130
4131 kern_channel_ring_t __single rx_ring = pcb->ipsec_netif_rxring[0];
4132 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4133
4134 if (rx_ring != NULL) {
4135 kern_channel_notify(rx_ring, 0);
4136 }
4137
4138 ipsec_data_move_end(pcb);
4139 return 0;
4140 } else
4141 #endif // IPSEC_NEXUS
4142 {
4143 errno_t error;
4144 protocol_family_t protocol;
4145 if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
4146 return error;
4147 }
4148
4149 return ipsec_proto_input(interface, protocol, packet, NULL);
4150 }
4151 }
4152
4153 void
ipsec_set_pkthdr_for_interface(ifnet_t interface,mbuf_t packet,int family,uint32_t flowid)4154 ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family,
4155 uint32_t flowid)
4156 {
4157 #pragma unused (flowid)
4158 if (packet != NULL && interface != NULL) {
4159 struct ipsec_pcb *__single pcb = ifnet_softc(interface);
4160 if (pcb != NULL) {
4161 /* Set traffic class, set flow */
4162 m_set_service_class(packet, pcb->ipsec_output_service_class);
4163 packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4164 #if SKYWALK
4165 packet->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4166 packet->m_pkthdr.pkt_flowid = flowid;
4167 #else /* !SKYWALK */
4168 packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
4169 #endif /* !SKYWALK */
4170 if (family == AF_INET) {
4171 struct ip *ip = mtod(packet, struct ip *);
4172 packet->m_pkthdr.pkt_proto = ip->ip_p;
4173 } else if (family == AF_INET6) {
4174 struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
4175 packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
4176 }
4177 packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4178 }
4179 }
4180 }
4181
4182 void
ipsec_set_ipoa_for_interface(ifnet_t interface,struct ip_out_args * ipoa)4183 ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
4184 {
4185 struct ipsec_pcb *__single pcb;
4186
4187 if (interface == NULL || ipoa == NULL) {
4188 return;
4189 }
4190 pcb = ifnet_softc(interface);
4191
4192 if (net_qos_policy_restricted == 0) {
4193 ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
4194 ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4195 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4196 net_qos_policy_restrict_avapps != 0) {
4197 ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4198 } else {
4199 ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4200 ipoa->ipoa_sotc = SO_TC_VO;
4201 }
4202 }
4203
4204 void
ipsec_set_ip6oa_for_interface(ifnet_t interface,struct ip6_out_args * ip6oa)4205 ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
4206 {
4207 struct ipsec_pcb *__single pcb;
4208
4209 if (interface == NULL || ip6oa == NULL) {
4210 return;
4211 }
4212 pcb = ifnet_softc(interface);
4213
4214 if (net_qos_policy_restricted == 0) {
4215 ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
4216 ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4217 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4218 net_qos_policy_restrict_avapps != 0) {
4219 ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4220 } else {
4221 ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4222 ip6oa->ip6oa_sotc = SO_TC_VO;
4223 }
4224 }
4225
4226 static boolean_t
ipsec_data_move_begin(struct ipsec_pcb * pcb)4227 ipsec_data_move_begin(struct ipsec_pcb *pcb)
4228 {
4229 boolean_t ret = 0;
4230
4231 lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4232 if ((ret = IPSEC_IS_DATA_PATH_READY(pcb))) {
4233 pcb->ipsec_pcb_data_move++;
4234 }
4235 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4236
4237 return ret;
4238 }
4239
4240 static void
ipsec_data_move_end(struct ipsec_pcb * pcb)4241 ipsec_data_move_end(struct ipsec_pcb *pcb)
4242 {
4243 lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4244 VERIFY(pcb->ipsec_pcb_data_move > 0);
4245 /*
4246 * if there's no more thread moving data, wakeup any
4247 * drainers that's blocked waiting for this.
4248 */
4249 if (--pcb->ipsec_pcb_data_move == 0 && pcb->ipsec_pcb_drainers > 0) {
4250 wakeup(&(pcb->ipsec_pcb_data_move));
4251 }
4252 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4253 }
4254
4255 static void
ipsec_data_move_drain(struct ipsec_pcb * pcb)4256 ipsec_data_move_drain(struct ipsec_pcb *pcb)
4257 {
4258 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4259 /* data path must already be marked as not ready */
4260 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4261 pcb->ipsec_pcb_drainers++;
4262 while (pcb->ipsec_pcb_data_move != 0) {
4263 (void)msleep(&(pcb->ipsec_pcb_data_move), &pcb->ipsec_pcb_data_move_lock,
4264 (PZERO - 1), __func__, NULL);
4265 }
4266 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4267 VERIFY(pcb->ipsec_pcb_drainers > 0);
4268 pcb->ipsec_pcb_drainers--;
4269 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4270 }
4271
4272 static void
ipsec_wait_data_move_drain(struct ipsec_pcb * pcb)4273 ipsec_wait_data_move_drain(struct ipsec_pcb *pcb)
4274 {
4275 /*
4276 * Mark the data path as not usable.
4277 */
4278 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4279 IPSEC_CLR_DATA_PATH_READY(pcb);
4280 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4281
4282 /* Wait until all threads in the data paths are done. */
4283 ipsec_data_move_drain(pcb);
4284 }
4285