1 /*
2 * Copyright (c) 2012-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29
30 #include <sys/systm.h>
31 #include <sys/kern_control.h>
32 #include <net/kpi_protocol.h>
33 #include <net/kpi_interface.h>
34 #include <sys/socket.h>
35 #include <sys/socketvar.h>
36 #include <net/if.h>
37 #include <net/if_types.h>
38 #include <net/bpf.h>
39 #include <net/if_ipsec.h>
40 #include <sys/mbuf.h>
41 #include <sys/sockio.h>
42 #include <netinet/in.h>
43 #include <netinet/ip6.h>
44 #include <netinet6/in6_var.h>
45 #include <netinet6/ip6_var.h>
46 #include <sys/kauth.h>
47 #include <netinet6/ipsec.h>
48 #include <netinet6/ipsec6.h>
49 #include <netinet6/esp.h>
50 #include <netinet6/esp6.h>
51 #include <netinet/ip.h>
52 #include <net/flowadv.h>
53 #include <net/necp.h>
54 #include <netkey/key.h>
55 #include <net/pktap.h>
56 #include <kern/zalloc.h>
57 #include <os/log.h>
58
59 #if SKYWALK
60 #include <skywalk/os_skywalk_private.h>
61 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
62 #include <skywalk/nexus/netif/nx_netif.h>
63 #define IPSEC_NEXUS 1
64 #else // SKYWALK
65 #define IPSEC_NEXUS 0
66 #endif // SKYWALK
67
68 extern int net_qos_policy_restricted;
69 extern int net_qos_policy_restrict_avapps;
70
71 /* Kernel Control functions */
72 static errno_t ipsec_ctl_setup(u_int32_t *unit, void **unitinfo);
73 static errno_t ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
74 void **unitinfo);
75 static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
76 void **unitinfo);
77 static errno_t ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
78 void *unitinfo);
79 static errno_t ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
80 void *unitinfo, mbuf_t m, int flags);
81 static errno_t ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
82 int opt, void *data, size_t *len);
83 static errno_t ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
84 int opt, void *data, size_t len);
85
86 /* Network Interface functions */
87 static void ipsec_start(ifnet_t interface);
88 static errno_t ipsec_output(ifnet_t interface, mbuf_t data);
89 static errno_t ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
90 protocol_family_t *protocol);
91 static errno_t ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
92 const struct ifnet_demux_desc *demux_array,
93 u_int32_t demux_count);
94 static errno_t ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
95 static errno_t ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
96 static void ipsec_detached(ifnet_t interface);
97
98 /* Protocol handlers */
99 static errno_t ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
100 static errno_t ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
101 mbuf_t m, char *frame_header);
102 static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
103 mbuf_t *packet, const struct sockaddr *dest, void *route,
104 char *frame_type, char *link_layer_dest);
105
106 static kern_ctl_ref ipsec_kctlref;
107 static LCK_ATTR_DECLARE(ipsec_lck_attr, 0, 0);
108 static LCK_GRP_DECLARE(ipsec_lck_grp, "ipsec");
109 static LCK_MTX_DECLARE_ATTR(ipsec_lock, &ipsec_lck_grp, &ipsec_lck_attr);
110
111 #if IPSEC_NEXUS
112
113 SYSCTL_DECL(_net_ipsec);
114 SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
115 static int if_ipsec_verify_interface_creation = 0;
116 SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
117
118 #define IPSEC_IF_VERIFY(_e) if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
119
120 #define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
121 #define IPSEC_IF_DEFAULT_RING_SIZE 64
122 #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
123 #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
124 #define IPSEC_IF_DEFAULT_BUF_SEG_SIZE skmem_usr_buf_seg_size
125
126 #define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES
127 #define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
128 #define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
129 #define IPSEC_NETIF_WMM_RX_RING_COUNT 1
130 #define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT
131 #define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT
132
133 #define IPSEC_IF_MIN_RING_SIZE 8
134 #define IPSEC_IF_MAX_RING_SIZE 1024
135
136 #define IPSEC_IF_MIN_SLOT_SIZE 1024
137 #define IPSEC_IF_MAX_SLOT_SIZE 4096
138
139 #define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512
140
141 #define IPSEC_KPIPE_FLAG_WAKE_PKT 0x01
142
143 static int if_ipsec_max_pending_input = IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT;
144
145 static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
146 static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
147 static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
148
149 static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
150 static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
151 static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
152
153 SYSCTL_INT(_net_ipsec, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_max_pending_input, 0, "");
154 SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
155 &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
156 SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
157 &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
158 SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
159 &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
160
161 static int if_ipsec_debug = 0;
162 SYSCTL_INT(_net_ipsec, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_debug, 0, "");
163
164 static errno_t
165 ipsec_register_nexus(void);
166
167 typedef struct ipsec_nx {
168 uuid_t if_provider;
169 uuid_t if_instance;
170 uuid_t fsw_provider;
171 uuid_t fsw_instance;
172 uuid_t fsw_device;
173 uuid_t fsw_agent;
174 } *ipsec_nx_t;
175
176 static nexus_controller_t ipsec_ncd;
177 static int ipsec_ncd_refcount;
178 static uuid_t ipsec_kpipe_uuid;
179
180 #endif // IPSEC_NEXUS
181
182 /* Control block allocated for each kernel control connection */
183 struct ipsec_pcb {
184 TAILQ_ENTRY(ipsec_pcb) ipsec_chain;
185 kern_ctl_ref ipsec_ctlref;
186 ifnet_t ipsec_ifp;
187 u_int32_t ipsec_unit;
188 u_int32_t ipsec_unique_id;
189 // These external flags can be set with IPSEC_OPT_FLAGS
190 u_int32_t ipsec_external_flags;
191 // These internal flags are only used within this driver
192 u_int32_t ipsec_internal_flags;
193 u_int32_t ipsec_input_frag_size;
194 bool ipsec_frag_size_set;
195 int ipsec_ext_ifdata_stats;
196 mbuf_svc_class_t ipsec_output_service_class;
197 char ipsec_if_xname[IFXNAMSIZ];
198 char ipsec_unique_name[IFXNAMSIZ];
199 // PCB lock protects state fields, like ipsec_kpipe_count
200 decl_lck_rw_data(, ipsec_pcb_lock);
201 // lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers
202 decl_lck_mtx_data(, ipsec_pcb_data_move_lock);
203 u_int32_t ipsec_pcb_data_move; /* number of data moving contexts */
204 u_int32_t ipsec_pcb_drainers; /* number of threads waiting to drain */
205 u_int32_t ipsec_pcb_data_path_state; /* internal state of interface data path */
206 ipsec_dscp_mapping_t ipsec_output_dscp_mapping;
207
208 #if IPSEC_NEXUS
209 lck_mtx_t ipsec_input_chain_lock;
210 lck_mtx_t ipsec_kpipe_encrypt_lock;
211 lck_mtx_t ipsec_kpipe_decrypt_lock;
212 struct mbuf * ipsec_input_chain;
213 struct mbuf * ipsec_input_chain_last;
214 u_int32_t ipsec_input_chain_count;
215 // Input chain lock protects the list of input mbufs
216 // The input chain lock must be taken AFTER the PCB lock if both are held
217 struct ipsec_nx ipsec_nx;
218 u_int32_t ipsec_kpipe_count;
219 pid_t ipsec_kpipe_pid;
220 uuid_t ipsec_kpipe_proc_uuid;
221 uuid_t ipsec_kpipe_uuid[IPSEC_IF_MAX_RING_COUNT];
222 void * ipsec_kpipe_rxring[IPSEC_IF_MAX_RING_COUNT];
223 void * ipsec_kpipe_txring[IPSEC_IF_MAX_RING_COUNT];
224 kern_pbufpool_t ipsec_kpipe_pp;
225 u_int32_t ipsec_kpipe_tx_ring_size;
226 u_int32_t ipsec_kpipe_rx_ring_size;
227
228 kern_nexus_t ipsec_netif_nexus;
229 kern_pbufpool_t ipsec_netif_pp;
230 void * ipsec_netif_rxring[IPSEC_NETIF_MAX_RX_RING_COUNT];
231 void * ipsec_netif_txring[IPSEC_NETIF_MAX_TX_RING_COUNT];
232 uint64_t ipsec_netif_txring_size;
233
234 u_int32_t ipsec_slot_size;
235 u_int32_t ipsec_netif_ring_size;
236 u_int32_t ipsec_tx_fsw_ring_size;
237 u_int32_t ipsec_rx_fsw_ring_size;
238 bool ipsec_use_netif;
239 bool ipsec_needs_netagent;
240 #endif // IPSEC_NEXUS
241 };
242
243 /* These are internal flags not exposed outside this file */
244 #define IPSEC_FLAGS_KPIPE_ALLOCATED 1
245
246 /* data movement refcounting functions */
247 static boolean_t ipsec_data_move_begin(struct ipsec_pcb *pcb);
248 static void ipsec_data_move_end(struct ipsec_pcb *pcb);
249 static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb);
250
251 /* Data path states */
252 #define IPSEC_PCB_DATA_PATH_READY 0x1
253
254 /* Macros to set/clear/test data path states */
255 #define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY)
256 #define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY)
257 #define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0)
258
259 #if IPSEC_NEXUS
260 /* Macros to clear/set/test flags. */
261 static inline void
ipsec_flag_set(struct ipsec_pcb * pcb,uint32_t flag)262 ipsec_flag_set(struct ipsec_pcb *pcb, uint32_t flag)
263 {
264 pcb->ipsec_internal_flags |= flag;
265 }
266 static inline void
ipsec_flag_clr(struct ipsec_pcb * pcb,uint32_t flag)267 ipsec_flag_clr(struct ipsec_pcb *pcb, uint32_t flag)
268 {
269 pcb->ipsec_internal_flags &= ~flag;
270 }
271
272 static inline bool
ipsec_flag_isset(struct ipsec_pcb * pcb,uint32_t flag)273 ipsec_flag_isset(struct ipsec_pcb *pcb, uint32_t flag)
274 {
275 return !!(pcb->ipsec_internal_flags & flag);
276 }
277 #endif // IPSEC_NEXUS
278
279 TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
280
281 static KALLOC_TYPE_DEFINE(ipsec_pcb_zone, struct ipsec_pcb, NET_KT_DEFAULT);
282
283 #define IPSECQ_MAXLEN 256
284
285 #if IPSEC_NEXUS
286 static int
287 sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
288 {
289 #pragma unused(arg1, arg2)
290 int value = if_ipsec_ring_size;
291
292 int error = sysctl_handle_int(oidp, &value, 0, req);
293 if (error || !req->newptr) {
294 return error;
295 }
296
297 if (value < IPSEC_IF_MIN_RING_SIZE ||
298 value > IPSEC_IF_MAX_RING_SIZE) {
299 return EINVAL;
300 }
301
302 if_ipsec_ring_size = value;
303
304 return 0;
305 }
306
307 static int
308 sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
309 {
310 #pragma unused(arg1, arg2)
311 int value = if_ipsec_tx_fsw_ring_size;
312
313 int error = sysctl_handle_int(oidp, &value, 0, req);
314 if (error || !req->newptr) {
315 return error;
316 }
317
318 if (value < IPSEC_IF_MIN_RING_SIZE ||
319 value > IPSEC_IF_MAX_RING_SIZE) {
320 return EINVAL;
321 }
322
323 if_ipsec_tx_fsw_ring_size = value;
324
325 return 0;
326 }
327
328 static int
329 sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
330 {
331 #pragma unused(arg1, arg2)
332 int value = if_ipsec_rx_fsw_ring_size;
333
334 int error = sysctl_handle_int(oidp, &value, 0, req);
335 if (error || !req->newptr) {
336 return error;
337 }
338
339 if (value < IPSEC_IF_MIN_RING_SIZE ||
340 value > IPSEC_IF_MAX_RING_SIZE) {
341 return EINVAL;
342 }
343
344 if_ipsec_rx_fsw_ring_size = value;
345
346 return 0;
347 }
348
349
350 static inline bool
ipsec_in_wmm_mode(struct ipsec_pcb * pcb)351 ipsec_in_wmm_mode(struct ipsec_pcb *pcb)
352 {
353 return pcb->ipsec_kpipe_count == IPSEC_IF_WMM_RING_COUNT;
354 }
355
356 #endif // IPSEC_NEXUS
357
358 errno_t
ipsec_register_control(void)359 ipsec_register_control(void)
360 {
361 struct kern_ctl_reg kern_ctl;
362 errno_t result = 0;
363
364 #if IPSEC_NEXUS
365 ipsec_register_nexus();
366 #endif // IPSEC_NEXUS
367
368 TAILQ_INIT(&ipsec_head);
369
370 bzero(&kern_ctl, sizeof(kern_ctl));
371 strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
372 kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
373 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP; /* Require root */
374 kern_ctl.ctl_sendsize = 64 * 1024;
375 kern_ctl.ctl_recvsize = 64 * 1024;
376 kern_ctl.ctl_setup = ipsec_ctl_setup;
377 kern_ctl.ctl_bind = ipsec_ctl_bind;
378 kern_ctl.ctl_connect = ipsec_ctl_connect;
379 kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
380 kern_ctl.ctl_send = ipsec_ctl_send;
381 kern_ctl.ctl_setopt = ipsec_ctl_setopt;
382 kern_ctl.ctl_getopt = ipsec_ctl_getopt;
383
384 result = ctl_register(&kern_ctl, &ipsec_kctlref);
385 if (result != 0) {
386 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - ctl_register failed: %d\n", result);
387 return result;
388 }
389
390 /* Register the protocol plumbers */
391 if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC,
392 ipsec_attach_proto, NULL)) != 0) {
393 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n",
394 result);
395 ctl_deregister(ipsec_kctlref);
396 return result;
397 }
398
399 /* Register the protocol plumbers */
400 if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC,
401 ipsec_attach_proto, NULL)) != 0) {
402 proto_unregister_plumber(PF_INET, IFNET_FAMILY_IPSEC);
403 ctl_deregister(ipsec_kctlref);
404 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n",
405 result);
406 return result;
407 }
408
409 return 0;
410 }
411
412 /* Helpers */
413 int
ipsec_interface_isvalid(ifnet_t interface)414 ipsec_interface_isvalid(ifnet_t interface)
415 {
416 struct ipsec_pcb *pcb = NULL;
417
418 if (interface == NULL) {
419 return 0;
420 }
421
422 pcb = ifnet_softc(interface);
423
424 if (pcb == NULL) {
425 return 0;
426 }
427
428 /* When ctl disconnects, ipsec_unit is set to 0 */
429 if (pcb->ipsec_unit == 0) {
430 return 0;
431 }
432
433 return 1;
434 }
435
436 #if IPSEC_NEXUS
437 boolean_t
ipsec_interface_needs_netagent(ifnet_t interface)438 ipsec_interface_needs_netagent(ifnet_t interface)
439 {
440 struct ipsec_pcb *pcb = NULL;
441
442 if (interface == NULL) {
443 return FALSE;
444 }
445
446 pcb = ifnet_softc(interface);
447
448 if (pcb == NULL) {
449 return FALSE;
450 }
451
452 return pcb->ipsec_needs_netagent == true;
453 }
454 #endif // IPSEC_NEXUS
455
456 static errno_t
ipsec_ifnet_set_attrs(ifnet_t ifp)457 ipsec_ifnet_set_attrs(ifnet_t ifp)
458 {
459 /* Set flags and additional information. */
460 ifnet_set_mtu(ifp, 1500);
461 ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
462
463 /* The interface must generate its own IPv6 LinkLocal address,
464 * if possible following the recommendation of RFC2472 to the 64bit interface ID
465 */
466 ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
467
468 #if !IPSEC_NEXUS
469 /* Reset the stats in case as the interface may have been recycled */
470 struct ifnet_stats_param stats;
471 bzero(&stats, sizeof(struct ifnet_stats_param));
472 ifnet_set_stat(ifp, &stats);
473 #endif // !IPSEC_NEXUS
474
475 return 0;
476 }
477
478 #if IPSEC_NEXUS
479
480 static uuid_t ipsec_nx_dom_prov;
481
482 static errno_t
ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)483 ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
484 {
485 return 0;
486 }
487
488 static void
ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)489 ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
490 {
491 // Ignore
492 }
493
494 static errno_t
ipsec_register_nexus(void)495 ipsec_register_nexus(void)
496 {
497 const struct kern_nexus_domain_provider_init dp_init = {
498 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
499 .nxdpi_flags = 0,
500 .nxdpi_init = ipsec_nxdp_init,
501 .nxdpi_fini = ipsec_nxdp_fini
502 };
503 errno_t err = 0;
504
505 /* ipsec_nxdp_init() is called before this function returns */
506 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
507 (const uint8_t *) "com.apple.ipsec",
508 &dp_init, sizeof(dp_init),
509 &ipsec_nx_dom_prov);
510 if (err != 0) {
511 os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
512 return err;
513 }
514 return 0;
515 }
516
517 static errno_t
ipsec_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)518 ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
519 {
520 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
521 pcb->ipsec_netif_nexus = nexus;
522 return ipsec_ifnet_set_attrs(ifp);
523 }
524
525 static errno_t
ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,proc_t p,kern_nexus_t nexus,nexus_port_t nexus_port,kern_channel_t channel,void ** ch_ctx)526 ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
527 proc_t p, kern_nexus_t nexus,
528 nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
529 {
530 #pragma unused(nxprov, p)
531 #pragma unused(nexus, nexus_port, channel, ch_ctx)
532 return 0;
533 }
534
535 static errno_t
ipsec_nexus_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)536 ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
537 kern_channel_t channel)
538 {
539 #pragma unused(nxprov, channel)
540 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
541 boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, 1);
542 /* Mark the data path as ready */
543 if (ok) {
544 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
545 IPSEC_SET_DATA_PATH_READY(pcb);
546 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
547 }
548 return ok ? 0 : ENXIO;
549 }
550
551 static void
ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)552 ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
553 kern_channel_t channel)
554 {
555 #pragma unused(nxprov, channel)
556 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
557
558 VERIFY(pcb->ipsec_kpipe_count != 0);
559
560 /* Wait until all threads in the data paths are done. */
561 ipsec_wait_data_move_drain(pcb);
562 }
563
564 static void
ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)565 ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
566 kern_channel_t channel)
567 {
568 #pragma unused(nxprov, channel)
569 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
570
571 /* Wait until all threads in the data paths are done. */
572 ipsec_wait_data_move_drain(pcb);
573 }
574
575 static void
ipsec_nexus_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)576 ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
577 kern_channel_t channel)
578 {
579 #pragma unused(nxprov, channel)
580 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
581 if (pcb->ipsec_netif_nexus == nexus) {
582 pcb->ipsec_netif_nexus = NULL;
583 }
584 ifnet_decr_iorefcnt(pcb->ipsec_ifp);
585 }
586
587 static errno_t
ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)588 ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
589 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
590 void **ring_ctx)
591 {
592 #pragma unused(nxprov)
593 #pragma unused(channel)
594 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
595 uint8_t ring_idx;
596
597 for (ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
598 if (!uuid_compare(channel->ch_info->cinfo_nx_uuid, pcb->ipsec_kpipe_uuid[ring_idx])) {
599 break;
600 }
601 }
602
603 if (ring_idx == pcb->ipsec_kpipe_count) {
604 uuid_string_t uuidstr;
605 uuid_unparse(channel->ch_info->cinfo_nx_uuid, uuidstr);
606 os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n", __func__, pcb->ipsec_if_xname, uuidstr);
607 return ENOENT;
608 }
609
610 *ring_ctx = (void *)(uintptr_t)ring_idx;
611
612 if (!is_tx_ring) {
613 VERIFY(pcb->ipsec_kpipe_rxring[ring_idx] == NULL);
614 pcb->ipsec_kpipe_rxring[ring_idx] = ring;
615 } else {
616 VERIFY(pcb->ipsec_kpipe_txring[ring_idx] == NULL);
617 pcb->ipsec_kpipe_txring[ring_idx] = ring;
618 }
619 return 0;
620 }
621
622 static void
ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)623 ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
624 kern_channel_ring_t ring)
625 {
626 #pragma unused(nxprov)
627 bool found = false;
628 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
629
630 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
631 if (pcb->ipsec_kpipe_rxring[i] == ring) {
632 pcb->ipsec_kpipe_rxring[i] = NULL;
633 found = true;
634 } else if (pcb->ipsec_kpipe_txring[i] == ring) {
635 pcb->ipsec_kpipe_txring[i] = NULL;
636 found = true;
637 }
638 }
639 VERIFY(found);
640 }
641
642 static errno_t
ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)643 ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
644 kern_channel_ring_t tx_ring, uint32_t flags)
645 {
646 #pragma unused(nxprov)
647 #pragma unused(flags)
648 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
649
650 if (!ipsec_data_move_begin(pcb)) {
651 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
652 return 0;
653 }
654
655 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
656
657 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
658 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
659 ipsec_data_move_end(pcb);
660 return 0;
661 }
662
663 VERIFY(pcb->ipsec_kpipe_count);
664
665 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
666 if (tx_slot == NULL) {
667 // Nothing to write, bail
668 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
669 ipsec_data_move_end(pcb);
670 return 0;
671 }
672
673 // Signal the netif ring to read
674 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
675 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
676
677 if (rx_ring != NULL) {
678 kern_channel_notify(rx_ring, 0);
679 }
680
681 ipsec_data_move_end(pcb);
682 return 0;
683 }
684
685 static mbuf_t
ipsec_encrypt_mbuf(ifnet_t interface,mbuf_t data)686 ipsec_encrypt_mbuf(ifnet_t interface,
687 mbuf_t data)
688 {
689 struct ipsec_output_state ipsec_state;
690 int error = 0;
691 uint32_t af;
692
693 // Make sure this packet isn't looping through the interface
694 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
695 error = -1;
696 goto ipsec_output_err;
697 }
698
699 // Mark the interface so NECP can evaluate tunnel policy
700 necp_mark_packet_from_interface(data, interface);
701
702 struct ip *ip = mtod(data, struct ip *);
703 u_int ip_version = ip->ip_v;
704
705 switch (ip_version) {
706 case 4: {
707 af = AF_INET;
708
709 memset(&ipsec_state, 0, sizeof(ipsec_state));
710 ipsec_state.m = data;
711 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
712 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
713
714 error = ipsec4_interface_output(&ipsec_state, interface);
715 if (error == 0 && ipsec_state.tunneled == 6) {
716 // Tunneled in IPv6 - packet is gone
717 // TODO: Don't lose mbuf
718 data = NULL;
719 goto done;
720 }
721
722 data = ipsec_state.m;
723 if (error || data == NULL) {
724 if (error) {
725 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec4_output error %d\n", error);
726 }
727 goto ipsec_output_err;
728 }
729 goto done;
730 }
731 case 6: {
732 af = AF_INET6;
733
734 data = ipsec6_splithdr(data);
735 if (data == NULL) {
736 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
737 goto ipsec_output_err;
738 }
739
740 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
741
742 memset(&ipsec_state, 0, sizeof(ipsec_state));
743 ipsec_state.m = data;
744 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
745 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
746
747 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
748 if (error == 0 && ipsec_state.tunneled == 4) {
749 // Tunneled in IPv4 - packet is gone
750 // TODO: Don't lose mbuf
751 data = NULL;
752 goto done;
753 }
754 data = ipsec_state.m;
755 if (error || data == NULL) {
756 if (error) {
757 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_output error %d\n", error);
758 }
759 goto ipsec_output_err;
760 }
761 goto done;
762 }
763 default: {
764 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version);
765 error = -1;
766 goto ipsec_output_err;
767 }
768 }
769
770 done:
771 return data;
772
773 ipsec_output_err:
774 if (data) {
775 mbuf_freem(data);
776 }
777 return NULL;
778 }
779
780 static errno_t
ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)781 ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
782 kern_channel_ring_t rx_ring, uint32_t flags)
783 {
784 #pragma unused(nxprov)
785 #pragma unused(flags)
786 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
787 struct kern_channel_ring_stat_increment rx_ring_stats;
788 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring);
789
790 if (!ipsec_data_move_begin(pcb)) {
791 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
792 return 0;
793 }
794
795 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
796
797 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
798 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
799 ipsec_data_move_end(pcb);
800 return 0;
801 }
802
803 VERIFY(pcb->ipsec_kpipe_count);
804 VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
805
806 // Reclaim user-released slots
807 (void) kern_channel_reclaim(rx_ring);
808
809 uint32_t avail = kern_channel_available_slot_count(rx_ring);
810 if (avail == 0) {
811 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
812 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
813 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
814 ipsec_data_move_end(pcb);
815 return 0;
816 }
817
818 kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx];
819 if (tx_ring == NULL) {
820 // Net-If TX ring not set up yet, nothing to read
821 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
822 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
823 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
824 ipsec_data_move_end(pcb);
825 return 0;
826 }
827
828 struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
829
830 // Unlock ipsec before entering ring
831 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
832
833 (void)kr_enter(tx_ring, TRUE);
834
835 // Lock again after entering and validate
836 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
837 if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
838 // Ring no longer valid
839 // Unlock first, then exit ring
840 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
841 kr_exit(tx_ring);
842 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
843 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
844 ipsec_data_move_end(pcb);
845 return 0;
846 }
847
848 struct kern_channel_ring_stat_increment tx_ring_stats;
849 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
850 kern_channel_slot_t tx_pslot = NULL;
851 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
852 if (tx_slot == NULL) {
853 // Nothing to read, don't bother signalling
854 // Unlock first, then exit ring
855 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
856 kr_exit(tx_ring);
857 ipsec_data_move_end(pcb);
858 return 0;
859 }
860
861 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
862 VERIFY(rx_pp != NULL);
863 struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
864 VERIFY(tx_pp != NULL);
865 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
866 kern_channel_slot_t rx_pslot = NULL;
867 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
868 kern_packet_t tx_chain_ph = 0;
869
870 while (rx_slot != NULL && tx_slot != NULL) {
871 size_t length = 0;
872 mbuf_t data = NULL;
873 errno_t error = 0;
874
875 // Allocate rx packet
876 kern_packet_t rx_ph = 0;
877 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
878 if (__improbable(error != 0)) {
879 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
880 pcb->ipsec_ifp->if_xname);
881 break;
882 }
883
884 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
885
886 if (tx_ph == 0) {
887 // Advance TX ring
888 tx_pslot = tx_slot;
889 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
890 kern_pbufpool_free(rx_pp, rx_ph);
891 continue;
892 }
893 (void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
894 if (tx_chain_ph != 0) {
895 kern_packet_append(tx_ph, tx_chain_ph);
896 }
897 tx_chain_ph = tx_ph;
898
899 // Advance TX ring
900 tx_pslot = tx_slot;
901 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
902
903 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
904 VERIFY(tx_buf != NULL);
905 uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
906 VERIFY(tx_baddr != NULL);
907 tx_baddr += kern_buflet_get_data_offset(tx_buf);
908
909 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
910
911 length = MIN(kern_packet_get_data_length(tx_ph),
912 pcb->ipsec_slot_size);
913
914 // Increment TX stats
915 tx_ring_stats.kcrsi_slots_transferred++;
916 tx_ring_stats.kcrsi_bytes_transferred += length;
917
918 if (length > 0) {
919 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
920 if (error == 0) {
921 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
922 if (error == 0) {
923 // Encrypt and send packet
924 lck_mtx_lock(&pcb->ipsec_kpipe_encrypt_lock);
925 data = ipsec_encrypt_mbuf(pcb->ipsec_ifp, data);
926 lck_mtx_unlock(&pcb->ipsec_kpipe_encrypt_lock);
927 } else {
928 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
929 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
930 STATS_INC(nifs, NETIF_STATS_DROP);
931 mbuf_freem(data);
932 data = NULL;
933 }
934 } else {
935 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
936 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
937 STATS_INC(nifs, NETIF_STATS_DROP);
938 }
939 } else {
940 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
941 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
942 STATS_INC(nifs, NETIF_STATS_DROP);
943 }
944
945 if (data == NULL) {
946 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
947 kern_pbufpool_free(rx_pp, rx_ph);
948 break;
949 }
950
951 length = mbuf_pkthdr_len(data);
952 if (length > PP_BUF_SIZE_DEF(rx_pp)) {
953 // Flush data
954 mbuf_freem(data);
955 kern_pbufpool_free(rx_pp, rx_ph);
956 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
957 pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
958 continue;
959 }
960
961 // Fillout rx packet
962 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
963 VERIFY(rx_buf != NULL);
964 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
965 VERIFY(rx_baddr != NULL);
966
967 // Copy-in data from mbuf to buflet
968 mbuf_copydata(data, 0, length, (void *)rx_baddr);
969 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
970
971 // Finalize and attach the packet
972 error = kern_buflet_set_data_offset(rx_buf, 0);
973 VERIFY(error == 0);
974 error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
975 VERIFY(error == 0);
976 error = kern_packet_finalize(rx_ph);
977 VERIFY(error == 0);
978 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
979 VERIFY(error == 0);
980
981 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
982 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
983
984 rx_ring_stats.kcrsi_slots_transferred++;
985 rx_ring_stats.kcrsi_bytes_transferred += length;
986
987 if (!pcb->ipsec_ext_ifdata_stats) {
988 ifnet_stat_increment_out(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
989 }
990
991 mbuf_freem(data);
992
993 rx_pslot = rx_slot;
994 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
995 }
996
997 if (rx_pslot) {
998 kern_channel_advance_slot(rx_ring, rx_pslot);
999 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1000 }
1001
1002 if (tx_chain_ph != 0) {
1003 kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
1004 }
1005
1006 if (tx_pslot) {
1007 kern_channel_advance_slot(tx_ring, tx_pslot);
1008 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1009 (void)kern_channel_reclaim(tx_ring);
1010 }
1011
1012 /* always reenable output */
1013 errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
1014 if (error != 0) {
1015 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
1016 }
1017
1018 // Unlock first, then exit ring
1019 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1020
1021 if (tx_pslot != NULL) {
1022 kern_channel_notify(tx_ring, 0);
1023 }
1024 kr_exit(tx_ring);
1025
1026 ipsec_data_move_end(pcb);
1027 return 0;
1028 }
1029
1030 static uint8_t
ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1031 ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1032 {
1033 switch (svc_class) {
1034 case KPKT_SC_VO: {
1035 return 0;
1036 }
1037 case KPKT_SC_VI: {
1038 return 1;
1039 }
1040 case KPKT_SC_BE: {
1041 return 2;
1042 }
1043 case KPKT_SC_BK: {
1044 return 3;
1045 }
1046 default: {
1047 VERIFY(0);
1048 return 0;
1049 }
1050 }
1051 }
1052
1053 static errno_t
ipsec_netif_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1054 ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1055 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1056 void **ring_ctx)
1057 {
1058 #pragma unused(nxprov)
1059 #pragma unused(channel)
1060 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1061
1062 if (!is_tx_ring) {
1063 VERIFY(pcb->ipsec_netif_rxring[0] == NULL);
1064 pcb->ipsec_netif_rxring[0] = ring;
1065 } else {
1066 uint8_t ring_idx = 0;
1067 if (ipsec_in_wmm_mode(pcb)) {
1068 int err;
1069 kern_packet_svc_class_t svc_class;
1070 err = kern_channel_get_service_class(ring, &svc_class);
1071 VERIFY(err == 0);
1072 ring_idx = ipsec_find_tx_ring_by_svc(svc_class);
1073 VERIFY(ring_idx < IPSEC_IF_WMM_RING_COUNT);
1074 }
1075
1076 *ring_ctx = (void *)(uintptr_t)ring_idx;
1077
1078 VERIFY(pcb->ipsec_netif_txring[ring_idx] == NULL);
1079 pcb->ipsec_netif_txring[ring_idx] = ring;
1080 }
1081 return 0;
1082 }
1083
1084 static void
ipsec_netif_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)1085 ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1086 kern_channel_ring_t ring)
1087 {
1088 #pragma unused(nxprov)
1089 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1090 bool found = false;
1091
1092 for (int i = 0; i < IPSEC_NETIF_MAX_RX_RING_COUNT; i++) {
1093 if (pcb->ipsec_netif_rxring[i] == ring) {
1094 pcb->ipsec_netif_rxring[i] = NULL;
1095 VERIFY(!found);
1096 found = true;
1097 }
1098 }
1099 for (int i = 0; i < IPSEC_NETIF_MAX_TX_RING_COUNT; i++) {
1100 if (pcb->ipsec_netif_txring[i] == ring) {
1101 pcb->ipsec_netif_txring[i] = NULL;
1102 VERIFY(!found);
1103 found = true;
1104 }
1105 }
1106 VERIFY(found);
1107 }
1108
1109 static bool
ipsec_netif_check_policy(ifnet_t interface,mbuf_t data)1110 ipsec_netif_check_policy(ifnet_t interface, mbuf_t data)
1111 {
1112 necp_kernel_policy_result necp_result = 0;
1113 necp_kernel_policy_result_parameter necp_result_parameter = {};
1114 uint32_t necp_matched_policy_id = 0;
1115 struct ip_out_args args4 = { };
1116 struct ip6_out_args args6 = { };
1117
1118 // This packet has been marked with IP level policy, do not mark again.
1119 if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
1120 return true;
1121 }
1122
1123 size_t length = mbuf_pkthdr_len(data);
1124 if (length < sizeof(struct ip)) {
1125 return false;
1126 }
1127
1128 struct ip *ip = mtod(data, struct ip *);
1129 u_int ip_version = ip->ip_v;
1130 switch (ip_version) {
1131 case 4: {
1132 if (interface != NULL) {
1133 args4.ipoa_flags |= IPOAF_BOUND_IF;
1134 args4.ipoa_boundif = interface->if_index;
1135 }
1136 necp_matched_policy_id = necp_ip_output_find_policy_match(data, IP_OUTARGS, &args4, NULL,
1137 &necp_result, &necp_result_parameter);
1138 break;
1139 }
1140 case 6: {
1141 if (interface != NULL) {
1142 args6.ip6oa_flags |= IP6OAF_BOUND_IF;
1143 args6.ip6oa_boundif = interface->if_index;
1144 }
1145 necp_matched_policy_id = necp_ip6_output_find_policy_match(data, IPV6_OUTARGS, &args6, NULL,
1146 &necp_result, &necp_result_parameter);
1147 break;
1148 }
1149 default: {
1150 return false;
1151 }
1152 }
1153
1154 if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
1155 necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
1156 /* Drop and flow divert packets should be blocked at the IP layer */
1157 return false;
1158 }
1159
1160 necp_mark_packet_from_ip(data, necp_matched_policy_id);
1161 return true;
1162 }
1163
1164 static errno_t
ipsec_netif_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)1165 ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1166 kern_channel_ring_t tx_ring, uint32_t flags)
1167 {
1168 #pragma unused(nxprov)
1169 #pragma unused(flags)
1170 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1171
1172 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1173
1174 if (!ipsec_data_move_begin(pcb)) {
1175 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1176 return 0;
1177 }
1178
1179 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1180
1181 struct kern_channel_ring_stat_increment tx_ring_stats;
1182 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1183 kern_channel_slot_t tx_pslot = NULL;
1184 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1185 kern_packet_t tx_chain_ph = 0;
1186
1187 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
1188
1189 if (tx_slot == NULL) {
1190 // Nothing to write, don't bother signalling
1191 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1192 ipsec_data_move_end(pcb);
1193 return 0;
1194 }
1195
1196 if (pcb->ipsec_kpipe_count &&
1197 ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1198 // Select the corresponding kpipe rx ring
1199 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(tx_ring);
1200 VERIFY(ring_idx < IPSEC_IF_MAX_RING_COUNT);
1201 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1202
1203 // Unlock while calling notify
1204 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1205
1206 // Signal the kernel pipe ring to read
1207 if (rx_ring != NULL) {
1208 kern_channel_notify(rx_ring, 0);
1209 }
1210
1211 ipsec_data_move_end(pcb);
1212 return 0;
1213 }
1214
1215 // If we're here, we're injecting into the BSD stack
1216 while (tx_slot != NULL) {
1217 size_t length = 0;
1218 mbuf_t data = NULL;
1219
1220 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1221
1222 if (tx_ph == 0) {
1223 // Advance TX ring
1224 tx_pslot = tx_slot;
1225 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1226 continue;
1227 }
1228 (void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
1229 if (tx_chain_ph != 0) {
1230 kern_packet_append(tx_ph, tx_chain_ph);
1231 }
1232 tx_chain_ph = tx_ph;
1233
1234 // Advance TX ring
1235 tx_pslot = tx_slot;
1236 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1237
1238 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1239 VERIFY(tx_buf != NULL);
1240 uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1241 VERIFY(tx_baddr != 0);
1242 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1243
1244 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1245
1246 length = MIN(kern_packet_get_data_length(tx_ph),
1247 pcb->ipsec_slot_size);
1248
1249 if (length > 0) {
1250 errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1251 if (error == 0) {
1252 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1253 if (error == 0) {
1254 // Mark packet from policy
1255 uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1256 necp_mark_packet_from_ip(data, policy_id);
1257
1258 // Check policy with NECP
1259 if (!ipsec_netif_check_policy(pcb->ipsec_ifp, data)) {
1260 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1261 STATS_INC(nifs, NETIF_STATS_DROP);
1262 mbuf_freem(data);
1263 data = NULL;
1264 } else {
1265 // Send through encryption
1266 error = ipsec_output(pcb->ipsec_ifp, data);
1267 if (error != 0) {
1268 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
1269 }
1270 }
1271 } else {
1272 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1273 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1274 STATS_INC(nifs, NETIF_STATS_DROP);
1275 mbuf_freem(data);
1276 data = NULL;
1277 }
1278 } else {
1279 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1280 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1281 STATS_INC(nifs, NETIF_STATS_DROP);
1282 }
1283 } else {
1284 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1285 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1286 STATS_INC(nifs, NETIF_STATS_DROP);
1287 }
1288
1289 if (data == NULL) {
1290 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
1291 break;
1292 }
1293
1294 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1295 STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
1296
1297 tx_ring_stats.kcrsi_slots_transferred++;
1298 tx_ring_stats.kcrsi_bytes_transferred += length;
1299 }
1300
1301 if (tx_chain_ph != 0) {
1302 kern_pbufpool_free_chain(tx_ring->ckr_pp, tx_chain_ph);
1303 }
1304
1305 if (tx_pslot) {
1306 kern_channel_advance_slot(tx_ring, tx_pslot);
1307 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1308 (void)kern_channel_reclaim(tx_ring);
1309 }
1310
1311 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1312 ipsec_data_move_end(pcb);
1313
1314 return 0;
1315 }
1316
1317 static errno_t
ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags,uint8_t ring_idx)1318 ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1319 kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx)
1320 {
1321 #pragma unused(nxprov)
1322 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1323 boolean_t more = false;
1324 errno_t rc = 0;
1325
1326 VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0);
1327
1328 /*
1329 * Refill and sync the ring; we may be racing against another thread doing
1330 * an RX sync that also wants to do kr_enter(), and so use the blocking
1331 * variant here.
1332 */
1333 rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1334 if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
1335 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n", __func__,
1336 pcb->ipsec_if_xname, ring->ckr_name, rc);
1337 }
1338
1339 (void) kr_enter(ring, TRUE);
1340 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1341 if (ring != pcb->ipsec_netif_txring[ring_idx]) {
1342 // ring no longer valid
1343 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1344 kr_exit(ring);
1345 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n", __func__,
1346 pcb->ipsec_if_xname, ring->ckr_name, ring_idx);
1347 return ENXIO;
1348 }
1349
1350 if (pcb->ipsec_kpipe_count) {
1351 uint32_t tx_available = kern_channel_available_slot_count(ring);
1352 if (pcb->ipsec_netif_txring_size > 0 &&
1353 tx_available >= pcb->ipsec_netif_txring_size - 1) {
1354 // No room left in tx ring, disable output for now
1355 errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1356 if (error != 0) {
1357 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
1358 }
1359 }
1360 }
1361
1362 if (pcb->ipsec_kpipe_count) {
1363 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1364
1365 // Unlock while calling notify
1366 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1367 // Signal the kernel pipe ring to read
1368 if (rx_ring != NULL) {
1369 kern_channel_notify(rx_ring, 0);
1370 }
1371 } else {
1372 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1373 }
1374
1375 kr_exit(ring);
1376
1377 return 0;
1378 }
1379
1380 static errno_t
ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,__unused uint32_t flags)1381 ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1382 kern_channel_ring_t ring, __unused uint32_t flags)
1383 {
1384 errno_t ret = 0;
1385 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1386
1387 if (!ipsec_data_move_begin(pcb)) {
1388 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1389 return 0;
1390 }
1391
1392 if (ipsec_in_wmm_mode(pcb)) {
1393 for (uint8_t i = 0; i < IPSEC_IF_WMM_RING_COUNT; i++) {
1394 kern_channel_ring_t nring = pcb->ipsec_netif_txring[i];
1395 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, nring, flags, i);
1396 if (ret) {
1397 break;
1398 }
1399 }
1400 } else {
1401 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring, flags, 0);
1402 }
1403
1404 ipsec_data_move_end(pcb);
1405 return ret;
1406 }
1407
1408 static errno_t
ipsec_netif_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)1409 ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1410 kern_channel_ring_t rx_ring, uint32_t flags)
1411 {
1412 #pragma unused(nxprov)
1413 #pragma unused(flags)
1414 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1415 struct kern_channel_ring_stat_increment rx_ring_stats;
1416
1417 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1418
1419 if (!ipsec_data_move_begin(pcb)) {
1420 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1421 return 0;
1422 }
1423
1424 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1425
1426 // Reclaim user-released slots
1427 (void) kern_channel_reclaim(rx_ring);
1428
1429 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1430
1431 uint32_t avail = kern_channel_available_slot_count(rx_ring);
1432 if (avail == 0) {
1433 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1434 ipsec_data_move_end(pcb);
1435 return 0;
1436 }
1437
1438 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1439 VERIFY(rx_pp != NULL);
1440 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1441 kern_channel_slot_t rx_pslot = NULL;
1442 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1443
1444 while (rx_slot != NULL) {
1445 // Check for a waiting packet
1446 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1447 mbuf_t data = pcb->ipsec_input_chain;
1448 if (data == NULL) {
1449 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1450 break;
1451 }
1452
1453 // Allocate rx packet
1454 kern_packet_t rx_ph = 0;
1455 errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1456 if (__improbable(error != 0)) {
1457 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1458 STATS_INC(nifs, NETIF_STATS_DROP);
1459 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1460 break;
1461 }
1462
1463 // Advance waiting packets
1464 if (pcb->ipsec_input_chain_count > 0) {
1465 pcb->ipsec_input_chain_count--;
1466 }
1467 pcb->ipsec_input_chain = data->m_nextpkt;
1468 data->m_nextpkt = NULL;
1469 if (pcb->ipsec_input_chain == NULL) {
1470 pcb->ipsec_input_chain_last = NULL;
1471 }
1472 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1473
1474 size_t length = mbuf_pkthdr_len(data);
1475
1476 if (length < sizeof(struct ip)) {
1477 // Flush data
1478 mbuf_freem(data);
1479 kern_pbufpool_free(rx_pp, rx_ph);
1480 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1481 STATS_INC(nifs, NETIF_STATS_DROP);
1482 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1483 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
1484 continue;
1485 }
1486
1487 uint32_t af = 0;
1488 struct ip *ip = mtod(data, struct ip *);
1489 u_int ip_version = ip->ip_v;
1490 switch (ip_version) {
1491 case 4: {
1492 af = AF_INET;
1493 break;
1494 }
1495 case 6: {
1496 af = AF_INET6;
1497 break;
1498 }
1499 default: {
1500 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1501 pcb->ipsec_ifp->if_xname, ip_version);
1502 break;
1503 }
1504 }
1505
1506 if (length > PP_BUF_SIZE_DEF(rx_pp) ||
1507 (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
1508 // We need to fragment to send up into the netif
1509
1510 u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp);
1511 if (pcb->ipsec_frag_size_set &&
1512 pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) {
1513 fragment_mtu = pcb->ipsec_input_frag_size;
1514 }
1515
1516 mbuf_t fragment_chain = NULL;
1517 switch (af) {
1518 case AF_INET: {
1519 // ip_fragment expects the length in host order
1520 ip->ip_len = ntohs(ip->ip_len);
1521
1522 // ip_fragment will modify the original data, don't free
1523 int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1524 if (fragment_error == 0 && data != NULL) {
1525 fragment_chain = data;
1526 } else {
1527 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1528 STATS_INC(nifs, NETIF_STATS_DROP);
1529 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1530 pcb->ipsec_ifp->if_xname, length, fragment_error);
1531 }
1532 break;
1533 }
1534 case AF_INET6: {
1535 if (length < sizeof(struct ip6_hdr)) {
1536 mbuf_freem(data);
1537 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1538 STATS_INC(nifs, NETIF_STATS_DROP);
1539 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1540 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1541 } else {
1542 // ip6_do_fragmentation will free the original data on success only
1543 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1544
1545 int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
1546 ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid()));
1547 if (fragment_error == 0 && data != NULL) {
1548 fragment_chain = data;
1549 } else {
1550 mbuf_freem(data);
1551 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1552 STATS_INC(nifs, NETIF_STATS_DROP);
1553 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1554 pcb->ipsec_ifp->if_xname, length, fragment_error);
1555 }
1556 }
1557 break;
1558 }
1559 default: {
1560 // Cannot fragment unknown families
1561 mbuf_freem(data);
1562 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1563 STATS_INC(nifs, NETIF_STATS_DROP);
1564 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1565 pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
1566 break;
1567 }
1568 }
1569
1570 if (fragment_chain != NULL) {
1571 // Add fragments to chain before continuing
1572 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1573 if (pcb->ipsec_input_chain != NULL) {
1574 pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1575 } else {
1576 pcb->ipsec_input_chain = fragment_chain;
1577 }
1578 pcb->ipsec_input_chain_count++;
1579 while (fragment_chain->m_nextpkt) {
1580 VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1581 fragment_chain = fragment_chain->m_nextpkt;
1582 pcb->ipsec_input_chain_count++;
1583 }
1584 pcb->ipsec_input_chain_last = fragment_chain;
1585 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1586 }
1587
1588 // Make sure to free unused rx packet
1589 kern_pbufpool_free(rx_pp, rx_ph);
1590
1591 continue;
1592 }
1593
1594 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1595
1596 // Fillout rx packet
1597 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1598 VERIFY(rx_buf != NULL);
1599 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
1600 VERIFY(rx_baddr != NULL);
1601
1602 // Copy-in data from mbuf to buflet
1603 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1604 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1605
1606 // Finalize and attach the packet
1607 error = kern_buflet_set_data_offset(rx_buf, 0);
1608 VERIFY(error == 0);
1609 error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
1610 VERIFY(error == 0);
1611 error = kern_packet_set_headroom(rx_ph, 0);
1612 VERIFY(error == 0);
1613 error = kern_packet_finalize(rx_ph);
1614 VERIFY(error == 0);
1615 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1616 VERIFY(error == 0);
1617
1618 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1619 STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
1620 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1621
1622 rx_ring_stats.kcrsi_slots_transferred++;
1623 rx_ring_stats.kcrsi_bytes_transferred += length;
1624
1625 if (!pcb->ipsec_ext_ifdata_stats) {
1626 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
1627 }
1628
1629 mbuf_freem(data);
1630
1631 // Advance ring
1632 rx_pslot = rx_slot;
1633 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1634 }
1635
1636 for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
1637 struct kern_channel_ring_stat_increment tx_ring_stats;
1638 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1639 kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
1640 kern_channel_slot_t tx_pslot = NULL;
1641 kern_channel_slot_t tx_slot = NULL;
1642 if (tx_ring == NULL) {
1643 // Net-If TX ring not set up yet, nothing to read
1644 goto done;
1645 }
1646 // Unlock ipsec before entering ring
1647 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1648
1649 (void)kr_enter(tx_ring, TRUE);
1650
1651 // Lock again after entering and validate
1652 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1653
1654 if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
1655 goto done;
1656 }
1657
1658 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1659 if (tx_slot == NULL) {
1660 // Nothing to read, don't bother signalling
1661 goto done;
1662 }
1663
1664 while (rx_slot != NULL && tx_slot != NULL) {
1665 size_t length = 0;
1666 mbuf_t data = NULL;
1667 errno_t error = 0;
1668 uint32_t af;
1669
1670 // Allocate rx packet
1671 kern_packet_t rx_ph = 0;
1672 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1673 if (__improbable(error != 0)) {
1674 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1675 STATS_INC(nifs, NETIF_STATS_DROP);
1676 break;
1677 }
1678
1679 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1680
1681 // Advance TX ring
1682 tx_pslot = tx_slot;
1683 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1684
1685 if (tx_ph == 0) {
1686 kern_pbufpool_free(rx_pp, rx_ph);
1687 continue;
1688 }
1689
1690 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1691 VERIFY(tx_buf != NULL);
1692 uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1693 VERIFY(tx_baddr != 0);
1694 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1695
1696 length = MIN(kern_packet_get_data_length(tx_ph),
1697 pcb->ipsec_slot_size);
1698
1699 // Increment TX stats
1700 tx_ring_stats.kcrsi_slots_transferred++;
1701 tx_ring_stats.kcrsi_bytes_transferred += length;
1702
1703 if (length >= sizeof(struct ip)) {
1704 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1705 if (error == 0) {
1706 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1707 if (error == 0) {
1708 // Check for wake packet flag
1709 uuid_t flow_uuid;
1710 kern_packet_get_flow_uuid(tx_ph, &flow_uuid);
1711 u_int8_t *id_8 = (u_int8_t *)(uintptr_t)flow_uuid;
1712 if ((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT) {
1713 os_log_info(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: wake packet flag is set\n",
1714 pcb->ipsec_ifp->if_xname);
1715 data->m_pkthdr.pkt_flags |= PKTF_WAKE_PKT;
1716 }
1717
1718 lck_mtx_lock(&pcb->ipsec_kpipe_decrypt_lock);
1719 struct ip *ip = mtod(data, struct ip *);
1720 u_int ip_version = ip->ip_v;
1721 switch (ip_version) {
1722 case 4: {
1723 af = AF_INET;
1724 ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip);
1725 ip->ip_off = ntohs(ip->ip_off);
1726
1727 if (length < ip->ip_len) {
1728 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
1729 pcb->ipsec_ifp->if_xname, length, ip->ip_len);
1730 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1731 STATS_INC(nifs, NETIF_STATS_DROP);
1732 mbuf_freem(data);
1733 data = NULL;
1734 } else {
1735 data = esp4_input_extended(data, sizeof(struct ip), pcb->ipsec_ifp);
1736 }
1737 break;
1738 }
1739 case 6: {
1740 if (length < sizeof(struct ip6_hdr)) {
1741 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
1742 pcb->ipsec_ifp->if_xname, length);
1743 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1744 STATS_INC(nifs, NETIF_STATS_DROP);
1745 mbuf_freem(data);
1746 data = NULL;
1747 } else {
1748 af = AF_INET6;
1749 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1750 const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen);
1751 if (length < ip6_len) {
1752 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
1753 pcb->ipsec_ifp->if_xname, length, ip6_len);
1754 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1755 STATS_INC(nifs, NETIF_STATS_DROP);
1756 mbuf_freem(data);
1757 data = NULL;
1758 } else {
1759 int offset = sizeof(struct ip6_hdr);
1760 esp6_input_extended(&data, &offset, ip6->ip6_nxt, pcb->ipsec_ifp);
1761 }
1762 }
1763 break;
1764 }
1765 default: {
1766 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: unknown ip version %u\n",
1767 pcb->ipsec_ifp->if_xname, ip_version);
1768 STATS_INC(nifs, NETIF_STATS_DROP);
1769 mbuf_freem(data);
1770 data = NULL;
1771 break;
1772 }
1773 }
1774 lck_mtx_unlock(&pcb->ipsec_kpipe_decrypt_lock);
1775 } else {
1776 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1777 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1778 STATS_INC(nifs, NETIF_STATS_DROP);
1779 mbuf_freem(data);
1780 data = NULL;
1781 }
1782 } else {
1783 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1784 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1785 STATS_INC(nifs, NETIF_STATS_DROP);
1786 }
1787 } else {
1788 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb->ipsec_ifp->if_xname, length);
1789 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1790 STATS_INC(nifs, NETIF_STATS_DROP);
1791 }
1792
1793 if (data == NULL) {
1794 // Failed to get decrypted data data
1795 kern_pbufpool_free(rx_pp, rx_ph);
1796 continue;
1797 }
1798
1799 length = mbuf_pkthdr_len(data);
1800 if (length > PP_BUF_SIZE_DEF(rx_pp)) {
1801 // Flush data
1802 mbuf_freem(data);
1803 kern_pbufpool_free(rx_pp, rx_ph);
1804 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1805 STATS_INC(nifs, NETIF_STATS_DROP);
1806 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
1807 pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
1808 continue;
1809 }
1810
1811 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1812
1813 // Fillout rx packet
1814 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1815 VERIFY(rx_buf != NULL);
1816 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
1817 VERIFY(rx_baddr != NULL);
1818
1819 // Copy-in data from mbuf to buflet
1820 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1821 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1822
1823 // Finalize and attach the packet
1824 error = kern_buflet_set_data_offset(rx_buf, 0);
1825 VERIFY(error == 0);
1826 error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
1827 VERIFY(error == 0);
1828 error = kern_packet_set_link_header_offset(rx_ph, 0);
1829 VERIFY(error == 0);
1830 error = kern_packet_set_network_header_offset(rx_ph, 0);
1831 VERIFY(error == 0);
1832 error = kern_packet_finalize(rx_ph);
1833 VERIFY(error == 0);
1834 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1835 VERIFY(error == 0);
1836
1837 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1838 STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
1839 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1840
1841 rx_ring_stats.kcrsi_slots_transferred++;
1842 rx_ring_stats.kcrsi_bytes_transferred += length;
1843
1844 if (!pcb->ipsec_ext_ifdata_stats) {
1845 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
1846 }
1847
1848 mbuf_freem(data);
1849
1850 rx_pslot = rx_slot;
1851 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1852 }
1853
1854 done:
1855 if (tx_pslot) {
1856 kern_channel_advance_slot(tx_ring, tx_pslot);
1857 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1858 (void)kern_channel_reclaim(tx_ring);
1859 }
1860
1861 // Unlock first, then exit ring
1862 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1863 if (tx_ring != NULL) {
1864 if (tx_pslot != NULL) {
1865 kern_channel_notify(tx_ring, 0);
1866 }
1867 kr_exit(tx_ring);
1868 }
1869
1870 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1871 }
1872
1873 if (rx_pslot) {
1874 kern_channel_advance_slot(rx_ring, rx_pslot);
1875 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1876 }
1877
1878
1879 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1880
1881 ipsec_data_move_end(pcb);
1882 return 0;
1883 }
1884
1885 static errno_t
ipsec_nexus_ifattach(struct ipsec_pcb * pcb,struct ifnet_init_eparams * init_params,struct ifnet ** ifp)1886 ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
1887 struct ifnet_init_eparams *init_params,
1888 struct ifnet **ifp)
1889 {
1890 errno_t err;
1891 nexus_controller_t controller = kern_nexus_shared_controller();
1892 struct kern_nexus_net_init net_init;
1893 struct kern_pbufpool_init pp_init;
1894
1895 nexus_name_t provider_name;
1896 snprintf((char *)provider_name, sizeof(provider_name),
1897 "com.apple.netif.%s", pcb->ipsec_if_xname);
1898
1899 struct kern_nexus_provider_init prov_init = {
1900 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1901 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1902 .nxpi_pre_connect = ipsec_nexus_pre_connect,
1903 .nxpi_connected = ipsec_nexus_connected,
1904 .nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
1905 .nxpi_disconnected = ipsec_nexus_disconnected,
1906 .nxpi_ring_init = ipsec_netif_ring_init,
1907 .nxpi_ring_fini = ipsec_netif_ring_fini,
1908 .nxpi_slot_init = NULL,
1909 .nxpi_slot_fini = NULL,
1910 .nxpi_sync_tx = ipsec_netif_sync_tx,
1911 .nxpi_sync_rx = ipsec_netif_sync_rx,
1912 .nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
1913 };
1914
1915 nexus_attr_t nxa = NULL;
1916 err = kern_nexus_attr_create(&nxa);
1917 IPSEC_IF_VERIFY(err == 0);
1918 if (err != 0) {
1919 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1920 __func__, err);
1921 goto failed;
1922 }
1923
1924 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
1925 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1926 VERIFY(err == 0);
1927
1928 // Reset ring size for netif nexus to limit memory usage
1929 uint64_t ring_size = pcb->ipsec_netif_ring_size;
1930 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1931 VERIFY(err == 0);
1932 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1933 VERIFY(err == 0);
1934
1935 assert(err == 0);
1936
1937 if (ipsec_in_wmm_mode(pcb)) {
1938 os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n",
1939 __func__, pcb->ipsec_if_xname);
1940
1941 init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED;
1942
1943 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_RINGS,
1944 IPSEC_NETIF_WMM_TX_RING_COUNT);
1945 VERIFY(err == 0);
1946 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_RINGS,
1947 IPSEC_NETIF_WMM_RX_RING_COUNT);
1948 VERIFY(err == 0);
1949
1950 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_QMAP, NEXUS_QMAP_TYPE_WMM);
1951 VERIFY(err == 0);
1952 }
1953
1954 pcb->ipsec_netif_txring_size = ring_size;
1955
1956 bzero(&pp_init, sizeof(pp_init));
1957 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1958 pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
1959 // Note: we need more packets than can be held in the tx and rx rings because
1960 // packets can also be in the AQM queue(s)
1961 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * (2 * pcb->ipsec_kpipe_count + 1);
1962 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
1963 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
1964 pp_init.kbi_max_frags = 1;
1965 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1966 "%s", provider_name);
1967 pp_init.kbi_ctx = NULL;
1968 pp_init.kbi_ctx_retain = NULL;
1969 pp_init.kbi_ctx_release = NULL;
1970
1971 err = kern_pbufpool_create(&pp_init, &pcb->ipsec_netif_pp, NULL);
1972 if (err != 0) {
1973 os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
1974 goto failed;
1975 }
1976
1977 err = kern_nexus_controller_register_provider(controller,
1978 ipsec_nx_dom_prov,
1979 provider_name,
1980 &prov_init,
1981 sizeof(prov_init),
1982 nxa,
1983 &pcb->ipsec_nx.if_provider);
1984 IPSEC_IF_VERIFY(err == 0);
1985 if (err != 0) {
1986 os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
1987 __func__, err);
1988 goto failed;
1989 }
1990
1991 bzero(&net_init, sizeof(net_init));
1992 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1993 net_init.nxneti_flags = 0;
1994 net_init.nxneti_eparams = init_params;
1995 net_init.nxneti_lladdr = NULL;
1996 net_init.nxneti_prepare = ipsec_netif_prepare;
1997 net_init.nxneti_rx_pbufpool = pcb->ipsec_netif_pp;
1998 net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp;
1999 err = kern_nexus_controller_alloc_net_provider_instance(controller,
2000 pcb->ipsec_nx.if_provider,
2001 pcb,
2002 NULL,
2003 &pcb->ipsec_nx.if_instance,
2004 &net_init,
2005 ifp);
2006 IPSEC_IF_VERIFY(err == 0);
2007 if (err != 0) {
2008 os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
2009 __func__, err);
2010 kern_nexus_controller_deregister_provider(controller,
2011 pcb->ipsec_nx.if_provider);
2012 uuid_clear(pcb->ipsec_nx.if_provider);
2013 goto failed;
2014 }
2015
2016 failed:
2017 if (nxa) {
2018 kern_nexus_attr_destroy(nxa);
2019 }
2020 if (err && pcb->ipsec_netif_pp != NULL) {
2021 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2022 pcb->ipsec_netif_pp = NULL;
2023 }
2024 return err;
2025 }
2026
2027 static void
ipsec_detach_provider_and_instance(uuid_t provider,uuid_t instance)2028 ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
2029 {
2030 nexus_controller_t controller = kern_nexus_shared_controller();
2031 errno_t err;
2032
2033 if (!uuid_is_null(instance)) {
2034 err = kern_nexus_controller_free_provider_instance(controller,
2035 instance);
2036 if (err != 0) {
2037 os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
2038 __func__, err);
2039 }
2040 uuid_clear(instance);
2041 }
2042 if (!uuid_is_null(provider)) {
2043 err = kern_nexus_controller_deregister_provider(controller,
2044 provider);
2045 if (err != 0) {
2046 os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
2047 }
2048 uuid_clear(provider);
2049 }
2050 return;
2051 }
2052
2053 static void
ipsec_nexus_detach(struct ipsec_pcb * pcb)2054 ipsec_nexus_detach(struct ipsec_pcb *pcb)
2055 {
2056 ipsec_nx_t nx = &pcb->ipsec_nx;
2057 nexus_controller_t controller = kern_nexus_shared_controller();
2058 errno_t err;
2059
2060 if (!uuid_is_null(nx->fsw_device)) {
2061 err = kern_nexus_ifdetach(controller,
2062 nx->fsw_instance,
2063 nx->fsw_device);
2064 if (err != 0) {
2065 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
2066 __func__, err);
2067 }
2068 }
2069
2070 ipsec_detach_provider_and_instance(nx->fsw_provider,
2071 nx->fsw_instance);
2072 ipsec_detach_provider_and_instance(nx->if_provider,
2073 nx->if_instance);
2074
2075 if (pcb->ipsec_netif_pp != NULL) {
2076 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2077 pcb->ipsec_netif_pp = NULL;
2078 }
2079 memset(nx, 0, sizeof(*nx));
2080 }
2081
2082 static errno_t
ipsec_create_fs_provider_and_instance(struct ipsec_pcb * pcb,const char * type_name,const char * ifname,uuid_t * provider,uuid_t * instance)2083 ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
2084 const char *type_name,
2085 const char *ifname,
2086 uuid_t *provider, uuid_t *instance)
2087 {
2088 nexus_attr_t attr = NULL;
2089 nexus_controller_t controller = kern_nexus_shared_controller();
2090 uuid_t dom_prov;
2091 errno_t err;
2092 struct kern_nexus_init init;
2093 nexus_name_t provider_name;
2094
2095 err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
2096 &dom_prov);
2097 IPSEC_IF_VERIFY(err == 0);
2098 if (err != 0) {
2099 os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
2100 __func__, type_name, err);
2101 goto failed;
2102 }
2103
2104 err = kern_nexus_attr_create(&attr);
2105 IPSEC_IF_VERIFY(err == 0);
2106 if (err != 0) {
2107 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2108 __func__, err);
2109 goto failed;
2110 }
2111
2112 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
2113 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2114 VERIFY(err == 0);
2115
2116 // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
2117 uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
2118 err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
2119 VERIFY(err == 0);
2120 uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
2121 err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
2122 VERIFY(err == 0);
2123 /*
2124 * Configure flowswitch to use super-packet (multi-buflet).
2125 * This allows flowswitch to perform intra-stack packet aggregation.
2126 */
2127 err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
2128 NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
2129 VERIFY(err == 0);
2130
2131 snprintf((char *)provider_name, sizeof(provider_name),
2132 "com.apple.%s.%s", type_name, ifname);
2133 err = kern_nexus_controller_register_provider(controller,
2134 dom_prov,
2135 provider_name,
2136 NULL,
2137 0,
2138 attr,
2139 provider);
2140 kern_nexus_attr_destroy(attr);
2141 attr = NULL;
2142 IPSEC_IF_VERIFY(err == 0);
2143 if (err != 0) {
2144 os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
2145 __func__, type_name, err);
2146 goto failed;
2147 }
2148 bzero(&init, sizeof(init));
2149 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2150 err = kern_nexus_controller_alloc_provider_instance(controller,
2151 *provider,
2152 NULL, NULL,
2153 instance, &init);
2154 IPSEC_IF_VERIFY(err == 0);
2155 if (err != 0) {
2156 os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
2157 __func__, type_name, err);
2158 kern_nexus_controller_deregister_provider(controller,
2159 *provider);
2160 uuid_clear(*provider);
2161 }
2162 failed:
2163 return err;
2164 }
2165
2166 static errno_t
ipsec_flowswitch_attach(struct ipsec_pcb * pcb)2167 ipsec_flowswitch_attach(struct ipsec_pcb *pcb)
2168 {
2169 nexus_controller_t controller = kern_nexus_shared_controller();
2170 errno_t err = 0;
2171 ipsec_nx_t nx = &pcb->ipsec_nx;
2172
2173 // Allocate flowswitch
2174 err = ipsec_create_fs_provider_and_instance(pcb,
2175 "flowswitch",
2176 pcb->ipsec_ifp->if_xname,
2177 &nx->fsw_provider,
2178 &nx->fsw_instance);
2179 if (err != 0) {
2180 os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
2181 __func__);
2182 goto failed;
2183 }
2184
2185 // Attach flowswitch to device port
2186 err = kern_nexus_ifattach(controller, nx->fsw_instance,
2187 NULL, nx->if_instance,
2188 FALSE, &nx->fsw_device);
2189 if (err != 0) {
2190 os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
2191 goto failed;
2192 }
2193
2194 // Extract the agent UUID and save for later
2195 struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
2196 if (flowswitch_nx != NULL) {
2197 struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
2198 if (flowswitch != NULL) {
2199 FSW_RLOCK(flowswitch);
2200 uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
2201 FSW_UNLOCK(flowswitch);
2202 } else {
2203 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - flowswitch is NULL\n");
2204 }
2205 nx_release(flowswitch_nx);
2206 } else {
2207 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n");
2208 }
2209
2210 return 0;
2211
2212 failed:
2213 ipsec_nexus_detach(pcb);
2214
2215 errno_t detach_error = 0;
2216 if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
2217 panic("ipsec_flowswitch_attach - ifnet_detach failed: %d", detach_error);
2218 /* NOT REACHED */
2219 }
2220
2221 return err;
2222 }
2223
2224 #pragma mark Kernel Pipe Nexus
2225
2226 static errno_t
ipsec_register_kernel_pipe_nexus(struct ipsec_pcb * pcb)2227 ipsec_register_kernel_pipe_nexus(struct ipsec_pcb *pcb)
2228 {
2229 nexus_attr_t nxa = NULL;
2230 errno_t result;
2231
2232 lck_mtx_lock(&ipsec_lock);
2233 if (ipsec_ncd_refcount++) {
2234 lck_mtx_unlock(&ipsec_lock);
2235 return 0;
2236 }
2237
2238 result = kern_nexus_controller_create(&ipsec_ncd);
2239 if (result) {
2240 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
2241 __FUNCTION__, result);
2242 goto done;
2243 }
2244
2245 uuid_t dom_prov;
2246 result = kern_nexus_get_default_domain_provider(
2247 NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
2248 if (result) {
2249 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
2250 __FUNCTION__, result);
2251 goto done;
2252 }
2253
2254 struct kern_nexus_provider_init prov_init = {
2255 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2256 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2257 .nxpi_pre_connect = ipsec_nexus_pre_connect,
2258 .nxpi_connected = ipsec_nexus_connected,
2259 .nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
2260 .nxpi_disconnected = ipsec_nexus_disconnected,
2261 .nxpi_ring_init = ipsec_kpipe_ring_init,
2262 .nxpi_ring_fini = ipsec_kpipe_ring_fini,
2263 .nxpi_slot_init = NULL,
2264 .nxpi_slot_fini = NULL,
2265 .nxpi_sync_tx = ipsec_kpipe_sync_tx,
2266 .nxpi_sync_rx = ipsec_kpipe_sync_rx,
2267 .nxpi_tx_doorbell = NULL,
2268 };
2269
2270 result = kern_nexus_attr_create(&nxa);
2271 if (result) {
2272 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2273 __FUNCTION__, result);
2274 goto done;
2275 }
2276
2277 uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2278 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2279 VERIFY(result == 0);
2280
2281 // Reset ring size for kernel pipe nexus to limit memory usage
2282 // Note: It's better to have less on slots on the kpipe TX ring than the netif
2283 // so back pressure is applied at the AQM layer
2284 uint64_t ring_size =
2285 pcb->ipsec_kpipe_tx_ring_size != 0 ? pcb->ipsec_kpipe_tx_ring_size :
2286 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2287 if_ipsec_ring_size;
2288 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
2289 VERIFY(result == 0);
2290
2291 ring_size =
2292 pcb->ipsec_kpipe_rx_ring_size != 0 ? pcb->ipsec_kpipe_rx_ring_size :
2293 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2294 if_ipsec_ring_size;
2295 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
2296 VERIFY(result == 0);
2297
2298 result = kern_nexus_controller_register_provider(ipsec_ncd,
2299 dom_prov,
2300 (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
2301 &prov_init,
2302 sizeof(prov_init),
2303 nxa,
2304 &ipsec_kpipe_uuid);
2305 if (result) {
2306 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
2307 __FUNCTION__, result);
2308 goto done;
2309 }
2310
2311 done:
2312 if (nxa) {
2313 kern_nexus_attr_destroy(nxa);
2314 }
2315
2316 if (result) {
2317 if (ipsec_ncd) {
2318 kern_nexus_controller_destroy(ipsec_ncd);
2319 ipsec_ncd = NULL;
2320 }
2321 ipsec_ncd_refcount = 0;
2322 }
2323
2324 lck_mtx_unlock(&ipsec_lock);
2325
2326 return result;
2327 }
2328
2329 static void
ipsec_unregister_kernel_pipe_nexus(void)2330 ipsec_unregister_kernel_pipe_nexus(void)
2331 {
2332 lck_mtx_lock(&ipsec_lock);
2333
2334 VERIFY(ipsec_ncd_refcount > 0);
2335
2336 if (--ipsec_ncd_refcount == 0) {
2337 kern_nexus_controller_destroy(ipsec_ncd);
2338 ipsec_ncd = NULL;
2339 }
2340
2341 lck_mtx_unlock(&ipsec_lock);
2342 }
2343
2344 /* This structure only holds onto kpipe channels that need to be
2345 * freed in the future, but are cleared from the pcb under lock
2346 */
2347 struct ipsec_detached_channels {
2348 int count;
2349 kern_pbufpool_t pp;
2350 uuid_t uuids[IPSEC_IF_MAX_RING_COUNT];
2351 };
2352
2353 static void
ipsec_detach_channels(struct ipsec_pcb * pcb,struct ipsec_detached_channels * dc)2354 ipsec_detach_channels(struct ipsec_pcb *pcb, struct ipsec_detached_channels *dc)
2355 {
2356 LCK_RW_ASSERT(&pcb->ipsec_pcb_lock, LCK_RW_TYPE_EXCLUSIVE);
2357
2358 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
2359 for (int i = 0; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2360 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2361 }
2362 dc->count = 0;
2363 return;
2364 }
2365
2366 dc->count = pcb->ipsec_kpipe_count;
2367
2368 VERIFY(dc->count >= 0);
2369 VERIFY(dc->count <= IPSEC_IF_MAX_RING_COUNT);
2370
2371 for (int i = 0; i < dc->count; i++) {
2372 VERIFY(!uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2373 uuid_copy(dc->uuids[i], pcb->ipsec_kpipe_uuid[i]);
2374 uuid_clear(pcb->ipsec_kpipe_uuid[i]);
2375 }
2376 for (int i = dc->count; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2377 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2378 }
2379
2380 if (dc->count) {
2381 VERIFY(pcb->ipsec_kpipe_pp);
2382 } else {
2383 VERIFY(!pcb->ipsec_kpipe_pp);
2384 }
2385
2386 dc->pp = pcb->ipsec_kpipe_pp;
2387
2388 pcb->ipsec_kpipe_pp = NULL;
2389
2390 ipsec_flag_clr(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2391 }
2392
2393 static void
ipsec_free_channels(struct ipsec_detached_channels * dc)2394 ipsec_free_channels(struct ipsec_detached_channels *dc)
2395 {
2396 if (!dc->count) {
2397 return;
2398 }
2399
2400 for (int i = 0; i < dc->count; i++) {
2401 errno_t result;
2402 result = kern_nexus_controller_free_provider_instance(ipsec_ncd, dc->uuids[i]);
2403 VERIFY(!result);
2404 }
2405
2406 VERIFY(dc->pp);
2407 kern_pbufpool_destroy(dc->pp);
2408
2409 ipsec_unregister_kernel_pipe_nexus();
2410
2411 memset(dc, 0, sizeof(*dc));
2412 }
2413
2414 static errno_t
ipsec_enable_channel(struct ipsec_pcb * pcb,struct proc * proc)2415 ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
2416 {
2417 struct kern_nexus_init init;
2418 struct kern_pbufpool_init pp_init;
2419 errno_t result;
2420
2421 kauth_cred_t cred = kauth_cred_get();
2422 result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
2423 if (result) {
2424 return result;
2425 }
2426
2427 VERIFY(pcb->ipsec_kpipe_count);
2428 VERIFY(!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED));
2429
2430 result = ipsec_register_kernel_pipe_nexus(pcb);
2431
2432 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2433
2434 if (result) {
2435 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to register kernel pipe nexus\n",
2436 __func__, pcb->ipsec_if_xname);
2437 goto done;
2438 }
2439
2440 VERIFY(ipsec_ncd);
2441
2442 bzero(&pp_init, sizeof(pp_init));
2443 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
2444 pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
2445 // Note: We only needs are many packets as can be held in the tx and rx rings
2446 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2 * pcb->ipsec_kpipe_count;
2447 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
2448 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
2449 pp_init.kbi_max_frags = 1;
2450 pp_init.kbi_flags |= KBIF_QUANTUM;
2451 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
2452 "com.apple.kpipe.%s", pcb->ipsec_if_xname);
2453 pp_init.kbi_ctx = NULL;
2454 pp_init.kbi_ctx_retain = NULL;
2455 pp_init.kbi_ctx_release = NULL;
2456
2457 result = kern_pbufpool_create(&pp_init, &pcb->ipsec_kpipe_pp,
2458 NULL);
2459 if (result != 0) {
2460 os_log_error(OS_LOG_DEFAULT, "%s: %s pbufbool create failed, error %d\n",
2461 __func__, pcb->ipsec_if_xname, result);
2462 goto done;
2463 }
2464
2465 bzero(&init, sizeof(init));
2466 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2467 init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp;
2468
2469 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
2470 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2471 result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
2472 ipsec_kpipe_uuid, pcb, NULL, &pcb->ipsec_kpipe_uuid[i], &init);
2473
2474 if (result == 0) {
2475 nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
2476 const bool has_proc_uuid = !uuid_is_null(pcb->ipsec_kpipe_proc_uuid);
2477 pid_t pid = pcb->ipsec_kpipe_pid;
2478 if (!pid && !has_proc_uuid) {
2479 pid = proc_pid(proc);
2480 }
2481 result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
2482 pcb->ipsec_kpipe_uuid[i], &port,
2483 pid, has_proc_uuid ? pcb->ipsec_kpipe_proc_uuid : NULL, NULL,
2484 0, has_proc_uuid ? NEXUS_BIND_EXEC_UUID:NEXUS_BIND_PID);
2485 }
2486
2487 if (result) {
2488 /* Unwind all of them on error */
2489 for (int j = 0; j < IPSEC_IF_MAX_RING_COUNT; j++) {
2490 if (!uuid_is_null(pcb->ipsec_kpipe_uuid[j])) {
2491 kern_nexus_controller_free_provider_instance(ipsec_ncd,
2492 pcb->ipsec_kpipe_uuid[j]);
2493 uuid_clear(pcb->ipsec_kpipe_uuid[j]);
2494 }
2495 }
2496 goto done;
2497 }
2498 }
2499
2500 done:
2501 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2502
2503 if (result) {
2504 if (pcb->ipsec_kpipe_pp != NULL) {
2505 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2506 pcb->ipsec_kpipe_pp = NULL;
2507 }
2508 ipsec_unregister_kernel_pipe_nexus();
2509 } else {
2510 ipsec_flag_set(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2511 }
2512
2513 return result;
2514 }
2515
2516 #endif // IPSEC_NEXUS
2517
2518
2519 /* Kernel control functions */
2520
2521 static inline int
ipsec_find_by_unit(u_int32_t unit)2522 ipsec_find_by_unit(u_int32_t unit)
2523 {
2524 struct ipsec_pcb *next_pcb = NULL;
2525 int found = 0;
2526
2527 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2528 if (next_pcb->ipsec_unit == unit) {
2529 found = 1;
2530 break;
2531 }
2532 }
2533
2534 return found;
2535 }
2536
2537 static inline void
ipsec_free_pcb(struct ipsec_pcb * pcb,bool locked)2538 ipsec_free_pcb(struct ipsec_pcb *pcb, bool locked)
2539 {
2540 #if IPSEC_NEXUS
2541 mbuf_freem_list(pcb->ipsec_input_chain);
2542 pcb->ipsec_input_chain_count = 0;
2543 lck_mtx_destroy(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp);
2544 lck_mtx_destroy(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp);
2545 lck_mtx_destroy(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp);
2546 #endif // IPSEC_NEXUS
2547 lck_mtx_destroy(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp);
2548 lck_rw_destroy(&pcb->ipsec_pcb_lock, &ipsec_lck_grp);
2549 if (!locked) {
2550 lck_mtx_lock(&ipsec_lock);
2551 }
2552 TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
2553 if (!locked) {
2554 lck_mtx_unlock(&ipsec_lock);
2555 }
2556 zfree(ipsec_pcb_zone, pcb);
2557 }
2558
2559 static errno_t
ipsec_ctl_setup(u_int32_t * unit,void ** unitinfo)2560 ipsec_ctl_setup(u_int32_t *unit, void **unitinfo)
2561 {
2562 if (unit == NULL || unitinfo == NULL) {
2563 return EINVAL;
2564 }
2565
2566 lck_mtx_lock(&ipsec_lock);
2567
2568 /* Find next available unit */
2569 if (*unit == 0) {
2570 *unit = 1;
2571 while (*unit != ctl_maxunit) {
2572 if (ipsec_find_by_unit(*unit)) {
2573 (*unit)++;
2574 } else {
2575 break;
2576 }
2577 }
2578 if (*unit == ctl_maxunit) {
2579 lck_mtx_unlock(&ipsec_lock);
2580 return EBUSY;
2581 }
2582 } else if (ipsec_find_by_unit(*unit)) {
2583 lck_mtx_unlock(&ipsec_lock);
2584 return EBUSY;
2585 }
2586
2587 /* Find some open interface id */
2588 u_int32_t chosen_unique_id = 1;
2589 struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
2590 if (next_pcb != NULL) {
2591 /* List was not empty, add one to the last item */
2592 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2593 next_pcb = NULL;
2594
2595 /*
2596 * If this wrapped the id number, start looking at
2597 * the front of the list for an unused id.
2598 */
2599 if (chosen_unique_id == 0) {
2600 /* Find the next unused ID */
2601 chosen_unique_id = 1;
2602 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2603 if (next_pcb->ipsec_unique_id > chosen_unique_id) {
2604 /* We found a gap */
2605 break;
2606 }
2607
2608 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2609 }
2610 }
2611 }
2612
2613 struct ipsec_pcb *pcb = zalloc_flags(ipsec_pcb_zone, Z_WAITOK | Z_ZERO);
2614
2615 *unitinfo = pcb;
2616 pcb->ipsec_unit = *unit;
2617 pcb->ipsec_unique_id = chosen_unique_id;
2618
2619 if (next_pcb != NULL) {
2620 TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
2621 } else {
2622 TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
2623 }
2624
2625 lck_mtx_unlock(&ipsec_lock);
2626
2627 return 0;
2628 }
2629
2630 static errno_t
ipsec_ctl_bind(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)2631 ipsec_ctl_bind(kern_ctl_ref kctlref,
2632 struct sockaddr_ctl *sac,
2633 void **unitinfo)
2634 {
2635 if (*unitinfo == NULL) {
2636 u_int32_t unit = 0;
2637 (void)ipsec_ctl_setup(&unit, unitinfo);
2638 }
2639
2640 struct ipsec_pcb *pcb = (struct ipsec_pcb *)*unitinfo;
2641 if (pcb == NULL) {
2642 return EINVAL;
2643 }
2644
2645 /* Setup the protocol control block */
2646 pcb->ipsec_ctlref = kctlref;
2647 pcb->ipsec_unit = sac->sc_unit;
2648 pcb->ipsec_output_service_class = MBUF_SC_OAM;
2649
2650 #if IPSEC_NEXUS
2651 pcb->ipsec_use_netif = false;
2652 pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2653 pcb->ipsec_netif_ring_size = if_ipsec_ring_size;
2654 pcb->ipsec_tx_fsw_ring_size = if_ipsec_tx_fsw_ring_size;
2655 pcb->ipsec_rx_fsw_ring_size = if_ipsec_rx_fsw_ring_size;
2656 #endif // IPSEC_NEXUS
2657
2658 lck_rw_init(&pcb->ipsec_pcb_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2659 lck_mtx_init(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2660 #if IPSEC_NEXUS
2661 pcb->ipsec_input_chain_count = 0;
2662 lck_mtx_init(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2663 lck_mtx_init(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2664 lck_mtx_init(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2665 #endif // IPSEC_NEXUS
2666
2667 return 0;
2668 }
2669
2670 static errno_t
ipsec_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)2671 ipsec_ctl_connect(kern_ctl_ref kctlref,
2672 struct sockaddr_ctl *sac,
2673 void **unitinfo)
2674 {
2675 struct ifnet_init_eparams ipsec_init = {};
2676 errno_t result = 0;
2677
2678 if (*unitinfo == NULL) {
2679 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2680 }
2681
2682 struct ipsec_pcb *pcb = *unitinfo;
2683 if (pcb == NULL) {
2684 return EINVAL;
2685 }
2686
2687 /* Handle case where ipsec_ctl_setup() was called, but ipsec_ctl_bind() was not */
2688 if (pcb->ipsec_ctlref == NULL) {
2689 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2690 }
2691
2692 snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
2693 snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
2694 os_log(OS_LOG_DEFAULT, "ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2695
2696 /* Create the interface */
2697 bzero(&ipsec_init, sizeof(ipsec_init));
2698 ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
2699 ipsec_init.len = sizeof(ipsec_init);
2700
2701 #if IPSEC_NEXUS
2702 if (pcb->ipsec_use_netif) {
2703 ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
2704 } else
2705 #endif // IPSEC_NEXUS
2706 {
2707 ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
2708 ipsec_init.start = ipsec_start;
2709 }
2710 ipsec_init.name = "ipsec";
2711 ipsec_init.unit = pcb->ipsec_unit - 1;
2712 ipsec_init.uniqueid = pcb->ipsec_unique_name;
2713 ipsec_init.uniqueid_len = (uint32_t)strlen(pcb->ipsec_unique_name);
2714 ipsec_init.family = IFNET_FAMILY_IPSEC;
2715 ipsec_init.type = IFT_OTHER;
2716 ipsec_init.demux = ipsec_demux;
2717 ipsec_init.add_proto = ipsec_add_proto;
2718 ipsec_init.del_proto = ipsec_del_proto;
2719 ipsec_init.softc = pcb;
2720 ipsec_init.ioctl = ipsec_ioctl;
2721 ipsec_init.free = ipsec_detached;
2722
2723 #if IPSEC_NEXUS
2724 /* We don't support kpipes without a netif */
2725 if (pcb->ipsec_kpipe_count && !pcb->ipsec_use_netif) {
2726 result = ENOTSUP;
2727 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - kpipe requires netif: failed %d\n", result);
2728 ipsec_free_pcb(pcb, false);
2729 *unitinfo = NULL;
2730 return result;
2731 }
2732
2733 if (if_ipsec_debug != 0) {
2734 printf("%s: %s%d use_netif %d kpipe_count %d slot_size %u ring_size %u "
2735 "kpipe_tx_ring_size %u kpipe_rx_ring_size %u\n",
2736 __func__,
2737 ipsec_init.name, ipsec_init.unit,
2738 pcb->ipsec_use_netif,
2739 pcb->ipsec_kpipe_count,
2740 pcb->ipsec_slot_size,
2741 pcb->ipsec_netif_ring_size,
2742 pcb->ipsec_kpipe_tx_ring_size,
2743 pcb->ipsec_kpipe_rx_ring_size);
2744 }
2745 if (pcb->ipsec_use_netif) {
2746 if (pcb->ipsec_kpipe_count) {
2747 result = ipsec_enable_channel(pcb, current_proc());
2748 if (result) {
2749 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to enable channels\n",
2750 __func__, pcb->ipsec_if_xname);
2751 ipsec_free_pcb(pcb, false);
2752 *unitinfo = NULL;
2753 return result;
2754 }
2755 }
2756
2757 result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
2758 if (result != 0) {
2759 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
2760 ipsec_free_pcb(pcb, false);
2761 *unitinfo = NULL;
2762 return result;
2763 }
2764
2765 result = ipsec_flowswitch_attach(pcb);
2766 if (result != 0) {
2767 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_flowswitch_attach failed: %d\n", result);
2768 // Do not call ipsec_free_pcb(). We will be attached already, and will be freed later
2769 // in ipsec_detached().
2770 *unitinfo = NULL;
2771 return result;
2772 }
2773
2774 /* Attach to bpf */
2775 bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
2776 } else
2777 #endif // IPSEC_NEXUS
2778 {
2779 result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
2780 if (result != 0) {
2781 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
2782 ipsec_free_pcb(pcb, false);
2783 *unitinfo = NULL;
2784 return result;
2785 }
2786 ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
2787
2788 /* Attach the interface */
2789 result = ifnet_attach(pcb->ipsec_ifp, NULL);
2790 if (result != 0) {
2791 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
2792 ifnet_release(pcb->ipsec_ifp);
2793 ipsec_free_pcb(pcb, false);
2794 *unitinfo = NULL;
2795 return result;
2796 }
2797
2798 /* Attach to bpf */
2799 bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
2800 }
2801
2802 #if IPSEC_NEXUS
2803 /*
2804 * Mark the data path as ready.
2805 * If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected.
2806 */
2807 if (pcb->ipsec_kpipe_count == 0) {
2808 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
2809 IPSEC_SET_DATA_PATH_READY(pcb);
2810 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
2811 }
2812 #endif
2813
2814 /* The interfaces resoures allocated, mark it as running */
2815 ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
2816
2817 return 0;
2818 }
2819
2820 static errno_t
ipsec_detach_ip(ifnet_t interface,protocol_family_t protocol,socket_t pf_socket)2821 ipsec_detach_ip(ifnet_t interface,
2822 protocol_family_t protocol,
2823 socket_t pf_socket)
2824 {
2825 errno_t result = EPROTONOSUPPORT;
2826
2827 /* Attempt a detach */
2828 if (protocol == PF_INET) {
2829 struct ifreq ifr;
2830
2831 bzero(&ifr, sizeof(ifr));
2832 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2833 ifnet_name(interface), ifnet_unit(interface));
2834
2835 result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
2836 } else if (protocol == PF_INET6) {
2837 struct in6_ifreq ifr6;
2838
2839 bzero(&ifr6, sizeof(ifr6));
2840 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2841 ifnet_name(interface), ifnet_unit(interface));
2842
2843 result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
2844 }
2845
2846 return result;
2847 }
2848
2849 static void
ipsec_remove_address(ifnet_t interface,protocol_family_t protocol,ifaddr_t address,socket_t pf_socket)2850 ipsec_remove_address(ifnet_t interface,
2851 protocol_family_t protocol,
2852 ifaddr_t address,
2853 socket_t pf_socket)
2854 {
2855 errno_t result = 0;
2856
2857 /* Attempt a detach */
2858 if (protocol == PF_INET) {
2859 struct ifreq ifr;
2860
2861 bzero(&ifr, sizeof(ifr));
2862 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2863 ifnet_name(interface), ifnet_unit(interface));
2864 result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
2865 if (result != 0) {
2866 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed: %d", result);
2867 } else {
2868 result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
2869 if (result != 0) {
2870 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR failed: %d", result);
2871 }
2872 }
2873 } else if (protocol == PF_INET6) {
2874 struct in6_ifreq ifr6;
2875
2876 bzero(&ifr6, sizeof(ifr6));
2877 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2878 ifnet_name(interface), ifnet_unit(interface));
2879 result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
2880 sizeof(ifr6.ifr_addr));
2881 if (result != 0) {
2882 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed (v6): %d",
2883 result);
2884 } else {
2885 result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
2886 if (result != 0) {
2887 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
2888 result);
2889 }
2890 }
2891 }
2892 }
2893
2894 static void
ipsec_cleanup_family(ifnet_t interface,protocol_family_t protocol)2895 ipsec_cleanup_family(ifnet_t interface,
2896 protocol_family_t protocol)
2897 {
2898 errno_t result = 0;
2899 socket_t pf_socket = NULL;
2900 ifaddr_t *addresses = NULL;
2901 int i;
2902
2903 if (protocol != PF_INET && protocol != PF_INET6) {
2904 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - invalid protocol family %d\n", protocol);
2905 return;
2906 }
2907
2908 /* Create a socket for removing addresses and detaching the protocol */
2909 result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
2910 if (result != 0) {
2911 if (result != EAFNOSUPPORT) {
2912 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - failed to create %s socket: %d\n",
2913 protocol == PF_INET ? "IP" : "IPv6", result);
2914 }
2915 goto cleanup;
2916 }
2917
2918 /* always set SS_PRIV, we want to close and detach regardless */
2919 sock_setpriv(pf_socket, 1);
2920
2921 result = ipsec_detach_ip(interface, protocol, pf_socket);
2922 if (result == 0 || result == ENXIO) {
2923 /* We are done! We either detached or weren't attached. */
2924 goto cleanup;
2925 } else if (result != EBUSY) {
2926 /* Uh, not really sure what happened here... */
2927 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2928 goto cleanup;
2929 }
2930
2931 /*
2932 * At this point, we received an EBUSY error. This means there are
2933 * addresses attached. We should detach them and then try again.
2934 */
2935 result = ifnet_get_address_list_family(interface, &addresses, (sa_family_t)protocol);
2936 if (result != 0) {
2937 os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
2938 ifnet_name(interface), ifnet_unit(interface),
2939 protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
2940 goto cleanup;
2941 }
2942
2943 for (i = 0; addresses[i] != 0; i++) {
2944 ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
2945 }
2946 ifnet_free_address_list(addresses);
2947 addresses = NULL;
2948
2949 /*
2950 * The addresses should be gone, we should try the remove again.
2951 */
2952 result = ipsec_detach_ip(interface, protocol, pf_socket);
2953 if (result != 0 && result != ENXIO) {
2954 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2955 }
2956
2957 cleanup:
2958 if (pf_socket != NULL) {
2959 sock_close(pf_socket);
2960 }
2961
2962 if (addresses != NULL) {
2963 ifnet_free_address_list(addresses);
2964 }
2965 }
2966
2967 static errno_t
ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo)2968 ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,
2969 __unused u_int32_t unit,
2970 void *unitinfo)
2971 {
2972 struct ipsec_pcb *pcb = unitinfo;
2973 ifnet_t ifp = NULL;
2974 errno_t result = 0;
2975
2976 if (pcb == NULL) {
2977 return EINVAL;
2978 }
2979
2980 /* Wait until all threads in the data paths are done. */
2981 ipsec_wait_data_move_drain(pcb);
2982
2983 #if IPSEC_NEXUS
2984 // Tell the nexus to stop all rings
2985 if (pcb->ipsec_netif_nexus != NULL) {
2986 kern_nexus_stop(pcb->ipsec_netif_nexus);
2987 }
2988 #endif // IPSEC_NEXUS
2989
2990 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2991
2992 #if IPSEC_NEXUS
2993 if (if_ipsec_debug != 0) {
2994 printf("ipsec_ctl_disconnect: detaching interface %s (id %s)\n",
2995 pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2996 }
2997
2998 struct ipsec_detached_channels dc;
2999 ipsec_detach_channels(pcb, &dc);
3000 #endif // IPSEC_NEXUS
3001
3002 pcb->ipsec_ctlref = NULL;
3003
3004 ifp = pcb->ipsec_ifp;
3005 if (ifp != NULL) {
3006 #if IPSEC_NEXUS
3007 if (pcb->ipsec_netif_nexus != NULL) {
3008 /*
3009 * Quiesce the interface and flush any pending outbound packets.
3010 */
3011 if_down(ifp);
3012
3013 /*
3014 * Suspend data movement and wait for IO threads to exit.
3015 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
3016 * do this because ipsec nexuses are attached/detached separately.
3017 */
3018 ifnet_datamov_suspend_and_drain(ifp);
3019 if ((result = ifnet_detach(ifp)) != 0) {
3020 panic("ipsec_ctl_disconnect - ifnet_detach failed: %d", result);
3021 /* NOT REACHED */
3022 }
3023
3024 /*
3025 * We want to do everything in our power to ensure that the interface
3026 * really goes away when the socket is closed. We must remove IP/IPv6
3027 * addresses and detach the protocols. Finally, we can remove and
3028 * release the interface.
3029 */
3030 key_delsp_for_ipsec_if(ifp);
3031
3032 ipsec_cleanup_family(ifp, AF_INET);
3033 ipsec_cleanup_family(ifp, AF_INET6);
3034
3035 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3036
3037 ipsec_free_channels(&dc);
3038
3039 ipsec_nexus_detach(pcb);
3040
3041 /* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
3042 ifnet_datamov_resume(ifp);
3043 } else
3044 #endif // IPSEC_NEXUS
3045 {
3046 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3047
3048 #if IPSEC_NEXUS
3049 ipsec_free_channels(&dc);
3050 #endif // IPSEC_NEXUS
3051
3052 /*
3053 * We want to do everything in our power to ensure that the interface
3054 * really goes away when the socket is closed. We must remove IP/IPv6
3055 * addresses and detach the protocols. Finally, we can remove and
3056 * release the interface.
3057 */
3058 key_delsp_for_ipsec_if(ifp);
3059
3060 ipsec_cleanup_family(ifp, AF_INET);
3061 ipsec_cleanup_family(ifp, AF_INET6);
3062
3063 /*
3064 * Detach now; ipsec_detach() will be called asynchronously once
3065 * the I/O reference count drops to 0. There we will invoke
3066 * ifnet_release().
3067 */
3068 if ((result = ifnet_detach(ifp)) != 0) {
3069 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
3070 }
3071 }
3072 } else {
3073 // Bound, but not connected
3074 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3075 ipsec_free_pcb(pcb, false);
3076 }
3077
3078 return 0;
3079 }
3080
3081 static errno_t
ipsec_ctl_send(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,__unused void * unitinfo,mbuf_t m,__unused int flags)3082 ipsec_ctl_send(__unused kern_ctl_ref kctlref,
3083 __unused u_int32_t unit,
3084 __unused void *unitinfo,
3085 mbuf_t m,
3086 __unused int flags)
3087 {
3088 /* Receive messages from the control socket. Currently unused. */
3089 mbuf_freem(m);
3090 return 0;
3091 }
3092
3093 static errno_t
ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t len)3094 ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,
3095 __unused u_int32_t unit,
3096 void *unitinfo,
3097 int opt,
3098 void *data,
3099 size_t len)
3100 {
3101 errno_t result = 0;
3102 struct ipsec_pcb *pcb = unitinfo;
3103 if (pcb == NULL) {
3104 return EINVAL;
3105 }
3106
3107 /* check for privileges for privileged options */
3108 switch (opt) {
3109 case IPSEC_OPT_FLAGS:
3110 case IPSEC_OPT_EXT_IFDATA_STATS:
3111 case IPSEC_OPT_SET_DELEGATE_INTERFACE:
3112 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
3113 case IPSEC_OPT_OUTPUT_DSCP_MAPPING:
3114 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3115 return EPERM;
3116 }
3117 break;
3118 }
3119
3120 switch (opt) {
3121 case IPSEC_OPT_FLAGS: {
3122 if (len != sizeof(u_int32_t)) {
3123 result = EMSGSIZE;
3124 } else {
3125 pcb->ipsec_external_flags = *(u_int32_t *)data;
3126 }
3127 break;
3128 }
3129
3130 case IPSEC_OPT_EXT_IFDATA_STATS: {
3131 if (len != sizeof(int)) {
3132 result = EMSGSIZE;
3133 break;
3134 }
3135 if (pcb->ipsec_ifp == NULL) {
3136 // Only can set after connecting
3137 result = EINVAL;
3138 break;
3139 }
3140 pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
3141 break;
3142 }
3143
3144 case IPSEC_OPT_INC_IFDATA_STATS_IN:
3145 case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
3146 struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
3147
3148 if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
3149 result = EINVAL;
3150 break;
3151 }
3152 if (pcb->ipsec_ifp == NULL) {
3153 // Only can set after connecting
3154 result = EINVAL;
3155 break;
3156 }
3157 if (!pcb->ipsec_ext_ifdata_stats) {
3158 result = EINVAL;
3159 break;
3160 }
3161 if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) {
3162 ifnet_stat_increment_in(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3163 (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3164 } else {
3165 ifnet_stat_increment_out(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3166 (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3167 }
3168 break;
3169 }
3170
3171 case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
3172 ifnet_t del_ifp = NULL;
3173 char name[IFNAMSIZ];
3174
3175 if (len > IFNAMSIZ - 1) {
3176 result = EMSGSIZE;
3177 break;
3178 }
3179 if (pcb->ipsec_ifp == NULL) {
3180 // Only can set after connecting
3181 result = EINVAL;
3182 break;
3183 }
3184 if (len != 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */
3185 bcopy(data, name, len);
3186 name[len] = 0;
3187 result = ifnet_find_by_name(name, &del_ifp);
3188 }
3189 if (result == 0) {
3190 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
3191 __func__, pcb->ipsec_ifp->if_xname,
3192 del_ifp ? del_ifp->if_xname : "NULL");
3193
3194 result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
3195 if (del_ifp) {
3196 ifnet_release(del_ifp);
3197 }
3198 }
3199 break;
3200 }
3201
3202 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3203 if (len != sizeof(int)) {
3204 result = EMSGSIZE;
3205 break;
3206 }
3207 if (pcb->ipsec_ifp == NULL) {
3208 // Only can set after connecting
3209 result = EINVAL;
3210 break;
3211 }
3212 mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
3213 if (output_service_class == MBUF_SC_UNSPEC) {
3214 pcb->ipsec_output_service_class = MBUF_SC_OAM;
3215 } else {
3216 pcb->ipsec_output_service_class = output_service_class;
3217 }
3218 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
3219 __func__, pcb->ipsec_ifp->if_xname,
3220 pcb->ipsec_output_service_class);
3221 break;
3222 }
3223
3224 #if IPSEC_NEXUS
3225 case IPSEC_OPT_ENABLE_CHANNEL: {
3226 if (len != sizeof(int)) {
3227 result = EMSGSIZE;
3228 break;
3229 }
3230 if (pcb->ipsec_ifp != NULL) {
3231 // Only can set before connecting
3232 result = EINVAL;
3233 break;
3234 }
3235 if ((*(int *)data) != 0 &&
3236 (*(int *)data) != 1 &&
3237 (*(int *)data) != IPSEC_IF_WMM_RING_COUNT) {
3238 result = EINVAL;
3239 break;
3240 }
3241 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3242 pcb->ipsec_kpipe_count = *(int *)data;
3243 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3244 break;
3245 }
3246
3247 case IPSEC_OPT_CHANNEL_BIND_PID: {
3248 if (len != sizeof(pid_t)) {
3249 result = EMSGSIZE;
3250 break;
3251 }
3252 if (pcb->ipsec_ifp != NULL) {
3253 // Only can set before connecting
3254 result = EINVAL;
3255 break;
3256 }
3257 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3258 pcb->ipsec_kpipe_pid = *(pid_t *)data;
3259 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3260 break;
3261 }
3262
3263 case IPSEC_OPT_CHANNEL_BIND_UUID: {
3264 if (len != sizeof(uuid_t)) {
3265 result = EMSGSIZE;
3266 break;
3267 }
3268 if (pcb->ipsec_ifp != NULL) {
3269 // Only can set before connecting
3270 result = EINVAL;
3271 break;
3272 }
3273 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3274 uuid_copy(pcb->ipsec_kpipe_proc_uuid, *((uuid_t *)data));
3275 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3276 break;
3277 }
3278
3279 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3280 if (len != sizeof(int)) {
3281 result = EMSGSIZE;
3282 break;
3283 }
3284 if (pcb->ipsec_ifp == NULL) {
3285 // Only can set after connecting
3286 result = EINVAL;
3287 break;
3288 }
3289 if (!if_is_fsw_transport_netagent_enabled()) {
3290 result = ENOTSUP;
3291 break;
3292 }
3293 if (uuid_is_null(pcb->ipsec_nx.fsw_agent)) {
3294 result = ENOENT;
3295 break;
3296 }
3297
3298 uint32_t flags = netagent_get_flags(pcb->ipsec_nx.fsw_agent);
3299
3300 if (*(int *)data) {
3301 flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
3302 NETAGENT_FLAG_NEXUS_LISTENER);
3303 result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3304 pcb->ipsec_needs_netagent = true;
3305 } else {
3306 pcb->ipsec_needs_netagent = false;
3307 flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
3308 NETAGENT_FLAG_NEXUS_LISTENER);
3309 result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3310 }
3311 break;
3312 }
3313
3314 case IPSEC_OPT_INPUT_FRAG_SIZE: {
3315 if (len != sizeof(u_int32_t)) {
3316 result = EMSGSIZE;
3317 break;
3318 }
3319 u_int32_t input_frag_size = *(u_int32_t *)data;
3320 if (input_frag_size <= sizeof(struct ip6_hdr)) {
3321 pcb->ipsec_frag_size_set = FALSE;
3322 pcb->ipsec_input_frag_size = 0;
3323 } else {
3324 pcb->ipsec_frag_size_set = TRUE;
3325 pcb->ipsec_input_frag_size = input_frag_size;
3326 }
3327 break;
3328 }
3329 case IPSEC_OPT_ENABLE_NETIF: {
3330 if (len != sizeof(int)) {
3331 result = EMSGSIZE;
3332 break;
3333 }
3334 if (pcb->ipsec_ifp != NULL) {
3335 // Only can set before connecting
3336 result = EINVAL;
3337 break;
3338 }
3339 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3340 pcb->ipsec_use_netif = !!(*(int *)data);
3341 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3342 break;
3343 }
3344 case IPSEC_OPT_SLOT_SIZE: {
3345 if (len != sizeof(u_int32_t)) {
3346 result = EMSGSIZE;
3347 break;
3348 }
3349 if (pcb->ipsec_ifp != NULL) {
3350 // Only can set before connecting
3351 result = EINVAL;
3352 break;
3353 }
3354 u_int32_t slot_size = *(u_int32_t *)data;
3355 if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
3356 slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
3357 return EINVAL;
3358 }
3359 pcb->ipsec_slot_size = slot_size;
3360 if (if_ipsec_debug != 0) {
3361 printf("%s: IPSEC_OPT_SLOT_SIZE %u\n", __func__, slot_size);
3362 }
3363 break;
3364 }
3365 case IPSEC_OPT_NETIF_RING_SIZE: {
3366 if (len != sizeof(u_int32_t)) {
3367 result = EMSGSIZE;
3368 break;
3369 }
3370 if (pcb->ipsec_ifp != NULL) {
3371 // Only can set before connecting
3372 result = EINVAL;
3373 break;
3374 }
3375 u_int32_t ring_size = *(u_int32_t *)data;
3376 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3377 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3378 return EINVAL;
3379 }
3380 pcb->ipsec_netif_ring_size = ring_size;
3381 if (if_ipsec_debug != 0) {
3382 printf("%s: IPSEC_OPT_NETIF_RING_SIZE %u\n", __func__, ring_size);
3383 }
3384 break;
3385 }
3386 case IPSEC_OPT_TX_FSW_RING_SIZE: {
3387 if (len != sizeof(u_int32_t)) {
3388 result = EMSGSIZE;
3389 break;
3390 }
3391 if (pcb->ipsec_ifp != NULL) {
3392 // Only can set before connecting
3393 result = EINVAL;
3394 break;
3395 }
3396 u_int32_t ring_size = *(u_int32_t *)data;
3397 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3398 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3399 return EINVAL;
3400 }
3401 pcb->ipsec_tx_fsw_ring_size = ring_size;
3402 if (if_ipsec_debug != 0) {
3403 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3404 }
3405 break;
3406 }
3407 case IPSEC_OPT_RX_FSW_RING_SIZE: {
3408 if (len != sizeof(u_int32_t)) {
3409 result = EMSGSIZE;
3410 break;
3411 }
3412 if (pcb->ipsec_ifp != NULL) {
3413 // Only can set before connecting
3414 result = EINVAL;
3415 break;
3416 }
3417 u_int32_t ring_size = *(u_int32_t *)data;
3418 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3419 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3420 return EINVAL;
3421 }
3422 pcb->ipsec_rx_fsw_ring_size = ring_size;
3423 if (if_ipsec_debug != 0) {
3424 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3425 }
3426 break;
3427 }
3428 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3429 if (len != sizeof(u_int32_t)) {
3430 result = EMSGSIZE;
3431 break;
3432 }
3433 if (pcb->ipsec_ifp != NULL) {
3434 // Only can set before connecting
3435 result = EINVAL;
3436 break;
3437 }
3438 u_int32_t ring_size = *(u_int32_t *)data;
3439 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3440 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3441 return EINVAL;
3442 }
3443 pcb->ipsec_kpipe_tx_ring_size = ring_size;
3444 if (if_ipsec_debug != 0) {
3445 printf("%s: IPSEC_OPT_KPIPE_TX_RING_SIZE %u\n", __func__, ring_size);
3446 }
3447 break;
3448 }
3449 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3450 if (len != sizeof(u_int32_t)) {
3451 result = EMSGSIZE;
3452 break;
3453 }
3454 if (pcb->ipsec_ifp != NULL) {
3455 // Only can set before connecting
3456 result = EINVAL;
3457 break;
3458 }
3459 u_int32_t ring_size = *(u_int32_t *)data;
3460 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3461 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3462 return EINVAL;
3463 }
3464 pcb->ipsec_kpipe_rx_ring_size = ring_size;
3465 if (if_ipsec_debug != 0) {
3466 printf("%s: IPSEC_OPT_KPIPE_RX_RING_SIZE %u\n", __func__, ring_size);
3467 }
3468 break;
3469 }
3470 case IPSEC_OPT_OUTPUT_DSCP_MAPPING: {
3471 if (len != sizeof(int)) {
3472 result = EMSGSIZE;
3473 break;
3474 }
3475 if (pcb->ipsec_ifp == NULL) {
3476 // Only can set after connecting
3477 result = EINVAL;
3478 break;
3479 }
3480
3481 ipsec_dscp_mapping_t output_dscp_mapping = (ipsec_dscp_mapping_t)(*(int *)data);
3482 if (output_dscp_mapping > IPSEC_DSCP_MAPPING_LEGACY) {
3483 return EINVAL;
3484 }
3485
3486 pcb->ipsec_output_dscp_mapping = output_dscp_mapping;
3487
3488 os_log(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_DSCP_MAPPING %s DSCP %d\n",
3489 __func__, pcb->ipsec_ifp->if_xname,
3490 pcb->ipsec_output_dscp_mapping);
3491 break;
3492 }
3493
3494 #endif // IPSEC_NEXUS
3495
3496 default: {
3497 result = ENOPROTOOPT;
3498 break;
3499 }
3500 }
3501
3502 return result;
3503 }
3504
3505 static errno_t
ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t * len)3506 ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
3507 __unused u_int32_t unit,
3508 void *unitinfo,
3509 int opt,
3510 void *data,
3511 size_t *len)
3512 {
3513 errno_t result = 0;
3514 struct ipsec_pcb *pcb = unitinfo;
3515 if (pcb == NULL) {
3516 return EINVAL;
3517 }
3518
3519 switch (opt) {
3520 case IPSEC_OPT_FLAGS: {
3521 if (*len != sizeof(u_int32_t)) {
3522 result = EMSGSIZE;
3523 } else {
3524 *(u_int32_t *)data = pcb->ipsec_external_flags;
3525 }
3526 break;
3527 }
3528
3529 case IPSEC_OPT_EXT_IFDATA_STATS: {
3530 if (*len != sizeof(int)) {
3531 result = EMSGSIZE;
3532 } else {
3533 *(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
3534 }
3535 break;
3536 }
3537
3538 case IPSEC_OPT_IFNAME: {
3539 if (*len < MIN(strlen(pcb->ipsec_if_xname) + 1, sizeof(pcb->ipsec_if_xname))) {
3540 result = EMSGSIZE;
3541 } else {
3542 if (pcb->ipsec_ifp == NULL) {
3543 // Only can get after connecting
3544 result = EINVAL;
3545 break;
3546 }
3547 *len = scnprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
3548 }
3549 break;
3550 }
3551
3552 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3553 if (*len != sizeof(int)) {
3554 result = EMSGSIZE;
3555 } else {
3556 *(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
3557 }
3558 break;
3559 }
3560
3561 #if IPSEC_NEXUS
3562
3563 case IPSEC_OPT_ENABLE_CHANNEL: {
3564 if (*len != sizeof(int)) {
3565 result = EMSGSIZE;
3566 } else {
3567 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3568 *(int *)data = pcb->ipsec_kpipe_count;
3569 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3570 }
3571 break;
3572 }
3573
3574 case IPSEC_OPT_CHANNEL_BIND_PID: {
3575 if (*len != sizeof(pid_t)) {
3576 result = EMSGSIZE;
3577 } else {
3578 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3579 *(pid_t *)data = pcb->ipsec_kpipe_pid;
3580 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3581 }
3582 break;
3583 }
3584
3585 case IPSEC_OPT_CHANNEL_BIND_UUID: {
3586 if (*len != sizeof(uuid_t)) {
3587 result = EMSGSIZE;
3588 } else {
3589 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3590 uuid_copy(*((uuid_t *)data), pcb->ipsec_kpipe_proc_uuid);
3591 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3592 }
3593 break;
3594 }
3595
3596 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3597 if (*len != sizeof(int)) {
3598 result = EMSGSIZE;
3599 } else {
3600 *(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.fsw_agent);
3601 }
3602 break;
3603 }
3604
3605 case IPSEC_OPT_ENABLE_NETIF: {
3606 if (*len != sizeof(int)) {
3607 result = EMSGSIZE;
3608 } else {
3609 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3610 *(int *)data = !!pcb->ipsec_use_netif;
3611 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3612 }
3613 break;
3614 }
3615
3616 case IPSEC_OPT_GET_CHANNEL_UUID: {
3617 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3618 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
3619 result = ENXIO;
3620 } else if (*len != sizeof(uuid_t) * pcb->ipsec_kpipe_count) {
3621 result = EMSGSIZE;
3622 } else {
3623 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
3624 uuid_copy(((uuid_t *)data)[i], pcb->ipsec_kpipe_uuid[i]);
3625 }
3626 }
3627 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3628 break;
3629 }
3630
3631 case IPSEC_OPT_INPUT_FRAG_SIZE: {
3632 if (*len != sizeof(u_int32_t)) {
3633 result = EMSGSIZE;
3634 } else {
3635 *(u_int32_t *)data = pcb->ipsec_input_frag_size;
3636 }
3637 break;
3638 }
3639 case IPSEC_OPT_SLOT_SIZE: {
3640 if (*len != sizeof(u_int32_t)) {
3641 result = EMSGSIZE;
3642 } else {
3643 *(u_int32_t *)data = pcb->ipsec_slot_size;
3644 }
3645 break;
3646 }
3647 case IPSEC_OPT_NETIF_RING_SIZE: {
3648 if (*len != sizeof(u_int32_t)) {
3649 result = EMSGSIZE;
3650 } else {
3651 *(u_int32_t *)data = pcb->ipsec_netif_ring_size;
3652 }
3653 break;
3654 }
3655 case IPSEC_OPT_TX_FSW_RING_SIZE: {
3656 if (*len != sizeof(u_int32_t)) {
3657 result = EMSGSIZE;
3658 } else {
3659 *(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
3660 }
3661 break;
3662 }
3663 case IPSEC_OPT_RX_FSW_RING_SIZE: {
3664 if (*len != sizeof(u_int32_t)) {
3665 result = EMSGSIZE;
3666 } else {
3667 *(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
3668 }
3669 break;
3670 }
3671 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3672 if (*len != sizeof(u_int32_t)) {
3673 result = EMSGSIZE;
3674 } else {
3675 *(u_int32_t *)data = pcb->ipsec_kpipe_tx_ring_size;
3676 }
3677 break;
3678 }
3679 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3680 if (*len != sizeof(u_int32_t)) {
3681 result = EMSGSIZE;
3682 } else {
3683 *(u_int32_t *)data = pcb->ipsec_kpipe_rx_ring_size;
3684 }
3685 break;
3686 }
3687
3688 #endif // IPSEC_NEXUS
3689
3690 default: {
3691 result = ENOPROTOOPT;
3692 break;
3693 }
3694 }
3695
3696 return result;
3697 }
3698
3699 /* Network Interface functions */
3700 static errno_t
ipsec_output(ifnet_t interface,mbuf_t data)3701 ipsec_output(ifnet_t interface,
3702 mbuf_t data)
3703 {
3704 struct ipsec_pcb *pcb = ifnet_softc(interface);
3705 struct ipsec_output_state ipsec_state;
3706 struct route ro;
3707 struct route_in6 ro6;
3708 size_t length;
3709 struct ip *ip = NULL;
3710 struct ip6_hdr *ip6 = NULL;
3711 struct ip_out_args ipoa;
3712 struct ip6_out_args ip6oa;
3713 int error = 0;
3714 u_int ip_version = 0;
3715 int flags = 0;
3716 struct flowadv *adv = NULL;
3717
3718 // Make sure this packet isn't looping through the interface
3719 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
3720 error = EINVAL;
3721 goto ipsec_output_err;
3722 }
3723
3724 // Mark the interface so NECP can evaluate tunnel policy
3725 necp_mark_packet_from_interface(data, interface);
3726
3727 if (data->m_len < sizeof(*ip)) {
3728 os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IP header length: %d.\n", data->m_len);
3729 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3730 error = EINVAL;
3731 goto ipsec_output_err;
3732 }
3733
3734 ip = mtod(data, struct ip *);
3735 ip_version = ip->ip_v;
3736
3737 switch (ip_version) {
3738 case 4: {
3739 u_int8_t ip_hlen = 0;
3740 #ifdef _IP_VHL
3741 ip_hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
3742 #else
3743 ip_hlen = (uint8_t)(ip->ip_hl << 2);
3744 #endif
3745 if (ip_hlen < sizeof(*ip)) {
3746 os_log_error(OS_LOG_DEFAULT, "ipsec_output: Bad ip header length %d.\n", ip_hlen);
3747 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3748 error = EINVAL;
3749 goto ipsec_output_err;
3750 }
3751 #if IPSEC_NEXUS
3752 if (!pcb->ipsec_use_netif)
3753 #endif // IPSEC_NEXUS
3754 {
3755 int af = AF_INET;
3756 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3757 }
3758
3759 /* Apply encryption */
3760 memset(&ipsec_state, 0, sizeof(ipsec_state));
3761 ipsec_state.m = data;
3762 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
3763 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3764 ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
3765
3766 error = ipsec4_interface_output(&ipsec_state, interface);
3767 /* Tunneled in IPv6 - packet is gone */
3768 if (error == 0 && ipsec_state.tunneled == 6) {
3769 goto done;
3770 }
3771
3772 data = ipsec_state.m;
3773 if (error || data == NULL) {
3774 if (error) {
3775 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec4_output error %d.\n", error);
3776 }
3777 goto ipsec_output_err;
3778 }
3779
3780 /* Set traffic class, set flow */
3781 m_set_service_class(data, pcb->ipsec_output_service_class);
3782 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3783 #if SKYWALK
3784 data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
3785 #else /* !SKYWALK */
3786 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3787 #endif /* !SKYWALK */
3788 data->m_pkthdr.pkt_proto = ip->ip_p;
3789 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3790
3791 /* Flip endian-ness for ip_output */
3792 ip = mtod(data, struct ip *);
3793 NTOHS(ip->ip_len);
3794 NTOHS(ip->ip_off);
3795
3796 /* Increment statistics */
3797 length = mbuf_pkthdr_len(data);
3798 ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
3799
3800 /* Send to ip_output */
3801 memset(&ro, 0, sizeof(ro));
3802
3803 flags = (IP_OUTARGS | /* Passing out args to specify interface */
3804 IP_NOIPSEC); /* To ensure the packet doesn't go through ipsec twice */
3805
3806 memset(&ipoa, 0, sizeof(ipoa));
3807 ipoa.ipoa_flowadv.code = 0;
3808 ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
3809 if (ipsec_state.outgoing_if) {
3810 ipoa.ipoa_boundif = ipsec_state.outgoing_if;
3811 ipoa.ipoa_flags |= IPOAF_BOUND_IF;
3812 }
3813 ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
3814
3815 adv = &ipoa.ipoa_flowadv;
3816
3817 (void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
3818 data = NULL;
3819
3820 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3821 error = ENOBUFS;
3822 ifnet_disable_output(interface);
3823 }
3824
3825 goto done;
3826 }
3827 case 6: {
3828 if (data->m_len < sizeof(*ip6)) {
3829 os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IPv6 header length: %d.\n", data->m_len);
3830 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
3831 error = EINVAL;
3832 goto ipsec_output_err;
3833 }
3834 #if IPSEC_NEXUS
3835 if (!pcb->ipsec_use_netif)
3836 #endif // IPSEC_NEXUS
3837 {
3838 int af = AF_INET6;
3839 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3840 }
3841
3842 data = ipsec6_splithdr(data);
3843 if (data == NULL) {
3844 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_splithdr returned NULL\n");
3845 goto ipsec_output_err;
3846 }
3847
3848 ip6 = mtod(data, struct ip6_hdr *);
3849
3850 memset(&ipsec_state, 0, sizeof(ipsec_state));
3851 ipsec_state.m = data;
3852 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
3853 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3854 ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
3855
3856 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
3857 if (error == 0 && ipsec_state.tunneled == 4) { /* tunneled in IPv4 - packet is gone */
3858 goto done;
3859 }
3860 data = ipsec_state.m;
3861 if (error || data == NULL) {
3862 if (error) {
3863 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_output error %d\n", error);
3864 }
3865 goto ipsec_output_err;
3866 }
3867
3868 /* Set traffic class, set flow */
3869 m_set_service_class(data, pcb->ipsec_output_service_class);
3870 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3871 #if SKYWALK
3872 data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
3873 #else /* !SKYWALK */
3874 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3875 #endif /* !SKYWALK */
3876 data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3877 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3878
3879 /* Increment statistics */
3880 length = mbuf_pkthdr_len(data);
3881 ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
3882
3883 /* Send to ip6_output */
3884 memset(&ro6, 0, sizeof(ro6));
3885
3886 flags = IPV6_OUTARGS;
3887
3888 memset(&ip6oa, 0, sizeof(ip6oa));
3889 ip6oa.ip6oa_flowadv.code = 0;
3890 ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
3891 if (ipsec_state.outgoing_if) {
3892 ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
3893 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
3894 ip6_output_setsrcifscope(data, ipsec_state.outgoing_if, NULL);
3895 ip6_output_setdstifscope(data, ipsec_state.outgoing_if, NULL);
3896 } else {
3897 ip6_output_setsrcifscope(data, IFSCOPE_UNKNOWN, NULL);
3898 ip6_output_setdstifscope(data, IFSCOPE_UNKNOWN, NULL);
3899 }
3900 ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
3901
3902 adv = &ip6oa.ip6oa_flowadv;
3903
3904 (void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
3905 data = NULL;
3906
3907 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3908 error = ENOBUFS;
3909 ifnet_disable_output(interface);
3910 }
3911
3912 goto done;
3913 }
3914 default: {
3915 os_log_error(OS_LOG_DEFAULT, "ipsec_output: Received unknown packet version %d.\n", ip_version);
3916 error = EINVAL;
3917 goto ipsec_output_err;
3918 }
3919 }
3920
3921 done:
3922 return error;
3923
3924 ipsec_output_err:
3925 if (data) {
3926 mbuf_freem(data);
3927 }
3928 goto done;
3929 }
3930
3931 static void
ipsec_start(ifnet_t interface)3932 ipsec_start(ifnet_t interface)
3933 {
3934 mbuf_t data;
3935 struct ipsec_pcb *pcb = ifnet_softc(interface);
3936
3937 VERIFY(pcb != NULL);
3938 for (;;) {
3939 if (ifnet_dequeue(interface, &data) != 0) {
3940 break;
3941 }
3942 if (ipsec_output(interface, data) != 0) {
3943 break;
3944 }
3945 }
3946 }
3947
3948 /* Network Interface functions */
3949 static errno_t
ipsec_demux(__unused ifnet_t interface,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)3950 ipsec_demux(__unused ifnet_t interface,
3951 mbuf_t data,
3952 __unused char *frame_header,
3953 protocol_family_t *protocol)
3954 {
3955 struct ip *ip;
3956 u_int ip_version;
3957
3958 while (data != NULL && mbuf_len(data) < 1) {
3959 data = mbuf_next(data);
3960 }
3961
3962 if (data == NULL) {
3963 return ENOENT;
3964 }
3965
3966 ip = mtod(data, struct ip *);
3967 ip_version = ip->ip_v;
3968
3969 switch (ip_version) {
3970 case 4:
3971 *protocol = PF_INET;
3972 return 0;
3973 case 6:
3974 *protocol = PF_INET6;
3975 return 0;
3976 default:
3977 *protocol = PF_UNSPEC;
3978 break;
3979 }
3980
3981 return 0;
3982 }
3983
3984 static errno_t
ipsec_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused u_int32_t demux_count)3985 ipsec_add_proto(__unused ifnet_t interface,
3986 protocol_family_t protocol,
3987 __unused const struct ifnet_demux_desc *demux_array,
3988 __unused u_int32_t demux_count)
3989 {
3990 switch (protocol) {
3991 case PF_INET:
3992 return 0;
3993 case PF_INET6:
3994 return 0;
3995 default:
3996 break;
3997 }
3998
3999 return ENOPROTOOPT;
4000 }
4001
4002 static errno_t
ipsec_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)4003 ipsec_del_proto(__unused ifnet_t interface,
4004 __unused protocol_family_t protocol)
4005 {
4006 return 0;
4007 }
4008
4009 static errno_t
ipsec_ioctl(ifnet_t interface,u_long command,void * data)4010 ipsec_ioctl(ifnet_t interface,
4011 u_long command,
4012 void *data)
4013 {
4014 #if IPSEC_NEXUS
4015 struct ipsec_pcb *pcb = ifnet_softc(interface);
4016 #endif
4017 errno_t result = 0;
4018
4019 switch (command) {
4020 case SIOCSIFMTU: {
4021 #if IPSEC_NEXUS
4022 if (pcb->ipsec_use_netif) {
4023 // Make sure we can fit packets in the channel buffers
4024 if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
4025 result = EINVAL;
4026 } else {
4027 ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
4028 }
4029 } else
4030 #endif // IPSEC_NEXUS
4031 {
4032 ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
4033 }
4034 break;
4035 }
4036
4037 case SIOCSIFFLAGS:
4038 /* ifioctl() takes care of it */
4039 break;
4040
4041 case SIOCSIFSUBFAMILY: {
4042 uint32_t subfamily;
4043
4044 subfamily = ((struct ifreq*)data)->ifr_type.ift_subfamily;
4045 switch (subfamily) {
4046 case IFRTYPE_SUBFAMILY_BLUETOOTH:
4047 interface->if_subfamily = IFNET_SUBFAMILY_BLUETOOTH;
4048 break;
4049 case IFRTYPE_SUBFAMILY_WIFI:
4050 interface->if_subfamily = IFNET_SUBFAMILY_WIFI;
4051 break;
4052 case IFRTYPE_SUBFAMILY_QUICKRELAY:
4053 interface->if_subfamily = IFNET_SUBFAMILY_QUICKRELAY;
4054 break;
4055 case IFRTYPE_SUBFAMILY_DEFAULT:
4056 interface->if_subfamily = IFNET_SUBFAMILY_DEFAULT;
4057 break;
4058 default:
4059 result = EINVAL;
4060 break;
4061 }
4062 break;
4063 }
4064
4065 default:
4066 result = EOPNOTSUPP;
4067 }
4068
4069 return result;
4070 }
4071
4072 static void
ipsec_detached(ifnet_t interface)4073 ipsec_detached(ifnet_t interface)
4074 {
4075 struct ipsec_pcb *pcb = ifnet_softc(interface);
4076
4077 (void)ifnet_release(interface);
4078 lck_mtx_lock(&ipsec_lock);
4079 ipsec_free_pcb(pcb, true);
4080 (void)ifnet_dispose(interface);
4081 lck_mtx_unlock(&ipsec_lock);
4082 }
4083
4084 /* Protocol Handlers */
4085
4086 static errno_t
ipsec_proto_input(ifnet_t interface,protocol_family_t protocol,mbuf_t m,__unused char * frame_header)4087 ipsec_proto_input(ifnet_t interface,
4088 protocol_family_t protocol,
4089 mbuf_t m,
4090 __unused char *frame_header)
4091 {
4092 mbuf_pkthdr_setrcvif(m, interface);
4093
4094 #if IPSEC_NEXUS
4095 struct ipsec_pcb *pcb = ifnet_softc(interface);
4096 if (!pcb->ipsec_use_netif)
4097 #endif // IPSEC_NEXUS
4098 {
4099 uint32_t af = 0;
4100 struct ip *ip = mtod(m, struct ip *);
4101 if (ip->ip_v == 4) {
4102 af = AF_INET;
4103 } else if (ip->ip_v == 6) {
4104 af = AF_INET6;
4105 }
4106 bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
4107 pktap_input(interface, protocol, m, NULL);
4108 }
4109
4110 int32_t pktlen = m->m_pkthdr.len;
4111 if (proto_input(protocol, m) != 0) {
4112 ifnet_stat_increment_in(interface, 0, 0, 1);
4113 m_freem(m);
4114 } else {
4115 ifnet_stat_increment_in(interface, 1, pktlen, 0);
4116 }
4117
4118 return 0;
4119 }
4120
4121 static errno_t
ipsec_proto_pre_output(__unused ifnet_t interface,protocol_family_t protocol,__unused mbuf_t * packet,__unused const struct sockaddr * dest,__unused void * route,__unused char * frame_type,__unused char * link_layer_dest)4122 ipsec_proto_pre_output(__unused ifnet_t interface,
4123 protocol_family_t protocol,
4124 __unused mbuf_t *packet,
4125 __unused const struct sockaddr *dest,
4126 __unused void *route,
4127 __unused char *frame_type,
4128 __unused char *link_layer_dest)
4129 {
4130 *(protocol_family_t *)(void *)frame_type = protocol;
4131 return 0;
4132 }
4133
4134 static errno_t
ipsec_attach_proto(ifnet_t interface,protocol_family_t protocol)4135 ipsec_attach_proto(ifnet_t interface,
4136 protocol_family_t protocol)
4137 {
4138 struct ifnet_attach_proto_param proto;
4139 errno_t result;
4140
4141 bzero(&proto, sizeof(proto));
4142 proto.input = ipsec_proto_input;
4143 proto.pre_output = ipsec_proto_pre_output;
4144
4145 result = ifnet_attach_protocol(interface, protocol, &proto);
4146 if (result != 0 && result != EEXIST) {
4147 os_log_error(OS_LOG_DEFAULT, "ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
4148 protocol, result);
4149 }
4150
4151 return result;
4152 }
4153
4154 errno_t
ipsec_inject_inbound_packet(ifnet_t interface,mbuf_t packet)4155 ipsec_inject_inbound_packet(ifnet_t interface,
4156 mbuf_t packet)
4157 {
4158 #if IPSEC_NEXUS
4159 struct ipsec_pcb *pcb = ifnet_softc(interface);
4160
4161 if (pcb->ipsec_use_netif) {
4162 if (!ipsec_data_move_begin(pcb)) {
4163 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__,
4164 if_name(pcb->ipsec_ifp));
4165 return ENXIO;
4166 }
4167
4168 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4169
4170 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
4171
4172 if (pcb->ipsec_input_chain_count > (u_int32_t)if_ipsec_max_pending_input) {
4173 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4174 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4175 ipsec_data_move_end(pcb);
4176 return ENOSPC;
4177 }
4178
4179 if (pcb->ipsec_input_chain != NULL) {
4180 pcb->ipsec_input_chain_last->m_nextpkt = packet;
4181 } else {
4182 pcb->ipsec_input_chain = packet;
4183 }
4184 pcb->ipsec_input_chain_count++;
4185 while (packet->m_nextpkt) {
4186 VERIFY(packet != packet->m_nextpkt);
4187 packet = packet->m_nextpkt;
4188 pcb->ipsec_input_chain_count++;
4189 }
4190 pcb->ipsec_input_chain_last = packet;
4191 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4192
4193 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
4194 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4195
4196 if (rx_ring != NULL) {
4197 kern_channel_notify(rx_ring, 0);
4198 }
4199
4200 ipsec_data_move_end(pcb);
4201 return 0;
4202 } else
4203 #endif // IPSEC_NEXUS
4204 {
4205 errno_t error;
4206 protocol_family_t protocol;
4207 if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
4208 return error;
4209 }
4210
4211 return ipsec_proto_input(interface, protocol, packet, NULL);
4212 }
4213 }
4214
4215 void
ipsec_set_pkthdr_for_interface(ifnet_t interface,mbuf_t packet,int family,uint32_t flowid)4216 ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family,
4217 uint32_t flowid)
4218 {
4219 #pragma unused (flowid)
4220 if (packet != NULL && interface != NULL) {
4221 struct ipsec_pcb *pcb = ifnet_softc(interface);
4222 if (pcb != NULL) {
4223 /* Set traffic class, set flow */
4224 m_set_service_class(packet, pcb->ipsec_output_service_class);
4225 packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4226 #if SKYWALK
4227 packet->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4228 packet->m_pkthdr.pkt_flowid = flowid;
4229 #else /* !SKYWALK */
4230 packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
4231 #endif /* !SKYWALK */
4232 if (family == AF_INET) {
4233 struct ip *ip = mtod(packet, struct ip *);
4234 packet->m_pkthdr.pkt_proto = ip->ip_p;
4235 } else if (family == AF_INET6) {
4236 struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
4237 packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
4238 }
4239 packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4240 }
4241 }
4242 }
4243
4244 void
ipsec_set_ipoa_for_interface(ifnet_t interface,struct ip_out_args * ipoa)4245 ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
4246 {
4247 struct ipsec_pcb *pcb;
4248
4249 if (interface == NULL || ipoa == NULL) {
4250 return;
4251 }
4252 pcb = ifnet_softc(interface);
4253
4254 if (net_qos_policy_restricted == 0) {
4255 ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
4256 ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4257 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4258 net_qos_policy_restrict_avapps != 0) {
4259 ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4260 } else {
4261 ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4262 ipoa->ipoa_sotc = SO_TC_VO;
4263 }
4264 }
4265
4266 void
ipsec_set_ip6oa_for_interface(ifnet_t interface,struct ip6_out_args * ip6oa)4267 ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
4268 {
4269 struct ipsec_pcb *pcb;
4270
4271 if (interface == NULL || ip6oa == NULL) {
4272 return;
4273 }
4274 pcb = ifnet_softc(interface);
4275
4276 if (net_qos_policy_restricted == 0) {
4277 ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
4278 ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4279 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4280 net_qos_policy_restrict_avapps != 0) {
4281 ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4282 } else {
4283 ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4284 ip6oa->ip6oa_sotc = SO_TC_VO;
4285 }
4286 }
4287
4288 static boolean_t
ipsec_data_move_begin(struct ipsec_pcb * pcb)4289 ipsec_data_move_begin(struct ipsec_pcb *pcb)
4290 {
4291 boolean_t ret = 0;
4292
4293 lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4294 if ((ret = IPSEC_IS_DATA_PATH_READY(pcb))) {
4295 pcb->ipsec_pcb_data_move++;
4296 }
4297 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4298
4299 return ret;
4300 }
4301
4302 static void
ipsec_data_move_end(struct ipsec_pcb * pcb)4303 ipsec_data_move_end(struct ipsec_pcb *pcb)
4304 {
4305 lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4306 VERIFY(pcb->ipsec_pcb_data_move > 0);
4307 /*
4308 * if there's no more thread moving data, wakeup any
4309 * drainers that's blocked waiting for this.
4310 */
4311 if (--pcb->ipsec_pcb_data_move == 0 && pcb->ipsec_pcb_drainers > 0) {
4312 wakeup(&(pcb->ipsec_pcb_data_move));
4313 }
4314 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4315 }
4316
4317 static void
ipsec_data_move_drain(struct ipsec_pcb * pcb)4318 ipsec_data_move_drain(struct ipsec_pcb *pcb)
4319 {
4320 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4321 /* data path must already be marked as not ready */
4322 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4323 pcb->ipsec_pcb_drainers++;
4324 while (pcb->ipsec_pcb_data_move != 0) {
4325 (void)msleep(&(pcb->ipsec_pcb_data_move), &pcb->ipsec_pcb_data_move_lock,
4326 (PZERO - 1), __func__, NULL);
4327 }
4328 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4329 VERIFY(pcb->ipsec_pcb_drainers > 0);
4330 pcb->ipsec_pcb_drainers--;
4331 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4332 }
4333
4334 static void
ipsec_wait_data_move_drain(struct ipsec_pcb * pcb)4335 ipsec_wait_data_move_drain(struct ipsec_pcb *pcb)
4336 {
4337 /*
4338 * Mark the data path as not usable.
4339 */
4340 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4341 IPSEC_CLR_DATA_PATH_READY(pcb);
4342 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4343
4344 /* Wait until all threads in the data paths are done. */
4345 ipsec_data_move_drain(pcb);
4346 }
4347