1 /*
2 * Copyright (c) 2012-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29
30 #include <sys/systm.h>
31 #include <sys/kern_control.h>
32 #include <net/kpi_protocol.h>
33 #include <net/kpi_interface.h>
34 #include <sys/socket.h>
35 #include <sys/socketvar.h>
36 #include <net/if.h>
37 #include <net/if_types.h>
38 #include <net/bpf.h>
39 #include <net/if_ipsec.h>
40 #include <sys/mbuf.h>
41 #include <sys/sockio.h>
42 #include <netinet/in.h>
43 #include <netinet/ip6.h>
44 #include <netinet6/in6_var.h>
45 #include <netinet6/ip6_var.h>
46 #include <sys/kauth.h>
47 #include <netinet6/ipsec.h>
48 #include <netinet6/ipsec6.h>
49 #include <netinet6/esp.h>
50 #include <netinet6/esp6.h>
51 #include <netinet/ip.h>
52 #include <net/flowadv.h>
53 #include <net/necp.h>
54 #include <netkey/key.h>
55 #include <net/pktap.h>
56 #include <kern/zalloc.h>
57 #include <os/log.h>
58
59 #if SKYWALK
60 #include <skywalk/os_skywalk_private.h>
61 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
62 #include <skywalk/nexus/netif/nx_netif.h>
63 #define IPSEC_NEXUS 1
64 #else // SKYWALK
65 #define IPSEC_NEXUS 0
66 #endif // SKYWALK
67
68 extern int net_qos_policy_restricted;
69 extern int net_qos_policy_restrict_avapps;
70
71 /* Kernel Control functions */
72 static errno_t ipsec_ctl_setup(u_int32_t *unit, void **unitinfo);
73 static errno_t ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
74 void **unitinfo);
75 static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
76 void **unitinfo);
77 static errno_t ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
78 void *unitinfo);
79 static errno_t ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
80 void *unitinfo, mbuf_t m, int flags);
81 static errno_t ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
82 int opt, void *data, size_t *len);
83 static errno_t ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
84 int opt, void *data, size_t len);
85
86 /* Network Interface functions */
87 static void ipsec_start(ifnet_t interface);
88 static errno_t ipsec_output(ifnet_t interface, mbuf_t data);
89 static errno_t ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
90 protocol_family_t *protocol);
91 static errno_t ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
92 const struct ifnet_demux_desc *demux_array,
93 u_int32_t demux_count);
94 static errno_t ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
95 static errno_t ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
96 static void ipsec_detached(ifnet_t interface);
97
98 /* Protocol handlers */
99 static errno_t ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
100 static errno_t ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
101 mbuf_t m, char *frame_header);
102 static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
103 mbuf_t *packet, const struct sockaddr *dest, void *route,
104 char *frame_type, char *link_layer_dest);
105
106 static kern_ctl_ref ipsec_kctlref;
107 static LCK_ATTR_DECLARE(ipsec_lck_attr, 0, 0);
108 static LCK_GRP_DECLARE(ipsec_lck_grp, "ipsec");
109 static LCK_MTX_DECLARE_ATTR(ipsec_lock, &ipsec_lck_grp, &ipsec_lck_attr);
110
111 #if IPSEC_NEXUS
112
113 SYSCTL_DECL(_net_ipsec);
114 SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
115 static int if_ipsec_verify_interface_creation = 0;
116 SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
117
118 #define IPSEC_IF_VERIFY(_e) if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
119
120 #define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
121 #define IPSEC_IF_DEFAULT_RING_SIZE 64
122 #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
123 #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
124 #define IPSEC_IF_DEFAULT_BUF_SEG_SIZE skmem_usr_buf_seg_size
125
126 #define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES
127 #define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
128 #define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
129 #define IPSEC_NETIF_WMM_RX_RING_COUNT 1
130 #define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT
131 #define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT
132
133 #define IPSEC_IF_MIN_RING_SIZE 8
134 #define IPSEC_IF_MAX_RING_SIZE 1024
135
136 #define IPSEC_IF_MIN_SLOT_SIZE 1024
137 #define IPSEC_IF_MAX_SLOT_SIZE 4096
138
139 #define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512
140
141 #define IPSEC_KPIPE_FLAG_WAKE_PKT 0x01
142
143 static int if_ipsec_max_pending_input = IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT;
144
145 static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
146 static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
147 static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
148
149 static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
150 static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
151 static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
152
153 SYSCTL_INT(_net_ipsec, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_max_pending_input, 0, "");
154 SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
155 &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
156 SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
157 &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
158 SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
159 &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
160
161 static int if_ipsec_debug = 0;
162 SYSCTL_INT(_net_ipsec, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_debug, 0, "");
163
164 static errno_t
165 ipsec_register_nexus(void);
166
167 typedef struct ipsec_nx {
168 uuid_t if_provider;
169 uuid_t if_instance;
170 uuid_t fsw_provider;
171 uuid_t fsw_instance;
172 uuid_t fsw_device;
173 uuid_t fsw_agent;
174 } *ipsec_nx_t;
175
176 static nexus_controller_t ipsec_ncd;
177 static int ipsec_ncd_refcount;
178 static uuid_t ipsec_kpipe_uuid;
179
180 #endif // IPSEC_NEXUS
181
182 /* Control block allocated for each kernel control connection */
183 struct ipsec_pcb {
184 TAILQ_ENTRY(ipsec_pcb) ipsec_chain;
185 kern_ctl_ref ipsec_ctlref;
186 ifnet_t ipsec_ifp;
187 u_int32_t ipsec_unit;
188 u_int32_t ipsec_unique_id;
189 // These external flags can be set with IPSEC_OPT_FLAGS
190 u_int32_t ipsec_external_flags;
191 // These internal flags are only used within this driver
192 u_int32_t ipsec_internal_flags;
193 u_int32_t ipsec_input_frag_size;
194 bool ipsec_frag_size_set;
195 int ipsec_ext_ifdata_stats;
196 mbuf_svc_class_t ipsec_output_service_class;
197 char ipsec_if_xname[IFXNAMSIZ];
198 char ipsec_unique_name[IFXNAMSIZ];
199 // PCB lock protects state fields, like ipsec_kpipe_count
200 decl_lck_rw_data(, ipsec_pcb_lock);
201 // lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers
202 decl_lck_mtx_data(, ipsec_pcb_data_move_lock);
203 u_int32_t ipsec_pcb_data_move; /* number of data moving contexts */
204 u_int32_t ipsec_pcb_drainers; /* number of threads waiting to drain */
205 u_int32_t ipsec_pcb_data_path_state; /* internal state of interface data path */
206 ipsec_dscp_mapping_t ipsec_output_dscp_mapping;
207
208 #if IPSEC_NEXUS
209 lck_mtx_t ipsec_input_chain_lock;
210 lck_mtx_t ipsec_kpipe_encrypt_lock;
211 lck_mtx_t ipsec_kpipe_decrypt_lock;
212 struct mbuf * ipsec_input_chain;
213 struct mbuf * ipsec_input_chain_last;
214 u_int32_t ipsec_input_chain_count;
215 // Input chain lock protects the list of input mbufs
216 // The input chain lock must be taken AFTER the PCB lock if both are held
217 struct ipsec_nx ipsec_nx;
218 u_int32_t ipsec_kpipe_count;
219 pid_t ipsec_kpipe_pid;
220 uuid_t ipsec_kpipe_proc_uuid;
221 uuid_t ipsec_kpipe_uuid[IPSEC_IF_MAX_RING_COUNT];
222 void * ipsec_kpipe_rxring[IPSEC_IF_MAX_RING_COUNT];
223 void * ipsec_kpipe_txring[IPSEC_IF_MAX_RING_COUNT];
224 kern_pbufpool_t ipsec_kpipe_pp;
225 u_int32_t ipsec_kpipe_tx_ring_size;
226 u_int32_t ipsec_kpipe_rx_ring_size;
227
228 kern_nexus_t ipsec_netif_nexus;
229 kern_pbufpool_t ipsec_netif_pp;
230 void * ipsec_netif_rxring[IPSEC_NETIF_MAX_RX_RING_COUNT];
231 void * ipsec_netif_txring[IPSEC_NETIF_MAX_TX_RING_COUNT];
232 uint64_t ipsec_netif_txring_size;
233
234 u_int32_t ipsec_slot_size;
235 u_int32_t ipsec_netif_ring_size;
236 u_int32_t ipsec_tx_fsw_ring_size;
237 u_int32_t ipsec_rx_fsw_ring_size;
238 bool ipsec_use_netif;
239 bool ipsec_needs_netagent;
240 #endif // IPSEC_NEXUS
241 };
242
243 /* These are internal flags not exposed outside this file */
244 #define IPSEC_FLAGS_KPIPE_ALLOCATED 1
245
246 /* data movement refcounting functions */
247 static boolean_t ipsec_data_move_begin(struct ipsec_pcb *pcb);
248 static void ipsec_data_move_end(struct ipsec_pcb *pcb);
249 static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb);
250
251 /* Data path states */
252 #define IPSEC_PCB_DATA_PATH_READY 0x1
253
254 /* Macros to set/clear/test data path states */
255 #define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY)
256 #define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY)
257 #define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0)
258
259 #if IPSEC_NEXUS
260 /* Macros to clear/set/test flags. */
261 static inline void
ipsec_flag_set(struct ipsec_pcb * pcb,uint32_t flag)262 ipsec_flag_set(struct ipsec_pcb *pcb, uint32_t flag)
263 {
264 pcb->ipsec_internal_flags |= flag;
265 }
266 static inline void
ipsec_flag_clr(struct ipsec_pcb * pcb,uint32_t flag)267 ipsec_flag_clr(struct ipsec_pcb *pcb, uint32_t flag)
268 {
269 pcb->ipsec_internal_flags &= ~flag;
270 }
271
272 static inline bool
ipsec_flag_isset(struct ipsec_pcb * pcb,uint32_t flag)273 ipsec_flag_isset(struct ipsec_pcb *pcb, uint32_t flag)
274 {
275 return !!(pcb->ipsec_internal_flags & flag);
276 }
277 #endif // IPSEC_NEXUS
278
279 TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
280
281 static ZONE_DEFINE(ipsec_pcb_zone, "net.if_ipsec",
282 sizeof(struct ipsec_pcb), ZC_ZFREE_CLEARMEM);
283
284 #define IPSECQ_MAXLEN 256
285
286 #if IPSEC_NEXUS
287 static int
288 sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
289 {
290 #pragma unused(arg1, arg2)
291 int value = if_ipsec_ring_size;
292
293 int error = sysctl_handle_int(oidp, &value, 0, req);
294 if (error || !req->newptr) {
295 return error;
296 }
297
298 if (value < IPSEC_IF_MIN_RING_SIZE ||
299 value > IPSEC_IF_MAX_RING_SIZE) {
300 return EINVAL;
301 }
302
303 if_ipsec_ring_size = value;
304
305 return 0;
306 }
307
308 static int
309 sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
310 {
311 #pragma unused(arg1, arg2)
312 int value = if_ipsec_tx_fsw_ring_size;
313
314 int error = sysctl_handle_int(oidp, &value, 0, req);
315 if (error || !req->newptr) {
316 return error;
317 }
318
319 if (value < IPSEC_IF_MIN_RING_SIZE ||
320 value > IPSEC_IF_MAX_RING_SIZE) {
321 return EINVAL;
322 }
323
324 if_ipsec_tx_fsw_ring_size = value;
325
326 return 0;
327 }
328
329 static int
330 sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
331 {
332 #pragma unused(arg1, arg2)
333 int value = if_ipsec_rx_fsw_ring_size;
334
335 int error = sysctl_handle_int(oidp, &value, 0, req);
336 if (error || !req->newptr) {
337 return error;
338 }
339
340 if (value < IPSEC_IF_MIN_RING_SIZE ||
341 value > IPSEC_IF_MAX_RING_SIZE) {
342 return EINVAL;
343 }
344
345 if_ipsec_rx_fsw_ring_size = value;
346
347 return 0;
348 }
349
350
351 static inline bool
ipsec_in_wmm_mode(struct ipsec_pcb * pcb)352 ipsec_in_wmm_mode(struct ipsec_pcb *pcb)
353 {
354 return pcb->ipsec_kpipe_count == IPSEC_IF_WMM_RING_COUNT;
355 }
356
357 #endif // IPSEC_NEXUS
358
359 errno_t
ipsec_register_control(void)360 ipsec_register_control(void)
361 {
362 struct kern_ctl_reg kern_ctl;
363 errno_t result = 0;
364
365 #if IPSEC_NEXUS
366 ipsec_register_nexus();
367 #endif // IPSEC_NEXUS
368
369 TAILQ_INIT(&ipsec_head);
370
371 bzero(&kern_ctl, sizeof(kern_ctl));
372 strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
373 kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
374 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP; /* Require root */
375 kern_ctl.ctl_sendsize = 64 * 1024;
376 kern_ctl.ctl_recvsize = 64 * 1024;
377 kern_ctl.ctl_setup = ipsec_ctl_setup;
378 kern_ctl.ctl_bind = ipsec_ctl_bind;
379 kern_ctl.ctl_connect = ipsec_ctl_connect;
380 kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
381 kern_ctl.ctl_send = ipsec_ctl_send;
382 kern_ctl.ctl_setopt = ipsec_ctl_setopt;
383 kern_ctl.ctl_getopt = ipsec_ctl_getopt;
384
385 result = ctl_register(&kern_ctl, &ipsec_kctlref);
386 if (result != 0) {
387 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - ctl_register failed: %d\n", result);
388 return result;
389 }
390
391 /* Register the protocol plumbers */
392 if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC,
393 ipsec_attach_proto, NULL)) != 0) {
394 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n",
395 result);
396 ctl_deregister(ipsec_kctlref);
397 return result;
398 }
399
400 /* Register the protocol plumbers */
401 if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC,
402 ipsec_attach_proto, NULL)) != 0) {
403 proto_unregister_plumber(PF_INET, IFNET_FAMILY_IPSEC);
404 ctl_deregister(ipsec_kctlref);
405 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n",
406 result);
407 return result;
408 }
409
410 return 0;
411 }
412
413 /* Helpers */
414 int
ipsec_interface_isvalid(ifnet_t interface)415 ipsec_interface_isvalid(ifnet_t interface)
416 {
417 struct ipsec_pcb *pcb = NULL;
418
419 if (interface == NULL) {
420 return 0;
421 }
422
423 pcb = ifnet_softc(interface);
424
425 if (pcb == NULL) {
426 return 0;
427 }
428
429 /* When ctl disconnects, ipsec_unit is set to 0 */
430 if (pcb->ipsec_unit == 0) {
431 return 0;
432 }
433
434 return 1;
435 }
436
437 #if IPSEC_NEXUS
438 boolean_t
ipsec_interface_needs_netagent(ifnet_t interface)439 ipsec_interface_needs_netagent(ifnet_t interface)
440 {
441 struct ipsec_pcb *pcb = NULL;
442
443 if (interface == NULL) {
444 return FALSE;
445 }
446
447 pcb = ifnet_softc(interface);
448
449 if (pcb == NULL) {
450 return FALSE;
451 }
452
453 return pcb->ipsec_needs_netagent == true;
454 }
455 #endif // IPSEC_NEXUS
456
457 static errno_t
ipsec_ifnet_set_attrs(ifnet_t ifp)458 ipsec_ifnet_set_attrs(ifnet_t ifp)
459 {
460 /* Set flags and additional information. */
461 ifnet_set_mtu(ifp, 1500);
462 ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
463
464 /* The interface must generate its own IPv6 LinkLocal address,
465 * if possible following the recommendation of RFC2472 to the 64bit interface ID
466 */
467 ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
468
469 #if !IPSEC_NEXUS
470 /* Reset the stats in case as the interface may have been recycled */
471 struct ifnet_stats_param stats;
472 bzero(&stats, sizeof(struct ifnet_stats_param));
473 ifnet_set_stat(ifp, &stats);
474 #endif // !IPSEC_NEXUS
475
476 return 0;
477 }
478
479 #if IPSEC_NEXUS
480
481 static uuid_t ipsec_nx_dom_prov;
482
483 static errno_t
ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)484 ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
485 {
486 return 0;
487 }
488
489 static void
ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)490 ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
491 {
492 // Ignore
493 }
494
495 static errno_t
ipsec_register_nexus(void)496 ipsec_register_nexus(void)
497 {
498 const struct kern_nexus_domain_provider_init dp_init = {
499 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
500 .nxdpi_flags = 0,
501 .nxdpi_init = ipsec_nxdp_init,
502 .nxdpi_fini = ipsec_nxdp_fini
503 };
504 errno_t err = 0;
505
506 /* ipsec_nxdp_init() is called before this function returns */
507 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
508 (const uint8_t *) "com.apple.ipsec",
509 &dp_init, sizeof(dp_init),
510 &ipsec_nx_dom_prov);
511 if (err != 0) {
512 os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
513 return err;
514 }
515 return 0;
516 }
517
518 static errno_t
ipsec_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)519 ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
520 {
521 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
522 pcb->ipsec_netif_nexus = nexus;
523 return ipsec_ifnet_set_attrs(ifp);
524 }
525
526 static errno_t
ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,proc_t p,kern_nexus_t nexus,nexus_port_t nexus_port,kern_channel_t channel,void ** ch_ctx)527 ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
528 proc_t p, kern_nexus_t nexus,
529 nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
530 {
531 #pragma unused(nxprov, p)
532 #pragma unused(nexus, nexus_port, channel, ch_ctx)
533 return 0;
534 }
535
536 static errno_t
ipsec_nexus_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)537 ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
538 kern_channel_t channel)
539 {
540 #pragma unused(nxprov, channel)
541 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
542 boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, 1);
543 /* Mark the data path as ready */
544 if (ok) {
545 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
546 IPSEC_SET_DATA_PATH_READY(pcb);
547 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
548 }
549 return ok ? 0 : ENXIO;
550 }
551
552 static void
ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)553 ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
554 kern_channel_t channel)
555 {
556 #pragma unused(nxprov, channel)
557 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
558
559 VERIFY(pcb->ipsec_kpipe_count != 0);
560
561 /* Wait until all threads in the data paths are done. */
562 ipsec_wait_data_move_drain(pcb);
563 }
564
565 static void
ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)566 ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
567 kern_channel_t channel)
568 {
569 #pragma unused(nxprov, channel)
570 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
571
572 /* Wait until all threads in the data paths are done. */
573 ipsec_wait_data_move_drain(pcb);
574 }
575
576 static void
ipsec_nexus_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)577 ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
578 kern_channel_t channel)
579 {
580 #pragma unused(nxprov, channel)
581 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
582 if (pcb->ipsec_netif_nexus == nexus) {
583 pcb->ipsec_netif_nexus = NULL;
584 }
585 ifnet_decr_iorefcnt(pcb->ipsec_ifp);
586 }
587
588 static errno_t
ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)589 ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
590 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
591 void **ring_ctx)
592 {
593 #pragma unused(nxprov)
594 #pragma unused(channel)
595 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
596 uint8_t ring_idx;
597
598 for (ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
599 if (!uuid_compare(channel->ch_info->cinfo_nx_uuid, pcb->ipsec_kpipe_uuid[ring_idx])) {
600 break;
601 }
602 }
603
604 if (ring_idx == pcb->ipsec_kpipe_count) {
605 uuid_string_t uuidstr;
606 uuid_unparse(channel->ch_info->cinfo_nx_uuid, uuidstr);
607 os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n", __func__, pcb->ipsec_if_xname, uuidstr);
608 return ENOENT;
609 }
610
611 *ring_ctx = (void *)(uintptr_t)ring_idx;
612
613 if (!is_tx_ring) {
614 VERIFY(pcb->ipsec_kpipe_rxring[ring_idx] == NULL);
615 pcb->ipsec_kpipe_rxring[ring_idx] = ring;
616 } else {
617 VERIFY(pcb->ipsec_kpipe_txring[ring_idx] == NULL);
618 pcb->ipsec_kpipe_txring[ring_idx] = ring;
619 }
620 return 0;
621 }
622
623 static void
ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)624 ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
625 kern_channel_ring_t ring)
626 {
627 #pragma unused(nxprov)
628 bool found = false;
629 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
630
631 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
632 if (pcb->ipsec_kpipe_rxring[i] == ring) {
633 pcb->ipsec_kpipe_rxring[i] = NULL;
634 found = true;
635 } else if (pcb->ipsec_kpipe_txring[i] == ring) {
636 pcb->ipsec_kpipe_txring[i] = NULL;
637 found = true;
638 }
639 }
640 VERIFY(found);
641 }
642
643 static errno_t
ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)644 ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
645 kern_channel_ring_t tx_ring, uint32_t flags)
646 {
647 #pragma unused(nxprov)
648 #pragma unused(flags)
649 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
650
651 if (!ipsec_data_move_begin(pcb)) {
652 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
653 return 0;
654 }
655
656 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
657
658 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
659 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
660 ipsec_data_move_end(pcb);
661 return 0;
662 }
663
664 VERIFY(pcb->ipsec_kpipe_count);
665
666 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
667 if (tx_slot == NULL) {
668 // Nothing to write, bail
669 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
670 ipsec_data_move_end(pcb);
671 return 0;
672 }
673
674 // Signal the netif ring to read
675 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
676 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
677
678 if (rx_ring != NULL) {
679 kern_channel_notify(rx_ring, 0);
680 }
681
682 ipsec_data_move_end(pcb);
683 return 0;
684 }
685
686 static mbuf_t
ipsec_encrypt_mbuf(ifnet_t interface,mbuf_t data)687 ipsec_encrypt_mbuf(ifnet_t interface,
688 mbuf_t data)
689 {
690 struct ipsec_output_state ipsec_state;
691 int error = 0;
692 uint32_t af;
693
694 // Make sure this packet isn't looping through the interface
695 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
696 error = -1;
697 goto ipsec_output_err;
698 }
699
700 // Mark the interface so NECP can evaluate tunnel policy
701 necp_mark_packet_from_interface(data, interface);
702
703 struct ip *ip = mtod(data, struct ip *);
704 u_int ip_version = ip->ip_v;
705
706 switch (ip_version) {
707 case 4: {
708 af = AF_INET;
709
710 memset(&ipsec_state, 0, sizeof(ipsec_state));
711 ipsec_state.m = data;
712 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
713 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
714
715 error = ipsec4_interface_output(&ipsec_state, interface);
716 if (error == 0 && ipsec_state.tunneled == 6) {
717 // Tunneled in IPv6 - packet is gone
718 // TODO: Don't lose mbuf
719 data = NULL;
720 goto done;
721 }
722
723 data = ipsec_state.m;
724 if (error || data == NULL) {
725 if (error) {
726 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec4_output error %d\n", error);
727 }
728 goto ipsec_output_err;
729 }
730 goto done;
731 }
732 case 6: {
733 af = AF_INET6;
734
735 data = ipsec6_splithdr(data);
736 if (data == NULL) {
737 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
738 goto ipsec_output_err;
739 }
740
741 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
742
743 memset(&ipsec_state, 0, sizeof(ipsec_state));
744 ipsec_state.m = data;
745 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
746 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
747
748 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
749 if (error == 0 && ipsec_state.tunneled == 4) {
750 // Tunneled in IPv4 - packet is gone
751 // TODO: Don't lose mbuf
752 data = NULL;
753 goto done;
754 }
755 data = ipsec_state.m;
756 if (error || data == NULL) {
757 if (error) {
758 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_output error %d\n", error);
759 }
760 goto ipsec_output_err;
761 }
762 goto done;
763 }
764 default: {
765 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version);
766 error = -1;
767 goto ipsec_output_err;
768 }
769 }
770
771 done:
772 return data;
773
774 ipsec_output_err:
775 if (data) {
776 mbuf_freem(data);
777 }
778 return NULL;
779 }
780
781 static errno_t
ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)782 ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
783 kern_channel_ring_t rx_ring, uint32_t flags)
784 {
785 #pragma unused(nxprov)
786 #pragma unused(flags)
787 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
788 struct kern_channel_ring_stat_increment rx_ring_stats;
789 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring);
790
791 if (!ipsec_data_move_begin(pcb)) {
792 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
793 return 0;
794 }
795
796 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
797
798 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
799 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
800 ipsec_data_move_end(pcb);
801 return 0;
802 }
803
804 VERIFY(pcb->ipsec_kpipe_count);
805 VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
806
807 // Reclaim user-released slots
808 (void) kern_channel_reclaim(rx_ring);
809
810 uint32_t avail = kern_channel_available_slot_count(rx_ring);
811 if (avail == 0) {
812 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
813 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
814 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
815 ipsec_data_move_end(pcb);
816 return 0;
817 }
818
819 kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx];
820 if (tx_ring == NULL) {
821 // Net-If TX ring not set up yet, nothing to read
822 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
823 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
824 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
825 ipsec_data_move_end(pcb);
826 return 0;
827 }
828
829 struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
830
831 // Unlock ipsec before entering ring
832 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
833
834 (void)kr_enter(tx_ring, TRUE);
835
836 // Lock again after entering and validate
837 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
838 if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
839 // Ring no longer valid
840 // Unlock first, then exit ring
841 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
842 kr_exit(tx_ring);
843 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
844 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
845 ipsec_data_move_end(pcb);
846 return 0;
847 }
848
849 struct kern_channel_ring_stat_increment tx_ring_stats;
850 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
851 kern_channel_slot_t tx_pslot = NULL;
852 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
853 if (tx_slot == NULL) {
854 // Nothing to read, don't bother signalling
855 // Unlock first, then exit ring
856 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
857 kr_exit(tx_ring);
858 ipsec_data_move_end(pcb);
859 return 0;
860 }
861
862 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
863 VERIFY(rx_pp != NULL);
864 struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
865 VERIFY(tx_pp != NULL);
866 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
867 kern_channel_slot_t rx_pslot = NULL;
868 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
869 kern_packet_t tx_chain_ph = 0;
870
871 while (rx_slot != NULL && tx_slot != NULL) {
872 size_t length = 0;
873 mbuf_t data = NULL;
874 errno_t error = 0;
875
876 // Allocate rx packet
877 kern_packet_t rx_ph = 0;
878 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
879 if (__improbable(error != 0)) {
880 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
881 pcb->ipsec_ifp->if_xname);
882 break;
883 }
884
885 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
886
887 if (tx_ph == 0) {
888 // Advance TX ring
889 tx_pslot = tx_slot;
890 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
891 kern_pbufpool_free(rx_pp, rx_ph);
892 continue;
893 }
894 (void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
895 if (tx_chain_ph != 0) {
896 kern_packet_append(tx_ph, tx_chain_ph);
897 }
898 tx_chain_ph = tx_ph;
899
900 // Advance TX ring
901 tx_pslot = tx_slot;
902 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
903
904 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
905 VERIFY(tx_buf != NULL);
906 uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
907 VERIFY(tx_baddr != NULL);
908 tx_baddr += kern_buflet_get_data_offset(tx_buf);
909
910 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
911
912 length = MIN(kern_packet_get_data_length(tx_ph),
913 pcb->ipsec_slot_size);
914
915 // Increment TX stats
916 tx_ring_stats.kcrsi_slots_transferred++;
917 tx_ring_stats.kcrsi_bytes_transferred += length;
918
919 if (length > 0) {
920 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
921 if (error == 0) {
922 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
923 if (error == 0) {
924 // Encrypt and send packet
925 lck_mtx_lock(&pcb->ipsec_kpipe_encrypt_lock);
926 data = ipsec_encrypt_mbuf(pcb->ipsec_ifp, data);
927 lck_mtx_unlock(&pcb->ipsec_kpipe_encrypt_lock);
928 } else {
929 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
930 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
931 STATS_INC(nifs, NETIF_STATS_DROP);
932 mbuf_freem(data);
933 data = NULL;
934 }
935 } else {
936 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
937 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
938 STATS_INC(nifs, NETIF_STATS_DROP);
939 }
940 } else {
941 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
942 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
943 STATS_INC(nifs, NETIF_STATS_DROP);
944 }
945
946 if (data == NULL) {
947 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
948 kern_pbufpool_free(rx_pp, rx_ph);
949 break;
950 }
951
952 length = mbuf_pkthdr_len(data);
953 if (length > PP_BUF_SIZE_DEF(rx_pp)) {
954 // Flush data
955 mbuf_freem(data);
956 kern_pbufpool_free(rx_pp, rx_ph);
957 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
958 pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
959 continue;
960 }
961
962 // Fillout rx packet
963 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
964 VERIFY(rx_buf != NULL);
965 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
966 VERIFY(rx_baddr != NULL);
967
968 // Copy-in data from mbuf to buflet
969 mbuf_copydata(data, 0, length, (void *)rx_baddr);
970 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
971
972 // Finalize and attach the packet
973 error = kern_buflet_set_data_offset(rx_buf, 0);
974 VERIFY(error == 0);
975 error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
976 VERIFY(error == 0);
977 error = kern_packet_finalize(rx_ph);
978 VERIFY(error == 0);
979 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
980 VERIFY(error == 0);
981
982 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
983 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
984
985 rx_ring_stats.kcrsi_slots_transferred++;
986 rx_ring_stats.kcrsi_bytes_transferred += length;
987
988 if (!pcb->ipsec_ext_ifdata_stats) {
989 ifnet_stat_increment_out(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
990 }
991
992 mbuf_freem(data);
993
994 rx_pslot = rx_slot;
995 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
996 }
997
998 if (rx_pslot) {
999 kern_channel_advance_slot(rx_ring, rx_pslot);
1000 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1001 }
1002
1003 if (tx_chain_ph != 0) {
1004 kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
1005 }
1006
1007 if (tx_pslot) {
1008 kern_channel_advance_slot(tx_ring, tx_pslot);
1009 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1010 (void)kern_channel_reclaim(tx_ring);
1011 }
1012
1013 /* always reenable output */
1014 errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
1015 if (error != 0) {
1016 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
1017 }
1018
1019 // Unlock first, then exit ring
1020 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1021
1022 if (tx_pslot != NULL) {
1023 kern_channel_notify(tx_ring, 0);
1024 }
1025 kr_exit(tx_ring);
1026
1027 ipsec_data_move_end(pcb);
1028 return 0;
1029 }
1030
1031 static uint8_t
ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1032 ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1033 {
1034 switch (svc_class) {
1035 case KPKT_SC_VO: {
1036 return 0;
1037 }
1038 case KPKT_SC_VI: {
1039 return 1;
1040 }
1041 case KPKT_SC_BE: {
1042 return 2;
1043 }
1044 case KPKT_SC_BK: {
1045 return 3;
1046 }
1047 default: {
1048 VERIFY(0);
1049 return 0;
1050 }
1051 }
1052 }
1053
1054 static errno_t
ipsec_netif_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1055 ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1056 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1057 void **ring_ctx)
1058 {
1059 #pragma unused(nxprov)
1060 #pragma unused(channel)
1061 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1062
1063 if (!is_tx_ring) {
1064 VERIFY(pcb->ipsec_netif_rxring[0] == NULL);
1065 pcb->ipsec_netif_rxring[0] = ring;
1066 } else {
1067 uint8_t ring_idx = 0;
1068 if (ipsec_in_wmm_mode(pcb)) {
1069 int err;
1070 kern_packet_svc_class_t svc_class;
1071 err = kern_channel_get_service_class(ring, &svc_class);
1072 VERIFY(err == 0);
1073 ring_idx = ipsec_find_tx_ring_by_svc(svc_class);
1074 VERIFY(ring_idx < IPSEC_IF_WMM_RING_COUNT);
1075 }
1076
1077 *ring_ctx = (void *)(uintptr_t)ring_idx;
1078
1079 VERIFY(pcb->ipsec_netif_txring[ring_idx] == NULL);
1080 pcb->ipsec_netif_txring[ring_idx] = ring;
1081 }
1082 return 0;
1083 }
1084
1085 static void
ipsec_netif_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)1086 ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1087 kern_channel_ring_t ring)
1088 {
1089 #pragma unused(nxprov)
1090 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1091 bool found = false;
1092
1093 for (int i = 0; i < IPSEC_NETIF_MAX_RX_RING_COUNT; i++) {
1094 if (pcb->ipsec_netif_rxring[i] == ring) {
1095 pcb->ipsec_netif_rxring[i] = NULL;
1096 VERIFY(!found);
1097 found = true;
1098 }
1099 }
1100 for (int i = 0; i < IPSEC_NETIF_MAX_TX_RING_COUNT; i++) {
1101 if (pcb->ipsec_netif_txring[i] == ring) {
1102 pcb->ipsec_netif_txring[i] = NULL;
1103 VERIFY(!found);
1104 found = true;
1105 }
1106 }
1107 VERIFY(found);
1108 }
1109
1110 static bool
ipsec_netif_check_policy(ifnet_t interface,mbuf_t data)1111 ipsec_netif_check_policy(ifnet_t interface, mbuf_t data)
1112 {
1113 necp_kernel_policy_result necp_result = 0;
1114 necp_kernel_policy_result_parameter necp_result_parameter = {};
1115 uint32_t necp_matched_policy_id = 0;
1116 struct ip_out_args args4 = { };
1117 struct ip6_out_args args6 = { };
1118
1119 // This packet has been marked with IP level policy, do not mark again.
1120 if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
1121 return true;
1122 }
1123
1124 size_t length = mbuf_pkthdr_len(data);
1125 if (length < sizeof(struct ip)) {
1126 return false;
1127 }
1128
1129 struct ip *ip = mtod(data, struct ip *);
1130 u_int ip_version = ip->ip_v;
1131 switch (ip_version) {
1132 case 4: {
1133 if (interface != NULL) {
1134 args4.ipoa_flags |= IPOAF_BOUND_IF;
1135 args4.ipoa_boundif = interface->if_index;
1136 }
1137 necp_matched_policy_id = necp_ip_output_find_policy_match(data, IP_OUTARGS, &args4, NULL,
1138 &necp_result, &necp_result_parameter);
1139 break;
1140 }
1141 case 6: {
1142 if (interface != NULL) {
1143 args6.ip6oa_flags |= IP6OAF_BOUND_IF;
1144 args6.ip6oa_boundif = interface->if_index;
1145 }
1146 necp_matched_policy_id = necp_ip6_output_find_policy_match(data, IPV6_OUTARGS, &args6, NULL,
1147 &necp_result, &necp_result_parameter);
1148 break;
1149 }
1150 default: {
1151 return false;
1152 }
1153 }
1154
1155 if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
1156 necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
1157 /* Drop and flow divert packets should be blocked at the IP layer */
1158 return false;
1159 }
1160
1161 necp_mark_packet_from_ip(data, necp_matched_policy_id);
1162 return true;
1163 }
1164
1165 static errno_t
ipsec_netif_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)1166 ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1167 kern_channel_ring_t tx_ring, uint32_t flags)
1168 {
1169 #pragma unused(nxprov)
1170 #pragma unused(flags)
1171 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1172
1173 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1174
1175 if (!ipsec_data_move_begin(pcb)) {
1176 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1177 return 0;
1178 }
1179
1180 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1181
1182 struct kern_channel_ring_stat_increment tx_ring_stats;
1183 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1184 kern_channel_slot_t tx_pslot = NULL;
1185 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1186 kern_packet_t tx_chain_ph = 0;
1187
1188 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
1189
1190 if (tx_slot == NULL) {
1191 // Nothing to write, don't bother signalling
1192 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1193 ipsec_data_move_end(pcb);
1194 return 0;
1195 }
1196
1197 if (pcb->ipsec_kpipe_count &&
1198 ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1199 // Select the corresponding kpipe rx ring
1200 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(tx_ring);
1201 VERIFY(ring_idx < IPSEC_IF_MAX_RING_COUNT);
1202 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1203
1204 // Unlock while calling notify
1205 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1206
1207 // Signal the kernel pipe ring to read
1208 if (rx_ring != NULL) {
1209 kern_channel_notify(rx_ring, 0);
1210 }
1211
1212 ipsec_data_move_end(pcb);
1213 return 0;
1214 }
1215
1216 // If we're here, we're injecting into the BSD stack
1217 while (tx_slot != NULL) {
1218 size_t length = 0;
1219 mbuf_t data = NULL;
1220
1221 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1222
1223 if (tx_ph == 0) {
1224 // Advance TX ring
1225 tx_pslot = tx_slot;
1226 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1227 continue;
1228 }
1229 (void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
1230 if (tx_chain_ph != 0) {
1231 kern_packet_append(tx_ph, tx_chain_ph);
1232 }
1233 tx_chain_ph = tx_ph;
1234
1235 // Advance TX ring
1236 tx_pslot = tx_slot;
1237 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1238
1239 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1240 VERIFY(tx_buf != NULL);
1241 uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1242 VERIFY(tx_baddr != 0);
1243 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1244
1245 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1246
1247 length = MIN(kern_packet_get_data_length(tx_ph),
1248 pcb->ipsec_slot_size);
1249
1250 if (length > 0) {
1251 errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1252 if (error == 0) {
1253 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1254 if (error == 0) {
1255 // Mark packet from policy
1256 uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1257 necp_mark_packet_from_ip(data, policy_id);
1258
1259 // Check policy with NECP
1260 if (!ipsec_netif_check_policy(pcb->ipsec_ifp, data)) {
1261 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1262 STATS_INC(nifs, NETIF_STATS_DROP);
1263 mbuf_freem(data);
1264 data = NULL;
1265 } else {
1266 // Send through encryption
1267 error = ipsec_output(pcb->ipsec_ifp, data);
1268 if (error != 0) {
1269 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
1270 }
1271 }
1272 } else {
1273 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1274 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1275 STATS_INC(nifs, NETIF_STATS_DROP);
1276 mbuf_freem(data);
1277 data = NULL;
1278 }
1279 } else {
1280 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1281 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1282 STATS_INC(nifs, NETIF_STATS_DROP);
1283 }
1284 } else {
1285 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1286 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1287 STATS_INC(nifs, NETIF_STATS_DROP);
1288 }
1289
1290 if (data == NULL) {
1291 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
1292 break;
1293 }
1294
1295 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1296 STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
1297
1298 tx_ring_stats.kcrsi_slots_transferred++;
1299 tx_ring_stats.kcrsi_bytes_transferred += length;
1300 }
1301
1302 if (tx_chain_ph != 0) {
1303 kern_pbufpool_free_chain(tx_ring->ckr_pp, tx_chain_ph);
1304 }
1305
1306 if (tx_pslot) {
1307 kern_channel_advance_slot(tx_ring, tx_pslot);
1308 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1309 (void)kern_channel_reclaim(tx_ring);
1310 }
1311
1312 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1313 ipsec_data_move_end(pcb);
1314
1315 return 0;
1316 }
1317
1318 static errno_t
ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags,uint8_t ring_idx)1319 ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1320 kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx)
1321 {
1322 #pragma unused(nxprov)
1323 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1324 boolean_t more = false;
1325 errno_t rc = 0;
1326
1327 VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0);
1328
1329 /*
1330 * Refill and sync the ring; we may be racing against another thread doing
1331 * an RX sync that also wants to do kr_enter(), and so use the blocking
1332 * variant here.
1333 */
1334 rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1335 if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
1336 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n", __func__,
1337 pcb->ipsec_if_xname, ring->ckr_name, rc);
1338 }
1339
1340 (void) kr_enter(ring, TRUE);
1341 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1342 if (ring != pcb->ipsec_netif_txring[ring_idx]) {
1343 // ring no longer valid
1344 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1345 kr_exit(ring);
1346 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n", __func__,
1347 pcb->ipsec_if_xname, ring->ckr_name, ring_idx);
1348 return ENXIO;
1349 }
1350
1351 if (pcb->ipsec_kpipe_count) {
1352 uint32_t tx_available = kern_channel_available_slot_count(ring);
1353 if (pcb->ipsec_netif_txring_size > 0 &&
1354 tx_available >= pcb->ipsec_netif_txring_size - 1) {
1355 // No room left in tx ring, disable output for now
1356 errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1357 if (error != 0) {
1358 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
1359 }
1360 }
1361 }
1362
1363 if (pcb->ipsec_kpipe_count) {
1364 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1365
1366 // Unlock while calling notify
1367 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1368 // Signal the kernel pipe ring to read
1369 if (rx_ring != NULL) {
1370 kern_channel_notify(rx_ring, 0);
1371 }
1372 } else {
1373 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1374 }
1375
1376 kr_exit(ring);
1377
1378 return 0;
1379 }
1380
1381 static errno_t
ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,__unused uint32_t flags)1382 ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1383 kern_channel_ring_t ring, __unused uint32_t flags)
1384 {
1385 errno_t ret = 0;
1386 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1387
1388 if (!ipsec_data_move_begin(pcb)) {
1389 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1390 return 0;
1391 }
1392
1393 if (ipsec_in_wmm_mode(pcb)) {
1394 for (uint8_t i = 0; i < IPSEC_IF_WMM_RING_COUNT; i++) {
1395 kern_channel_ring_t nring = pcb->ipsec_netif_txring[i];
1396 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, nring, flags, i);
1397 if (ret) {
1398 break;
1399 }
1400 }
1401 } else {
1402 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring, flags, 0);
1403 }
1404
1405 ipsec_data_move_end(pcb);
1406 return ret;
1407 }
1408
1409 static errno_t
ipsec_netif_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)1410 ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1411 kern_channel_ring_t rx_ring, uint32_t flags)
1412 {
1413 #pragma unused(nxprov)
1414 #pragma unused(flags)
1415 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1416 struct kern_channel_ring_stat_increment rx_ring_stats;
1417
1418 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1419
1420 if (!ipsec_data_move_begin(pcb)) {
1421 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1422 return 0;
1423 }
1424
1425 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1426
1427 // Reclaim user-released slots
1428 (void) kern_channel_reclaim(rx_ring);
1429
1430 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1431
1432 uint32_t avail = kern_channel_available_slot_count(rx_ring);
1433 if (avail == 0) {
1434 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1435 ipsec_data_move_end(pcb);
1436 return 0;
1437 }
1438
1439 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1440 VERIFY(rx_pp != NULL);
1441 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1442 kern_channel_slot_t rx_pslot = NULL;
1443 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1444
1445 while (rx_slot != NULL) {
1446 // Check for a waiting packet
1447 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1448 mbuf_t data = pcb->ipsec_input_chain;
1449 if (data == NULL) {
1450 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1451 break;
1452 }
1453
1454 // Allocate rx packet
1455 kern_packet_t rx_ph = 0;
1456 errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1457 if (__improbable(error != 0)) {
1458 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1459 STATS_INC(nifs, NETIF_STATS_DROP);
1460 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1461 break;
1462 }
1463
1464 // Advance waiting packets
1465 if (pcb->ipsec_input_chain_count > 0) {
1466 pcb->ipsec_input_chain_count--;
1467 }
1468 pcb->ipsec_input_chain = data->m_nextpkt;
1469 data->m_nextpkt = NULL;
1470 if (pcb->ipsec_input_chain == NULL) {
1471 pcb->ipsec_input_chain_last = NULL;
1472 }
1473 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1474
1475 size_t length = mbuf_pkthdr_len(data);
1476
1477 if (length < sizeof(struct ip)) {
1478 // Flush data
1479 mbuf_freem(data);
1480 kern_pbufpool_free(rx_pp, rx_ph);
1481 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1482 STATS_INC(nifs, NETIF_STATS_DROP);
1483 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1484 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
1485 continue;
1486 }
1487
1488 uint32_t af = 0;
1489 struct ip *ip = mtod(data, struct ip *);
1490 u_int ip_version = ip->ip_v;
1491 switch (ip_version) {
1492 case 4: {
1493 af = AF_INET;
1494 break;
1495 }
1496 case 6: {
1497 af = AF_INET6;
1498 break;
1499 }
1500 default: {
1501 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1502 pcb->ipsec_ifp->if_xname, ip_version);
1503 break;
1504 }
1505 }
1506
1507 if (length > PP_BUF_SIZE_DEF(rx_pp) ||
1508 (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
1509 // We need to fragment to send up into the netif
1510
1511 u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp);
1512 if (pcb->ipsec_frag_size_set &&
1513 pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) {
1514 fragment_mtu = pcb->ipsec_input_frag_size;
1515 }
1516
1517 mbuf_t fragment_chain = NULL;
1518 switch (af) {
1519 case AF_INET: {
1520 // ip_fragment expects the length in host order
1521 ip->ip_len = ntohs(ip->ip_len);
1522
1523 // ip_fragment will modify the original data, don't free
1524 int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1525 if (fragment_error == 0 && data != NULL) {
1526 fragment_chain = data;
1527 } else {
1528 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1529 STATS_INC(nifs, NETIF_STATS_DROP);
1530 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1531 pcb->ipsec_ifp->if_xname, length, fragment_error);
1532 }
1533 break;
1534 }
1535 case AF_INET6: {
1536 if (length < sizeof(struct ip6_hdr)) {
1537 mbuf_freem(data);
1538 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1539 STATS_INC(nifs, NETIF_STATS_DROP);
1540 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1541 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1542 } else {
1543 // ip6_do_fragmentation will free the original data on success only
1544 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1545
1546 int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
1547 ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid()));
1548 if (fragment_error == 0 && data != NULL) {
1549 fragment_chain = data;
1550 } else {
1551 mbuf_freem(data);
1552 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1553 STATS_INC(nifs, NETIF_STATS_DROP);
1554 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1555 pcb->ipsec_ifp->if_xname, length, fragment_error);
1556 }
1557 }
1558 break;
1559 }
1560 default: {
1561 // Cannot fragment unknown families
1562 mbuf_freem(data);
1563 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1564 STATS_INC(nifs, NETIF_STATS_DROP);
1565 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1566 pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
1567 break;
1568 }
1569 }
1570
1571 if (fragment_chain != NULL) {
1572 // Add fragments to chain before continuing
1573 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1574 if (pcb->ipsec_input_chain != NULL) {
1575 pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1576 } else {
1577 pcb->ipsec_input_chain = fragment_chain;
1578 }
1579 pcb->ipsec_input_chain_count++;
1580 while (fragment_chain->m_nextpkt) {
1581 VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1582 fragment_chain = fragment_chain->m_nextpkt;
1583 pcb->ipsec_input_chain_count++;
1584 }
1585 pcb->ipsec_input_chain_last = fragment_chain;
1586 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1587 }
1588
1589 // Make sure to free unused rx packet
1590 kern_pbufpool_free(rx_pp, rx_ph);
1591
1592 continue;
1593 }
1594
1595 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1596
1597 // Fillout rx packet
1598 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1599 VERIFY(rx_buf != NULL);
1600 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
1601 VERIFY(rx_baddr != NULL);
1602
1603 // Copy-in data from mbuf to buflet
1604 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1605 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1606
1607 // Finalize and attach the packet
1608 error = kern_buflet_set_data_offset(rx_buf, 0);
1609 VERIFY(error == 0);
1610 error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
1611 VERIFY(error == 0);
1612 error = kern_packet_set_headroom(rx_ph, 0);
1613 VERIFY(error == 0);
1614 error = kern_packet_finalize(rx_ph);
1615 VERIFY(error == 0);
1616 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1617 VERIFY(error == 0);
1618
1619 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1620 STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
1621 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1622
1623 rx_ring_stats.kcrsi_slots_transferred++;
1624 rx_ring_stats.kcrsi_bytes_transferred += length;
1625
1626 if (!pcb->ipsec_ext_ifdata_stats) {
1627 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
1628 }
1629
1630 mbuf_freem(data);
1631
1632 // Advance ring
1633 rx_pslot = rx_slot;
1634 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1635 }
1636
1637 for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
1638 struct kern_channel_ring_stat_increment tx_ring_stats;
1639 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1640 kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
1641 kern_channel_slot_t tx_pslot = NULL;
1642 kern_channel_slot_t tx_slot = NULL;
1643 if (tx_ring == NULL) {
1644 // Net-If TX ring not set up yet, nothing to read
1645 goto done;
1646 }
1647 // Unlock ipsec before entering ring
1648 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1649
1650 (void)kr_enter(tx_ring, TRUE);
1651
1652 // Lock again after entering and validate
1653 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1654
1655 if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
1656 goto done;
1657 }
1658
1659 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1660 if (tx_slot == NULL) {
1661 // Nothing to read, don't bother signalling
1662 goto done;
1663 }
1664
1665 while (rx_slot != NULL && tx_slot != NULL) {
1666 size_t length = 0;
1667 mbuf_t data = NULL;
1668 errno_t error = 0;
1669 uint32_t af;
1670
1671 // Allocate rx packet
1672 kern_packet_t rx_ph = 0;
1673 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1674 if (__improbable(error != 0)) {
1675 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1676 STATS_INC(nifs, NETIF_STATS_DROP);
1677 break;
1678 }
1679
1680 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1681
1682 // Advance TX ring
1683 tx_pslot = tx_slot;
1684 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1685
1686 if (tx_ph == 0) {
1687 kern_pbufpool_free(rx_pp, rx_ph);
1688 continue;
1689 }
1690
1691 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1692 VERIFY(tx_buf != NULL);
1693 uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1694 VERIFY(tx_baddr != 0);
1695 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1696
1697 length = MIN(kern_packet_get_data_length(tx_ph),
1698 pcb->ipsec_slot_size);
1699
1700 // Increment TX stats
1701 tx_ring_stats.kcrsi_slots_transferred++;
1702 tx_ring_stats.kcrsi_bytes_transferred += length;
1703
1704 if (length >= sizeof(struct ip)) {
1705 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1706 if (error == 0) {
1707 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1708 if (error == 0) {
1709 // Check for wake packet flag
1710 uuid_t flow_uuid;
1711 kern_packet_get_flow_uuid(tx_ph, &flow_uuid);
1712 u_int8_t *id_8 = (u_int8_t *)(uintptr_t)flow_uuid;
1713 if ((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT) {
1714 os_log_info(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: wake packet flag is set\n",
1715 pcb->ipsec_ifp->if_xname);
1716 data->m_pkthdr.pkt_flags |= PKTF_WAKE_PKT;
1717 }
1718
1719 lck_mtx_lock(&pcb->ipsec_kpipe_decrypt_lock);
1720 struct ip *ip = mtod(data, struct ip *);
1721 u_int ip_version = ip->ip_v;
1722 switch (ip_version) {
1723 case 4: {
1724 af = AF_INET;
1725 ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip);
1726 ip->ip_off = ntohs(ip->ip_off);
1727
1728 if (length < ip->ip_len) {
1729 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
1730 pcb->ipsec_ifp->if_xname, length, ip->ip_len);
1731 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1732 STATS_INC(nifs, NETIF_STATS_DROP);
1733 mbuf_freem(data);
1734 data = NULL;
1735 } else {
1736 data = esp4_input_extended(data, sizeof(struct ip), pcb->ipsec_ifp);
1737 }
1738 break;
1739 }
1740 case 6: {
1741 if (length < sizeof(struct ip6_hdr)) {
1742 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
1743 pcb->ipsec_ifp->if_xname, length);
1744 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1745 STATS_INC(nifs, NETIF_STATS_DROP);
1746 mbuf_freem(data);
1747 data = NULL;
1748 } else {
1749 af = AF_INET6;
1750 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1751 const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen);
1752 if (length < ip6_len) {
1753 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
1754 pcb->ipsec_ifp->if_xname, length, ip6_len);
1755 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1756 STATS_INC(nifs, NETIF_STATS_DROP);
1757 mbuf_freem(data);
1758 data = NULL;
1759 } else {
1760 int offset = sizeof(struct ip6_hdr);
1761 esp6_input_extended(&data, &offset, ip6->ip6_nxt, pcb->ipsec_ifp);
1762 }
1763 }
1764 break;
1765 }
1766 default: {
1767 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: unknown ip version %u\n",
1768 pcb->ipsec_ifp->if_xname, ip_version);
1769 STATS_INC(nifs, NETIF_STATS_DROP);
1770 mbuf_freem(data);
1771 data = NULL;
1772 break;
1773 }
1774 }
1775 lck_mtx_unlock(&pcb->ipsec_kpipe_decrypt_lock);
1776 } else {
1777 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1778 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1779 STATS_INC(nifs, NETIF_STATS_DROP);
1780 mbuf_freem(data);
1781 data = NULL;
1782 }
1783 } else {
1784 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1785 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1786 STATS_INC(nifs, NETIF_STATS_DROP);
1787 }
1788 } else {
1789 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb->ipsec_ifp->if_xname, length);
1790 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1791 STATS_INC(nifs, NETIF_STATS_DROP);
1792 }
1793
1794 if (data == NULL) {
1795 // Failed to get decrypted data data
1796 kern_pbufpool_free(rx_pp, rx_ph);
1797 continue;
1798 }
1799
1800 length = mbuf_pkthdr_len(data);
1801 if (length > PP_BUF_SIZE_DEF(rx_pp)) {
1802 // Flush data
1803 mbuf_freem(data);
1804 kern_pbufpool_free(rx_pp, rx_ph);
1805 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1806 STATS_INC(nifs, NETIF_STATS_DROP);
1807 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
1808 pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
1809 continue;
1810 }
1811
1812 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1813
1814 // Fillout rx packet
1815 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1816 VERIFY(rx_buf != NULL);
1817 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
1818 VERIFY(rx_baddr != NULL);
1819
1820 // Copy-in data from mbuf to buflet
1821 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1822 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1823
1824 // Finalize and attach the packet
1825 error = kern_buflet_set_data_offset(rx_buf, 0);
1826 VERIFY(error == 0);
1827 error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
1828 VERIFY(error == 0);
1829 error = kern_packet_set_link_header_offset(rx_ph, 0);
1830 VERIFY(error == 0);
1831 error = kern_packet_set_network_header_offset(rx_ph, 0);
1832 VERIFY(error == 0);
1833 error = kern_packet_finalize(rx_ph);
1834 VERIFY(error == 0);
1835 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1836 VERIFY(error == 0);
1837
1838 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1839 STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
1840 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1841
1842 rx_ring_stats.kcrsi_slots_transferred++;
1843 rx_ring_stats.kcrsi_bytes_transferred += length;
1844
1845 if (!pcb->ipsec_ext_ifdata_stats) {
1846 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
1847 }
1848
1849 mbuf_freem(data);
1850
1851 rx_pslot = rx_slot;
1852 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1853 }
1854
1855 done:
1856 if (tx_pslot) {
1857 kern_channel_advance_slot(tx_ring, tx_pslot);
1858 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1859 (void)kern_channel_reclaim(tx_ring);
1860 }
1861
1862 // Unlock first, then exit ring
1863 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1864 if (tx_ring != NULL) {
1865 if (tx_pslot != NULL) {
1866 kern_channel_notify(tx_ring, 0);
1867 }
1868 kr_exit(tx_ring);
1869 }
1870
1871 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1872 }
1873
1874 if (rx_pslot) {
1875 kern_channel_advance_slot(rx_ring, rx_pslot);
1876 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1877 }
1878
1879
1880 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1881
1882 ipsec_data_move_end(pcb);
1883 return 0;
1884 }
1885
1886 static errno_t
ipsec_nexus_ifattach(struct ipsec_pcb * pcb,struct ifnet_init_eparams * init_params,struct ifnet ** ifp)1887 ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
1888 struct ifnet_init_eparams *init_params,
1889 struct ifnet **ifp)
1890 {
1891 errno_t err;
1892 nexus_controller_t controller = kern_nexus_shared_controller();
1893 struct kern_nexus_net_init net_init;
1894 struct kern_pbufpool_init pp_init;
1895
1896 nexus_name_t provider_name;
1897 snprintf((char *)provider_name, sizeof(provider_name),
1898 "com.apple.netif.%s", pcb->ipsec_if_xname);
1899
1900 struct kern_nexus_provider_init prov_init = {
1901 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1902 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1903 .nxpi_pre_connect = ipsec_nexus_pre_connect,
1904 .nxpi_connected = ipsec_nexus_connected,
1905 .nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
1906 .nxpi_disconnected = ipsec_nexus_disconnected,
1907 .nxpi_ring_init = ipsec_netif_ring_init,
1908 .nxpi_ring_fini = ipsec_netif_ring_fini,
1909 .nxpi_slot_init = NULL,
1910 .nxpi_slot_fini = NULL,
1911 .nxpi_sync_tx = ipsec_netif_sync_tx,
1912 .nxpi_sync_rx = ipsec_netif_sync_rx,
1913 .nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
1914 };
1915
1916 nexus_attr_t nxa = NULL;
1917 err = kern_nexus_attr_create(&nxa);
1918 IPSEC_IF_VERIFY(err == 0);
1919 if (err != 0) {
1920 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1921 __func__, err);
1922 goto failed;
1923 }
1924
1925 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
1926 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1927 VERIFY(err == 0);
1928
1929 // Reset ring size for netif nexus to limit memory usage
1930 uint64_t ring_size = pcb->ipsec_netif_ring_size;
1931 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1932 VERIFY(err == 0);
1933 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1934 VERIFY(err == 0);
1935
1936 assert(err == 0);
1937
1938 if (ipsec_in_wmm_mode(pcb)) {
1939 os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n",
1940 __func__, pcb->ipsec_if_xname);
1941
1942 init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED;
1943
1944 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_RINGS,
1945 IPSEC_NETIF_WMM_TX_RING_COUNT);
1946 VERIFY(err == 0);
1947 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_RINGS,
1948 IPSEC_NETIF_WMM_RX_RING_COUNT);
1949 VERIFY(err == 0);
1950
1951 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_QMAP, NEXUS_QMAP_TYPE_WMM);
1952 VERIFY(err == 0);
1953 }
1954
1955 pcb->ipsec_netif_txring_size = ring_size;
1956
1957 bzero(&pp_init, sizeof(pp_init));
1958 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1959 pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
1960 // Note: we need more packets than can be held in the tx and rx rings because
1961 // packets can also be in the AQM queue(s)
1962 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * (2 * pcb->ipsec_kpipe_count + 1);
1963 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
1964 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
1965 pp_init.kbi_max_frags = 1;
1966 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1967 "%s", provider_name);
1968 pp_init.kbi_ctx = NULL;
1969 pp_init.kbi_ctx_retain = NULL;
1970 pp_init.kbi_ctx_release = NULL;
1971
1972 err = kern_pbufpool_create(&pp_init, &pcb->ipsec_netif_pp, NULL);
1973 if (err != 0) {
1974 os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
1975 goto failed;
1976 }
1977
1978 err = kern_nexus_controller_register_provider(controller,
1979 ipsec_nx_dom_prov,
1980 provider_name,
1981 &prov_init,
1982 sizeof(prov_init),
1983 nxa,
1984 &pcb->ipsec_nx.if_provider);
1985 IPSEC_IF_VERIFY(err == 0);
1986 if (err != 0) {
1987 os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
1988 __func__, err);
1989 goto failed;
1990 }
1991
1992 bzero(&net_init, sizeof(net_init));
1993 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1994 net_init.nxneti_flags = 0;
1995 net_init.nxneti_eparams = init_params;
1996 net_init.nxneti_lladdr = NULL;
1997 net_init.nxneti_prepare = ipsec_netif_prepare;
1998 net_init.nxneti_rx_pbufpool = pcb->ipsec_netif_pp;
1999 net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp;
2000 err = kern_nexus_controller_alloc_net_provider_instance(controller,
2001 pcb->ipsec_nx.if_provider,
2002 pcb,
2003 NULL,
2004 &pcb->ipsec_nx.if_instance,
2005 &net_init,
2006 ifp);
2007 IPSEC_IF_VERIFY(err == 0);
2008 if (err != 0) {
2009 os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
2010 __func__, err);
2011 kern_nexus_controller_deregister_provider(controller,
2012 pcb->ipsec_nx.if_provider);
2013 uuid_clear(pcb->ipsec_nx.if_provider);
2014 goto failed;
2015 }
2016
2017 failed:
2018 if (nxa) {
2019 kern_nexus_attr_destroy(nxa);
2020 }
2021 if (err && pcb->ipsec_netif_pp != NULL) {
2022 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2023 pcb->ipsec_netif_pp = NULL;
2024 }
2025 return err;
2026 }
2027
2028 static void
ipsec_detach_provider_and_instance(uuid_t provider,uuid_t instance)2029 ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
2030 {
2031 nexus_controller_t controller = kern_nexus_shared_controller();
2032 errno_t err;
2033
2034 if (!uuid_is_null(instance)) {
2035 err = kern_nexus_controller_free_provider_instance(controller,
2036 instance);
2037 if (err != 0) {
2038 os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
2039 __func__, err);
2040 }
2041 uuid_clear(instance);
2042 }
2043 if (!uuid_is_null(provider)) {
2044 err = kern_nexus_controller_deregister_provider(controller,
2045 provider);
2046 if (err != 0) {
2047 os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
2048 }
2049 uuid_clear(provider);
2050 }
2051 return;
2052 }
2053
2054 static void
ipsec_nexus_detach(struct ipsec_pcb * pcb)2055 ipsec_nexus_detach(struct ipsec_pcb *pcb)
2056 {
2057 ipsec_nx_t nx = &pcb->ipsec_nx;
2058 nexus_controller_t controller = kern_nexus_shared_controller();
2059 errno_t err;
2060
2061 if (!uuid_is_null(nx->fsw_device)) {
2062 err = kern_nexus_ifdetach(controller,
2063 nx->fsw_instance,
2064 nx->fsw_device);
2065 if (err != 0) {
2066 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
2067 __func__, err);
2068 }
2069 }
2070
2071 ipsec_detach_provider_and_instance(nx->fsw_provider,
2072 nx->fsw_instance);
2073 ipsec_detach_provider_and_instance(nx->if_provider,
2074 nx->if_instance);
2075
2076 if (pcb->ipsec_netif_pp != NULL) {
2077 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2078 pcb->ipsec_netif_pp = NULL;
2079 }
2080 memset(nx, 0, sizeof(*nx));
2081 }
2082
2083 static errno_t
ipsec_create_fs_provider_and_instance(struct ipsec_pcb * pcb,const char * type_name,const char * ifname,uuid_t * provider,uuid_t * instance)2084 ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
2085 const char *type_name,
2086 const char *ifname,
2087 uuid_t *provider, uuid_t *instance)
2088 {
2089 nexus_attr_t attr = NULL;
2090 nexus_controller_t controller = kern_nexus_shared_controller();
2091 uuid_t dom_prov;
2092 errno_t err;
2093 struct kern_nexus_init init;
2094 nexus_name_t provider_name;
2095
2096 err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
2097 &dom_prov);
2098 IPSEC_IF_VERIFY(err == 0);
2099 if (err != 0) {
2100 os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
2101 __func__, type_name, err);
2102 goto failed;
2103 }
2104
2105 err = kern_nexus_attr_create(&attr);
2106 IPSEC_IF_VERIFY(err == 0);
2107 if (err != 0) {
2108 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2109 __func__, err);
2110 goto failed;
2111 }
2112
2113 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
2114 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2115 VERIFY(err == 0);
2116
2117 // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
2118 uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
2119 err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
2120 VERIFY(err == 0);
2121 uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
2122 err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
2123 VERIFY(err == 0);
2124 /*
2125 * Configure flowswitch to use super-packet (multi-buflet).
2126 * This allows flowswitch to perform intra-stack packet aggregation.
2127 */
2128 err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
2129 NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
2130 VERIFY(err == 0);
2131
2132 snprintf((char *)provider_name, sizeof(provider_name),
2133 "com.apple.%s.%s", type_name, ifname);
2134 err = kern_nexus_controller_register_provider(controller,
2135 dom_prov,
2136 provider_name,
2137 NULL,
2138 0,
2139 attr,
2140 provider);
2141 kern_nexus_attr_destroy(attr);
2142 attr = NULL;
2143 IPSEC_IF_VERIFY(err == 0);
2144 if (err != 0) {
2145 os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
2146 __func__, type_name, err);
2147 goto failed;
2148 }
2149 bzero(&init, sizeof(init));
2150 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2151 err = kern_nexus_controller_alloc_provider_instance(controller,
2152 *provider,
2153 NULL, NULL,
2154 instance, &init);
2155 IPSEC_IF_VERIFY(err == 0);
2156 if (err != 0) {
2157 os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
2158 __func__, type_name, err);
2159 kern_nexus_controller_deregister_provider(controller,
2160 *provider);
2161 uuid_clear(*provider);
2162 }
2163 failed:
2164 return err;
2165 }
2166
2167 static errno_t
ipsec_flowswitch_attach(struct ipsec_pcb * pcb)2168 ipsec_flowswitch_attach(struct ipsec_pcb *pcb)
2169 {
2170 nexus_controller_t controller = kern_nexus_shared_controller();
2171 errno_t err = 0;
2172 ipsec_nx_t nx = &pcb->ipsec_nx;
2173
2174 // Allocate flowswitch
2175 err = ipsec_create_fs_provider_and_instance(pcb,
2176 "flowswitch",
2177 pcb->ipsec_ifp->if_xname,
2178 &nx->fsw_provider,
2179 &nx->fsw_instance);
2180 if (err != 0) {
2181 os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
2182 __func__);
2183 goto failed;
2184 }
2185
2186 // Attach flowswitch to device port
2187 err = kern_nexus_ifattach(controller, nx->fsw_instance,
2188 NULL, nx->if_instance,
2189 FALSE, &nx->fsw_device);
2190 if (err != 0) {
2191 os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
2192 goto failed;
2193 }
2194
2195 // Extract the agent UUID and save for later
2196 struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
2197 if (flowswitch_nx != NULL) {
2198 struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
2199 if (flowswitch != NULL) {
2200 FSW_RLOCK(flowswitch);
2201 uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
2202 FSW_UNLOCK(flowswitch);
2203 } else {
2204 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - flowswitch is NULL\n");
2205 }
2206 nx_release(flowswitch_nx);
2207 } else {
2208 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n");
2209 }
2210
2211 return 0;
2212
2213 failed:
2214 ipsec_nexus_detach(pcb);
2215
2216 errno_t detach_error = 0;
2217 if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
2218 panic("ipsec_flowswitch_attach - ifnet_detach failed: %d", detach_error);
2219 /* NOT REACHED */
2220 }
2221
2222 return err;
2223 }
2224
2225 #pragma mark Kernel Pipe Nexus
2226
2227 static errno_t
ipsec_register_kernel_pipe_nexus(struct ipsec_pcb * pcb)2228 ipsec_register_kernel_pipe_nexus(struct ipsec_pcb *pcb)
2229 {
2230 nexus_attr_t nxa = NULL;
2231 errno_t result;
2232
2233 lck_mtx_lock(&ipsec_lock);
2234 if (ipsec_ncd_refcount++) {
2235 lck_mtx_unlock(&ipsec_lock);
2236 return 0;
2237 }
2238
2239 result = kern_nexus_controller_create(&ipsec_ncd);
2240 if (result) {
2241 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
2242 __FUNCTION__, result);
2243 goto done;
2244 }
2245
2246 uuid_t dom_prov;
2247 result = kern_nexus_get_default_domain_provider(
2248 NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
2249 if (result) {
2250 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
2251 __FUNCTION__, result);
2252 goto done;
2253 }
2254
2255 struct kern_nexus_provider_init prov_init = {
2256 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2257 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2258 .nxpi_pre_connect = ipsec_nexus_pre_connect,
2259 .nxpi_connected = ipsec_nexus_connected,
2260 .nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
2261 .nxpi_disconnected = ipsec_nexus_disconnected,
2262 .nxpi_ring_init = ipsec_kpipe_ring_init,
2263 .nxpi_ring_fini = ipsec_kpipe_ring_fini,
2264 .nxpi_slot_init = NULL,
2265 .nxpi_slot_fini = NULL,
2266 .nxpi_sync_tx = ipsec_kpipe_sync_tx,
2267 .nxpi_sync_rx = ipsec_kpipe_sync_rx,
2268 .nxpi_tx_doorbell = NULL,
2269 };
2270
2271 result = kern_nexus_attr_create(&nxa);
2272 if (result) {
2273 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2274 __FUNCTION__, result);
2275 goto done;
2276 }
2277
2278 uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2279 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2280 VERIFY(result == 0);
2281
2282 // Reset ring size for kernel pipe nexus to limit memory usage
2283 // Note: It's better to have less on slots on the kpipe TX ring than the netif
2284 // so back pressure is applied at the AQM layer
2285 uint64_t ring_size =
2286 pcb->ipsec_kpipe_tx_ring_size != 0 ? pcb->ipsec_kpipe_tx_ring_size :
2287 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2288 if_ipsec_ring_size;
2289 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
2290 VERIFY(result == 0);
2291
2292 ring_size =
2293 pcb->ipsec_kpipe_rx_ring_size != 0 ? pcb->ipsec_kpipe_rx_ring_size :
2294 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2295 if_ipsec_ring_size;
2296 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
2297 VERIFY(result == 0);
2298
2299 result = kern_nexus_controller_register_provider(ipsec_ncd,
2300 dom_prov,
2301 (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
2302 &prov_init,
2303 sizeof(prov_init),
2304 nxa,
2305 &ipsec_kpipe_uuid);
2306 if (result) {
2307 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
2308 __FUNCTION__, result);
2309 goto done;
2310 }
2311
2312 done:
2313 if (nxa) {
2314 kern_nexus_attr_destroy(nxa);
2315 }
2316
2317 if (result) {
2318 if (ipsec_ncd) {
2319 kern_nexus_controller_destroy(ipsec_ncd);
2320 ipsec_ncd = NULL;
2321 }
2322 ipsec_ncd_refcount = 0;
2323 }
2324
2325 lck_mtx_unlock(&ipsec_lock);
2326
2327 return result;
2328 }
2329
2330 static void
ipsec_unregister_kernel_pipe_nexus(void)2331 ipsec_unregister_kernel_pipe_nexus(void)
2332 {
2333 lck_mtx_lock(&ipsec_lock);
2334
2335 VERIFY(ipsec_ncd_refcount > 0);
2336
2337 if (--ipsec_ncd_refcount == 0) {
2338 kern_nexus_controller_destroy(ipsec_ncd);
2339 ipsec_ncd = NULL;
2340 }
2341
2342 lck_mtx_unlock(&ipsec_lock);
2343 }
2344
2345 /* This structure only holds onto kpipe channels that need to be
2346 * freed in the future, but are cleared from the pcb under lock
2347 */
2348 struct ipsec_detached_channels {
2349 int count;
2350 kern_pbufpool_t pp;
2351 uuid_t uuids[IPSEC_IF_MAX_RING_COUNT];
2352 };
2353
2354 static void
ipsec_detach_channels(struct ipsec_pcb * pcb,struct ipsec_detached_channels * dc)2355 ipsec_detach_channels(struct ipsec_pcb *pcb, struct ipsec_detached_channels *dc)
2356 {
2357 LCK_RW_ASSERT(&pcb->ipsec_pcb_lock, LCK_RW_TYPE_EXCLUSIVE);
2358
2359 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
2360 for (int i = 0; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2361 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2362 }
2363 dc->count = 0;
2364 return;
2365 }
2366
2367 dc->count = pcb->ipsec_kpipe_count;
2368
2369 VERIFY(dc->count >= 0);
2370 VERIFY(dc->count <= IPSEC_IF_MAX_RING_COUNT);
2371
2372 for (int i = 0; i < dc->count; i++) {
2373 VERIFY(!uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2374 uuid_copy(dc->uuids[i], pcb->ipsec_kpipe_uuid[i]);
2375 uuid_clear(pcb->ipsec_kpipe_uuid[i]);
2376 }
2377 for (int i = dc->count; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2378 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2379 }
2380
2381 if (dc->count) {
2382 VERIFY(pcb->ipsec_kpipe_pp);
2383 } else {
2384 VERIFY(!pcb->ipsec_kpipe_pp);
2385 }
2386
2387 dc->pp = pcb->ipsec_kpipe_pp;
2388
2389 pcb->ipsec_kpipe_pp = NULL;
2390
2391 ipsec_flag_clr(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2392 }
2393
2394 static void
ipsec_free_channels(struct ipsec_detached_channels * dc)2395 ipsec_free_channels(struct ipsec_detached_channels *dc)
2396 {
2397 if (!dc->count) {
2398 return;
2399 }
2400
2401 for (int i = 0; i < dc->count; i++) {
2402 errno_t result;
2403 result = kern_nexus_controller_free_provider_instance(ipsec_ncd, dc->uuids[i]);
2404 VERIFY(!result);
2405 }
2406
2407 VERIFY(dc->pp);
2408 kern_pbufpool_destroy(dc->pp);
2409
2410 ipsec_unregister_kernel_pipe_nexus();
2411
2412 memset(dc, 0, sizeof(*dc));
2413 }
2414
2415 static errno_t
ipsec_enable_channel(struct ipsec_pcb * pcb,struct proc * proc)2416 ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
2417 {
2418 struct kern_nexus_init init;
2419 struct kern_pbufpool_init pp_init;
2420 errno_t result;
2421
2422 kauth_cred_t cred = kauth_cred_get();
2423 result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
2424 if (result) {
2425 return result;
2426 }
2427
2428 VERIFY(pcb->ipsec_kpipe_count);
2429 VERIFY(!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED));
2430
2431 result = ipsec_register_kernel_pipe_nexus(pcb);
2432
2433 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2434
2435 if (result) {
2436 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to register kernel pipe nexus\n",
2437 __func__, pcb->ipsec_if_xname);
2438 goto done;
2439 }
2440
2441 VERIFY(ipsec_ncd);
2442
2443 bzero(&pp_init, sizeof(pp_init));
2444 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
2445 pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
2446 // Note: We only needs are many packets as can be held in the tx and rx rings
2447 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2 * pcb->ipsec_kpipe_count;
2448 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
2449 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
2450 pp_init.kbi_max_frags = 1;
2451 pp_init.kbi_flags |= KBIF_QUANTUM;
2452 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
2453 "com.apple.kpipe.%s", pcb->ipsec_if_xname);
2454 pp_init.kbi_ctx = NULL;
2455 pp_init.kbi_ctx_retain = NULL;
2456 pp_init.kbi_ctx_release = NULL;
2457
2458 result = kern_pbufpool_create(&pp_init, &pcb->ipsec_kpipe_pp,
2459 NULL);
2460 if (result != 0) {
2461 os_log_error(OS_LOG_DEFAULT, "%s: %s pbufbool create failed, error %d\n",
2462 __func__, pcb->ipsec_if_xname, result);
2463 goto done;
2464 }
2465
2466 bzero(&init, sizeof(init));
2467 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2468 init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp;
2469
2470 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
2471 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2472 result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
2473 ipsec_kpipe_uuid, pcb, NULL, &pcb->ipsec_kpipe_uuid[i], &init);
2474
2475 if (result == 0) {
2476 nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
2477 const bool has_proc_uuid = !uuid_is_null(pcb->ipsec_kpipe_proc_uuid);
2478 pid_t pid = pcb->ipsec_kpipe_pid;
2479 if (!pid && !has_proc_uuid) {
2480 pid = proc_pid(proc);
2481 }
2482 result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
2483 pcb->ipsec_kpipe_uuid[i], &port,
2484 pid, has_proc_uuid ? pcb->ipsec_kpipe_proc_uuid : NULL, NULL,
2485 0, has_proc_uuid ? NEXUS_BIND_EXEC_UUID:NEXUS_BIND_PID);
2486 }
2487
2488 if (result) {
2489 /* Unwind all of them on error */
2490 for (int j = 0; j < IPSEC_IF_MAX_RING_COUNT; j++) {
2491 if (!uuid_is_null(pcb->ipsec_kpipe_uuid[j])) {
2492 kern_nexus_controller_free_provider_instance(ipsec_ncd,
2493 pcb->ipsec_kpipe_uuid[j]);
2494 uuid_clear(pcb->ipsec_kpipe_uuid[j]);
2495 }
2496 }
2497 goto done;
2498 }
2499 }
2500
2501 done:
2502 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2503
2504 if (result) {
2505 if (pcb->ipsec_kpipe_pp != NULL) {
2506 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2507 pcb->ipsec_kpipe_pp = NULL;
2508 }
2509 ipsec_unregister_kernel_pipe_nexus();
2510 } else {
2511 ipsec_flag_set(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2512 }
2513
2514 return result;
2515 }
2516
2517 #endif // IPSEC_NEXUS
2518
2519
2520 /* Kernel control functions */
2521
2522 static inline int
ipsec_find_by_unit(u_int32_t unit)2523 ipsec_find_by_unit(u_int32_t unit)
2524 {
2525 struct ipsec_pcb *next_pcb = NULL;
2526 int found = 0;
2527
2528 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2529 if (next_pcb->ipsec_unit == unit) {
2530 found = 1;
2531 break;
2532 }
2533 }
2534
2535 return found;
2536 }
2537
2538 static inline void
ipsec_free_pcb(struct ipsec_pcb * pcb,bool locked)2539 ipsec_free_pcb(struct ipsec_pcb *pcb, bool locked)
2540 {
2541 #if IPSEC_NEXUS
2542 mbuf_freem_list(pcb->ipsec_input_chain);
2543 pcb->ipsec_input_chain_count = 0;
2544 lck_mtx_destroy(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp);
2545 lck_mtx_destroy(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp);
2546 lck_mtx_destroy(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp);
2547 #endif // IPSEC_NEXUS
2548 lck_mtx_destroy(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp);
2549 lck_rw_destroy(&pcb->ipsec_pcb_lock, &ipsec_lck_grp);
2550 if (!locked) {
2551 lck_mtx_lock(&ipsec_lock);
2552 }
2553 TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
2554 if (!locked) {
2555 lck_mtx_unlock(&ipsec_lock);
2556 }
2557 zfree(ipsec_pcb_zone, pcb);
2558 }
2559
2560 static errno_t
ipsec_ctl_setup(u_int32_t * unit,void ** unitinfo)2561 ipsec_ctl_setup(u_int32_t *unit, void **unitinfo)
2562 {
2563 if (unit == NULL || unitinfo == NULL) {
2564 return EINVAL;
2565 }
2566
2567 lck_mtx_lock(&ipsec_lock);
2568
2569 /* Find next available unit */
2570 if (*unit == 0) {
2571 *unit = 1;
2572 while (*unit != ctl_maxunit) {
2573 if (ipsec_find_by_unit(*unit)) {
2574 (*unit)++;
2575 } else {
2576 break;
2577 }
2578 }
2579 if (*unit == ctl_maxunit) {
2580 lck_mtx_unlock(&ipsec_lock);
2581 return EBUSY;
2582 }
2583 } else if (ipsec_find_by_unit(*unit)) {
2584 lck_mtx_unlock(&ipsec_lock);
2585 return EBUSY;
2586 }
2587
2588 /* Find some open interface id */
2589 u_int32_t chosen_unique_id = 1;
2590 struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
2591 if (next_pcb != NULL) {
2592 /* List was not empty, add one to the last item */
2593 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2594 next_pcb = NULL;
2595
2596 /*
2597 * If this wrapped the id number, start looking at
2598 * the front of the list for an unused id.
2599 */
2600 if (chosen_unique_id == 0) {
2601 /* Find the next unused ID */
2602 chosen_unique_id = 1;
2603 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2604 if (next_pcb->ipsec_unique_id > chosen_unique_id) {
2605 /* We found a gap */
2606 break;
2607 }
2608
2609 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2610 }
2611 }
2612 }
2613
2614 struct ipsec_pcb *pcb = zalloc_flags(ipsec_pcb_zone, Z_WAITOK | Z_ZERO);
2615
2616 *unitinfo = pcb;
2617 pcb->ipsec_unit = *unit;
2618 pcb->ipsec_unique_id = chosen_unique_id;
2619
2620 if (next_pcb != NULL) {
2621 TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
2622 } else {
2623 TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
2624 }
2625
2626 lck_mtx_unlock(&ipsec_lock);
2627
2628 return 0;
2629 }
2630
2631 static errno_t
ipsec_ctl_bind(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)2632 ipsec_ctl_bind(kern_ctl_ref kctlref,
2633 struct sockaddr_ctl *sac,
2634 void **unitinfo)
2635 {
2636 if (*unitinfo == NULL) {
2637 u_int32_t unit = 0;
2638 (void)ipsec_ctl_setup(&unit, unitinfo);
2639 }
2640
2641 struct ipsec_pcb *pcb = (struct ipsec_pcb *)*unitinfo;
2642 if (pcb == NULL) {
2643 return EINVAL;
2644 }
2645
2646 /* Setup the protocol control block */
2647 pcb->ipsec_ctlref = kctlref;
2648 pcb->ipsec_unit = sac->sc_unit;
2649 pcb->ipsec_output_service_class = MBUF_SC_OAM;
2650
2651 #if IPSEC_NEXUS
2652 pcb->ipsec_use_netif = false;
2653 pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2654 pcb->ipsec_netif_ring_size = if_ipsec_ring_size;
2655 pcb->ipsec_tx_fsw_ring_size = if_ipsec_tx_fsw_ring_size;
2656 pcb->ipsec_rx_fsw_ring_size = if_ipsec_rx_fsw_ring_size;
2657 #endif // IPSEC_NEXUS
2658
2659 lck_rw_init(&pcb->ipsec_pcb_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2660 lck_mtx_init(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2661 #if IPSEC_NEXUS
2662 pcb->ipsec_input_chain_count = 0;
2663 lck_mtx_init(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2664 lck_mtx_init(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2665 lck_mtx_init(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
2666 #endif // IPSEC_NEXUS
2667
2668 return 0;
2669 }
2670
2671 static errno_t
ipsec_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)2672 ipsec_ctl_connect(kern_ctl_ref kctlref,
2673 struct sockaddr_ctl *sac,
2674 void **unitinfo)
2675 {
2676 struct ifnet_init_eparams ipsec_init = {};
2677 errno_t result = 0;
2678
2679 if (*unitinfo == NULL) {
2680 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2681 }
2682
2683 struct ipsec_pcb *pcb = *unitinfo;
2684 if (pcb == NULL) {
2685 return EINVAL;
2686 }
2687
2688 /* Handle case where ipsec_ctl_setup() was called, but ipsec_ctl_bind() was not */
2689 if (pcb->ipsec_ctlref == NULL) {
2690 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2691 }
2692
2693 snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
2694 snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
2695 os_log(OS_LOG_DEFAULT, "ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2696
2697 /* Create the interface */
2698 bzero(&ipsec_init, sizeof(ipsec_init));
2699 ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
2700 ipsec_init.len = sizeof(ipsec_init);
2701
2702 #if IPSEC_NEXUS
2703 if (pcb->ipsec_use_netif) {
2704 ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
2705 } else
2706 #endif // IPSEC_NEXUS
2707 {
2708 ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
2709 ipsec_init.start = ipsec_start;
2710 }
2711 ipsec_init.name = "ipsec";
2712 ipsec_init.unit = pcb->ipsec_unit - 1;
2713 ipsec_init.uniqueid = pcb->ipsec_unique_name;
2714 ipsec_init.uniqueid_len = (uint32_t)strlen(pcb->ipsec_unique_name);
2715 ipsec_init.family = IFNET_FAMILY_IPSEC;
2716 ipsec_init.type = IFT_OTHER;
2717 ipsec_init.demux = ipsec_demux;
2718 ipsec_init.add_proto = ipsec_add_proto;
2719 ipsec_init.del_proto = ipsec_del_proto;
2720 ipsec_init.softc = pcb;
2721 ipsec_init.ioctl = ipsec_ioctl;
2722 ipsec_init.free = ipsec_detached;
2723
2724 #if IPSEC_NEXUS
2725 /* We don't support kpipes without a netif */
2726 if (pcb->ipsec_kpipe_count && !pcb->ipsec_use_netif) {
2727 result = ENOTSUP;
2728 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - kpipe requires netif: failed %d\n", result);
2729 ipsec_free_pcb(pcb, false);
2730 *unitinfo = NULL;
2731 return result;
2732 }
2733
2734 if (if_ipsec_debug != 0) {
2735 printf("%s: %s%d use_netif %d kpipe_count %d slot_size %u ring_size %u "
2736 "kpipe_tx_ring_size %u kpipe_rx_ring_size %u\n",
2737 __func__,
2738 ipsec_init.name, ipsec_init.unit,
2739 pcb->ipsec_use_netif,
2740 pcb->ipsec_kpipe_count,
2741 pcb->ipsec_slot_size,
2742 pcb->ipsec_netif_ring_size,
2743 pcb->ipsec_kpipe_tx_ring_size,
2744 pcb->ipsec_kpipe_rx_ring_size);
2745 }
2746 if (pcb->ipsec_use_netif) {
2747 if (pcb->ipsec_kpipe_count) {
2748 result = ipsec_enable_channel(pcb, current_proc());
2749 if (result) {
2750 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to enable channels\n",
2751 __func__, pcb->ipsec_if_xname);
2752 ipsec_free_pcb(pcb, false);
2753 *unitinfo = NULL;
2754 return result;
2755 }
2756 }
2757
2758 result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
2759 if (result != 0) {
2760 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
2761 ipsec_free_pcb(pcb, false);
2762 *unitinfo = NULL;
2763 return result;
2764 }
2765
2766 result = ipsec_flowswitch_attach(pcb);
2767 if (result != 0) {
2768 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_flowswitch_attach failed: %d\n", result);
2769 // Do not call ipsec_free_pcb(). We will be attached already, and will be freed later
2770 // in ipsec_detached().
2771 *unitinfo = NULL;
2772 return result;
2773 }
2774
2775 /* Attach to bpf */
2776 bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
2777 } else
2778 #endif // IPSEC_NEXUS
2779 {
2780 result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
2781 if (result != 0) {
2782 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
2783 ipsec_free_pcb(pcb, false);
2784 *unitinfo = NULL;
2785 return result;
2786 }
2787 ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
2788
2789 /* Attach the interface */
2790 result = ifnet_attach(pcb->ipsec_ifp, NULL);
2791 if (result != 0) {
2792 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
2793 ifnet_release(pcb->ipsec_ifp);
2794 ipsec_free_pcb(pcb, false);
2795 *unitinfo = NULL;
2796 return result;
2797 }
2798
2799 /* Attach to bpf */
2800 bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
2801 }
2802
2803 #if IPSEC_NEXUS
2804 /*
2805 * Mark the data path as ready.
2806 * If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected.
2807 */
2808 if (pcb->ipsec_kpipe_count == 0) {
2809 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
2810 IPSEC_SET_DATA_PATH_READY(pcb);
2811 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
2812 }
2813 #endif
2814
2815 /* The interfaces resoures allocated, mark it as running */
2816 ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
2817
2818 return 0;
2819 }
2820
2821 static errno_t
ipsec_detach_ip(ifnet_t interface,protocol_family_t protocol,socket_t pf_socket)2822 ipsec_detach_ip(ifnet_t interface,
2823 protocol_family_t protocol,
2824 socket_t pf_socket)
2825 {
2826 errno_t result = EPROTONOSUPPORT;
2827
2828 /* Attempt a detach */
2829 if (protocol == PF_INET) {
2830 struct ifreq ifr;
2831
2832 bzero(&ifr, sizeof(ifr));
2833 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2834 ifnet_name(interface), ifnet_unit(interface));
2835
2836 result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
2837 } else if (protocol == PF_INET6) {
2838 struct in6_ifreq ifr6;
2839
2840 bzero(&ifr6, sizeof(ifr6));
2841 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2842 ifnet_name(interface), ifnet_unit(interface));
2843
2844 result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
2845 }
2846
2847 return result;
2848 }
2849
2850 static void
ipsec_remove_address(ifnet_t interface,protocol_family_t protocol,ifaddr_t address,socket_t pf_socket)2851 ipsec_remove_address(ifnet_t interface,
2852 protocol_family_t protocol,
2853 ifaddr_t address,
2854 socket_t pf_socket)
2855 {
2856 errno_t result = 0;
2857
2858 /* Attempt a detach */
2859 if (protocol == PF_INET) {
2860 struct ifreq ifr;
2861
2862 bzero(&ifr, sizeof(ifr));
2863 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2864 ifnet_name(interface), ifnet_unit(interface));
2865 result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
2866 if (result != 0) {
2867 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed: %d", result);
2868 } else {
2869 result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
2870 if (result != 0) {
2871 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR failed: %d", result);
2872 }
2873 }
2874 } else if (protocol == PF_INET6) {
2875 struct in6_ifreq ifr6;
2876
2877 bzero(&ifr6, sizeof(ifr6));
2878 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2879 ifnet_name(interface), ifnet_unit(interface));
2880 result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
2881 sizeof(ifr6.ifr_addr));
2882 if (result != 0) {
2883 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed (v6): %d",
2884 result);
2885 } else {
2886 result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
2887 if (result != 0) {
2888 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
2889 result);
2890 }
2891 }
2892 }
2893 }
2894
2895 static void
ipsec_cleanup_family(ifnet_t interface,protocol_family_t protocol)2896 ipsec_cleanup_family(ifnet_t interface,
2897 protocol_family_t protocol)
2898 {
2899 errno_t result = 0;
2900 socket_t pf_socket = NULL;
2901 ifaddr_t *addresses = NULL;
2902 int i;
2903
2904 if (protocol != PF_INET && protocol != PF_INET6) {
2905 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - invalid protocol family %d\n", protocol);
2906 return;
2907 }
2908
2909 /* Create a socket for removing addresses and detaching the protocol */
2910 result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
2911 if (result != 0) {
2912 if (result != EAFNOSUPPORT) {
2913 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - failed to create %s socket: %d\n",
2914 protocol == PF_INET ? "IP" : "IPv6", result);
2915 }
2916 goto cleanup;
2917 }
2918
2919 /* always set SS_PRIV, we want to close and detach regardless */
2920 sock_setpriv(pf_socket, 1);
2921
2922 result = ipsec_detach_ip(interface, protocol, pf_socket);
2923 if (result == 0 || result == ENXIO) {
2924 /* We are done! We either detached or weren't attached. */
2925 goto cleanup;
2926 } else if (result != EBUSY) {
2927 /* Uh, not really sure what happened here... */
2928 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2929 goto cleanup;
2930 }
2931
2932 /*
2933 * At this point, we received an EBUSY error. This means there are
2934 * addresses attached. We should detach them and then try again.
2935 */
2936 result = ifnet_get_address_list_family(interface, &addresses, (sa_family_t)protocol);
2937 if (result != 0) {
2938 os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
2939 ifnet_name(interface), ifnet_unit(interface),
2940 protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
2941 goto cleanup;
2942 }
2943
2944 for (i = 0; addresses[i] != 0; i++) {
2945 ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
2946 }
2947 ifnet_free_address_list(addresses);
2948 addresses = NULL;
2949
2950 /*
2951 * The addresses should be gone, we should try the remove again.
2952 */
2953 result = ipsec_detach_ip(interface, protocol, pf_socket);
2954 if (result != 0 && result != ENXIO) {
2955 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2956 }
2957
2958 cleanup:
2959 if (pf_socket != NULL) {
2960 sock_close(pf_socket);
2961 }
2962
2963 if (addresses != NULL) {
2964 ifnet_free_address_list(addresses);
2965 }
2966 }
2967
2968 static errno_t
ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo)2969 ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,
2970 __unused u_int32_t unit,
2971 void *unitinfo)
2972 {
2973 struct ipsec_pcb *pcb = unitinfo;
2974 ifnet_t ifp = NULL;
2975 errno_t result = 0;
2976
2977 if (pcb == NULL) {
2978 return EINVAL;
2979 }
2980
2981 /* Wait until all threads in the data paths are done. */
2982 ipsec_wait_data_move_drain(pcb);
2983
2984 #if IPSEC_NEXUS
2985 // Tell the nexus to stop all rings
2986 if (pcb->ipsec_netif_nexus != NULL) {
2987 kern_nexus_stop(pcb->ipsec_netif_nexus);
2988 }
2989 #endif // IPSEC_NEXUS
2990
2991 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2992
2993 #if IPSEC_NEXUS
2994 if (if_ipsec_debug != 0) {
2995 printf("ipsec_ctl_disconnect: detaching interface %s (id %s)\n",
2996 pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2997 }
2998
2999 struct ipsec_detached_channels dc;
3000 ipsec_detach_channels(pcb, &dc);
3001 #endif // IPSEC_NEXUS
3002
3003 pcb->ipsec_ctlref = NULL;
3004
3005 ifp = pcb->ipsec_ifp;
3006 if (ifp != NULL) {
3007 #if IPSEC_NEXUS
3008 if (pcb->ipsec_netif_nexus != NULL) {
3009 /*
3010 * Quiesce the interface and flush any pending outbound packets.
3011 */
3012 if_down(ifp);
3013
3014 /*
3015 * Suspend data movement and wait for IO threads to exit.
3016 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
3017 * do this because ipsec nexuses are attached/detached separately.
3018 */
3019 ifnet_datamov_suspend_and_drain(ifp);
3020 if ((result = ifnet_detach(ifp)) != 0) {
3021 panic("ipsec_ctl_disconnect - ifnet_detach failed: %d", result);
3022 /* NOT REACHED */
3023 }
3024
3025 /*
3026 * We want to do everything in our power to ensure that the interface
3027 * really goes away when the socket is closed. We must remove IP/IPv6
3028 * addresses and detach the protocols. Finally, we can remove and
3029 * release the interface.
3030 */
3031 key_delsp_for_ipsec_if(ifp);
3032
3033 ipsec_cleanup_family(ifp, AF_INET);
3034 ipsec_cleanup_family(ifp, AF_INET6);
3035
3036 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3037
3038 ipsec_free_channels(&dc);
3039
3040 ipsec_nexus_detach(pcb);
3041
3042 /* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
3043 ifnet_datamov_resume(ifp);
3044 } else
3045 #endif // IPSEC_NEXUS
3046 {
3047 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3048
3049 #if IPSEC_NEXUS
3050 ipsec_free_channels(&dc);
3051 #endif // IPSEC_NEXUS
3052
3053 /*
3054 * We want to do everything in our power to ensure that the interface
3055 * really goes away when the socket is closed. We must remove IP/IPv6
3056 * addresses and detach the protocols. Finally, we can remove and
3057 * release the interface.
3058 */
3059 key_delsp_for_ipsec_if(ifp);
3060
3061 ipsec_cleanup_family(ifp, AF_INET);
3062 ipsec_cleanup_family(ifp, AF_INET6);
3063
3064 /*
3065 * Detach now; ipsec_detach() will be called asynchronously once
3066 * the I/O reference count drops to 0. There we will invoke
3067 * ifnet_release().
3068 */
3069 if ((result = ifnet_detach(ifp)) != 0) {
3070 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
3071 }
3072 }
3073 } else {
3074 // Bound, but not connected
3075 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3076 ipsec_free_pcb(pcb, false);
3077 }
3078
3079 return 0;
3080 }
3081
3082 static errno_t
ipsec_ctl_send(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,__unused void * unitinfo,mbuf_t m,__unused int flags)3083 ipsec_ctl_send(__unused kern_ctl_ref kctlref,
3084 __unused u_int32_t unit,
3085 __unused void *unitinfo,
3086 mbuf_t m,
3087 __unused int flags)
3088 {
3089 /* Receive messages from the control socket. Currently unused. */
3090 mbuf_freem(m);
3091 return 0;
3092 }
3093
3094 static errno_t
ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t len)3095 ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,
3096 __unused u_int32_t unit,
3097 void *unitinfo,
3098 int opt,
3099 void *data,
3100 size_t len)
3101 {
3102 errno_t result = 0;
3103 struct ipsec_pcb *pcb = unitinfo;
3104 if (pcb == NULL) {
3105 return EINVAL;
3106 }
3107
3108 /* check for privileges for privileged options */
3109 switch (opt) {
3110 case IPSEC_OPT_FLAGS:
3111 case IPSEC_OPT_EXT_IFDATA_STATS:
3112 case IPSEC_OPT_SET_DELEGATE_INTERFACE:
3113 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
3114 case IPSEC_OPT_OUTPUT_DSCP_MAPPING:
3115 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3116 return EPERM;
3117 }
3118 break;
3119 }
3120
3121 switch (opt) {
3122 case IPSEC_OPT_FLAGS: {
3123 if (len != sizeof(u_int32_t)) {
3124 result = EMSGSIZE;
3125 } else {
3126 pcb->ipsec_external_flags = *(u_int32_t *)data;
3127 }
3128 break;
3129 }
3130
3131 case IPSEC_OPT_EXT_IFDATA_STATS: {
3132 if (len != sizeof(int)) {
3133 result = EMSGSIZE;
3134 break;
3135 }
3136 if (pcb->ipsec_ifp == NULL) {
3137 // Only can set after connecting
3138 result = EINVAL;
3139 break;
3140 }
3141 pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
3142 break;
3143 }
3144
3145 case IPSEC_OPT_INC_IFDATA_STATS_IN:
3146 case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
3147 struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
3148
3149 if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
3150 result = EINVAL;
3151 break;
3152 }
3153 if (pcb->ipsec_ifp == NULL) {
3154 // Only can set after connecting
3155 result = EINVAL;
3156 break;
3157 }
3158 if (!pcb->ipsec_ext_ifdata_stats) {
3159 result = EINVAL;
3160 break;
3161 }
3162 if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) {
3163 ifnet_stat_increment_in(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3164 (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3165 } else {
3166 ifnet_stat_increment_out(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3167 (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3168 }
3169 break;
3170 }
3171
3172 case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
3173 ifnet_t del_ifp = NULL;
3174 char name[IFNAMSIZ];
3175
3176 if (len > IFNAMSIZ - 1) {
3177 result = EMSGSIZE;
3178 break;
3179 }
3180 if (pcb->ipsec_ifp == NULL) {
3181 // Only can set after connecting
3182 result = EINVAL;
3183 break;
3184 }
3185 if (len != 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */
3186 bcopy(data, name, len);
3187 name[len] = 0;
3188 result = ifnet_find_by_name(name, &del_ifp);
3189 }
3190 if (result == 0) {
3191 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
3192 __func__, pcb->ipsec_ifp->if_xname,
3193 del_ifp ? del_ifp->if_xname : "NULL");
3194
3195 result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
3196 if (del_ifp) {
3197 ifnet_release(del_ifp);
3198 }
3199 }
3200 break;
3201 }
3202
3203 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3204 if (len != sizeof(int)) {
3205 result = EMSGSIZE;
3206 break;
3207 }
3208 if (pcb->ipsec_ifp == NULL) {
3209 // Only can set after connecting
3210 result = EINVAL;
3211 break;
3212 }
3213 mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
3214 if (output_service_class == MBUF_SC_UNSPEC) {
3215 pcb->ipsec_output_service_class = MBUF_SC_OAM;
3216 } else {
3217 pcb->ipsec_output_service_class = output_service_class;
3218 }
3219 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
3220 __func__, pcb->ipsec_ifp->if_xname,
3221 pcb->ipsec_output_service_class);
3222 break;
3223 }
3224
3225 #if IPSEC_NEXUS
3226 case IPSEC_OPT_ENABLE_CHANNEL: {
3227 if (len != sizeof(int)) {
3228 result = EMSGSIZE;
3229 break;
3230 }
3231 if (pcb->ipsec_ifp != NULL) {
3232 // Only can set before connecting
3233 result = EINVAL;
3234 break;
3235 }
3236 if ((*(int *)data) != 0 &&
3237 (*(int *)data) != 1 &&
3238 (*(int *)data) != IPSEC_IF_WMM_RING_COUNT) {
3239 result = EINVAL;
3240 break;
3241 }
3242 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3243 pcb->ipsec_kpipe_count = *(int *)data;
3244 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3245 break;
3246 }
3247
3248 case IPSEC_OPT_CHANNEL_BIND_PID: {
3249 if (len != sizeof(pid_t)) {
3250 result = EMSGSIZE;
3251 break;
3252 }
3253 if (pcb->ipsec_ifp != NULL) {
3254 // Only can set before connecting
3255 result = EINVAL;
3256 break;
3257 }
3258 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3259 pcb->ipsec_kpipe_pid = *(pid_t *)data;
3260 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3261 break;
3262 }
3263
3264 case IPSEC_OPT_CHANNEL_BIND_UUID: {
3265 if (len != sizeof(uuid_t)) {
3266 result = EMSGSIZE;
3267 break;
3268 }
3269 if (pcb->ipsec_ifp != NULL) {
3270 // Only can set before connecting
3271 result = EINVAL;
3272 break;
3273 }
3274 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3275 uuid_copy(pcb->ipsec_kpipe_proc_uuid, *((uuid_t *)data));
3276 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3277 break;
3278 }
3279
3280 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3281 if (len != sizeof(int)) {
3282 result = EMSGSIZE;
3283 break;
3284 }
3285 if (pcb->ipsec_ifp == NULL) {
3286 // Only can set after connecting
3287 result = EINVAL;
3288 break;
3289 }
3290 if (!if_is_fsw_transport_netagent_enabled()) {
3291 result = ENOTSUP;
3292 break;
3293 }
3294 if (uuid_is_null(pcb->ipsec_nx.fsw_agent)) {
3295 result = ENOENT;
3296 break;
3297 }
3298
3299 uint32_t flags = netagent_get_flags(pcb->ipsec_nx.fsw_agent);
3300
3301 if (*(int *)data) {
3302 flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
3303 NETAGENT_FLAG_NEXUS_LISTENER);
3304 result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3305 pcb->ipsec_needs_netagent = true;
3306 } else {
3307 pcb->ipsec_needs_netagent = false;
3308 flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
3309 NETAGENT_FLAG_NEXUS_LISTENER);
3310 result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3311 }
3312 break;
3313 }
3314
3315 case IPSEC_OPT_INPUT_FRAG_SIZE: {
3316 if (len != sizeof(u_int32_t)) {
3317 result = EMSGSIZE;
3318 break;
3319 }
3320 u_int32_t input_frag_size = *(u_int32_t *)data;
3321 if (input_frag_size <= sizeof(struct ip6_hdr)) {
3322 pcb->ipsec_frag_size_set = FALSE;
3323 pcb->ipsec_input_frag_size = 0;
3324 } else {
3325 pcb->ipsec_frag_size_set = TRUE;
3326 pcb->ipsec_input_frag_size = input_frag_size;
3327 }
3328 break;
3329 }
3330 case IPSEC_OPT_ENABLE_NETIF: {
3331 if (len != sizeof(int)) {
3332 result = EMSGSIZE;
3333 break;
3334 }
3335 if (pcb->ipsec_ifp != NULL) {
3336 // Only can set before connecting
3337 result = EINVAL;
3338 break;
3339 }
3340 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3341 pcb->ipsec_use_netif = !!(*(int *)data);
3342 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3343 break;
3344 }
3345 case IPSEC_OPT_SLOT_SIZE: {
3346 if (len != sizeof(u_int32_t)) {
3347 result = EMSGSIZE;
3348 break;
3349 }
3350 if (pcb->ipsec_ifp != NULL) {
3351 // Only can set before connecting
3352 result = EINVAL;
3353 break;
3354 }
3355 u_int32_t slot_size = *(u_int32_t *)data;
3356 if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
3357 slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
3358 return EINVAL;
3359 }
3360 pcb->ipsec_slot_size = slot_size;
3361 if (if_ipsec_debug != 0) {
3362 printf("%s: IPSEC_OPT_SLOT_SIZE %u\n", __func__, slot_size);
3363 }
3364 break;
3365 }
3366 case IPSEC_OPT_NETIF_RING_SIZE: {
3367 if (len != sizeof(u_int32_t)) {
3368 result = EMSGSIZE;
3369 break;
3370 }
3371 if (pcb->ipsec_ifp != NULL) {
3372 // Only can set before connecting
3373 result = EINVAL;
3374 break;
3375 }
3376 u_int32_t ring_size = *(u_int32_t *)data;
3377 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3378 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3379 return EINVAL;
3380 }
3381 pcb->ipsec_netif_ring_size = ring_size;
3382 if (if_ipsec_debug != 0) {
3383 printf("%s: IPSEC_OPT_NETIF_RING_SIZE %u\n", __func__, ring_size);
3384 }
3385 break;
3386 }
3387 case IPSEC_OPT_TX_FSW_RING_SIZE: {
3388 if (len != sizeof(u_int32_t)) {
3389 result = EMSGSIZE;
3390 break;
3391 }
3392 if (pcb->ipsec_ifp != NULL) {
3393 // Only can set before connecting
3394 result = EINVAL;
3395 break;
3396 }
3397 u_int32_t ring_size = *(u_int32_t *)data;
3398 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3399 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3400 return EINVAL;
3401 }
3402 pcb->ipsec_tx_fsw_ring_size = ring_size;
3403 if (if_ipsec_debug != 0) {
3404 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3405 }
3406 break;
3407 }
3408 case IPSEC_OPT_RX_FSW_RING_SIZE: {
3409 if (len != sizeof(u_int32_t)) {
3410 result = EMSGSIZE;
3411 break;
3412 }
3413 if (pcb->ipsec_ifp != NULL) {
3414 // Only can set before connecting
3415 result = EINVAL;
3416 break;
3417 }
3418 u_int32_t ring_size = *(u_int32_t *)data;
3419 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3420 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3421 return EINVAL;
3422 }
3423 pcb->ipsec_rx_fsw_ring_size = ring_size;
3424 if (if_ipsec_debug != 0) {
3425 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3426 }
3427 break;
3428 }
3429 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3430 if (len != sizeof(u_int32_t)) {
3431 result = EMSGSIZE;
3432 break;
3433 }
3434 if (pcb->ipsec_ifp != NULL) {
3435 // Only can set before connecting
3436 result = EINVAL;
3437 break;
3438 }
3439 u_int32_t ring_size = *(u_int32_t *)data;
3440 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3441 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3442 return EINVAL;
3443 }
3444 pcb->ipsec_kpipe_tx_ring_size = ring_size;
3445 if (if_ipsec_debug != 0) {
3446 printf("%s: IPSEC_OPT_KPIPE_TX_RING_SIZE %u\n", __func__, ring_size);
3447 }
3448 break;
3449 }
3450 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3451 if (len != sizeof(u_int32_t)) {
3452 result = EMSGSIZE;
3453 break;
3454 }
3455 if (pcb->ipsec_ifp != NULL) {
3456 // Only can set before connecting
3457 result = EINVAL;
3458 break;
3459 }
3460 u_int32_t ring_size = *(u_int32_t *)data;
3461 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3462 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3463 return EINVAL;
3464 }
3465 pcb->ipsec_kpipe_rx_ring_size = ring_size;
3466 if (if_ipsec_debug != 0) {
3467 printf("%s: IPSEC_OPT_KPIPE_RX_RING_SIZE %u\n", __func__, ring_size);
3468 }
3469 break;
3470 }
3471 case IPSEC_OPT_OUTPUT_DSCP_MAPPING: {
3472 if (len != sizeof(int)) {
3473 result = EMSGSIZE;
3474 break;
3475 }
3476 if (pcb->ipsec_ifp == NULL) {
3477 // Only can set after connecting
3478 result = EINVAL;
3479 break;
3480 }
3481
3482 ipsec_dscp_mapping_t output_dscp_mapping = (ipsec_dscp_mapping_t)(*(int *)data);
3483 if (output_dscp_mapping > IPSEC_DSCP_MAPPING_LEGACY) {
3484 return EINVAL;
3485 }
3486
3487 pcb->ipsec_output_dscp_mapping = output_dscp_mapping;
3488
3489 os_log(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_DSCP_MAPPING %s DSCP %d\n",
3490 __func__, pcb->ipsec_ifp->if_xname,
3491 pcb->ipsec_output_dscp_mapping);
3492 break;
3493 }
3494
3495 #endif // IPSEC_NEXUS
3496
3497 default: {
3498 result = ENOPROTOOPT;
3499 break;
3500 }
3501 }
3502
3503 return result;
3504 }
3505
3506 static errno_t
ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t * len)3507 ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
3508 __unused u_int32_t unit,
3509 void *unitinfo,
3510 int opt,
3511 void *data,
3512 size_t *len)
3513 {
3514 errno_t result = 0;
3515 struct ipsec_pcb *pcb = unitinfo;
3516 if (pcb == NULL) {
3517 return EINVAL;
3518 }
3519
3520 switch (opt) {
3521 case IPSEC_OPT_FLAGS: {
3522 if (*len != sizeof(u_int32_t)) {
3523 result = EMSGSIZE;
3524 } else {
3525 *(u_int32_t *)data = pcb->ipsec_external_flags;
3526 }
3527 break;
3528 }
3529
3530 case IPSEC_OPT_EXT_IFDATA_STATS: {
3531 if (*len != sizeof(int)) {
3532 result = EMSGSIZE;
3533 } else {
3534 *(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
3535 }
3536 break;
3537 }
3538
3539 case IPSEC_OPT_IFNAME: {
3540 if (*len < MIN(strlen(pcb->ipsec_if_xname) + 1, sizeof(pcb->ipsec_if_xname))) {
3541 result = EMSGSIZE;
3542 } else {
3543 if (pcb->ipsec_ifp == NULL) {
3544 // Only can get after connecting
3545 result = EINVAL;
3546 break;
3547 }
3548 *len = scnprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
3549 }
3550 break;
3551 }
3552
3553 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3554 if (*len != sizeof(int)) {
3555 result = EMSGSIZE;
3556 } else {
3557 *(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
3558 }
3559 break;
3560 }
3561
3562 #if IPSEC_NEXUS
3563
3564 case IPSEC_OPT_ENABLE_CHANNEL: {
3565 if (*len != sizeof(int)) {
3566 result = EMSGSIZE;
3567 } else {
3568 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3569 *(int *)data = pcb->ipsec_kpipe_count;
3570 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3571 }
3572 break;
3573 }
3574
3575 case IPSEC_OPT_CHANNEL_BIND_PID: {
3576 if (*len != sizeof(pid_t)) {
3577 result = EMSGSIZE;
3578 } else {
3579 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3580 *(pid_t *)data = pcb->ipsec_kpipe_pid;
3581 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3582 }
3583 break;
3584 }
3585
3586 case IPSEC_OPT_CHANNEL_BIND_UUID: {
3587 if (*len != sizeof(uuid_t)) {
3588 result = EMSGSIZE;
3589 } else {
3590 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3591 uuid_copy(*((uuid_t *)data), pcb->ipsec_kpipe_proc_uuid);
3592 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3593 }
3594 break;
3595 }
3596
3597 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3598 if (*len != sizeof(int)) {
3599 result = EMSGSIZE;
3600 } else {
3601 *(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.fsw_agent);
3602 }
3603 break;
3604 }
3605
3606 case IPSEC_OPT_ENABLE_NETIF: {
3607 if (*len != sizeof(int)) {
3608 result = EMSGSIZE;
3609 } else {
3610 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3611 *(int *)data = !!pcb->ipsec_use_netif;
3612 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3613 }
3614 break;
3615 }
3616
3617 case IPSEC_OPT_GET_CHANNEL_UUID: {
3618 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3619 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
3620 result = ENXIO;
3621 } else if (*len != sizeof(uuid_t) * pcb->ipsec_kpipe_count) {
3622 result = EMSGSIZE;
3623 } else {
3624 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
3625 uuid_copy(((uuid_t *)data)[i], pcb->ipsec_kpipe_uuid[i]);
3626 }
3627 }
3628 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3629 break;
3630 }
3631
3632 case IPSEC_OPT_INPUT_FRAG_SIZE: {
3633 if (*len != sizeof(u_int32_t)) {
3634 result = EMSGSIZE;
3635 } else {
3636 *(u_int32_t *)data = pcb->ipsec_input_frag_size;
3637 }
3638 break;
3639 }
3640 case IPSEC_OPT_SLOT_SIZE: {
3641 if (*len != sizeof(u_int32_t)) {
3642 result = EMSGSIZE;
3643 } else {
3644 *(u_int32_t *)data = pcb->ipsec_slot_size;
3645 }
3646 break;
3647 }
3648 case IPSEC_OPT_NETIF_RING_SIZE: {
3649 if (*len != sizeof(u_int32_t)) {
3650 result = EMSGSIZE;
3651 } else {
3652 *(u_int32_t *)data = pcb->ipsec_netif_ring_size;
3653 }
3654 break;
3655 }
3656 case IPSEC_OPT_TX_FSW_RING_SIZE: {
3657 if (*len != sizeof(u_int32_t)) {
3658 result = EMSGSIZE;
3659 } else {
3660 *(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
3661 }
3662 break;
3663 }
3664 case IPSEC_OPT_RX_FSW_RING_SIZE: {
3665 if (*len != sizeof(u_int32_t)) {
3666 result = EMSGSIZE;
3667 } else {
3668 *(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
3669 }
3670 break;
3671 }
3672 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3673 if (*len != sizeof(u_int32_t)) {
3674 result = EMSGSIZE;
3675 } else {
3676 *(u_int32_t *)data = pcb->ipsec_kpipe_tx_ring_size;
3677 }
3678 break;
3679 }
3680 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3681 if (*len != sizeof(u_int32_t)) {
3682 result = EMSGSIZE;
3683 } else {
3684 *(u_int32_t *)data = pcb->ipsec_kpipe_rx_ring_size;
3685 }
3686 break;
3687 }
3688
3689 #endif // IPSEC_NEXUS
3690
3691 default: {
3692 result = ENOPROTOOPT;
3693 break;
3694 }
3695 }
3696
3697 return result;
3698 }
3699
3700 /* Network Interface functions */
3701 static errno_t
ipsec_output(ifnet_t interface,mbuf_t data)3702 ipsec_output(ifnet_t interface,
3703 mbuf_t data)
3704 {
3705 struct ipsec_pcb *pcb = ifnet_softc(interface);
3706 struct ipsec_output_state ipsec_state;
3707 struct route ro;
3708 struct route_in6 ro6;
3709 size_t length;
3710 struct ip *ip = NULL;
3711 struct ip6_hdr *ip6 = NULL;
3712 struct ip_out_args ipoa;
3713 struct ip6_out_args ip6oa;
3714 int error = 0;
3715 u_int ip_version = 0;
3716 int flags = 0;
3717 struct flowadv *adv = NULL;
3718
3719 // Make sure this packet isn't looping through the interface
3720 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
3721 error = EINVAL;
3722 goto ipsec_output_err;
3723 }
3724
3725 // Mark the interface so NECP can evaluate tunnel policy
3726 necp_mark_packet_from_interface(data, interface);
3727
3728 if (data->m_len < sizeof(*ip)) {
3729 os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IP header length: %d.\n", data->m_len);
3730 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3731 error = EINVAL;
3732 goto ipsec_output_err;
3733 }
3734
3735 ip = mtod(data, struct ip *);
3736 ip_version = ip->ip_v;
3737
3738 switch (ip_version) {
3739 case 4: {
3740 u_int8_t ip_hlen = 0;
3741 #ifdef _IP_VHL
3742 ip_hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
3743 #else
3744 ip_hlen = (uint8_t)(ip->ip_hl << 2);
3745 #endif
3746 if (ip_hlen < sizeof(*ip)) {
3747 os_log_error(OS_LOG_DEFAULT, "ipsec_output: Bad ip header length %d.\n", ip_hlen);
3748 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3749 error = EINVAL;
3750 goto ipsec_output_err;
3751 }
3752 #if IPSEC_NEXUS
3753 if (!pcb->ipsec_use_netif)
3754 #endif // IPSEC_NEXUS
3755 {
3756 int af = AF_INET;
3757 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3758 }
3759
3760 /* Apply encryption */
3761 memset(&ipsec_state, 0, sizeof(ipsec_state));
3762 ipsec_state.m = data;
3763 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
3764 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3765 ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
3766
3767 error = ipsec4_interface_output(&ipsec_state, interface);
3768 /* Tunneled in IPv6 - packet is gone */
3769 if (error == 0 && ipsec_state.tunneled == 6) {
3770 goto done;
3771 }
3772
3773 data = ipsec_state.m;
3774 if (error || data == NULL) {
3775 if (error) {
3776 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec4_output error %d.\n", error);
3777 }
3778 goto ipsec_output_err;
3779 }
3780
3781 /* Set traffic class, set flow */
3782 m_set_service_class(data, pcb->ipsec_output_service_class);
3783 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3784 #if SKYWALK
3785 data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
3786 #else /* !SKYWALK */
3787 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3788 #endif /* !SKYWALK */
3789 data->m_pkthdr.pkt_proto = ip->ip_p;
3790 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3791
3792 /* Flip endian-ness for ip_output */
3793 ip = mtod(data, struct ip *);
3794 NTOHS(ip->ip_len);
3795 NTOHS(ip->ip_off);
3796
3797 /* Increment statistics */
3798 length = mbuf_pkthdr_len(data);
3799 ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
3800
3801 /* Send to ip_output */
3802 memset(&ro, 0, sizeof(ro));
3803
3804 flags = (IP_OUTARGS | /* Passing out args to specify interface */
3805 IP_NOIPSEC); /* To ensure the packet doesn't go through ipsec twice */
3806
3807 memset(&ipoa, 0, sizeof(ipoa));
3808 ipoa.ipoa_flowadv.code = 0;
3809 ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
3810 if (ipsec_state.outgoing_if) {
3811 ipoa.ipoa_boundif = ipsec_state.outgoing_if;
3812 ipoa.ipoa_flags |= IPOAF_BOUND_IF;
3813 }
3814 ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
3815
3816 adv = &ipoa.ipoa_flowadv;
3817
3818 (void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
3819 data = NULL;
3820
3821 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3822 error = ENOBUFS;
3823 ifnet_disable_output(interface);
3824 }
3825
3826 goto done;
3827 }
3828 case 6: {
3829 if (data->m_len < sizeof(*ip6)) {
3830 os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IPv6 header length: %d.\n", data->m_len);
3831 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
3832 error = EINVAL;
3833 goto ipsec_output_err;
3834 }
3835 #if IPSEC_NEXUS
3836 if (!pcb->ipsec_use_netif)
3837 #endif // IPSEC_NEXUS
3838 {
3839 int af = AF_INET6;
3840 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3841 }
3842
3843 data = ipsec6_splithdr(data);
3844 if (data == NULL) {
3845 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_splithdr returned NULL\n");
3846 goto ipsec_output_err;
3847 }
3848
3849 ip6 = mtod(data, struct ip6_hdr *);
3850
3851 memset(&ipsec_state, 0, sizeof(ipsec_state));
3852 ipsec_state.m = data;
3853 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
3854 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3855 ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
3856
3857 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
3858 if (error == 0 && ipsec_state.tunneled == 4) { /* tunneled in IPv4 - packet is gone */
3859 goto done;
3860 }
3861 data = ipsec_state.m;
3862 if (error || data == NULL) {
3863 if (error) {
3864 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_output error %d\n", error);
3865 }
3866 goto ipsec_output_err;
3867 }
3868
3869 /* Set traffic class, set flow */
3870 m_set_service_class(data, pcb->ipsec_output_service_class);
3871 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3872 #if SKYWALK
3873 data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
3874 #else /* !SKYWALK */
3875 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3876 #endif /* !SKYWALK */
3877 data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3878 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3879
3880 /* Increment statistics */
3881 length = mbuf_pkthdr_len(data);
3882 ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
3883
3884 /* Send to ip6_output */
3885 memset(&ro6, 0, sizeof(ro6));
3886
3887 flags = IPV6_OUTARGS;
3888
3889 memset(&ip6oa, 0, sizeof(ip6oa));
3890 ip6oa.ip6oa_flowadv.code = 0;
3891 ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
3892 if (ipsec_state.outgoing_if) {
3893 ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
3894 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
3895 ip6_output_setsrcifscope(data, ipsec_state.outgoing_if, NULL);
3896 ip6_output_setdstifscope(data, ipsec_state.outgoing_if, NULL);
3897 } else {
3898 ip6_output_setsrcifscope(data, IFSCOPE_UNKNOWN, NULL);
3899 ip6_output_setdstifscope(data, IFSCOPE_UNKNOWN, NULL);
3900 }
3901 ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
3902
3903 adv = &ip6oa.ip6oa_flowadv;
3904
3905 (void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
3906 data = NULL;
3907
3908 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3909 error = ENOBUFS;
3910 ifnet_disable_output(interface);
3911 }
3912
3913 goto done;
3914 }
3915 default: {
3916 os_log_error(OS_LOG_DEFAULT, "ipsec_output: Received unknown packet version %d.\n", ip_version);
3917 error = EINVAL;
3918 goto ipsec_output_err;
3919 }
3920 }
3921
3922 done:
3923 return error;
3924
3925 ipsec_output_err:
3926 if (data) {
3927 mbuf_freem(data);
3928 }
3929 goto done;
3930 }
3931
3932 static void
ipsec_start(ifnet_t interface)3933 ipsec_start(ifnet_t interface)
3934 {
3935 mbuf_t data;
3936 struct ipsec_pcb *pcb = ifnet_softc(interface);
3937
3938 VERIFY(pcb != NULL);
3939 for (;;) {
3940 if (ifnet_dequeue(interface, &data) != 0) {
3941 break;
3942 }
3943 if (ipsec_output(interface, data) != 0) {
3944 break;
3945 }
3946 }
3947 }
3948
3949 /* Network Interface functions */
3950 static errno_t
ipsec_demux(__unused ifnet_t interface,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)3951 ipsec_demux(__unused ifnet_t interface,
3952 mbuf_t data,
3953 __unused char *frame_header,
3954 protocol_family_t *protocol)
3955 {
3956 struct ip *ip;
3957 u_int ip_version;
3958
3959 while (data != NULL && mbuf_len(data) < 1) {
3960 data = mbuf_next(data);
3961 }
3962
3963 if (data == NULL) {
3964 return ENOENT;
3965 }
3966
3967 ip = mtod(data, struct ip *);
3968 ip_version = ip->ip_v;
3969
3970 switch (ip_version) {
3971 case 4:
3972 *protocol = PF_INET;
3973 return 0;
3974 case 6:
3975 *protocol = PF_INET6;
3976 return 0;
3977 default:
3978 *protocol = PF_UNSPEC;
3979 break;
3980 }
3981
3982 return 0;
3983 }
3984
3985 static errno_t
ipsec_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused u_int32_t demux_count)3986 ipsec_add_proto(__unused ifnet_t interface,
3987 protocol_family_t protocol,
3988 __unused const struct ifnet_demux_desc *demux_array,
3989 __unused u_int32_t demux_count)
3990 {
3991 switch (protocol) {
3992 case PF_INET:
3993 return 0;
3994 case PF_INET6:
3995 return 0;
3996 default:
3997 break;
3998 }
3999
4000 return ENOPROTOOPT;
4001 }
4002
4003 static errno_t
ipsec_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)4004 ipsec_del_proto(__unused ifnet_t interface,
4005 __unused protocol_family_t protocol)
4006 {
4007 return 0;
4008 }
4009
4010 static errno_t
ipsec_ioctl(ifnet_t interface,u_long command,void * data)4011 ipsec_ioctl(ifnet_t interface,
4012 u_long command,
4013 void *data)
4014 {
4015 #if IPSEC_NEXUS
4016 struct ipsec_pcb *pcb = ifnet_softc(interface);
4017 #endif
4018 errno_t result = 0;
4019
4020 switch (command) {
4021 case SIOCSIFMTU: {
4022 #if IPSEC_NEXUS
4023 if (pcb->ipsec_use_netif) {
4024 // Make sure we can fit packets in the channel buffers
4025 if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
4026 result = EINVAL;
4027 } else {
4028 ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
4029 }
4030 } else
4031 #endif // IPSEC_NEXUS
4032 {
4033 ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
4034 }
4035 break;
4036 }
4037
4038 case SIOCSIFFLAGS:
4039 /* ifioctl() takes care of it */
4040 break;
4041
4042 case SIOCSIFSUBFAMILY: {
4043 uint32_t subfamily;
4044
4045 subfamily = ((struct ifreq*)data)->ifr_type.ift_subfamily;
4046 switch (subfamily) {
4047 case IFRTYPE_SUBFAMILY_BLUETOOTH:
4048 interface->if_subfamily = IFNET_SUBFAMILY_BLUETOOTH;
4049 break;
4050 case IFRTYPE_SUBFAMILY_WIFI:
4051 interface->if_subfamily = IFNET_SUBFAMILY_WIFI;
4052 break;
4053 case IFRTYPE_SUBFAMILY_QUICKRELAY:
4054 interface->if_subfamily = IFNET_SUBFAMILY_QUICKRELAY;
4055 break;
4056 case IFRTYPE_SUBFAMILY_DEFAULT:
4057 interface->if_subfamily = IFNET_SUBFAMILY_DEFAULT;
4058 break;
4059 default:
4060 result = EINVAL;
4061 break;
4062 }
4063 break;
4064 }
4065
4066 default:
4067 result = EOPNOTSUPP;
4068 }
4069
4070 return result;
4071 }
4072
4073 static void
ipsec_detached(ifnet_t interface)4074 ipsec_detached(ifnet_t interface)
4075 {
4076 struct ipsec_pcb *pcb = ifnet_softc(interface);
4077
4078 (void)ifnet_release(interface);
4079 lck_mtx_lock(&ipsec_lock);
4080 ipsec_free_pcb(pcb, true);
4081 (void)ifnet_dispose(interface);
4082 lck_mtx_unlock(&ipsec_lock);
4083 }
4084
4085 /* Protocol Handlers */
4086
4087 static errno_t
ipsec_proto_input(ifnet_t interface,protocol_family_t protocol,mbuf_t m,__unused char * frame_header)4088 ipsec_proto_input(ifnet_t interface,
4089 protocol_family_t protocol,
4090 mbuf_t m,
4091 __unused char *frame_header)
4092 {
4093 mbuf_pkthdr_setrcvif(m, interface);
4094
4095 #if IPSEC_NEXUS
4096 struct ipsec_pcb *pcb = ifnet_softc(interface);
4097 if (!pcb->ipsec_use_netif)
4098 #endif // IPSEC_NEXUS
4099 {
4100 uint32_t af = 0;
4101 struct ip *ip = mtod(m, struct ip *);
4102 if (ip->ip_v == 4) {
4103 af = AF_INET;
4104 } else if (ip->ip_v == 6) {
4105 af = AF_INET6;
4106 }
4107 bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
4108 pktap_input(interface, protocol, m, NULL);
4109 }
4110
4111 int32_t pktlen = m->m_pkthdr.len;
4112 if (proto_input(protocol, m) != 0) {
4113 ifnet_stat_increment_in(interface, 0, 0, 1);
4114 m_freem(m);
4115 } else {
4116 ifnet_stat_increment_in(interface, 1, pktlen, 0);
4117 }
4118
4119 return 0;
4120 }
4121
4122 static errno_t
ipsec_proto_pre_output(__unused ifnet_t interface,protocol_family_t protocol,__unused mbuf_t * packet,__unused const struct sockaddr * dest,__unused void * route,__unused char * frame_type,__unused char * link_layer_dest)4123 ipsec_proto_pre_output(__unused ifnet_t interface,
4124 protocol_family_t protocol,
4125 __unused mbuf_t *packet,
4126 __unused const struct sockaddr *dest,
4127 __unused void *route,
4128 __unused char *frame_type,
4129 __unused char *link_layer_dest)
4130 {
4131 *(protocol_family_t *)(void *)frame_type = protocol;
4132 return 0;
4133 }
4134
4135 static errno_t
ipsec_attach_proto(ifnet_t interface,protocol_family_t protocol)4136 ipsec_attach_proto(ifnet_t interface,
4137 protocol_family_t protocol)
4138 {
4139 struct ifnet_attach_proto_param proto;
4140 errno_t result;
4141
4142 bzero(&proto, sizeof(proto));
4143 proto.input = ipsec_proto_input;
4144 proto.pre_output = ipsec_proto_pre_output;
4145
4146 result = ifnet_attach_protocol(interface, protocol, &proto);
4147 if (result != 0 && result != EEXIST) {
4148 os_log_error(OS_LOG_DEFAULT, "ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
4149 protocol, result);
4150 }
4151
4152 return result;
4153 }
4154
4155 errno_t
ipsec_inject_inbound_packet(ifnet_t interface,mbuf_t packet)4156 ipsec_inject_inbound_packet(ifnet_t interface,
4157 mbuf_t packet)
4158 {
4159 #if IPSEC_NEXUS
4160 struct ipsec_pcb *pcb = ifnet_softc(interface);
4161
4162 if (pcb->ipsec_use_netif) {
4163 if (!ipsec_data_move_begin(pcb)) {
4164 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__,
4165 if_name(pcb->ipsec_ifp));
4166 return ENXIO;
4167 }
4168
4169 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4170
4171 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
4172
4173 if (pcb->ipsec_input_chain_count > (u_int32_t)if_ipsec_max_pending_input) {
4174 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4175 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4176 ipsec_data_move_end(pcb);
4177 return ENOSPC;
4178 }
4179
4180 if (pcb->ipsec_input_chain != NULL) {
4181 pcb->ipsec_input_chain_last->m_nextpkt = packet;
4182 } else {
4183 pcb->ipsec_input_chain = packet;
4184 }
4185 pcb->ipsec_input_chain_count++;
4186 while (packet->m_nextpkt) {
4187 VERIFY(packet != packet->m_nextpkt);
4188 packet = packet->m_nextpkt;
4189 pcb->ipsec_input_chain_count++;
4190 }
4191 pcb->ipsec_input_chain_last = packet;
4192 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4193
4194 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
4195 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4196
4197 if (rx_ring != NULL) {
4198 kern_channel_notify(rx_ring, 0);
4199 }
4200
4201 ipsec_data_move_end(pcb);
4202 return 0;
4203 } else
4204 #endif // IPSEC_NEXUS
4205 {
4206 errno_t error;
4207 protocol_family_t protocol;
4208 if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
4209 return error;
4210 }
4211
4212 return ipsec_proto_input(interface, protocol, packet, NULL);
4213 }
4214 }
4215
4216 void
ipsec_set_pkthdr_for_interface(ifnet_t interface,mbuf_t packet,int family,uint32_t flowid)4217 ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family,
4218 uint32_t flowid)
4219 {
4220 #pragma unused (flowid)
4221 if (packet != NULL && interface != NULL) {
4222 struct ipsec_pcb *pcb = ifnet_softc(interface);
4223 if (pcb != NULL) {
4224 /* Set traffic class, set flow */
4225 m_set_service_class(packet, pcb->ipsec_output_service_class);
4226 packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4227 #if SKYWALK
4228 packet->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4229 packet->m_pkthdr.pkt_flowid = flowid;
4230 #else /* !SKYWALK */
4231 packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
4232 #endif /* !SKYWALK */
4233 if (family == AF_INET) {
4234 struct ip *ip = mtod(packet, struct ip *);
4235 packet->m_pkthdr.pkt_proto = ip->ip_p;
4236 } else if (family == AF_INET6) {
4237 struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
4238 packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
4239 }
4240 packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4241 }
4242 }
4243 }
4244
4245 void
ipsec_set_ipoa_for_interface(ifnet_t interface,struct ip_out_args * ipoa)4246 ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
4247 {
4248 struct ipsec_pcb *pcb;
4249
4250 if (interface == NULL || ipoa == NULL) {
4251 return;
4252 }
4253 pcb = ifnet_softc(interface);
4254
4255 if (net_qos_policy_restricted == 0) {
4256 ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
4257 ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4258 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4259 net_qos_policy_restrict_avapps != 0) {
4260 ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4261 } else {
4262 ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4263 ipoa->ipoa_sotc = SO_TC_VO;
4264 }
4265 }
4266
4267 void
ipsec_set_ip6oa_for_interface(ifnet_t interface,struct ip6_out_args * ip6oa)4268 ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
4269 {
4270 struct ipsec_pcb *pcb;
4271
4272 if (interface == NULL || ip6oa == NULL) {
4273 return;
4274 }
4275 pcb = ifnet_softc(interface);
4276
4277 if (net_qos_policy_restricted == 0) {
4278 ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
4279 ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4280 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4281 net_qos_policy_restrict_avapps != 0) {
4282 ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4283 } else {
4284 ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4285 ip6oa->ip6oa_sotc = SO_TC_VO;
4286 }
4287 }
4288
4289 static boolean_t
ipsec_data_move_begin(struct ipsec_pcb * pcb)4290 ipsec_data_move_begin(struct ipsec_pcb *pcb)
4291 {
4292 boolean_t ret = 0;
4293
4294 lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4295 if ((ret = IPSEC_IS_DATA_PATH_READY(pcb))) {
4296 pcb->ipsec_pcb_data_move++;
4297 }
4298 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4299
4300 return ret;
4301 }
4302
4303 static void
ipsec_data_move_end(struct ipsec_pcb * pcb)4304 ipsec_data_move_end(struct ipsec_pcb *pcb)
4305 {
4306 lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4307 VERIFY(pcb->ipsec_pcb_data_move > 0);
4308 /*
4309 * if there's no more thread moving data, wakeup any
4310 * drainers that's blocked waiting for this.
4311 */
4312 if (--pcb->ipsec_pcb_data_move == 0 && pcb->ipsec_pcb_drainers > 0) {
4313 wakeup(&(pcb->ipsec_pcb_data_move));
4314 }
4315 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4316 }
4317
4318 static void
ipsec_data_move_drain(struct ipsec_pcb * pcb)4319 ipsec_data_move_drain(struct ipsec_pcb *pcb)
4320 {
4321 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4322 /* data path must already be marked as not ready */
4323 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4324 pcb->ipsec_pcb_drainers++;
4325 while (pcb->ipsec_pcb_data_move != 0) {
4326 (void)msleep(&(pcb->ipsec_pcb_data_move), &pcb->ipsec_pcb_data_move_lock,
4327 (PZERO - 1), __func__, NULL);
4328 }
4329 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4330 VERIFY(pcb->ipsec_pcb_drainers > 0);
4331 pcb->ipsec_pcb_drainers--;
4332 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4333 }
4334
4335 static void
ipsec_wait_data_move_drain(struct ipsec_pcb * pcb)4336 ipsec_wait_data_move_drain(struct ipsec_pcb *pcb)
4337 {
4338 /*
4339 * Mark the data path as not usable.
4340 */
4341 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4342 IPSEC_CLR_DATA_PATH_READY(pcb);
4343 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4344
4345 /* Wait until all threads in the data paths are done. */
4346 ipsec_data_move_drain(pcb);
4347 }
4348