1 /*
2 * Copyright (c) 2012-2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29
30 #include <sys/systm.h>
31 #include <sys/kern_control.h>
32 #include <net/kpi_protocol.h>
33 #include <net/kpi_interface.h>
34 #include <sys/socket.h>
35 #include <sys/socketvar.h>
36 #include <net/if.h>
37 #include <net/if_types.h>
38 #include <net/bpf.h>
39 #include <net/if_ipsec.h>
40 #include <sys/mbuf.h>
41 #include <sys/sockio.h>
42 #include <netinet/in.h>
43 #include <netinet/ip6.h>
44 #include <netinet6/in6_var.h>
45 #include <netinet6/ip6_var.h>
46 #include <sys/kauth.h>
47 #include <netinet6/ipsec.h>
48 #include <netinet6/ipsec6.h>
49 #include <netinet6/esp.h>
50 #include <netinet6/esp6.h>
51 #include <netinet/ip.h>
52 #include <net/flowadv.h>
53 #include <net/necp.h>
54 #include <netkey/key.h>
55 #include <net/pktap.h>
56 #include <kern/zalloc.h>
57 #include <os/log.h>
58
59 #if SKYWALK
60 #include <skywalk/os_skywalk_private.h>
61 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
62 #include <skywalk/nexus/netif/nx_netif.h>
63 #define IPSEC_NEXUS 1
64 #else // SKYWALK
65 #define IPSEC_NEXUS 0
66 #endif // SKYWALK
67
68 extern int net_qos_policy_restricted;
69 extern int net_qos_policy_restrict_avapps;
70
71 /* Kernel Control functions */
72 static errno_t ipsec_ctl_setup(u_int32_t *unit, void **unitinfo);
73 static errno_t ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
74 void **unitinfo);
75 static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
76 void **unitinfo);
77 static errno_t ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
78 void *unitinfo);
79 static errno_t ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
80 void *unitinfo, mbuf_t m, int flags);
81 static errno_t ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
82 int opt, void *data, size_t *len);
83 static errno_t ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
84 int opt, void *data, size_t len);
85
86 /* Network Interface functions */
87 static void ipsec_start(ifnet_t interface);
88 static errno_t ipsec_output(ifnet_t interface, mbuf_t data);
89 static errno_t ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
90 protocol_family_t *protocol);
91 static errno_t ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
92 const struct ifnet_demux_desc *demux_array,
93 u_int32_t demux_count);
94 static errno_t ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
95 static errno_t ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
96 static void ipsec_detached(ifnet_t interface);
97
98 /* Protocol handlers */
99 static errno_t ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
100 static errno_t ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
101 mbuf_t m, char *frame_header);
102 static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
103 mbuf_t *packet, const struct sockaddr *dest, void *route,
104 char *frame_type, char *link_layer_dest);
105
106 static kern_ctl_ref ipsec_kctlref;
107 static LCK_ATTR_DECLARE(ipsec_lck_attr, 0, 0);
108 static LCK_GRP_DECLARE(ipsec_lck_grp, "ipsec");
109 static LCK_MTX_DECLARE_ATTR(ipsec_lock, &ipsec_lck_grp, &ipsec_lck_attr);
110
111 #if IPSEC_NEXUS
112
113 SYSCTL_DECL(_net_ipsec);
114 SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
115 static int if_ipsec_verify_interface_creation = 0;
116 SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
117
118 #define IPSEC_IF_VERIFY(_e) if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
119
120 #define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
121 #define IPSEC_IF_DEFAULT_RING_SIZE 64
122 #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
123 #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
124 #define IPSEC_IF_DEFAULT_BUF_SEG_SIZE skmem_usr_buf_seg_size
125
126 #define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES
127 #define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
128 #define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
129 #define IPSEC_NETIF_WMM_RX_RING_COUNT 1
130 #define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT
131 #define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT
132
133 #define IPSEC_IF_MIN_RING_SIZE 8
134 #define IPSEC_IF_MAX_RING_SIZE 1024
135
136 #define IPSEC_IF_MIN_SLOT_SIZE 1024
137 #define IPSEC_IF_MAX_SLOT_SIZE (16 * 1024)
138
139 #define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512
140
141 #define IPSEC_KPIPE_FLAG_WAKE_PKT 0x01
142
143 static uint32_t ipsec_kpipe_mbuf;
144
145 static int if_ipsec_max_pending_input = IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT;
146
147 static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
148 static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
149 static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
150
151 static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
152 static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
153 static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
154
155 SYSCTL_INT(_net_ipsec, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_max_pending_input, 0, "");
156 SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
157 &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
158 SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
159 &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
160 SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
161 &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
162
163 static int if_ipsec_debug = 0;
164 SYSCTL_INT(_net_ipsec, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_debug, 0, "");
165
166 static errno_t
167 ipsec_register_nexus(void);
168
169 typedef struct ipsec_nx {
170 uuid_t if_provider;
171 uuid_t if_instance;
172 uuid_t fsw_provider;
173 uuid_t fsw_instance;
174 uuid_t fsw_device;
175 uuid_t fsw_agent;
176 } *ipsec_nx_t;
177
178 static nexus_controller_t ipsec_ncd;
179 static int ipsec_ncd_refcount;
180 static uuid_t ipsec_kpipe_uuid;
181
182 #endif // IPSEC_NEXUS
183
184 /* Control block allocated for each kernel control connection */
185 struct ipsec_pcb {
186 TAILQ_ENTRY(ipsec_pcb) ipsec_chain;
187 kern_ctl_ref ipsec_ctlref;
188 ifnet_t ipsec_ifp;
189 u_int32_t ipsec_unit;
190 u_int32_t ipsec_unique_id;
191 // These external flags can be set with IPSEC_OPT_FLAGS
192 u_int32_t ipsec_external_flags;
193 // These internal flags are only used within this driver
194 u_int32_t ipsec_internal_flags;
195 u_int32_t ipsec_input_frag_size;
196 bool ipsec_frag_size_set;
197 int ipsec_ext_ifdata_stats;
198 mbuf_svc_class_t ipsec_output_service_class;
199 char ipsec_if_xname[IFXNAMSIZ];
200 char ipsec_unique_name[IFXNAMSIZ];
201 // PCB lock protects state fields, like ipsec_kpipe_count
202 decl_lck_rw_data(, ipsec_pcb_lock);
203 // lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers
204 decl_lck_mtx_data(, ipsec_pcb_data_move_lock);
205 u_int32_t ipsec_pcb_data_move; /* number of data moving contexts */
206 u_int32_t ipsec_pcb_drainers; /* number of threads waiting to drain */
207 u_int32_t ipsec_pcb_data_path_state; /* internal state of interface data path */
208 ipsec_dscp_mapping_t ipsec_output_dscp_mapping;
209
210 #if IPSEC_NEXUS
211 lck_mtx_t ipsec_input_chain_lock;
212 lck_mtx_t ipsec_kpipe_encrypt_lock;
213 lck_mtx_t ipsec_kpipe_decrypt_lock;
214 struct mbuf * ipsec_input_chain;
215 struct mbuf * ipsec_input_chain_last;
216 u_int32_t ipsec_input_chain_count;
217 // Input chain lock protects the list of input mbufs
218 // The input chain lock must be taken AFTER the PCB lock if both are held
219 struct ipsec_nx ipsec_nx;
220 u_int32_t ipsec_kpipe_count;
221 pid_t ipsec_kpipe_pid;
222 uuid_t ipsec_kpipe_proc_uuid;
223 uuid_t ipsec_kpipe_uuid[IPSEC_IF_MAX_RING_COUNT];
224 void * ipsec_kpipe_rxring[IPSEC_IF_MAX_RING_COUNT];
225 void * ipsec_kpipe_txring[IPSEC_IF_MAX_RING_COUNT];
226 kern_pbufpool_t ipsec_kpipe_pp;
227 u_int32_t ipsec_kpipe_tx_ring_size;
228 u_int32_t ipsec_kpipe_rx_ring_size;
229
230 kern_nexus_t ipsec_netif_nexus;
231 kern_pbufpool_t ipsec_netif_pp;
232 void * ipsec_netif_rxring[IPSEC_NETIF_MAX_RX_RING_COUNT];
233 void * ipsec_netif_txring[IPSEC_NETIF_MAX_TX_RING_COUNT];
234 uint64_t ipsec_netif_txring_size;
235
236 u_int32_t ipsec_slot_size;
237 u_int32_t ipsec_netif_ring_size;
238 u_int32_t ipsec_tx_fsw_ring_size;
239 u_int32_t ipsec_rx_fsw_ring_size;
240 bool ipsec_use_netif;
241 bool ipsec_needs_netagent;
242 #endif // IPSEC_NEXUS
243 };
244
245 /* These are internal flags not exposed outside this file */
246 #define IPSEC_FLAGS_KPIPE_ALLOCATED 1
247
248 /* data movement refcounting functions */
249 static boolean_t ipsec_data_move_begin(struct ipsec_pcb *pcb);
250 static void ipsec_data_move_end(struct ipsec_pcb *pcb);
251 static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb);
252
253 /* Data path states */
254 #define IPSEC_PCB_DATA_PATH_READY 0x1
255
256 /* Macros to set/clear/test data path states */
257 #define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY)
258 #define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY)
259 #define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0)
260
261 #if IPSEC_NEXUS
262 /* Macros to clear/set/test flags. */
263 static inline void
ipsec_flag_set(struct ipsec_pcb * pcb,uint32_t flag)264 ipsec_flag_set(struct ipsec_pcb *pcb, uint32_t flag)
265 {
266 pcb->ipsec_internal_flags |= flag;
267 }
268 static inline void
ipsec_flag_clr(struct ipsec_pcb * pcb,uint32_t flag)269 ipsec_flag_clr(struct ipsec_pcb *pcb, uint32_t flag)
270 {
271 pcb->ipsec_internal_flags &= ~flag;
272 }
273
274 static inline bool
ipsec_flag_isset(struct ipsec_pcb * pcb,uint32_t flag)275 ipsec_flag_isset(struct ipsec_pcb *pcb, uint32_t flag)
276 {
277 return !!(pcb->ipsec_internal_flags & flag);
278 }
279 #endif // IPSEC_NEXUS
280
281 TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
282
283 static KALLOC_TYPE_DEFINE(ipsec_pcb_zone, struct ipsec_pcb, NET_KT_DEFAULT);
284
285 #define IPSECQ_MAXLEN 256
286
287 #if IPSEC_NEXUS
288 static int
289 sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
290 {
291 #pragma unused(arg1, arg2)
292 int value = if_ipsec_ring_size;
293
294 int error = sysctl_handle_int(oidp, &value, 0, req);
295 if (error || !req->newptr) {
296 return error;
297 }
298
299 if (value < IPSEC_IF_MIN_RING_SIZE ||
300 value > IPSEC_IF_MAX_RING_SIZE) {
301 return EINVAL;
302 }
303
304 if_ipsec_ring_size = value;
305
306 return 0;
307 }
308
309 static int
310 sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
311 {
312 #pragma unused(arg1, arg2)
313 int value = if_ipsec_tx_fsw_ring_size;
314
315 int error = sysctl_handle_int(oidp, &value, 0, req);
316 if (error || !req->newptr) {
317 return error;
318 }
319
320 if (value < IPSEC_IF_MIN_RING_SIZE ||
321 value > IPSEC_IF_MAX_RING_SIZE) {
322 return EINVAL;
323 }
324
325 if_ipsec_tx_fsw_ring_size = value;
326
327 return 0;
328 }
329
330 static int
331 sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
332 {
333 #pragma unused(arg1, arg2)
334 int value = if_ipsec_rx_fsw_ring_size;
335
336 int error = sysctl_handle_int(oidp, &value, 0, req);
337 if (error || !req->newptr) {
338 return error;
339 }
340
341 if (value < IPSEC_IF_MIN_RING_SIZE ||
342 value > IPSEC_IF_MAX_RING_SIZE) {
343 return EINVAL;
344 }
345
346 if_ipsec_rx_fsw_ring_size = value;
347
348 return 0;
349 }
350
351
352 static inline bool
ipsec_in_wmm_mode(struct ipsec_pcb * pcb)353 ipsec_in_wmm_mode(struct ipsec_pcb *pcb)
354 {
355 return pcb->ipsec_kpipe_count == IPSEC_IF_WMM_RING_COUNT;
356 }
357
358 #endif // IPSEC_NEXUS
359
360 errno_t
ipsec_register_control(void)361 ipsec_register_control(void)
362 {
363 struct kern_ctl_reg kern_ctl;
364 errno_t result = 0;
365
366 #if (DEVELOPMENT || DEBUG)
367 (void)PE_parse_boot_argn("ipsec_kpipe_mbuf", &ipsec_kpipe_mbuf,
368 sizeof(ipsec_kpipe_mbuf));
369 #endif /* DEVELOPMENT || DEBUG */
370
371 #if IPSEC_NEXUS
372 ipsec_register_nexus();
373 #endif // IPSEC_NEXUS
374
375 TAILQ_INIT(&ipsec_head);
376
377 bzero(&kern_ctl, sizeof(kern_ctl));
378 strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
379 kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
380 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP; /* Require root */
381 kern_ctl.ctl_sendsize = 64 * 1024;
382 kern_ctl.ctl_recvsize = 64 * 1024;
383 kern_ctl.ctl_setup = ipsec_ctl_setup;
384 kern_ctl.ctl_bind = ipsec_ctl_bind;
385 kern_ctl.ctl_connect = ipsec_ctl_connect;
386 kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
387 kern_ctl.ctl_send = ipsec_ctl_send;
388 kern_ctl.ctl_setopt = ipsec_ctl_setopt;
389 kern_ctl.ctl_getopt = ipsec_ctl_getopt;
390
391 result = ctl_register(&kern_ctl, &ipsec_kctlref);
392 if (result != 0) {
393 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - ctl_register failed: %d\n", result);
394 return result;
395 }
396
397 /* Register the protocol plumbers */
398 if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC,
399 ipsec_attach_proto, NULL)) != 0) {
400 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n",
401 result);
402 ctl_deregister(ipsec_kctlref);
403 return result;
404 }
405
406 /* Register the protocol plumbers */
407 if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC,
408 ipsec_attach_proto, NULL)) != 0) {
409 proto_unregister_plumber(PF_INET, IFNET_FAMILY_IPSEC);
410 ctl_deregister(ipsec_kctlref);
411 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n",
412 result);
413 return result;
414 }
415
416 return 0;
417 }
418
419 /* Helpers */
420 int
ipsec_interface_isvalid(ifnet_t interface)421 ipsec_interface_isvalid(ifnet_t interface)
422 {
423 struct ipsec_pcb *pcb = NULL;
424
425 if (interface == NULL) {
426 return 0;
427 }
428
429 pcb = ifnet_softc(interface);
430
431 if (pcb == NULL) {
432 return 0;
433 }
434
435 /* When ctl disconnects, ipsec_unit is set to 0 */
436 if (pcb->ipsec_unit == 0) {
437 return 0;
438 }
439
440 return 1;
441 }
442
443 #if IPSEC_NEXUS
444 boolean_t
ipsec_interface_needs_netagent(ifnet_t interface)445 ipsec_interface_needs_netagent(ifnet_t interface)
446 {
447 struct ipsec_pcb *pcb = NULL;
448
449 if (interface == NULL) {
450 return FALSE;
451 }
452
453 pcb = ifnet_softc(interface);
454
455 if (pcb == NULL) {
456 return FALSE;
457 }
458
459 return pcb->ipsec_needs_netagent == true;
460 }
461 #endif // IPSEC_NEXUS
462
463 static errno_t
ipsec_ifnet_set_attrs(ifnet_t ifp)464 ipsec_ifnet_set_attrs(ifnet_t ifp)
465 {
466 /* Set flags and additional information. */
467 ifnet_set_mtu(ifp, 1500);
468 ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
469
470 /* The interface must generate its own IPv6 LinkLocal address,
471 * if possible following the recommendation of RFC2472 to the 64bit interface ID
472 */
473 ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
474
475 #if !IPSEC_NEXUS
476 /* Reset the stats in case as the interface may have been recycled */
477 struct ifnet_stats_param stats;
478 bzero(&stats, sizeof(struct ifnet_stats_param));
479 ifnet_set_stat(ifp, &stats);
480 #endif // !IPSEC_NEXUS
481
482 return 0;
483 }
484
485 #if IPSEC_NEXUS
486
487 static uuid_t ipsec_nx_dom_prov;
488
489 static errno_t
ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)490 ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
491 {
492 return 0;
493 }
494
495 static void
ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)496 ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
497 {
498 // Ignore
499 }
500
501 static errno_t
ipsec_register_nexus(void)502 ipsec_register_nexus(void)
503 {
504 const struct kern_nexus_domain_provider_init dp_init = {
505 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
506 .nxdpi_flags = 0,
507 .nxdpi_init = ipsec_nxdp_init,
508 .nxdpi_fini = ipsec_nxdp_fini
509 };
510 errno_t err = 0;
511
512 /* ipsec_nxdp_init() is called before this function returns */
513 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
514 (const uint8_t *) "com.apple.ipsec",
515 &dp_init, sizeof(dp_init),
516 &ipsec_nx_dom_prov);
517 if (err != 0) {
518 os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
519 return err;
520 }
521 return 0;
522 }
523
524 static errno_t
ipsec_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)525 ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
526 {
527 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
528 pcb->ipsec_netif_nexus = nexus;
529 return ipsec_ifnet_set_attrs(ifp);
530 }
531
532 static errno_t
ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,proc_t p,kern_nexus_t nexus,nexus_port_t nexus_port,kern_channel_t channel,void ** ch_ctx)533 ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
534 proc_t p, kern_nexus_t nexus,
535 nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
536 {
537 #pragma unused(nxprov, p)
538 #pragma unused(nexus, nexus_port, channel, ch_ctx)
539 return 0;
540 }
541
542 static errno_t
ipsec_nexus_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)543 ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
544 kern_channel_t channel)
545 {
546 #pragma unused(nxprov, channel)
547 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
548 boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, 1);
549 /* Mark the data path as ready */
550 if (ok) {
551 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
552 IPSEC_SET_DATA_PATH_READY(pcb);
553 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
554 }
555 return ok ? 0 : ENXIO;
556 }
557
558 static void
ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)559 ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
560 kern_channel_t channel)
561 {
562 #pragma unused(nxprov, channel)
563 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
564
565 VERIFY(pcb->ipsec_kpipe_count != 0);
566
567 /* Wait until all threads in the data paths are done. */
568 ipsec_wait_data_move_drain(pcb);
569 }
570
571 static void
ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)572 ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
573 kern_channel_t channel)
574 {
575 #pragma unused(nxprov, channel)
576 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
577
578 /* Wait until all threads in the data paths are done. */
579 ipsec_wait_data_move_drain(pcb);
580 }
581
582 static void
ipsec_nexus_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)583 ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
584 kern_channel_t channel)
585 {
586 #pragma unused(nxprov, channel)
587 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
588 if (pcb->ipsec_netif_nexus == nexus) {
589 pcb->ipsec_netif_nexus = NULL;
590 }
591 ifnet_decr_iorefcnt(pcb->ipsec_ifp);
592 }
593
594 static errno_t
ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)595 ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
596 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
597 void **ring_ctx)
598 {
599 #pragma unused(nxprov)
600 #pragma unused(channel)
601 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
602 uint8_t ring_idx;
603
604 for (ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
605 if (!uuid_compare(channel->ch_info->cinfo_nx_uuid, pcb->ipsec_kpipe_uuid[ring_idx])) {
606 break;
607 }
608 }
609
610 if (ring_idx == pcb->ipsec_kpipe_count) {
611 uuid_string_t uuidstr;
612 uuid_unparse(channel->ch_info->cinfo_nx_uuid, uuidstr);
613 os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n", __func__, pcb->ipsec_if_xname, uuidstr);
614 return ENOENT;
615 }
616
617 *ring_ctx = (void *)(uintptr_t)ring_idx;
618
619 if (!is_tx_ring) {
620 VERIFY(pcb->ipsec_kpipe_rxring[ring_idx] == NULL);
621 pcb->ipsec_kpipe_rxring[ring_idx] = ring;
622 } else {
623 VERIFY(pcb->ipsec_kpipe_txring[ring_idx] == NULL);
624 pcb->ipsec_kpipe_txring[ring_idx] = ring;
625 }
626 return 0;
627 }
628
629 static void
ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)630 ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
631 kern_channel_ring_t ring)
632 {
633 #pragma unused(nxprov)
634 bool found = false;
635 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
636
637 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
638 if (pcb->ipsec_kpipe_rxring[i] == ring) {
639 pcb->ipsec_kpipe_rxring[i] = NULL;
640 found = true;
641 } else if (pcb->ipsec_kpipe_txring[i] == ring) {
642 pcb->ipsec_kpipe_txring[i] = NULL;
643 found = true;
644 }
645 }
646 VERIFY(found);
647 }
648
649 static errno_t
ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)650 ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
651 kern_channel_ring_t tx_ring, uint32_t flags)
652 {
653 #pragma unused(nxprov)
654 #pragma unused(flags)
655 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
656
657 if (!ipsec_data_move_begin(pcb)) {
658 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
659 return 0;
660 }
661
662 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
663
664 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
665 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
666 ipsec_data_move_end(pcb);
667 return 0;
668 }
669
670 VERIFY(pcb->ipsec_kpipe_count);
671
672 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
673 if (tx_slot == NULL) {
674 // Nothing to write, bail
675 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
676 ipsec_data_move_end(pcb);
677 return 0;
678 }
679
680 // Signal the netif ring to read
681 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
682 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
683
684 if (rx_ring != NULL) {
685 kern_channel_notify(rx_ring, 0);
686 }
687
688 ipsec_data_move_end(pcb);
689 return 0;
690 }
691
692 static mbuf_t
ipsec_encrypt_mbuf(ifnet_t interface,mbuf_t data)693 ipsec_encrypt_mbuf(ifnet_t interface,
694 mbuf_t data)
695 {
696 struct ipsec_output_state ipsec_state;
697 int error = 0;
698 uint32_t af;
699
700 // Make sure this packet isn't looping through the interface
701 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
702 error = -1;
703 goto ipsec_output_err;
704 }
705
706 // Mark the interface so NECP can evaluate tunnel policy
707 necp_mark_packet_from_interface(data, interface);
708
709 struct ip *ip = mtod(data, struct ip *);
710 u_int ip_version = ip->ip_v;
711
712 switch (ip_version) {
713 case 4: {
714 af = AF_INET;
715
716 memset(&ipsec_state, 0, sizeof(ipsec_state));
717 ipsec_state.m = data;
718 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
719 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
720
721 error = ipsec4_interface_output(&ipsec_state, interface);
722 if (error == 0 && ipsec_state.tunneled == 6) {
723 // Tunneled in IPv6 - packet is gone
724 // TODO: Don't lose mbuf
725 data = NULL;
726 goto done;
727 }
728
729 data = ipsec_state.m;
730 if (error || data == NULL) {
731 if (error) {
732 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec4_output error %d\n", error);
733 }
734 goto ipsec_output_err;
735 }
736 goto done;
737 }
738 case 6: {
739 af = AF_INET6;
740
741 data = ipsec6_splithdr(data);
742 if (data == NULL) {
743 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
744 goto ipsec_output_err;
745 }
746
747 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
748
749 memset(&ipsec_state, 0, sizeof(ipsec_state));
750 ipsec_state.m = data;
751 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
752 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
753
754 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
755 if (error == 0 && ipsec_state.tunneled == 4) {
756 // Tunneled in IPv4 - packet is gone
757 // TODO: Don't lose mbuf
758 data = NULL;
759 goto done;
760 }
761 data = ipsec_state.m;
762 if (error || data == NULL) {
763 if (error) {
764 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_output error %d\n", error);
765 }
766 goto ipsec_output_err;
767 }
768 goto done;
769 }
770 default: {
771 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version);
772 error = -1;
773 goto ipsec_output_err;
774 }
775 }
776
777 done:
778 return data;
779
780 ipsec_output_err:
781 if (data) {
782 mbuf_freem(data);
783 }
784 return NULL;
785 }
786
787 static errno_t
ipsec_kpipe_sync_rx_mbuf(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)788 ipsec_kpipe_sync_rx_mbuf(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
789 kern_channel_ring_t rx_ring, uint32_t flags)
790 {
791 #pragma unused(nxprov)
792 #pragma unused(flags)
793 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
794 struct kern_channel_ring_stat_increment rx_ring_stats;
795 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring);
796
797 if (!ipsec_data_move_begin(pcb)) {
798 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
799 return 0;
800 }
801
802 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
803
804 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
805 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
806 ipsec_data_move_end(pcb);
807 return 0;
808 }
809
810 VERIFY(pcb->ipsec_kpipe_count);
811 VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
812
813 // Reclaim user-released slots
814 (void) kern_channel_reclaim(rx_ring);
815
816 uint32_t avail = kern_channel_available_slot_count(rx_ring);
817 if (avail == 0) {
818 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
819 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
820 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
821 ipsec_data_move_end(pcb);
822 return 0;
823 }
824
825 kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx];
826 if (tx_ring == NULL) {
827 // Net-If TX ring not set up yet, nothing to read
828 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
829 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
830 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
831 ipsec_data_move_end(pcb);
832 return 0;
833 }
834
835 struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
836
837 // Unlock ipsec before entering ring
838 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
839
840 (void)kr_enter(tx_ring, TRUE);
841
842 // Lock again after entering and validate
843 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
844 if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
845 // Ring no longer valid
846 // Unlock first, then exit ring
847 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
848 kr_exit(tx_ring);
849 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
850 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
851 ipsec_data_move_end(pcb);
852 return 0;
853 }
854
855 struct kern_channel_ring_stat_increment tx_ring_stats;
856 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
857 kern_channel_slot_t tx_pslot = NULL;
858 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
859 if (tx_slot == NULL) {
860 // Nothing to read, don't bother signalling
861 // Unlock first, then exit ring
862 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
863 kr_exit(tx_ring);
864 ipsec_data_move_end(pcb);
865 return 0;
866 }
867
868 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
869 VERIFY(rx_pp != NULL);
870 struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
871 VERIFY(tx_pp != NULL);
872 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
873 kern_channel_slot_t rx_pslot = NULL;
874 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
875 kern_packet_t tx_chain_ph = 0;
876
877 while (rx_slot != NULL && tx_slot != NULL) {
878 size_t length = 0;
879 mbuf_t data = NULL;
880 errno_t error = 0;
881
882 // Allocate rx packet
883 kern_packet_t rx_ph = 0;
884 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
885 if (__improbable(error != 0)) {
886 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
887 pcb->ipsec_ifp->if_xname);
888 break;
889 }
890
891 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
892
893 if (tx_ph == 0) {
894 // Advance TX ring
895 tx_pslot = tx_slot;
896 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
897 kern_pbufpool_free(rx_pp, rx_ph);
898 continue;
899 }
900 (void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
901 if (tx_chain_ph != 0) {
902 kern_packet_append(tx_ph, tx_chain_ph);
903 }
904 tx_chain_ph = tx_ph;
905
906 // Advance TX ring
907 tx_pslot = tx_slot;
908 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
909
910 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
911 VERIFY(tx_buf != NULL);
912 uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
913 VERIFY(tx_baddr != NULL);
914 tx_baddr += kern_buflet_get_data_offset(tx_buf);
915
916 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
917
918 length = MIN(kern_packet_get_data_length(tx_ph),
919 pcb->ipsec_slot_size);
920
921 // Increment TX stats
922 tx_ring_stats.kcrsi_slots_transferred++;
923 tx_ring_stats.kcrsi_bytes_transferred += length;
924
925 if (length > 0) {
926 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
927 if (error == 0) {
928 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
929 if (error == 0) {
930 // Encrypt and send packet
931 lck_mtx_lock(&pcb->ipsec_kpipe_encrypt_lock);
932 data = ipsec_encrypt_mbuf(pcb->ipsec_ifp, data);
933 lck_mtx_unlock(&pcb->ipsec_kpipe_encrypt_lock);
934 } else {
935 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
936 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
937 STATS_INC(nifs, NETIF_STATS_DROP);
938 mbuf_freem(data);
939 data = NULL;
940 }
941 } else {
942 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
943 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
944 STATS_INC(nifs, NETIF_STATS_DROP);
945 }
946 } else {
947 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
948 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
949 STATS_INC(nifs, NETIF_STATS_DROP);
950 }
951
952 if (data == NULL) {
953 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
954 kern_pbufpool_free(rx_pp, rx_ph);
955 break;
956 }
957
958 length = mbuf_pkthdr_len(data);
959 if (length > PP_BUF_SIZE_DEF(rx_pp)) {
960 // Flush data
961 mbuf_freem(data);
962 kern_pbufpool_free(rx_pp, rx_ph);
963 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
964 pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
965 continue;
966 }
967
968 // Fillout rx packet
969 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
970 VERIFY(rx_buf != NULL);
971 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
972 VERIFY(rx_baddr != NULL);
973
974 // Copy-in data from mbuf to buflet
975 mbuf_copydata(data, 0, length, (void *)rx_baddr);
976 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
977
978 // Finalize and attach the packet
979 error = kern_buflet_set_data_offset(rx_buf, 0);
980 VERIFY(error == 0);
981 error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
982 VERIFY(error == 0);
983 error = kern_packet_finalize(rx_ph);
984 VERIFY(error == 0);
985 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
986 VERIFY(error == 0);
987
988 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
989 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
990
991 rx_ring_stats.kcrsi_slots_transferred++;
992 rx_ring_stats.kcrsi_bytes_transferred += length;
993
994 if (!pcb->ipsec_ext_ifdata_stats) {
995 ifnet_stat_increment_out(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
996 }
997
998 mbuf_freem(data);
999
1000 rx_pslot = rx_slot;
1001 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1002 }
1003
1004 if (rx_pslot) {
1005 kern_channel_advance_slot(rx_ring, rx_pslot);
1006 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1007 }
1008
1009 if (tx_chain_ph != 0) {
1010 kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
1011 }
1012
1013 if (tx_pslot) {
1014 kern_channel_advance_slot(tx_ring, tx_pslot);
1015 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1016 (void)kern_channel_reclaim(tx_ring);
1017 }
1018
1019 /* always reenable output */
1020 errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
1021 if (error != 0) {
1022 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
1023 }
1024
1025 // Unlock first, then exit ring
1026 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1027
1028 if (tx_pslot != NULL) {
1029 kern_channel_notify(tx_ring, 0);
1030 }
1031 kr_exit(tx_ring);
1032
1033 ipsec_data_move_end(pcb);
1034 return 0;
1035 }
1036
1037 static errno_t
ipsec_encrypt_kpipe_pkt(ifnet_t interface,kern_packet_t sph,kern_packet_t dph)1038 ipsec_encrypt_kpipe_pkt(ifnet_t interface, kern_packet_t sph,
1039 kern_packet_t dph)
1040 {
1041 uint8_t *sbaddr = NULL;
1042 int err = 0;
1043 uint32_t slen = 0;
1044
1045 VERIFY(interface != NULL);
1046 VERIFY(sph != 0);
1047 VERIFY(dph != 0);
1048
1049 kern_buflet_t sbuf = __packet_get_next_buflet(sph, NULL);
1050 VERIFY(sbuf != NULL);
1051 slen = __buflet_get_data_length(sbuf);
1052
1053 if (__improbable(slen < sizeof(struct ip))) {
1054 os_log_error(OS_LOG_DEFAULT, "ipsec encrypt kpipe pkt: source "
1055 "buffer shorter than ip header, %u\n", slen);
1056 return EINVAL;
1057 }
1058
1059 MD_BUFLET_ADDR(SK_PTR_ADDR_KPKT(sph), sbaddr);
1060 struct ip *ip = (struct ip *)(void *)sbaddr;
1061 ASSERT(IP_HDR_ALIGNED_P(ip));
1062
1063 u_int ip_vers = ip->ip_v;
1064 switch (ip_vers) {
1065 case IPVERSION: {
1066 err = ipsec4_interface_kpipe_output(interface, sph, dph);
1067 if (__improbable(err != 0)) {
1068 os_log_error(OS_LOG_DEFAULT, "ipsec4 interface kpipe "
1069 "output error %d\n", err);
1070 return err;
1071 }
1072 break;
1073 }
1074 case 6: {
1075 err = ipsec6_interface_kpipe_output(interface, sph, dph);
1076 if (__improbable(err != 0)) {
1077 os_log_error(OS_LOG_DEFAULT, "ipsec6 interface kpipe "
1078 "output error %d\n", err);
1079 return err;
1080 }
1081 break;
1082 }
1083 default: {
1084 os_log_error(OS_LOG_DEFAULT, "received unknown packet version: %d\n",
1085 ip_vers);
1086 return EINVAL;
1087 }
1088 }
1089
1090 return err;
1091 }
1092
1093 static errno_t
ipsec_kpipe_sync_rx_packet(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)1094 ipsec_kpipe_sync_rx_packet(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1095 kern_channel_ring_t rx_ring, uint32_t flags)
1096 {
1097 #pragma unused(nxprov)
1098 #pragma unused(flags)
1099 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1100 struct kern_channel_ring_stat_increment rx_ring_stats;
1101 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring);
1102
1103 if (!ipsec_data_move_begin(pcb)) {
1104 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1105 return 0;
1106 }
1107
1108 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1109
1110 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1111 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1112 ipsec_data_move_end(pcb);
1113 return 0;
1114 }
1115
1116 VERIFY(pcb->ipsec_kpipe_count);
1117 VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
1118
1119 // Reclaim user-released slots
1120 (void) kern_channel_reclaim(rx_ring);
1121
1122 uint32_t avail = kern_channel_available_slot_count(rx_ring);
1123 if (avail == 0) {
1124 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1125 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
1126 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
1127 ipsec_data_move_end(pcb);
1128 return 0;
1129 }
1130
1131 kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx];
1132 if (tx_ring == NULL) {
1133 // Net-If TX ring not set up yet, nothing to read
1134 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1135 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
1136 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
1137 ipsec_data_move_end(pcb);
1138 return 0;
1139 }
1140
1141 struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
1142
1143 // Unlock ipsec before entering ring
1144 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1145
1146 (void)kr_enter(tx_ring, TRUE);
1147
1148 // Lock again after entering and validate
1149 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1150 if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
1151 // Ring no longer valid
1152 // Unlock first, then exit ring
1153 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1154 kr_exit(tx_ring);
1155 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
1156 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
1157 ipsec_data_move_end(pcb);
1158 return 0;
1159 }
1160
1161 struct kern_channel_ring_stat_increment tx_ring_stats;
1162 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1163 kern_channel_slot_t tx_pslot = NULL;
1164 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1165 if (tx_slot == NULL) {
1166 // Nothing to read, don't bother signalling
1167 // Unlock first, then exit ring
1168 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1169 kr_exit(tx_ring);
1170 ipsec_data_move_end(pcb);
1171 return 0;
1172 }
1173
1174 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1175 VERIFY(rx_pp != NULL);
1176 struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
1177 VERIFY(tx_pp != NULL);
1178 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1179 kern_channel_slot_t rx_pslot = NULL;
1180 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1181 kern_packet_t tx_chain_ph = 0;
1182
1183 while (rx_slot != NULL && tx_slot != NULL) {
1184 size_t tx_pkt_length = 0;
1185 errno_t error = 0;
1186
1187 // Allocate rx packet
1188 kern_packet_t rx_ph = 0;
1189 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1190 if (__improbable(error != 0)) {
1191 os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
1192 "failed to allocate packet\n", pcb->ipsec_ifp->if_xname);
1193 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1194 STATS_INC(nifs, NETIF_STATS_DROP);
1195 break;
1196 }
1197
1198 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1199 if (__improbable(tx_ph == 0)) {
1200 // Advance TX ring
1201 tx_pslot = tx_slot;
1202 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1203 kern_pbufpool_free(rx_pp, rx_ph);
1204 continue;
1205 }
1206
1207 (void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
1208 if (tx_chain_ph != 0) {
1209 kern_packet_append(tx_ph, tx_chain_ph);
1210 }
1211 tx_chain_ph = tx_ph;
1212
1213 // Advance TX ring
1214 tx_pslot = tx_slot;
1215 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1216
1217 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1218
1219 tx_pkt_length = kern_packet_get_data_length(tx_ph);
1220 if (tx_pkt_length == 0 || tx_pkt_length > pcb->ipsec_slot_size) {
1221 os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
1222 "packet length %zu", pcb->ipsec_ifp->if_xname,
1223 tx_pkt_length);
1224 kern_pbufpool_free(rx_pp, rx_ph);
1225 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1226 STATS_INC(nifs, NETIF_STATS_DROP);
1227 continue;
1228 }
1229
1230 // Increment TX stats
1231 tx_ring_stats.kcrsi_slots_transferred++;
1232 tx_ring_stats.kcrsi_bytes_transferred += tx_pkt_length;
1233
1234 // Encrypt packet
1235 lck_mtx_lock(&pcb->ipsec_kpipe_encrypt_lock);
1236 error = ipsec_encrypt_kpipe_pkt(pcb->ipsec_ifp, tx_ph, rx_ph);
1237 lck_mtx_unlock(&pcb->ipsec_kpipe_encrypt_lock);
1238 if (__improbable(error != 0)) {
1239 os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
1240 "failed to encrypt packet", pcb->ipsec_ifp->if_xname);
1241 kern_pbufpool_free(rx_pp, rx_ph);
1242 STATS_INC(nifs, NETIF_STATS_DROP);
1243 continue;
1244 }
1245
1246 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1247 // Finalize and attach the packet
1248 kern_buflet_t rx_buf = __packet_get_next_buflet(rx_ph, NULL);
1249 error = kern_buflet_set_data_offset(rx_buf, 0);
1250 VERIFY(error == 0);
1251 error = kern_packet_finalize(rx_ph);
1252 VERIFY(error == 0);
1253 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1254 VERIFY(error == 0);
1255
1256 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1257 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
1258
1259 rx_ring_stats.kcrsi_slots_transferred++;
1260 rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph);
1261
1262 if (!pcb->ipsec_ext_ifdata_stats) {
1263 ifnet_stat_increment_out(pcb->ipsec_ifp, 1,
1264 kern_packet_get_data_length(rx_ph), 0);
1265 }
1266
1267 rx_pslot = rx_slot;
1268 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1269 }
1270
1271 if (rx_pslot) {
1272 kern_channel_advance_slot(rx_ring, rx_pslot);
1273 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1274 }
1275
1276 if (tx_chain_ph != 0) {
1277 kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
1278 }
1279
1280 if (tx_pslot) {
1281 kern_channel_advance_slot(tx_ring, tx_pslot);
1282 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1283 (void)kern_channel_reclaim(tx_ring);
1284 }
1285
1286 /* always reenable output */
1287 errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
1288 if (error != 0) {
1289 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
1290 }
1291
1292 // Unlock first, then exit ring
1293 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1294
1295 if (tx_pslot != NULL) {
1296 kern_channel_notify(tx_ring, 0);
1297 }
1298 kr_exit(tx_ring);
1299
1300 ipsec_data_move_end(pcb);
1301 return 0;
1302 }
1303
1304 static errno_t
ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)1305 ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1306 kern_channel_ring_t rx_ring, uint32_t flags)
1307 {
1308 if (__improbable(ipsec_kpipe_mbuf == 1)) {
1309 return ipsec_kpipe_sync_rx_mbuf(nxprov, nexus, rx_ring, flags);
1310 } else {
1311 return ipsec_kpipe_sync_rx_packet(nxprov, nexus, rx_ring, flags);
1312 }
1313 }
1314
1315 static uint8_t
ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)1316 ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1317 {
1318 switch (svc_class) {
1319 case KPKT_SC_VO: {
1320 return 0;
1321 }
1322 case KPKT_SC_VI: {
1323 return 1;
1324 }
1325 case KPKT_SC_BE: {
1326 return 2;
1327 }
1328 case KPKT_SC_BK: {
1329 return 3;
1330 }
1331 default: {
1332 VERIFY(0);
1333 return 0;
1334 }
1335 }
1336 }
1337
1338 static errno_t
ipsec_netif_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)1339 ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1340 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1341 void **ring_ctx)
1342 {
1343 #pragma unused(nxprov)
1344 #pragma unused(channel)
1345 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1346
1347 if (!is_tx_ring) {
1348 VERIFY(pcb->ipsec_netif_rxring[0] == NULL);
1349 pcb->ipsec_netif_rxring[0] = ring;
1350 } else {
1351 uint8_t ring_idx = 0;
1352 if (ipsec_in_wmm_mode(pcb)) {
1353 int err;
1354 kern_packet_svc_class_t svc_class;
1355 err = kern_channel_get_service_class(ring, &svc_class);
1356 VERIFY(err == 0);
1357 ring_idx = ipsec_find_tx_ring_by_svc(svc_class);
1358 VERIFY(ring_idx < IPSEC_IF_WMM_RING_COUNT);
1359 }
1360
1361 *ring_ctx = (void *)(uintptr_t)ring_idx;
1362
1363 VERIFY(pcb->ipsec_netif_txring[ring_idx] == NULL);
1364 pcb->ipsec_netif_txring[ring_idx] = ring;
1365 }
1366 return 0;
1367 }
1368
1369 static void
ipsec_netif_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)1370 ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1371 kern_channel_ring_t ring)
1372 {
1373 #pragma unused(nxprov)
1374 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1375 bool found = false;
1376
1377 for (int i = 0; i < IPSEC_NETIF_MAX_RX_RING_COUNT; i++) {
1378 if (pcb->ipsec_netif_rxring[i] == ring) {
1379 pcb->ipsec_netif_rxring[i] = NULL;
1380 VERIFY(!found);
1381 found = true;
1382 }
1383 }
1384 for (int i = 0; i < IPSEC_NETIF_MAX_TX_RING_COUNT; i++) {
1385 if (pcb->ipsec_netif_txring[i] == ring) {
1386 pcb->ipsec_netif_txring[i] = NULL;
1387 VERIFY(!found);
1388 found = true;
1389 }
1390 }
1391 VERIFY(found);
1392 }
1393
1394 static bool
ipsec_netif_check_policy(ifnet_t interface,mbuf_t data)1395 ipsec_netif_check_policy(ifnet_t interface, mbuf_t data)
1396 {
1397 necp_kernel_policy_result necp_result = 0;
1398 necp_kernel_policy_result_parameter necp_result_parameter = {};
1399 uint32_t necp_matched_policy_id = 0;
1400 struct ip_out_args args4 = { };
1401 struct ip6_out_args args6 = { };
1402
1403 // This packet has been marked with IP level policy, do not mark again.
1404 if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
1405 return true;
1406 }
1407
1408 size_t length = mbuf_pkthdr_len(data);
1409 if (length < sizeof(struct ip)) {
1410 return false;
1411 }
1412
1413 struct ip *ip = mtod(data, struct ip *);
1414 u_int ip_version = ip->ip_v;
1415 switch (ip_version) {
1416 case 4: {
1417 if (interface != NULL) {
1418 args4.ipoa_flags |= IPOAF_BOUND_IF;
1419 args4.ipoa_boundif = interface->if_index;
1420 }
1421 necp_matched_policy_id = necp_ip_output_find_policy_match(data, IP_OUTARGS, &args4, NULL,
1422 &necp_result, &necp_result_parameter);
1423 break;
1424 }
1425 case 6: {
1426 if (interface != NULL) {
1427 args6.ip6oa_flags |= IP6OAF_BOUND_IF;
1428 args6.ip6oa_boundif = interface->if_index;
1429 }
1430 necp_matched_policy_id = necp_ip6_output_find_policy_match(data, IPV6_OUTARGS, &args6, NULL,
1431 &necp_result, &necp_result_parameter);
1432 break;
1433 }
1434 default: {
1435 return false;
1436 }
1437 }
1438
1439 if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
1440 necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
1441 /* Drop and flow divert packets should be blocked at the IP layer */
1442 return false;
1443 }
1444
1445 necp_mark_packet_from_ip(data, necp_matched_policy_id);
1446 return true;
1447 }
1448
1449 static errno_t
ipsec_netif_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)1450 ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1451 kern_channel_ring_t tx_ring, uint32_t flags)
1452 {
1453 #pragma unused(nxprov)
1454 #pragma unused(flags)
1455 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1456
1457 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1458
1459 if (!ipsec_data_move_begin(pcb)) {
1460 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1461 return 0;
1462 }
1463
1464 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1465
1466 struct kern_channel_ring_stat_increment tx_ring_stats;
1467 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1468 kern_channel_slot_t tx_pslot = NULL;
1469 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1470 kern_packet_t tx_chain_ph = 0;
1471
1472 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
1473
1474 if (tx_slot == NULL) {
1475 // Nothing to write, don't bother signalling
1476 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1477 ipsec_data_move_end(pcb);
1478 return 0;
1479 }
1480
1481 if (pcb->ipsec_kpipe_count &&
1482 ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1483 // Select the corresponding kpipe rx ring
1484 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(tx_ring);
1485 VERIFY(ring_idx < IPSEC_IF_MAX_RING_COUNT);
1486 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1487
1488 // Unlock while calling notify
1489 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1490
1491 // Signal the kernel pipe ring to read
1492 if (rx_ring != NULL) {
1493 kern_channel_notify(rx_ring, 0);
1494 }
1495
1496 ipsec_data_move_end(pcb);
1497 return 0;
1498 }
1499
1500 // If we're here, we're injecting into the BSD stack
1501 while (tx_slot != NULL) {
1502 size_t length = 0;
1503 mbuf_t data = NULL;
1504
1505 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1506
1507 if (tx_ph == 0) {
1508 // Advance TX ring
1509 tx_pslot = tx_slot;
1510 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1511 continue;
1512 }
1513 (void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
1514 if (tx_chain_ph != 0) {
1515 kern_packet_append(tx_ph, tx_chain_ph);
1516 }
1517 tx_chain_ph = tx_ph;
1518
1519 // Advance TX ring
1520 tx_pslot = tx_slot;
1521 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1522
1523 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1524 VERIFY(tx_buf != NULL);
1525 uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1526 VERIFY(tx_baddr != 0);
1527 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1528
1529 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1530
1531 length = MIN(kern_packet_get_data_length(tx_ph),
1532 pcb->ipsec_slot_size);
1533
1534 if (length > 0) {
1535 errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1536 if (error == 0) {
1537 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1538 if (error == 0) {
1539 // Mark packet from policy
1540 uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1541 necp_mark_packet_from_ip(data, policy_id);
1542
1543 // Check policy with NECP
1544 if (!ipsec_netif_check_policy(pcb->ipsec_ifp, data)) {
1545 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1546 STATS_INC(nifs, NETIF_STATS_DROP);
1547 mbuf_freem(data);
1548 data = NULL;
1549 } else {
1550 // Send through encryption
1551 error = ipsec_output(pcb->ipsec_ifp, data);
1552 if (error != 0) {
1553 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
1554 }
1555 }
1556 } else {
1557 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1558 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1559 STATS_INC(nifs, NETIF_STATS_DROP);
1560 mbuf_freem(data);
1561 data = NULL;
1562 }
1563 } else {
1564 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1565 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1566 STATS_INC(nifs, NETIF_STATS_DROP);
1567 }
1568 } else {
1569 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1570 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1571 STATS_INC(nifs, NETIF_STATS_DROP);
1572 }
1573
1574 if (data == NULL) {
1575 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
1576 break;
1577 }
1578
1579 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1580 STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
1581
1582 tx_ring_stats.kcrsi_slots_transferred++;
1583 tx_ring_stats.kcrsi_bytes_transferred += length;
1584 }
1585
1586 if (tx_chain_ph != 0) {
1587 kern_pbufpool_free_chain(tx_ring->ckr_pp, tx_chain_ph);
1588 }
1589
1590 if (tx_pslot) {
1591 kern_channel_advance_slot(tx_ring, tx_pslot);
1592 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1593 (void)kern_channel_reclaim(tx_ring);
1594 }
1595
1596 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1597 ipsec_data_move_end(pcb);
1598
1599 return 0;
1600 }
1601
1602 static errno_t
ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags,uint8_t ring_idx)1603 ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1604 kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx)
1605 {
1606 #pragma unused(nxprov)
1607 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1608 boolean_t more = false;
1609 errno_t rc = 0;
1610
1611 VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0);
1612
1613 /*
1614 * Refill and sync the ring; we may be racing against another thread doing
1615 * an RX sync that also wants to do kr_enter(), and so use the blocking
1616 * variant here.
1617 */
1618 rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1619 if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
1620 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n", __func__,
1621 pcb->ipsec_if_xname, ring->ckr_name, rc);
1622 }
1623
1624 (void) kr_enter(ring, TRUE);
1625 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1626 if (ring != pcb->ipsec_netif_txring[ring_idx]) {
1627 // ring no longer valid
1628 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1629 kr_exit(ring);
1630 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n", __func__,
1631 pcb->ipsec_if_xname, ring->ckr_name, ring_idx);
1632 return ENXIO;
1633 }
1634
1635 if (pcb->ipsec_kpipe_count) {
1636 uint32_t tx_available = kern_channel_available_slot_count(ring);
1637 if (pcb->ipsec_netif_txring_size > 0 &&
1638 tx_available >= pcb->ipsec_netif_txring_size - 1) {
1639 // No room left in tx ring, disable output for now
1640 errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1641 if (error != 0) {
1642 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
1643 }
1644 }
1645 }
1646
1647 if (pcb->ipsec_kpipe_count) {
1648 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1649
1650 // Unlock while calling notify
1651 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1652 // Signal the kernel pipe ring to read
1653 if (rx_ring != NULL) {
1654 kern_channel_notify(rx_ring, 0);
1655 }
1656 } else {
1657 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1658 }
1659
1660 kr_exit(ring);
1661
1662 return 0;
1663 }
1664
1665 static errno_t
ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,__unused uint32_t flags)1666 ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1667 kern_channel_ring_t ring, __unused uint32_t flags)
1668 {
1669 errno_t ret = 0;
1670 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1671
1672 if (!ipsec_data_move_begin(pcb)) {
1673 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1674 return 0;
1675 }
1676
1677 if (ipsec_in_wmm_mode(pcb)) {
1678 for (uint8_t i = 0; i < IPSEC_IF_WMM_RING_COUNT; i++) {
1679 kern_channel_ring_t nring = pcb->ipsec_netif_txring[i];
1680 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, nring, flags, i);
1681 if (ret) {
1682 break;
1683 }
1684 }
1685 } else {
1686 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring, flags, 0);
1687 }
1688
1689 ipsec_data_move_end(pcb);
1690 return ret;
1691 }
1692
1693 static errno_t
ipsec_netif_sync_rx_mbuf(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)1694 ipsec_netif_sync_rx_mbuf(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1695 kern_channel_ring_t rx_ring, uint32_t flags)
1696 {
1697 #pragma unused(nxprov)
1698 #pragma unused(flags)
1699 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1700 struct kern_channel_ring_stat_increment rx_ring_stats;
1701
1702 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1703
1704 if (!ipsec_data_move_begin(pcb)) {
1705 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1706 return 0;
1707 }
1708
1709 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1710
1711 // Reclaim user-released slots
1712 (void) kern_channel_reclaim(rx_ring);
1713
1714 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1715
1716 uint32_t avail = kern_channel_available_slot_count(rx_ring);
1717 if (avail == 0) {
1718 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1719 ipsec_data_move_end(pcb);
1720 return 0;
1721 }
1722
1723 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1724 VERIFY(rx_pp != NULL);
1725 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1726 kern_channel_slot_t rx_pslot = NULL;
1727 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1728
1729 while (rx_slot != NULL) {
1730 // Check for a waiting packet
1731 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1732 mbuf_t data = pcb->ipsec_input_chain;
1733 if (data == NULL) {
1734 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1735 break;
1736 }
1737
1738 // Allocate rx packet
1739 kern_packet_t rx_ph = 0;
1740 errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1741 if (__improbable(error != 0)) {
1742 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1743 STATS_INC(nifs, NETIF_STATS_DROP);
1744 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1745 break;
1746 }
1747
1748 // Advance waiting packets
1749 if (pcb->ipsec_input_chain_count > 0) {
1750 pcb->ipsec_input_chain_count--;
1751 }
1752 pcb->ipsec_input_chain = data->m_nextpkt;
1753 data->m_nextpkt = NULL;
1754 if (pcb->ipsec_input_chain == NULL) {
1755 pcb->ipsec_input_chain_last = NULL;
1756 }
1757 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1758
1759 size_t length = mbuf_pkthdr_len(data);
1760
1761 if (length < sizeof(struct ip)) {
1762 // Flush data
1763 mbuf_freem(data);
1764 kern_pbufpool_free(rx_pp, rx_ph);
1765 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1766 STATS_INC(nifs, NETIF_STATS_DROP);
1767 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1768 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
1769 continue;
1770 }
1771
1772 uint32_t af = 0;
1773 struct ip *ip = mtod(data, struct ip *);
1774 u_int ip_version = ip->ip_v;
1775 switch (ip_version) {
1776 case 4: {
1777 af = AF_INET;
1778 break;
1779 }
1780 case 6: {
1781 af = AF_INET6;
1782 break;
1783 }
1784 default: {
1785 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1786 pcb->ipsec_ifp->if_xname, ip_version);
1787 break;
1788 }
1789 }
1790
1791 if (length > PP_BUF_SIZE_DEF(rx_pp) ||
1792 (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
1793 // We need to fragment to send up into the netif
1794
1795 u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp);
1796 if (pcb->ipsec_frag_size_set &&
1797 pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) {
1798 fragment_mtu = pcb->ipsec_input_frag_size;
1799 }
1800
1801 mbuf_t fragment_chain = NULL;
1802 switch (af) {
1803 case AF_INET: {
1804 // ip_fragment expects the length in host order
1805 ip->ip_len = ntohs(ip->ip_len);
1806
1807 // ip_fragment will modify the original data, don't free
1808 int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1809 if (fragment_error == 0 && data != NULL) {
1810 fragment_chain = data;
1811 } else {
1812 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1813 STATS_INC(nifs, NETIF_STATS_DROP);
1814 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1815 pcb->ipsec_ifp->if_xname, length, fragment_error);
1816 }
1817 break;
1818 }
1819 case AF_INET6: {
1820 if (length < sizeof(struct ip6_hdr)) {
1821 mbuf_freem(data);
1822 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1823 STATS_INC(nifs, NETIF_STATS_DROP);
1824 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1825 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1826 } else {
1827 // ip6_do_fragmentation will free the original data on success only
1828 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1829
1830 int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
1831 ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid()));
1832 if (fragment_error == 0 && data != NULL) {
1833 fragment_chain = data;
1834 } else {
1835 mbuf_freem(data);
1836 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1837 STATS_INC(nifs, NETIF_STATS_DROP);
1838 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1839 pcb->ipsec_ifp->if_xname, length, fragment_error);
1840 }
1841 }
1842 break;
1843 }
1844 default: {
1845 // Cannot fragment unknown families
1846 mbuf_freem(data);
1847 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1848 STATS_INC(nifs, NETIF_STATS_DROP);
1849 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1850 pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
1851 break;
1852 }
1853 }
1854
1855 if (fragment_chain != NULL) {
1856 // Add fragments to chain before continuing
1857 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1858 if (pcb->ipsec_input_chain != NULL) {
1859 pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1860 } else {
1861 pcb->ipsec_input_chain = fragment_chain;
1862 }
1863 pcb->ipsec_input_chain_count++;
1864 while (fragment_chain->m_nextpkt) {
1865 VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1866 fragment_chain = fragment_chain->m_nextpkt;
1867 pcb->ipsec_input_chain_count++;
1868 }
1869 pcb->ipsec_input_chain_last = fragment_chain;
1870 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1871 }
1872
1873 // Make sure to free unused rx packet
1874 kern_pbufpool_free(rx_pp, rx_ph);
1875
1876 continue;
1877 }
1878
1879 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1880
1881 // Fillout rx packet
1882 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1883 VERIFY(rx_buf != NULL);
1884 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
1885 VERIFY(rx_baddr != NULL);
1886
1887 // Copy-in data from mbuf to buflet
1888 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1889 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1890
1891 // Finalize and attach the packet
1892 error = kern_buflet_set_data_offset(rx_buf, 0);
1893 VERIFY(error == 0);
1894 error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
1895 VERIFY(error == 0);
1896 error = kern_packet_set_headroom(rx_ph, 0);
1897 VERIFY(error == 0);
1898 error = kern_packet_finalize(rx_ph);
1899 VERIFY(error == 0);
1900 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1901 VERIFY(error == 0);
1902
1903 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1904 STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
1905 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1906
1907 rx_ring_stats.kcrsi_slots_transferred++;
1908 rx_ring_stats.kcrsi_bytes_transferred += length;
1909
1910 if (!pcb->ipsec_ext_ifdata_stats) {
1911 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
1912 }
1913
1914 mbuf_freem(data);
1915
1916 // Advance ring
1917 rx_pslot = rx_slot;
1918 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1919 }
1920
1921 for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
1922 struct kern_channel_ring_stat_increment tx_ring_stats;
1923 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1924 kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
1925 kern_channel_slot_t tx_pslot = NULL;
1926 kern_channel_slot_t tx_slot = NULL;
1927 if (tx_ring == NULL) {
1928 // Net-If TX ring not set up yet, nothing to read
1929 goto done;
1930 }
1931 // Unlock ipsec before entering ring
1932 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1933
1934 (void)kr_enter(tx_ring, TRUE);
1935
1936 // Lock again after entering and validate
1937 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1938
1939 if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
1940 goto done;
1941 }
1942
1943 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1944 if (tx_slot == NULL) {
1945 // Nothing to read, don't bother signalling
1946 goto done;
1947 }
1948
1949 while (rx_slot != NULL && tx_slot != NULL) {
1950 size_t length = 0;
1951 mbuf_t data = NULL;
1952 errno_t error = 0;
1953 uint32_t af;
1954
1955 // Allocate rx packet
1956 kern_packet_t rx_ph = 0;
1957 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1958 if (__improbable(error != 0)) {
1959 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1960 STATS_INC(nifs, NETIF_STATS_DROP);
1961 break;
1962 }
1963
1964 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1965
1966 // Advance TX ring
1967 tx_pslot = tx_slot;
1968 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1969
1970 if (tx_ph == 0) {
1971 kern_pbufpool_free(rx_pp, rx_ph);
1972 continue;
1973 }
1974
1975 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1976 VERIFY(tx_buf != NULL);
1977 uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1978 VERIFY(tx_baddr != 0);
1979 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1980
1981 length = MIN(kern_packet_get_data_length(tx_ph),
1982 pcb->ipsec_slot_size);
1983
1984 // Increment TX stats
1985 tx_ring_stats.kcrsi_slots_transferred++;
1986 tx_ring_stats.kcrsi_bytes_transferred += length;
1987
1988 if (length >= sizeof(struct ip)) {
1989 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1990 if (error == 0) {
1991 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1992 if (error == 0) {
1993 // Check for wake packet flag
1994 uuid_t flow_uuid;
1995 kern_packet_get_flow_uuid(tx_ph, &flow_uuid);
1996 u_int8_t *id_8 = (u_int8_t *)(uintptr_t)flow_uuid;
1997 if ((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT) {
1998 os_log_info(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: wake packet flag is set\n",
1999 pcb->ipsec_ifp->if_xname);
2000 data->m_pkthdr.pkt_flags |= PKTF_WAKE_PKT;
2001 }
2002
2003 lck_mtx_lock(&pcb->ipsec_kpipe_decrypt_lock);
2004 struct ip *ip = mtod(data, struct ip *);
2005 u_int ip_version = ip->ip_v;
2006 switch (ip_version) {
2007 case 4: {
2008 af = AF_INET;
2009 ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip);
2010 ip->ip_off = ntohs(ip->ip_off);
2011
2012 if (length < ip->ip_len) {
2013 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
2014 pcb->ipsec_ifp->if_xname, length, ip->ip_len);
2015 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2016 STATS_INC(nifs, NETIF_STATS_DROP);
2017 mbuf_freem(data);
2018 data = NULL;
2019 } else {
2020 data = esp4_input_extended(data, sizeof(struct ip), pcb->ipsec_ifp);
2021 }
2022 break;
2023 }
2024 case 6: {
2025 if (length < sizeof(struct ip6_hdr)) {
2026 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
2027 pcb->ipsec_ifp->if_xname, length);
2028 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2029 STATS_INC(nifs, NETIF_STATS_DROP);
2030 mbuf_freem(data);
2031 data = NULL;
2032 } else {
2033 af = AF_INET6;
2034 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
2035 const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen);
2036 if (length < ip6_len) {
2037 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
2038 pcb->ipsec_ifp->if_xname, length, ip6_len);
2039 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2040 STATS_INC(nifs, NETIF_STATS_DROP);
2041 mbuf_freem(data);
2042 data = NULL;
2043 } else {
2044 int offset = sizeof(struct ip6_hdr);
2045 esp6_input_extended(&data, &offset, ip6->ip6_nxt, pcb->ipsec_ifp);
2046 }
2047 }
2048 break;
2049 }
2050 default: {
2051 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: unknown ip version %u\n",
2052 pcb->ipsec_ifp->if_xname, ip_version);
2053 STATS_INC(nifs, NETIF_STATS_DROP);
2054 mbuf_freem(data);
2055 data = NULL;
2056 break;
2057 }
2058 }
2059 lck_mtx_unlock(&pcb->ipsec_kpipe_decrypt_lock);
2060 } else {
2061 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
2062 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
2063 STATS_INC(nifs, NETIF_STATS_DROP);
2064 mbuf_freem(data);
2065 data = NULL;
2066 }
2067 } else {
2068 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
2069 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
2070 STATS_INC(nifs, NETIF_STATS_DROP);
2071 }
2072 } else {
2073 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb->ipsec_ifp->if_xname, length);
2074 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2075 STATS_INC(nifs, NETIF_STATS_DROP);
2076 }
2077
2078 if (data == NULL) {
2079 // Failed to get decrypted data data
2080 kern_pbufpool_free(rx_pp, rx_ph);
2081 continue;
2082 }
2083
2084 length = mbuf_pkthdr_len(data);
2085 if (length > PP_BUF_SIZE_DEF(rx_pp)) {
2086 // Flush data
2087 mbuf_freem(data);
2088 kern_pbufpool_free(rx_pp, rx_ph);
2089 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2090 STATS_INC(nifs, NETIF_STATS_DROP);
2091 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
2092 pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
2093 continue;
2094 }
2095
2096 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
2097
2098 // Fillout rx packet
2099 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
2100 VERIFY(rx_buf != NULL);
2101 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
2102 VERIFY(rx_baddr != NULL);
2103
2104 // Copy-in data from mbuf to buflet
2105 mbuf_copydata(data, 0, length, (void *)rx_baddr);
2106 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
2107
2108 // Finalize and attach the packet
2109 error = kern_buflet_set_data_offset(rx_buf, 0);
2110 VERIFY(error == 0);
2111 error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
2112 VERIFY(error == 0);
2113 error = kern_packet_set_link_header_offset(rx_ph, 0);
2114 VERIFY(error == 0);
2115 error = kern_packet_set_network_header_offset(rx_ph, 0);
2116 VERIFY(error == 0);
2117 error = kern_packet_finalize(rx_ph);
2118 VERIFY(error == 0);
2119 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
2120 VERIFY(error == 0);
2121
2122 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
2123 STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
2124 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
2125
2126 rx_ring_stats.kcrsi_slots_transferred++;
2127 rx_ring_stats.kcrsi_bytes_transferred += length;
2128
2129 if (!pcb->ipsec_ext_ifdata_stats) {
2130 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
2131 }
2132
2133 mbuf_freem(data);
2134
2135 rx_pslot = rx_slot;
2136 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
2137 }
2138
2139 done:
2140 if (tx_pslot) {
2141 kern_channel_advance_slot(tx_ring, tx_pslot);
2142 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
2143 (void)kern_channel_reclaim(tx_ring);
2144 }
2145
2146 // Unlock first, then exit ring
2147 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2148 if (tx_ring != NULL) {
2149 if (tx_pslot != NULL) {
2150 kern_channel_notify(tx_ring, 0);
2151 }
2152 kr_exit(tx_ring);
2153 }
2154
2155 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2156 }
2157
2158 if (rx_pslot) {
2159 kern_channel_advance_slot(rx_ring, rx_pslot);
2160 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
2161 }
2162
2163
2164 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2165
2166 ipsec_data_move_end(pcb);
2167 return 0;
2168 }
2169
2170 static errno_t
ipsec_transform_kpipe_pkt_to_netif_pkt(struct ipsec_pcb * pcb,struct kern_channel_ring_stat_increment * tx_ring_stats,struct netif_stats * nifs,kern_packet_t kpipe_ph,kern_packet_t netif_ph)2171 ipsec_transform_kpipe_pkt_to_netif_pkt(struct ipsec_pcb *pcb,
2172 struct kern_channel_ring_stat_increment *tx_ring_stats,
2173 struct netif_stats *nifs, kern_packet_t kpipe_ph, kern_packet_t netif_ph)
2174 {
2175 kern_buflet_t kpipe_buf = NULL, netif_buf = NULL;
2176 uint8_t *kpipe_baddr = NULL, *netif_baddr = NULL;
2177 uuid_t flow_uuid;
2178 size_t iphlen = 0;
2179 uint32_t kpipe_buf_len = 0, netif_buf_lim = 0;
2180 int err = 0;
2181
2182 VERIFY(kpipe_ph != 0);
2183 VERIFY(netif_ph != 0);
2184 VERIFY(pcb != NULL);
2185 VERIFY(tx_ring_stats != NULL);
2186 VERIFY(nifs != NULL);
2187
2188 kpipe_buf = kern_packet_get_next_buflet(kpipe_ph, NULL);
2189 VERIFY(kpipe_buf != NULL);
2190 kpipe_baddr = kern_buflet_get_data_address(kpipe_buf);
2191 VERIFY(kpipe_baddr != NULL);
2192 kpipe_baddr += kern_buflet_get_data_offset(kpipe_buf);
2193 kpipe_buf_len = kern_buflet_get_data_length(kpipe_buf);
2194
2195 netif_buf = kern_packet_get_next_buflet(netif_ph, NULL);
2196 VERIFY(netif_buf != NULL);
2197 netif_baddr = kern_buflet_get_data_address(netif_buf);
2198 VERIFY(netif_baddr != NULL);
2199 netif_baddr += kern_buflet_get_data_offset(netif_buf);
2200 netif_buf_lim = __buflet_get_data_limit(netif_buf);
2201 netif_buf_lim -= __buflet_get_data_offset(netif_buf);
2202
2203 if (kpipe_buf_len > pcb->ipsec_slot_size) {
2204 os_log_info(OS_LOG_DEFAULT,
2205 "ipsec_transform_kpipe_pkt_to_netif_pkt %s: kpipe buffer length "
2206 "%u > pcb ipsec slot size %u", pcb->ipsec_ifp->if_xname,
2207 kpipe_buf_len, pcb->ipsec_slot_size);
2208 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2209 err = EMSGSIZE;
2210 goto bad;
2211 }
2212
2213 tx_ring_stats->kcrsi_slots_transferred++;
2214 tx_ring_stats->kcrsi_bytes_transferred += kpipe_buf_len;
2215
2216 if (__improbable(kpipe_buf_len < sizeof(struct ip))) {
2217 os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
2218 "packet length %u\n", pcb->ipsec_ifp->if_xname, kpipe_buf_len);
2219 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2220 err = EBADMSG;
2221 goto bad;
2222 }
2223
2224 struct ip *ip = (struct ip *)(void *)kpipe_baddr;
2225 ASSERT(IP_HDR_ALIGNED_P(ip));
2226
2227 u_int ip_vers = ip->ip_v;
2228 switch (ip_vers) {
2229 case IPVERSION: {
2230 #ifdef _IP_VHL
2231 iphlen = IP_VHL_HL(ip->ip_vhl) << 2;
2232 #else /* _IP_VHL */
2233 iphlen = ip->ip_hl << 2;
2234 #endif /* _IP_VHL */
2235 break;
2236 }
2237 case 6: {
2238 iphlen = sizeof(struct ip6_hdr);
2239 break;
2240 }
2241 default: {
2242 os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
2243 "ip version %u\n", pcb->ipsec_ifp->if_xname, ip_vers);
2244 err = EBADMSG;
2245 goto bad;
2246 }
2247 }
2248
2249 if (__improbable(kpipe_buf_len < iphlen)) {
2250 os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
2251 "packet length %u\n", pcb->ipsec_ifp->if_xname, kpipe_buf_len);
2252 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2253 err = EBADMSG;
2254 goto bad;
2255 }
2256
2257 if (__improbable(netif_buf_lim < iphlen)) {
2258 os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - netif "
2259 "buffer length %u too short\n", pcb->ipsec_ifp->if_xname, netif_buf_lim);
2260 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2261 err = EBADMSG;
2262 goto bad;
2263 }
2264
2265 memcpy(netif_baddr, kpipe_baddr, iphlen);
2266 __buflet_set_data_length(netif_buf, (uint16_t)iphlen);
2267
2268 lck_mtx_lock(&pcb->ipsec_kpipe_decrypt_lock);
2269 err = esp_kpipe_input(pcb->ipsec_ifp, kpipe_ph, netif_ph);
2270 lck_mtx_unlock(&pcb->ipsec_kpipe_decrypt_lock);
2271
2272 if (__improbable((err != 0))) {
2273 goto bad;
2274 }
2275
2276 kern_packet_get_flow_uuid(kpipe_ph, &flow_uuid);
2277 uint8_t *id_8 = (uint8_t *)(uintptr_t)flow_uuid;
2278 if (__improbable((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT)) {
2279 os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s: wake packet "
2280 "flag is set\n", pcb->ipsec_ifp->if_xname);
2281 __packet_set_wake_flag(netif_ph);
2282 }
2283
2284 kern_packet_clear_flow_uuid(netif_ph);
2285 err = kern_buflet_set_data_offset(netif_buf, 0);
2286 VERIFY(err == 0);
2287 err = kern_packet_set_link_header_offset(netif_ph, 0);
2288 VERIFY(err == 0);
2289 err = kern_packet_set_network_header_offset(netif_ph, 0);
2290 VERIFY(err == 0);
2291 err = kern_packet_finalize(netif_ph);
2292 VERIFY(err == 0);
2293
2294 return 0;
2295 bad:
2296 STATS_INC(nifs, NETIF_STATS_DROP);
2297 return err;
2298 }
2299
2300
2301 static errno_t
ipsec_netif_sync_rx_packet(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)2302 ipsec_netif_sync_rx_packet(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2303 kern_channel_ring_t rx_ring, uint32_t flags)
2304 {
2305 #pragma unused(nxprov)
2306 #pragma unused(flags)
2307 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
2308 struct kern_channel_ring_stat_increment rx_ring_stats;
2309
2310 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2311
2312 if (!ipsec_data_move_begin(pcb)) {
2313 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
2314 return 0;
2315 }
2316
2317 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2318
2319 // Reclaim user-released slots
2320 (void) kern_channel_reclaim(rx_ring);
2321
2322 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
2323
2324 uint32_t avail = kern_channel_available_slot_count(rx_ring);
2325 if (avail == 0) {
2326 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2327 ipsec_data_move_end(pcb);
2328 return 0;
2329 }
2330
2331 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
2332 VERIFY(rx_pp != NULL);
2333 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
2334 kern_channel_slot_t rx_pslot = NULL;
2335 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
2336
2337 while (rx_slot != NULL) {
2338 // Check for a waiting packet
2339 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
2340 mbuf_t data = pcb->ipsec_input_chain;
2341 if (data == NULL) {
2342 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
2343 break;
2344 }
2345
2346 // Allocate rx packet
2347 kern_packet_t rx_ph = 0;
2348 errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
2349 if (__improbable(error != 0)) {
2350 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
2351 STATS_INC(nifs, NETIF_STATS_DROP);
2352 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
2353 break;
2354 }
2355
2356 // Advance waiting packets
2357 if (pcb->ipsec_input_chain_count > 0) {
2358 pcb->ipsec_input_chain_count--;
2359 }
2360 pcb->ipsec_input_chain = data->m_nextpkt;
2361 data->m_nextpkt = NULL;
2362 if (pcb->ipsec_input_chain == NULL) {
2363 pcb->ipsec_input_chain_last = NULL;
2364 }
2365 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
2366
2367 size_t length = mbuf_pkthdr_len(data);
2368
2369 if (length < sizeof(struct ip)) {
2370 // Flush data
2371 mbuf_freem(data);
2372 kern_pbufpool_free(rx_pp, rx_ph);
2373 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2374 STATS_INC(nifs, NETIF_STATS_DROP);
2375 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
2376 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
2377 continue;
2378 }
2379
2380 uint32_t af = 0;
2381 struct ip *ip = mtod(data, struct ip *);
2382 u_int ip_version = ip->ip_v;
2383 switch (ip_version) {
2384 case 4: {
2385 af = AF_INET;
2386 break;
2387 }
2388 case 6: {
2389 af = AF_INET6;
2390 break;
2391 }
2392 default: {
2393 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
2394 pcb->ipsec_ifp->if_xname, ip_version);
2395 break;
2396 }
2397 }
2398
2399 if (length > PP_BUF_SIZE_DEF(rx_pp) ||
2400 (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
2401 // We need to fragment to send up into the netif
2402
2403 u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp);
2404 if (pcb->ipsec_frag_size_set &&
2405 pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) {
2406 fragment_mtu = pcb->ipsec_input_frag_size;
2407 }
2408
2409 mbuf_t fragment_chain = NULL;
2410 switch (af) {
2411 case AF_INET: {
2412 // ip_fragment expects the length in host order
2413 ip->ip_len = ntohs(ip->ip_len);
2414
2415 // ip_fragment will modify the original data, don't free
2416 int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
2417 if (fragment_error == 0 && data != NULL) {
2418 fragment_chain = data;
2419 } else {
2420 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2421 STATS_INC(nifs, NETIF_STATS_DROP);
2422 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
2423 pcb->ipsec_ifp->if_xname, length, fragment_error);
2424 }
2425 break;
2426 }
2427 case AF_INET6: {
2428 if (length < sizeof(struct ip6_hdr)) {
2429 mbuf_freem(data);
2430 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2431 STATS_INC(nifs, NETIF_STATS_DROP);
2432 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
2433 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
2434 } else {
2435 // ip6_do_fragmentation will free the original data on success only
2436 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
2437
2438 int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
2439 ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid()));
2440 if (fragment_error == 0 && data != NULL) {
2441 fragment_chain = data;
2442 } else {
2443 mbuf_freem(data);
2444 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2445 STATS_INC(nifs, NETIF_STATS_DROP);
2446 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
2447 pcb->ipsec_ifp->if_xname, length, fragment_error);
2448 }
2449 }
2450 break;
2451 }
2452 default: {
2453 // Cannot fragment unknown families
2454 mbuf_freem(data);
2455 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2456 STATS_INC(nifs, NETIF_STATS_DROP);
2457 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
2458 pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
2459 break;
2460 }
2461 }
2462
2463 if (fragment_chain != NULL) {
2464 // Add fragments to chain before continuing
2465 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
2466 if (pcb->ipsec_input_chain != NULL) {
2467 pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
2468 } else {
2469 pcb->ipsec_input_chain = fragment_chain;
2470 }
2471 pcb->ipsec_input_chain_count++;
2472 while (fragment_chain->m_nextpkt) {
2473 VERIFY(fragment_chain != fragment_chain->m_nextpkt);
2474 fragment_chain = fragment_chain->m_nextpkt;
2475 pcb->ipsec_input_chain_count++;
2476 }
2477 pcb->ipsec_input_chain_last = fragment_chain;
2478 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
2479 }
2480
2481 // Make sure to free unused rx packet
2482 kern_pbufpool_free(rx_pp, rx_ph);
2483
2484 continue;
2485 }
2486
2487 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
2488
2489 // Fillout rx packet
2490 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
2491 VERIFY(rx_buf != NULL);
2492 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
2493 VERIFY(rx_baddr != NULL);
2494
2495 // Copy-in data from mbuf to buflet
2496 mbuf_copydata(data, 0, length, (void *)rx_baddr);
2497 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
2498
2499 // Finalize and attach the packet
2500 error = kern_buflet_set_data_offset(rx_buf, 0);
2501 VERIFY(error == 0);
2502 error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
2503 VERIFY(error == 0);
2504 error = kern_packet_set_headroom(rx_ph, 0);
2505 VERIFY(error == 0);
2506 error = kern_packet_finalize(rx_ph);
2507 VERIFY(error == 0);
2508 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
2509 VERIFY(error == 0);
2510
2511 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
2512 STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
2513 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
2514
2515 rx_ring_stats.kcrsi_slots_transferred++;
2516 rx_ring_stats.kcrsi_bytes_transferred += length;
2517
2518 if (!pcb->ipsec_ext_ifdata_stats) {
2519 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, (uint16_t)length, 0);
2520 }
2521
2522 mbuf_freem(data);
2523
2524 // Advance ring
2525 rx_pslot = rx_slot;
2526 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
2527 }
2528
2529 for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
2530 struct kern_channel_ring_stat_increment tx_ring_stats = {};
2531 kern_channel_slot_t tx_pslot = NULL;
2532 kern_channel_slot_t tx_slot = NULL;
2533
2534 kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
2535 if (tx_ring == NULL) {
2536 // Net-If TX ring not set up yet, nothing to read
2537 goto done;
2538 }
2539
2540 // Unlock ipsec before entering ring
2541 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2542
2543 (void)kr_enter(tx_ring, TRUE);
2544
2545 // Lock again after entering and validate
2546 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2547
2548 if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
2549 goto done;
2550 }
2551
2552 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
2553 if (tx_slot == NULL) {
2554 // Nothing to read, don't bother signalling
2555 goto done;
2556 }
2557
2558 while (rx_slot != NULL && tx_slot != NULL) {
2559 errno_t error = 0;
2560
2561 // Allocate rx packet
2562 kern_packet_t rx_ph = 0;
2563 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
2564 if (__improbable(error != 0)) {
2565 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
2566 STATS_INC(nifs, NETIF_STATS_DROP);
2567 break;
2568 }
2569
2570 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
2571 tx_pslot = tx_slot;
2572 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
2573 if (tx_ph == 0) {
2574 kern_pbufpool_free(rx_pp, rx_ph);
2575 continue;
2576 }
2577
2578 error = ipsec_transform_kpipe_pkt_to_netif_pkt(pcb,
2579 &tx_ring_stats, nifs, tx_ph, rx_ph);
2580 if (error != 0) {
2581 // Failed to get decrypted packet
2582 kern_pbufpool_free(rx_pp, rx_ph);
2583 continue;
2584 }
2585
2586 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
2587 VERIFY(error == 0);
2588
2589 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
2590 STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
2591
2592 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
2593
2594 rx_ring_stats.kcrsi_slots_transferred++;
2595 rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph);
2596
2597 if (!pcb->ipsec_ext_ifdata_stats) {
2598 ifnet_stat_increment_in(pcb->ipsec_ifp, 1,
2599 kern_packet_get_data_length(rx_ph), 0);
2600 }
2601
2602 rx_pslot = rx_slot;
2603 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
2604 }
2605
2606 done:
2607 if (tx_pslot) {
2608 kern_channel_advance_slot(tx_ring, tx_pslot);
2609 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
2610 (void)kern_channel_reclaim(tx_ring);
2611 }
2612
2613 // Unlock first, then exit ring
2614 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2615 if (tx_ring != NULL) {
2616 if (tx_pslot != NULL) {
2617 kern_channel_notify(tx_ring, 0);
2618 }
2619 kr_exit(tx_ring);
2620 }
2621
2622 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2623 }
2624
2625 if (rx_pslot) {
2626 kern_channel_advance_slot(rx_ring, rx_pslot);
2627 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
2628 }
2629
2630
2631 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2632
2633 ipsec_data_move_end(pcb);
2634 return 0;
2635 }
2636
2637 static errno_t
ipsec_netif_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)2638 ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2639 kern_channel_ring_t rx_ring, uint32_t flags)
2640 {
2641 if (__improbable(ipsec_kpipe_mbuf == 1)) {
2642 return ipsec_netif_sync_rx_mbuf(nxprov, nexus, rx_ring, flags);
2643 } else {
2644 return ipsec_netif_sync_rx_packet(nxprov, nexus, rx_ring, flags);
2645 }
2646 }
2647
2648 static errno_t
ipsec_nexus_ifattach(struct ipsec_pcb * pcb,struct ifnet_init_eparams * init_params,struct ifnet ** ifp)2649 ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
2650 struct ifnet_init_eparams *init_params,
2651 struct ifnet **ifp)
2652 {
2653 errno_t err;
2654 nexus_controller_t controller = kern_nexus_shared_controller();
2655 struct kern_nexus_net_init net_init;
2656 struct kern_pbufpool_init pp_init;
2657
2658 nexus_name_t provider_name;
2659 snprintf((char *)provider_name, sizeof(provider_name),
2660 "com.apple.netif.%s", pcb->ipsec_if_xname);
2661
2662 struct kern_nexus_provider_init prov_init = {
2663 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2664 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2665 .nxpi_pre_connect = ipsec_nexus_pre_connect,
2666 .nxpi_connected = ipsec_nexus_connected,
2667 .nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
2668 .nxpi_disconnected = ipsec_nexus_disconnected,
2669 .nxpi_ring_init = ipsec_netif_ring_init,
2670 .nxpi_ring_fini = ipsec_netif_ring_fini,
2671 .nxpi_slot_init = NULL,
2672 .nxpi_slot_fini = NULL,
2673 .nxpi_sync_tx = ipsec_netif_sync_tx,
2674 .nxpi_sync_rx = ipsec_netif_sync_rx,
2675 .nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
2676 };
2677
2678 nexus_attr_t nxa = NULL;
2679 err = kern_nexus_attr_create(&nxa);
2680 IPSEC_IF_VERIFY(err == 0);
2681 if (err != 0) {
2682 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2683 __func__, err);
2684 goto failed;
2685 }
2686
2687 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
2688 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2689 VERIFY(err == 0);
2690
2691 // Reset ring size for netif nexus to limit memory usage
2692 uint64_t ring_size = pcb->ipsec_netif_ring_size;
2693 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
2694 VERIFY(err == 0);
2695 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
2696 VERIFY(err == 0);
2697
2698 assert(err == 0);
2699
2700 if (ipsec_in_wmm_mode(pcb)) {
2701 os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n",
2702 __func__, pcb->ipsec_if_xname);
2703
2704 init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED;
2705
2706 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_RINGS,
2707 IPSEC_NETIF_WMM_TX_RING_COUNT);
2708 VERIFY(err == 0);
2709 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_RINGS,
2710 IPSEC_NETIF_WMM_RX_RING_COUNT);
2711 VERIFY(err == 0);
2712
2713 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_QMAP, NEXUS_QMAP_TYPE_WMM);
2714 VERIFY(err == 0);
2715 }
2716
2717 pcb->ipsec_netif_txring_size = ring_size;
2718
2719 bzero(&pp_init, sizeof(pp_init));
2720 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
2721 pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
2722 // Note: we need more packets than can be held in the tx and rx rings because
2723 // packets can also be in the AQM queue(s)
2724 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * (2 * pcb->ipsec_kpipe_count + 1);
2725 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
2726 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
2727 pp_init.kbi_max_frags = 1;
2728 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
2729 "%s", provider_name);
2730 pp_init.kbi_ctx = NULL;
2731 pp_init.kbi_ctx_retain = NULL;
2732 pp_init.kbi_ctx_release = NULL;
2733
2734 err = kern_pbufpool_create(&pp_init, &pcb->ipsec_netif_pp, NULL);
2735 if (err != 0) {
2736 os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
2737 goto failed;
2738 }
2739
2740 err = kern_nexus_controller_register_provider(controller,
2741 ipsec_nx_dom_prov,
2742 provider_name,
2743 &prov_init,
2744 sizeof(prov_init),
2745 nxa,
2746 &pcb->ipsec_nx.if_provider);
2747 IPSEC_IF_VERIFY(err == 0);
2748 if (err != 0) {
2749 os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
2750 __func__, err);
2751 goto failed;
2752 }
2753
2754 bzero(&net_init, sizeof(net_init));
2755 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2756 net_init.nxneti_flags = 0;
2757 net_init.nxneti_eparams = init_params;
2758 net_init.nxneti_lladdr = NULL;
2759 net_init.nxneti_prepare = ipsec_netif_prepare;
2760 net_init.nxneti_rx_pbufpool = pcb->ipsec_netif_pp;
2761 net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp;
2762 err = kern_nexus_controller_alloc_net_provider_instance(controller,
2763 pcb->ipsec_nx.if_provider,
2764 pcb,
2765 NULL,
2766 &pcb->ipsec_nx.if_instance,
2767 &net_init,
2768 ifp);
2769 IPSEC_IF_VERIFY(err == 0);
2770 if (err != 0) {
2771 os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
2772 __func__, err);
2773 kern_nexus_controller_deregister_provider(controller,
2774 pcb->ipsec_nx.if_provider);
2775 uuid_clear(pcb->ipsec_nx.if_provider);
2776 goto failed;
2777 }
2778
2779 failed:
2780 if (nxa) {
2781 kern_nexus_attr_destroy(nxa);
2782 }
2783 if (err && pcb->ipsec_netif_pp != NULL) {
2784 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2785 pcb->ipsec_netif_pp = NULL;
2786 }
2787 return err;
2788 }
2789
2790 static void
ipsec_detach_provider_and_instance(uuid_t provider,uuid_t instance)2791 ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
2792 {
2793 nexus_controller_t controller = kern_nexus_shared_controller();
2794 errno_t err;
2795
2796 if (!uuid_is_null(instance)) {
2797 err = kern_nexus_controller_free_provider_instance(controller,
2798 instance);
2799 if (err != 0) {
2800 os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
2801 __func__, err);
2802 }
2803 uuid_clear(instance);
2804 }
2805 if (!uuid_is_null(provider)) {
2806 err = kern_nexus_controller_deregister_provider(controller,
2807 provider);
2808 if (err != 0) {
2809 os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
2810 }
2811 uuid_clear(provider);
2812 }
2813 return;
2814 }
2815
2816 static void
ipsec_nexus_detach(struct ipsec_pcb * pcb)2817 ipsec_nexus_detach(struct ipsec_pcb *pcb)
2818 {
2819 ipsec_nx_t nx = &pcb->ipsec_nx;
2820 nexus_controller_t controller = kern_nexus_shared_controller();
2821 errno_t err;
2822
2823 if (!uuid_is_null(nx->fsw_device)) {
2824 err = kern_nexus_ifdetach(controller,
2825 nx->fsw_instance,
2826 nx->fsw_device);
2827 if (err != 0) {
2828 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
2829 __func__, err);
2830 }
2831 }
2832
2833 ipsec_detach_provider_and_instance(nx->fsw_provider,
2834 nx->fsw_instance);
2835 ipsec_detach_provider_and_instance(nx->if_provider,
2836 nx->if_instance);
2837
2838 if (pcb->ipsec_netif_pp != NULL) {
2839 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2840 pcb->ipsec_netif_pp = NULL;
2841 }
2842 memset(nx, 0, sizeof(*nx));
2843 }
2844
2845 static errno_t
ipsec_create_fs_provider_and_instance(struct ipsec_pcb * pcb,const char * type_name,const char * ifname,uuid_t * provider,uuid_t * instance)2846 ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
2847 const char *type_name,
2848 const char *ifname,
2849 uuid_t *provider, uuid_t *instance)
2850 {
2851 nexus_attr_t attr = NULL;
2852 nexus_controller_t controller = kern_nexus_shared_controller();
2853 uuid_t dom_prov;
2854 errno_t err;
2855 struct kern_nexus_init init;
2856 nexus_name_t provider_name;
2857
2858 err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
2859 &dom_prov);
2860 IPSEC_IF_VERIFY(err == 0);
2861 if (err != 0) {
2862 os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
2863 __func__, type_name, err);
2864 goto failed;
2865 }
2866
2867 err = kern_nexus_attr_create(&attr);
2868 IPSEC_IF_VERIFY(err == 0);
2869 if (err != 0) {
2870 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2871 __func__, err);
2872 goto failed;
2873 }
2874
2875 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
2876 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2877 VERIFY(err == 0);
2878
2879 // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
2880 uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
2881 err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
2882 VERIFY(err == 0);
2883 uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
2884 err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
2885 VERIFY(err == 0);
2886 /*
2887 * Configure flowswitch to use super-packet (multi-buflet).
2888 * This allows flowswitch to perform intra-stack packet aggregation.
2889 */
2890 err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
2891 NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
2892 VERIFY(err == 0);
2893
2894 snprintf((char *)provider_name, sizeof(provider_name),
2895 "com.apple.%s.%s", type_name, ifname);
2896 err = kern_nexus_controller_register_provider(controller,
2897 dom_prov,
2898 provider_name,
2899 NULL,
2900 0,
2901 attr,
2902 provider);
2903 kern_nexus_attr_destroy(attr);
2904 attr = NULL;
2905 IPSEC_IF_VERIFY(err == 0);
2906 if (err != 0) {
2907 os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
2908 __func__, type_name, err);
2909 goto failed;
2910 }
2911 bzero(&init, sizeof(init));
2912 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2913 err = kern_nexus_controller_alloc_provider_instance(controller,
2914 *provider,
2915 NULL, NULL,
2916 instance, &init);
2917 IPSEC_IF_VERIFY(err == 0);
2918 if (err != 0) {
2919 os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
2920 __func__, type_name, err);
2921 kern_nexus_controller_deregister_provider(controller,
2922 *provider);
2923 uuid_clear(*provider);
2924 }
2925 failed:
2926 return err;
2927 }
2928
2929 static errno_t
ipsec_flowswitch_attach(struct ipsec_pcb * pcb)2930 ipsec_flowswitch_attach(struct ipsec_pcb *pcb)
2931 {
2932 nexus_controller_t controller = kern_nexus_shared_controller();
2933 errno_t err = 0;
2934 ipsec_nx_t nx = &pcb->ipsec_nx;
2935
2936 // Allocate flowswitch
2937 err = ipsec_create_fs_provider_and_instance(pcb,
2938 "flowswitch",
2939 pcb->ipsec_ifp->if_xname,
2940 &nx->fsw_provider,
2941 &nx->fsw_instance);
2942 if (err != 0) {
2943 os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
2944 __func__);
2945 goto failed;
2946 }
2947
2948 // Attach flowswitch to device port
2949 err = kern_nexus_ifattach(controller, nx->fsw_instance,
2950 NULL, nx->if_instance,
2951 FALSE, &nx->fsw_device);
2952 if (err != 0) {
2953 os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
2954 goto failed;
2955 }
2956
2957 // Extract the agent UUID and save for later
2958 struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
2959 if (flowswitch_nx != NULL) {
2960 struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
2961 if (flowswitch != NULL) {
2962 FSW_RLOCK(flowswitch);
2963 uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
2964 FSW_UNLOCK(flowswitch);
2965 } else {
2966 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - flowswitch is NULL\n");
2967 }
2968 nx_release(flowswitch_nx);
2969 } else {
2970 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n");
2971 }
2972
2973 return 0;
2974
2975 failed:
2976 ipsec_nexus_detach(pcb);
2977
2978 errno_t detach_error = 0;
2979 if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
2980 panic("ipsec_flowswitch_attach - ifnet_detach failed: %d", detach_error);
2981 /* NOT REACHED */
2982 }
2983
2984 return err;
2985 }
2986
2987 #pragma mark Kernel Pipe Nexus
2988
2989 static errno_t
ipsec_register_kernel_pipe_nexus(struct ipsec_pcb * pcb)2990 ipsec_register_kernel_pipe_nexus(struct ipsec_pcb *pcb)
2991 {
2992 nexus_attr_t nxa = NULL;
2993 errno_t result;
2994
2995 lck_mtx_lock(&ipsec_lock);
2996 if (ipsec_ncd_refcount++) {
2997 lck_mtx_unlock(&ipsec_lock);
2998 return 0;
2999 }
3000
3001 result = kern_nexus_controller_create(&ipsec_ncd);
3002 if (result) {
3003 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
3004 __FUNCTION__, result);
3005 goto done;
3006 }
3007
3008 uuid_t dom_prov;
3009 result = kern_nexus_get_default_domain_provider(
3010 NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
3011 if (result) {
3012 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
3013 __FUNCTION__, result);
3014 goto done;
3015 }
3016
3017 struct kern_nexus_provider_init prov_init = {
3018 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
3019 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
3020 .nxpi_pre_connect = ipsec_nexus_pre_connect,
3021 .nxpi_connected = ipsec_nexus_connected,
3022 .nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
3023 .nxpi_disconnected = ipsec_nexus_disconnected,
3024 .nxpi_ring_init = ipsec_kpipe_ring_init,
3025 .nxpi_ring_fini = ipsec_kpipe_ring_fini,
3026 .nxpi_slot_init = NULL,
3027 .nxpi_slot_fini = NULL,
3028 .nxpi_sync_tx = ipsec_kpipe_sync_tx,
3029 .nxpi_sync_rx = ipsec_kpipe_sync_rx,
3030 .nxpi_tx_doorbell = NULL,
3031 };
3032
3033 result = kern_nexus_attr_create(&nxa);
3034 if (result) {
3035 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
3036 __FUNCTION__, result);
3037 goto done;
3038 }
3039
3040 uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
3041 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
3042 VERIFY(result == 0);
3043
3044 // Reset ring size for kernel pipe nexus to limit memory usage
3045 // Note: It's better to have less on slots on the kpipe TX ring than the netif
3046 // so back pressure is applied at the AQM layer
3047 uint64_t ring_size =
3048 pcb->ipsec_kpipe_tx_ring_size != 0 ? pcb->ipsec_kpipe_tx_ring_size :
3049 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
3050 if_ipsec_ring_size;
3051 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
3052 VERIFY(result == 0);
3053
3054 ring_size =
3055 pcb->ipsec_kpipe_rx_ring_size != 0 ? pcb->ipsec_kpipe_rx_ring_size :
3056 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
3057 if_ipsec_ring_size;
3058 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
3059 VERIFY(result == 0);
3060
3061 result = kern_nexus_controller_register_provider(ipsec_ncd,
3062 dom_prov,
3063 (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
3064 &prov_init,
3065 sizeof(prov_init),
3066 nxa,
3067 &ipsec_kpipe_uuid);
3068 if (result) {
3069 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
3070 __FUNCTION__, result);
3071 goto done;
3072 }
3073
3074 done:
3075 if (nxa) {
3076 kern_nexus_attr_destroy(nxa);
3077 }
3078
3079 if (result) {
3080 if (ipsec_ncd) {
3081 kern_nexus_controller_destroy(ipsec_ncd);
3082 ipsec_ncd = NULL;
3083 }
3084 ipsec_ncd_refcount = 0;
3085 }
3086
3087 lck_mtx_unlock(&ipsec_lock);
3088
3089 return result;
3090 }
3091
3092 static void
ipsec_unregister_kernel_pipe_nexus(void)3093 ipsec_unregister_kernel_pipe_nexus(void)
3094 {
3095 lck_mtx_lock(&ipsec_lock);
3096
3097 VERIFY(ipsec_ncd_refcount > 0);
3098
3099 if (--ipsec_ncd_refcount == 0) {
3100 kern_nexus_controller_destroy(ipsec_ncd);
3101 ipsec_ncd = NULL;
3102 }
3103
3104 lck_mtx_unlock(&ipsec_lock);
3105 }
3106
3107 /* This structure only holds onto kpipe channels that need to be
3108 * freed in the future, but are cleared from the pcb under lock
3109 */
3110 struct ipsec_detached_channels {
3111 int count;
3112 kern_pbufpool_t pp;
3113 uuid_t uuids[IPSEC_IF_MAX_RING_COUNT];
3114 };
3115
3116 static void
ipsec_detach_channels(struct ipsec_pcb * pcb,struct ipsec_detached_channels * dc)3117 ipsec_detach_channels(struct ipsec_pcb *pcb, struct ipsec_detached_channels *dc)
3118 {
3119 LCK_RW_ASSERT(&pcb->ipsec_pcb_lock, LCK_RW_TYPE_EXCLUSIVE);
3120
3121 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
3122 for (int i = 0; i < IPSEC_IF_MAX_RING_COUNT; i++) {
3123 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
3124 }
3125 dc->count = 0;
3126 return;
3127 }
3128
3129 dc->count = pcb->ipsec_kpipe_count;
3130
3131 VERIFY(dc->count >= 0);
3132 VERIFY(dc->count <= IPSEC_IF_MAX_RING_COUNT);
3133
3134 for (int i = 0; i < dc->count; i++) {
3135 VERIFY(!uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
3136 uuid_copy(dc->uuids[i], pcb->ipsec_kpipe_uuid[i]);
3137 uuid_clear(pcb->ipsec_kpipe_uuid[i]);
3138 }
3139 for (int i = dc->count; i < IPSEC_IF_MAX_RING_COUNT; i++) {
3140 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
3141 }
3142
3143 if (dc->count) {
3144 VERIFY(pcb->ipsec_kpipe_pp);
3145 } else {
3146 VERIFY(!pcb->ipsec_kpipe_pp);
3147 }
3148
3149 dc->pp = pcb->ipsec_kpipe_pp;
3150
3151 pcb->ipsec_kpipe_pp = NULL;
3152
3153 ipsec_flag_clr(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
3154 }
3155
3156 static void
ipsec_free_channels(struct ipsec_detached_channels * dc)3157 ipsec_free_channels(struct ipsec_detached_channels *dc)
3158 {
3159 if (!dc->count) {
3160 return;
3161 }
3162
3163 for (int i = 0; i < dc->count; i++) {
3164 errno_t result;
3165 result = kern_nexus_controller_free_provider_instance(ipsec_ncd, dc->uuids[i]);
3166 VERIFY(!result);
3167 }
3168
3169 VERIFY(dc->pp);
3170 kern_pbufpool_destroy(dc->pp);
3171
3172 ipsec_unregister_kernel_pipe_nexus();
3173
3174 memset(dc, 0, sizeof(*dc));
3175 }
3176
3177 static errno_t
ipsec_enable_channel(struct ipsec_pcb * pcb,struct proc * proc)3178 ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
3179 {
3180 struct kern_nexus_init init;
3181 struct kern_pbufpool_init pp_init;
3182 errno_t result;
3183
3184 kauth_cred_t cred = kauth_cred_get();
3185 result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
3186 if (result) {
3187 return result;
3188 }
3189
3190 VERIFY(pcb->ipsec_kpipe_count);
3191 VERIFY(!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED));
3192
3193 result = ipsec_register_kernel_pipe_nexus(pcb);
3194
3195 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3196
3197 if (result) {
3198 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to register kernel pipe nexus\n",
3199 __func__, pcb->ipsec_if_xname);
3200 goto done;
3201 }
3202
3203 VERIFY(ipsec_ncd);
3204
3205 bzero(&pp_init, sizeof(pp_init));
3206 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
3207 pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
3208 // Note: We only needs are many packets as can be held in the tx and rx rings
3209 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2 * pcb->ipsec_kpipe_count;
3210 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
3211 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
3212 pp_init.kbi_max_frags = 1;
3213 pp_init.kbi_flags |= KBIF_QUANTUM;
3214 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
3215 "com.apple.kpipe.%s", pcb->ipsec_if_xname);
3216 pp_init.kbi_ctx = NULL;
3217 pp_init.kbi_ctx_retain = NULL;
3218 pp_init.kbi_ctx_release = NULL;
3219
3220 result = kern_pbufpool_create(&pp_init, &pcb->ipsec_kpipe_pp,
3221 NULL);
3222 if (result != 0) {
3223 os_log_error(OS_LOG_DEFAULT, "%s: %s pbufbool create failed, error %d\n",
3224 __func__, pcb->ipsec_if_xname, result);
3225 goto done;
3226 }
3227
3228 bzero(&init, sizeof(init));
3229 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
3230 init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp;
3231
3232 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
3233 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
3234 result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
3235 ipsec_kpipe_uuid, pcb, NULL, &pcb->ipsec_kpipe_uuid[i], &init);
3236
3237 if (result == 0) {
3238 nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
3239 const bool has_proc_uuid = !uuid_is_null(pcb->ipsec_kpipe_proc_uuid);
3240 pid_t pid = pcb->ipsec_kpipe_pid;
3241 if (!pid && !has_proc_uuid) {
3242 pid = proc_pid(proc);
3243 }
3244 result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
3245 pcb->ipsec_kpipe_uuid[i], &port,
3246 pid, has_proc_uuid ? pcb->ipsec_kpipe_proc_uuid : NULL, NULL,
3247 0, has_proc_uuid ? NEXUS_BIND_EXEC_UUID:NEXUS_BIND_PID);
3248 }
3249
3250 if (result) {
3251 /* Unwind all of them on error */
3252 for (int j = 0; j < IPSEC_IF_MAX_RING_COUNT; j++) {
3253 if (!uuid_is_null(pcb->ipsec_kpipe_uuid[j])) {
3254 kern_nexus_controller_free_provider_instance(ipsec_ncd,
3255 pcb->ipsec_kpipe_uuid[j]);
3256 uuid_clear(pcb->ipsec_kpipe_uuid[j]);
3257 }
3258 }
3259 goto done;
3260 }
3261 }
3262
3263 done:
3264 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3265
3266 if (result) {
3267 if (pcb->ipsec_kpipe_pp != NULL) {
3268 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
3269 pcb->ipsec_kpipe_pp = NULL;
3270 }
3271 ipsec_unregister_kernel_pipe_nexus();
3272 } else {
3273 ipsec_flag_set(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
3274 }
3275
3276 return result;
3277 }
3278
3279 #endif // IPSEC_NEXUS
3280
3281
3282 /* Kernel control functions */
3283
3284 static inline int
ipsec_find_by_unit(u_int32_t unit)3285 ipsec_find_by_unit(u_int32_t unit)
3286 {
3287 struct ipsec_pcb *next_pcb = NULL;
3288 int found = 0;
3289
3290 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
3291 if (next_pcb->ipsec_unit == unit) {
3292 found = 1;
3293 break;
3294 }
3295 }
3296
3297 return found;
3298 }
3299
3300 static inline void
ipsec_free_pcb(struct ipsec_pcb * pcb,bool locked)3301 ipsec_free_pcb(struct ipsec_pcb *pcb, bool locked)
3302 {
3303 #if IPSEC_NEXUS
3304 mbuf_freem_list(pcb->ipsec_input_chain);
3305 pcb->ipsec_input_chain_count = 0;
3306 lck_mtx_destroy(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp);
3307 lck_mtx_destroy(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp);
3308 lck_mtx_destroy(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp);
3309 #endif // IPSEC_NEXUS
3310 lck_mtx_destroy(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp);
3311 lck_rw_destroy(&pcb->ipsec_pcb_lock, &ipsec_lck_grp);
3312 if (!locked) {
3313 lck_mtx_lock(&ipsec_lock);
3314 }
3315 TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
3316 if (!locked) {
3317 lck_mtx_unlock(&ipsec_lock);
3318 }
3319 zfree(ipsec_pcb_zone, pcb);
3320 }
3321
3322 static errno_t
ipsec_ctl_setup(u_int32_t * unit,void ** unitinfo)3323 ipsec_ctl_setup(u_int32_t *unit, void **unitinfo)
3324 {
3325 if (unit == NULL || unitinfo == NULL) {
3326 return EINVAL;
3327 }
3328
3329 lck_mtx_lock(&ipsec_lock);
3330
3331 /* Find next available unit */
3332 if (*unit == 0) {
3333 *unit = 1;
3334 while (*unit != ctl_maxunit) {
3335 if (ipsec_find_by_unit(*unit)) {
3336 (*unit)++;
3337 } else {
3338 break;
3339 }
3340 }
3341 if (*unit == ctl_maxunit) {
3342 lck_mtx_unlock(&ipsec_lock);
3343 return EBUSY;
3344 }
3345 } else if (ipsec_find_by_unit(*unit)) {
3346 lck_mtx_unlock(&ipsec_lock);
3347 return EBUSY;
3348 }
3349
3350 /* Find some open interface id */
3351 u_int32_t chosen_unique_id = 1;
3352 struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
3353 if (next_pcb != NULL) {
3354 /* List was not empty, add one to the last item */
3355 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
3356 next_pcb = NULL;
3357
3358 /*
3359 * If this wrapped the id number, start looking at
3360 * the front of the list for an unused id.
3361 */
3362 if (chosen_unique_id == 0) {
3363 /* Find the next unused ID */
3364 chosen_unique_id = 1;
3365 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
3366 if (next_pcb->ipsec_unique_id > chosen_unique_id) {
3367 /* We found a gap */
3368 break;
3369 }
3370
3371 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
3372 }
3373 }
3374 }
3375
3376 struct ipsec_pcb *pcb = zalloc_flags(ipsec_pcb_zone, Z_WAITOK | Z_ZERO);
3377
3378 *unitinfo = pcb;
3379 pcb->ipsec_unit = *unit;
3380 pcb->ipsec_unique_id = chosen_unique_id;
3381
3382 if (next_pcb != NULL) {
3383 TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
3384 } else {
3385 TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
3386 }
3387
3388 lck_mtx_unlock(&ipsec_lock);
3389
3390 return 0;
3391 }
3392
3393 static errno_t
ipsec_ctl_bind(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)3394 ipsec_ctl_bind(kern_ctl_ref kctlref,
3395 struct sockaddr_ctl *sac,
3396 void **unitinfo)
3397 {
3398 if (*unitinfo == NULL) {
3399 u_int32_t unit = 0;
3400 (void)ipsec_ctl_setup(&unit, unitinfo);
3401 }
3402
3403 struct ipsec_pcb *pcb = (struct ipsec_pcb *)*unitinfo;
3404 if (pcb == NULL) {
3405 return EINVAL;
3406 }
3407
3408 /* Setup the protocol control block */
3409 pcb->ipsec_ctlref = kctlref;
3410 pcb->ipsec_unit = sac->sc_unit;
3411 pcb->ipsec_output_service_class = MBUF_SC_OAM;
3412
3413 #if IPSEC_NEXUS
3414 pcb->ipsec_use_netif = false;
3415 pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
3416 pcb->ipsec_netif_ring_size = if_ipsec_ring_size;
3417 pcb->ipsec_tx_fsw_ring_size = if_ipsec_tx_fsw_ring_size;
3418 pcb->ipsec_rx_fsw_ring_size = if_ipsec_rx_fsw_ring_size;
3419 #endif // IPSEC_NEXUS
3420
3421 lck_rw_init(&pcb->ipsec_pcb_lock, &ipsec_lck_grp, &ipsec_lck_attr);
3422 lck_mtx_init(&pcb->ipsec_pcb_data_move_lock, &ipsec_lck_grp, &ipsec_lck_attr);
3423 #if IPSEC_NEXUS
3424 pcb->ipsec_input_chain_count = 0;
3425 lck_mtx_init(&pcb->ipsec_input_chain_lock, &ipsec_lck_grp, &ipsec_lck_attr);
3426 lck_mtx_init(&pcb->ipsec_kpipe_encrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
3427 lck_mtx_init(&pcb->ipsec_kpipe_decrypt_lock, &ipsec_lck_grp, &ipsec_lck_attr);
3428 #endif // IPSEC_NEXUS
3429
3430 return 0;
3431 }
3432
3433 static errno_t
ipsec_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)3434 ipsec_ctl_connect(kern_ctl_ref kctlref,
3435 struct sockaddr_ctl *sac,
3436 void **unitinfo)
3437 {
3438 struct ifnet_init_eparams ipsec_init = {};
3439 errno_t result = 0;
3440
3441 if (*unitinfo == NULL) {
3442 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
3443 }
3444
3445 struct ipsec_pcb *pcb = *unitinfo;
3446 if (pcb == NULL) {
3447 return EINVAL;
3448 }
3449
3450 /* Handle case where ipsec_ctl_setup() was called, but ipsec_ctl_bind() was not */
3451 if (pcb->ipsec_ctlref == NULL) {
3452 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
3453 }
3454
3455 snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
3456 snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
3457 os_log(OS_LOG_DEFAULT, "ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
3458
3459 /* Create the interface */
3460 bzero(&ipsec_init, sizeof(ipsec_init));
3461 ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
3462 ipsec_init.len = sizeof(ipsec_init);
3463
3464 #if IPSEC_NEXUS
3465 if (pcb->ipsec_use_netif) {
3466 ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
3467 } else
3468 #endif // IPSEC_NEXUS
3469 {
3470 ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
3471 ipsec_init.start = ipsec_start;
3472 }
3473 ipsec_init.name = "ipsec";
3474 ipsec_init.unit = pcb->ipsec_unit - 1;
3475 ipsec_init.uniqueid = pcb->ipsec_unique_name;
3476 ipsec_init.uniqueid_len = (uint32_t)strlen(pcb->ipsec_unique_name);
3477 ipsec_init.family = IFNET_FAMILY_IPSEC;
3478 ipsec_init.type = IFT_OTHER;
3479 ipsec_init.demux = ipsec_demux;
3480 ipsec_init.add_proto = ipsec_add_proto;
3481 ipsec_init.del_proto = ipsec_del_proto;
3482 ipsec_init.softc = pcb;
3483 ipsec_init.ioctl = ipsec_ioctl;
3484 ipsec_init.free = ipsec_detached;
3485
3486 #if IPSEC_NEXUS
3487 /* We don't support kpipes without a netif */
3488 if (pcb->ipsec_kpipe_count && !pcb->ipsec_use_netif) {
3489 result = ENOTSUP;
3490 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - kpipe requires netif: failed %d\n", result);
3491 ipsec_free_pcb(pcb, false);
3492 *unitinfo = NULL;
3493 return result;
3494 }
3495
3496 if (if_ipsec_debug != 0) {
3497 printf("%s: %s%d use_netif %d kpipe_count %d slot_size %u ring_size %u "
3498 "kpipe_tx_ring_size %u kpipe_rx_ring_size %u\n",
3499 __func__,
3500 ipsec_init.name, ipsec_init.unit,
3501 pcb->ipsec_use_netif,
3502 pcb->ipsec_kpipe_count,
3503 pcb->ipsec_slot_size,
3504 pcb->ipsec_netif_ring_size,
3505 pcb->ipsec_kpipe_tx_ring_size,
3506 pcb->ipsec_kpipe_rx_ring_size);
3507 }
3508 if (pcb->ipsec_use_netif) {
3509 if (pcb->ipsec_kpipe_count) {
3510 result = ipsec_enable_channel(pcb, current_proc());
3511 if (result) {
3512 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to enable channels\n",
3513 __func__, pcb->ipsec_if_xname);
3514 ipsec_free_pcb(pcb, false);
3515 *unitinfo = NULL;
3516 return result;
3517 }
3518 }
3519
3520 result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
3521 if (result != 0) {
3522 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
3523 ipsec_free_pcb(pcb, false);
3524 *unitinfo = NULL;
3525 return result;
3526 }
3527
3528 result = ipsec_flowswitch_attach(pcb);
3529 if (result != 0) {
3530 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_flowswitch_attach failed: %d\n", result);
3531 // Do not call ipsec_free_pcb(). We will be attached already, and will be freed later
3532 // in ipsec_detached().
3533 *unitinfo = NULL;
3534 return result;
3535 }
3536
3537 /* Attach to bpf */
3538 bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
3539 } else
3540 #endif // IPSEC_NEXUS
3541 {
3542 result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
3543 if (result != 0) {
3544 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
3545 ipsec_free_pcb(pcb, false);
3546 *unitinfo = NULL;
3547 return result;
3548 }
3549 ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
3550
3551 /* Attach the interface */
3552 result = ifnet_attach(pcb->ipsec_ifp, NULL);
3553 if (result != 0) {
3554 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
3555 ifnet_release(pcb->ipsec_ifp);
3556 ipsec_free_pcb(pcb, false);
3557 *unitinfo = NULL;
3558 return result;
3559 }
3560
3561 /* Attach to bpf */
3562 bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
3563 }
3564
3565 #if IPSEC_NEXUS
3566 /*
3567 * Mark the data path as ready.
3568 * If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected.
3569 */
3570 if (pcb->ipsec_kpipe_count == 0) {
3571 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
3572 IPSEC_SET_DATA_PATH_READY(pcb);
3573 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
3574 }
3575 #endif
3576
3577 /* The interfaces resoures allocated, mark it as running */
3578 ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
3579
3580 return 0;
3581 }
3582
3583 static errno_t
ipsec_detach_ip(ifnet_t interface,protocol_family_t protocol,socket_t pf_socket)3584 ipsec_detach_ip(ifnet_t interface,
3585 protocol_family_t protocol,
3586 socket_t pf_socket)
3587 {
3588 errno_t result = EPROTONOSUPPORT;
3589
3590 /* Attempt a detach */
3591 if (protocol == PF_INET) {
3592 struct ifreq ifr;
3593
3594 bzero(&ifr, sizeof(ifr));
3595 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
3596 ifnet_name(interface), ifnet_unit(interface));
3597
3598 result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
3599 } else if (protocol == PF_INET6) {
3600 struct in6_ifreq ifr6;
3601
3602 bzero(&ifr6, sizeof(ifr6));
3603 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
3604 ifnet_name(interface), ifnet_unit(interface));
3605
3606 result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
3607 }
3608
3609 return result;
3610 }
3611
3612 static void
ipsec_remove_address(ifnet_t interface,protocol_family_t protocol,ifaddr_t address,socket_t pf_socket)3613 ipsec_remove_address(ifnet_t interface,
3614 protocol_family_t protocol,
3615 ifaddr_t address,
3616 socket_t pf_socket)
3617 {
3618 errno_t result = 0;
3619
3620 /* Attempt a detach */
3621 if (protocol == PF_INET) {
3622 struct ifreq ifr;
3623
3624 bzero(&ifr, sizeof(ifr));
3625 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
3626 ifnet_name(interface), ifnet_unit(interface));
3627 result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
3628 if (result != 0) {
3629 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed: %d", result);
3630 } else {
3631 result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
3632 if (result != 0) {
3633 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR failed: %d", result);
3634 }
3635 }
3636 } else if (protocol == PF_INET6) {
3637 struct in6_ifreq ifr6;
3638
3639 bzero(&ifr6, sizeof(ifr6));
3640 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
3641 ifnet_name(interface), ifnet_unit(interface));
3642 result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
3643 sizeof(ifr6.ifr_addr));
3644 if (result != 0) {
3645 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed (v6): %d",
3646 result);
3647 } else {
3648 result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
3649 if (result != 0) {
3650 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
3651 result);
3652 }
3653 }
3654 }
3655 }
3656
3657 static void
ipsec_cleanup_family(ifnet_t interface,protocol_family_t protocol)3658 ipsec_cleanup_family(ifnet_t interface,
3659 protocol_family_t protocol)
3660 {
3661 errno_t result = 0;
3662 socket_t pf_socket = NULL;
3663 ifaddr_t *addresses = NULL;
3664 int i;
3665
3666 if (protocol != PF_INET && protocol != PF_INET6) {
3667 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - invalid protocol family %d\n", protocol);
3668 return;
3669 }
3670
3671 /* Create a socket for removing addresses and detaching the protocol */
3672 result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
3673 if (result != 0) {
3674 if (result != EAFNOSUPPORT) {
3675 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - failed to create %s socket: %d\n",
3676 protocol == PF_INET ? "IP" : "IPv6", result);
3677 }
3678 goto cleanup;
3679 }
3680
3681 /* always set SS_PRIV, we want to close and detach regardless */
3682 sock_setpriv(pf_socket, 1);
3683
3684 result = ipsec_detach_ip(interface, protocol, pf_socket);
3685 if (result == 0 || result == ENXIO) {
3686 /* We are done! We either detached or weren't attached. */
3687 goto cleanup;
3688 } else if (result != EBUSY) {
3689 /* Uh, not really sure what happened here... */
3690 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
3691 goto cleanup;
3692 }
3693
3694 /*
3695 * At this point, we received an EBUSY error. This means there are
3696 * addresses attached. We should detach them and then try again.
3697 */
3698 result = ifnet_get_address_list_family(interface, &addresses, (sa_family_t)protocol);
3699 if (result != 0) {
3700 os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
3701 ifnet_name(interface), ifnet_unit(interface),
3702 protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
3703 goto cleanup;
3704 }
3705
3706 for (i = 0; addresses[i] != 0; i++) {
3707 ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
3708 }
3709 ifnet_free_address_list(addresses);
3710 addresses = NULL;
3711
3712 /*
3713 * The addresses should be gone, we should try the remove again.
3714 */
3715 result = ipsec_detach_ip(interface, protocol, pf_socket);
3716 if (result != 0 && result != ENXIO) {
3717 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
3718 }
3719
3720 cleanup:
3721 if (pf_socket != NULL) {
3722 sock_close(pf_socket);
3723 }
3724
3725 if (addresses != NULL) {
3726 ifnet_free_address_list(addresses);
3727 }
3728 }
3729
3730 static errno_t
ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo)3731 ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,
3732 __unused u_int32_t unit,
3733 void *unitinfo)
3734 {
3735 struct ipsec_pcb *pcb = unitinfo;
3736 ifnet_t ifp = NULL;
3737 errno_t result = 0;
3738
3739 if (pcb == NULL) {
3740 return EINVAL;
3741 }
3742
3743 /* Wait until all threads in the data paths are done. */
3744 ipsec_wait_data_move_drain(pcb);
3745
3746 #if IPSEC_NEXUS
3747 // Tell the nexus to stop all rings
3748 if (pcb->ipsec_netif_nexus != NULL) {
3749 kern_nexus_stop(pcb->ipsec_netif_nexus);
3750 }
3751 #endif // IPSEC_NEXUS
3752
3753 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3754
3755 #if IPSEC_NEXUS
3756 if (if_ipsec_debug != 0) {
3757 printf("ipsec_ctl_disconnect: detaching interface %s (id %s)\n",
3758 pcb->ipsec_if_xname, pcb->ipsec_unique_name);
3759 }
3760
3761 struct ipsec_detached_channels dc;
3762 ipsec_detach_channels(pcb, &dc);
3763 #endif // IPSEC_NEXUS
3764
3765 pcb->ipsec_ctlref = NULL;
3766
3767 ifp = pcb->ipsec_ifp;
3768 if (ifp != NULL) {
3769 #if IPSEC_NEXUS
3770 if (pcb->ipsec_netif_nexus != NULL) {
3771 /*
3772 * Quiesce the interface and flush any pending outbound packets.
3773 */
3774 if_down(ifp);
3775
3776 /*
3777 * Suspend data movement and wait for IO threads to exit.
3778 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
3779 * do this because ipsec nexuses are attached/detached separately.
3780 */
3781 ifnet_datamov_suspend_and_drain(ifp);
3782 if ((result = ifnet_detach(ifp)) != 0) {
3783 panic("ipsec_ctl_disconnect - ifnet_detach failed: %d", result);
3784 /* NOT REACHED */
3785 }
3786
3787 /*
3788 * We want to do everything in our power to ensure that the interface
3789 * really goes away when the socket is closed. We must remove IP/IPv6
3790 * addresses and detach the protocols. Finally, we can remove and
3791 * release the interface.
3792 */
3793 key_delsp_for_ipsec_if(ifp);
3794
3795 ipsec_cleanup_family(ifp, AF_INET);
3796 ipsec_cleanup_family(ifp, AF_INET6);
3797
3798 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3799
3800 ipsec_free_channels(&dc);
3801
3802 ipsec_nexus_detach(pcb);
3803
3804 /* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
3805 ifnet_datamov_resume(ifp);
3806 } else
3807 #endif // IPSEC_NEXUS
3808 {
3809 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3810
3811 #if IPSEC_NEXUS
3812 ipsec_free_channels(&dc);
3813 #endif // IPSEC_NEXUS
3814
3815 /*
3816 * We want to do everything in our power to ensure that the interface
3817 * really goes away when the socket is closed. We must remove IP/IPv6
3818 * addresses and detach the protocols. Finally, we can remove and
3819 * release the interface.
3820 */
3821 key_delsp_for_ipsec_if(ifp);
3822
3823 ipsec_cleanup_family(ifp, AF_INET);
3824 ipsec_cleanup_family(ifp, AF_INET6);
3825
3826 /*
3827 * Detach now; ipsec_detach() will be called asynchronously once
3828 * the I/O reference count drops to 0. There we will invoke
3829 * ifnet_release().
3830 */
3831 if ((result = ifnet_detach(ifp)) != 0) {
3832 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
3833 }
3834 }
3835 } else {
3836 // Bound, but not connected
3837 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3838 ipsec_free_pcb(pcb, false);
3839 }
3840
3841 return 0;
3842 }
3843
3844 static errno_t
ipsec_ctl_send(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,__unused void * unitinfo,mbuf_t m,__unused int flags)3845 ipsec_ctl_send(__unused kern_ctl_ref kctlref,
3846 __unused u_int32_t unit,
3847 __unused void *unitinfo,
3848 mbuf_t m,
3849 __unused int flags)
3850 {
3851 /* Receive messages from the control socket. Currently unused. */
3852 mbuf_freem(m);
3853 return 0;
3854 }
3855
3856 static errno_t
ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t len)3857 ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,
3858 __unused u_int32_t unit,
3859 void *unitinfo,
3860 int opt,
3861 void *data,
3862 size_t len)
3863 {
3864 errno_t result = 0;
3865 struct ipsec_pcb *pcb = unitinfo;
3866 if (pcb == NULL) {
3867 return EINVAL;
3868 }
3869
3870 /* check for privileges for privileged options */
3871 switch (opt) {
3872 case IPSEC_OPT_FLAGS:
3873 case IPSEC_OPT_EXT_IFDATA_STATS:
3874 case IPSEC_OPT_SET_DELEGATE_INTERFACE:
3875 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
3876 case IPSEC_OPT_OUTPUT_DSCP_MAPPING:
3877 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3878 return EPERM;
3879 }
3880 break;
3881 }
3882
3883 switch (opt) {
3884 case IPSEC_OPT_FLAGS: {
3885 if (len != sizeof(u_int32_t)) {
3886 result = EMSGSIZE;
3887 } else {
3888 pcb->ipsec_external_flags = *(u_int32_t *)data;
3889 }
3890 break;
3891 }
3892
3893 case IPSEC_OPT_EXT_IFDATA_STATS: {
3894 if (len != sizeof(int)) {
3895 result = EMSGSIZE;
3896 break;
3897 }
3898 if (pcb->ipsec_ifp == NULL) {
3899 // Only can set after connecting
3900 result = EINVAL;
3901 break;
3902 }
3903 pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
3904 break;
3905 }
3906
3907 case IPSEC_OPT_INC_IFDATA_STATS_IN:
3908 case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
3909 struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
3910
3911 if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
3912 result = EINVAL;
3913 break;
3914 }
3915 if (pcb->ipsec_ifp == NULL) {
3916 // Only can set after connecting
3917 result = EINVAL;
3918 break;
3919 }
3920 if (!pcb->ipsec_ext_ifdata_stats) {
3921 result = EINVAL;
3922 break;
3923 }
3924 if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) {
3925 ifnet_stat_increment_in(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3926 (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3927 } else {
3928 ifnet_stat_increment_out(pcb->ipsec_ifp, (uint32_t)utsp->utsp_packets,
3929 (uint32_t)utsp->utsp_bytes, (uint32_t)utsp->utsp_errors);
3930 }
3931 break;
3932 }
3933
3934 case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
3935 ifnet_t del_ifp = NULL;
3936 char name[IFNAMSIZ];
3937
3938 if (len > IFNAMSIZ - 1) {
3939 result = EMSGSIZE;
3940 break;
3941 }
3942 if (pcb->ipsec_ifp == NULL) {
3943 // Only can set after connecting
3944 result = EINVAL;
3945 break;
3946 }
3947 if (len != 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */
3948 bcopy(data, name, len);
3949 name[len] = 0;
3950 result = ifnet_find_by_name(name, &del_ifp);
3951 }
3952 if (result == 0) {
3953 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
3954 __func__, pcb->ipsec_ifp->if_xname,
3955 del_ifp ? del_ifp->if_xname : "NULL");
3956
3957 result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
3958 if (del_ifp) {
3959 ifnet_release(del_ifp);
3960 }
3961 }
3962 break;
3963 }
3964
3965 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3966 if (len != sizeof(int)) {
3967 result = EMSGSIZE;
3968 break;
3969 }
3970 if (pcb->ipsec_ifp == NULL) {
3971 // Only can set after connecting
3972 result = EINVAL;
3973 break;
3974 }
3975 mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
3976 if (output_service_class == MBUF_SC_UNSPEC) {
3977 pcb->ipsec_output_service_class = MBUF_SC_OAM;
3978 } else {
3979 pcb->ipsec_output_service_class = output_service_class;
3980 }
3981 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
3982 __func__, pcb->ipsec_ifp->if_xname,
3983 pcb->ipsec_output_service_class);
3984 break;
3985 }
3986
3987 #if IPSEC_NEXUS
3988 case IPSEC_OPT_ENABLE_CHANNEL: {
3989 if (len != sizeof(int)) {
3990 result = EMSGSIZE;
3991 break;
3992 }
3993 if (pcb->ipsec_ifp != NULL) {
3994 // Only can set before connecting
3995 result = EINVAL;
3996 break;
3997 }
3998 if ((*(int *)data) != 0 &&
3999 (*(int *)data) != 1 &&
4000 (*(int *)data) != IPSEC_IF_WMM_RING_COUNT) {
4001 result = EINVAL;
4002 break;
4003 }
4004 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
4005 pcb->ipsec_kpipe_count = *(int *)data;
4006 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
4007 break;
4008 }
4009
4010 case IPSEC_OPT_CHANNEL_BIND_PID: {
4011 if (len != sizeof(pid_t)) {
4012 result = EMSGSIZE;
4013 break;
4014 }
4015 if (pcb->ipsec_ifp != NULL) {
4016 // Only can set before connecting
4017 result = EINVAL;
4018 break;
4019 }
4020 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
4021 pcb->ipsec_kpipe_pid = *(pid_t *)data;
4022 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
4023 break;
4024 }
4025
4026 case IPSEC_OPT_CHANNEL_BIND_UUID: {
4027 if (len != sizeof(uuid_t)) {
4028 result = EMSGSIZE;
4029 break;
4030 }
4031 if (pcb->ipsec_ifp != NULL) {
4032 // Only can set before connecting
4033 result = EINVAL;
4034 break;
4035 }
4036 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
4037 uuid_copy(pcb->ipsec_kpipe_proc_uuid, *((uuid_t *)data));
4038 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
4039 break;
4040 }
4041
4042 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
4043 if (len != sizeof(int)) {
4044 result = EMSGSIZE;
4045 break;
4046 }
4047 if (pcb->ipsec_ifp == NULL) {
4048 // Only can set after connecting
4049 result = EINVAL;
4050 break;
4051 }
4052 if (!if_is_fsw_transport_netagent_enabled()) {
4053 result = ENOTSUP;
4054 break;
4055 }
4056 if (uuid_is_null(pcb->ipsec_nx.fsw_agent)) {
4057 result = ENOENT;
4058 break;
4059 }
4060
4061 uint32_t flags = netagent_get_flags(pcb->ipsec_nx.fsw_agent);
4062
4063 if (*(int *)data) {
4064 flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
4065 NETAGENT_FLAG_NEXUS_LISTENER);
4066 result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
4067 pcb->ipsec_needs_netagent = true;
4068 } else {
4069 pcb->ipsec_needs_netagent = false;
4070 flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
4071 NETAGENT_FLAG_NEXUS_LISTENER);
4072 result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
4073 }
4074 break;
4075 }
4076
4077 case IPSEC_OPT_INPUT_FRAG_SIZE: {
4078 if (len != sizeof(u_int32_t)) {
4079 result = EMSGSIZE;
4080 break;
4081 }
4082 u_int32_t input_frag_size = *(u_int32_t *)data;
4083 if (input_frag_size <= sizeof(struct ip6_hdr)) {
4084 pcb->ipsec_frag_size_set = FALSE;
4085 pcb->ipsec_input_frag_size = 0;
4086 } else {
4087 pcb->ipsec_frag_size_set = TRUE;
4088 pcb->ipsec_input_frag_size = input_frag_size;
4089 }
4090 break;
4091 }
4092 case IPSEC_OPT_ENABLE_NETIF: {
4093 if (len != sizeof(int)) {
4094 result = EMSGSIZE;
4095 break;
4096 }
4097 if (pcb->ipsec_ifp != NULL) {
4098 // Only can set before connecting
4099 result = EINVAL;
4100 break;
4101 }
4102 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
4103 pcb->ipsec_use_netif = !!(*(int *)data);
4104 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
4105 break;
4106 }
4107 case IPSEC_OPT_SLOT_SIZE: {
4108 if (len != sizeof(u_int32_t)) {
4109 result = EMSGSIZE;
4110 break;
4111 }
4112 if (pcb->ipsec_ifp != NULL) {
4113 // Only can set before connecting
4114 result = EINVAL;
4115 break;
4116 }
4117 u_int32_t slot_size = *(u_int32_t *)data;
4118 if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
4119 slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
4120 return EINVAL;
4121 }
4122 pcb->ipsec_slot_size = slot_size;
4123 if (if_ipsec_debug != 0) {
4124 printf("%s: IPSEC_OPT_SLOT_SIZE %u\n", __func__, slot_size);
4125 }
4126 break;
4127 }
4128 case IPSEC_OPT_NETIF_RING_SIZE: {
4129 if (len != sizeof(u_int32_t)) {
4130 result = EMSGSIZE;
4131 break;
4132 }
4133 if (pcb->ipsec_ifp != NULL) {
4134 // Only can set before connecting
4135 result = EINVAL;
4136 break;
4137 }
4138 u_int32_t ring_size = *(u_int32_t *)data;
4139 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
4140 ring_size > IPSEC_IF_MAX_RING_SIZE) {
4141 return EINVAL;
4142 }
4143 pcb->ipsec_netif_ring_size = ring_size;
4144 if (if_ipsec_debug != 0) {
4145 printf("%s: IPSEC_OPT_NETIF_RING_SIZE %u\n", __func__, ring_size);
4146 }
4147 break;
4148 }
4149 case IPSEC_OPT_TX_FSW_RING_SIZE: {
4150 if (len != sizeof(u_int32_t)) {
4151 result = EMSGSIZE;
4152 break;
4153 }
4154 if (pcb->ipsec_ifp != NULL) {
4155 // Only can set before connecting
4156 result = EINVAL;
4157 break;
4158 }
4159 u_int32_t ring_size = *(u_int32_t *)data;
4160 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
4161 ring_size > IPSEC_IF_MAX_RING_SIZE) {
4162 return EINVAL;
4163 }
4164 pcb->ipsec_tx_fsw_ring_size = ring_size;
4165 if (if_ipsec_debug != 0) {
4166 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
4167 }
4168 break;
4169 }
4170 case IPSEC_OPT_RX_FSW_RING_SIZE: {
4171 if (len != sizeof(u_int32_t)) {
4172 result = EMSGSIZE;
4173 break;
4174 }
4175 if (pcb->ipsec_ifp != NULL) {
4176 // Only can set before connecting
4177 result = EINVAL;
4178 break;
4179 }
4180 u_int32_t ring_size = *(u_int32_t *)data;
4181 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
4182 ring_size > IPSEC_IF_MAX_RING_SIZE) {
4183 return EINVAL;
4184 }
4185 pcb->ipsec_rx_fsw_ring_size = ring_size;
4186 if (if_ipsec_debug != 0) {
4187 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
4188 }
4189 break;
4190 }
4191 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
4192 if (len != sizeof(u_int32_t)) {
4193 result = EMSGSIZE;
4194 break;
4195 }
4196 if (pcb->ipsec_ifp != NULL) {
4197 // Only can set before connecting
4198 result = EINVAL;
4199 break;
4200 }
4201 u_int32_t ring_size = *(u_int32_t *)data;
4202 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
4203 ring_size > IPSEC_IF_MAX_RING_SIZE) {
4204 return EINVAL;
4205 }
4206 pcb->ipsec_kpipe_tx_ring_size = ring_size;
4207 if (if_ipsec_debug != 0) {
4208 printf("%s: IPSEC_OPT_KPIPE_TX_RING_SIZE %u\n", __func__, ring_size);
4209 }
4210 break;
4211 }
4212 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
4213 if (len != sizeof(u_int32_t)) {
4214 result = EMSGSIZE;
4215 break;
4216 }
4217 if (pcb->ipsec_ifp != NULL) {
4218 // Only can set before connecting
4219 result = EINVAL;
4220 break;
4221 }
4222 u_int32_t ring_size = *(u_int32_t *)data;
4223 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
4224 ring_size > IPSEC_IF_MAX_RING_SIZE) {
4225 return EINVAL;
4226 }
4227 pcb->ipsec_kpipe_rx_ring_size = ring_size;
4228 if (if_ipsec_debug != 0) {
4229 printf("%s: IPSEC_OPT_KPIPE_RX_RING_SIZE %u\n", __func__, ring_size);
4230 }
4231 break;
4232 }
4233 case IPSEC_OPT_OUTPUT_DSCP_MAPPING: {
4234 if (len != sizeof(int)) {
4235 result = EMSGSIZE;
4236 break;
4237 }
4238 if (pcb->ipsec_ifp == NULL) {
4239 // Only can set after connecting
4240 result = EINVAL;
4241 break;
4242 }
4243
4244 ipsec_dscp_mapping_t output_dscp_mapping = (ipsec_dscp_mapping_t)(*(int *)data);
4245 if (output_dscp_mapping > IPSEC_DSCP_MAPPING_LEGACY) {
4246 return EINVAL;
4247 }
4248
4249 pcb->ipsec_output_dscp_mapping = output_dscp_mapping;
4250
4251 os_log(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_DSCP_MAPPING %s DSCP %d\n",
4252 __func__, pcb->ipsec_ifp->if_xname,
4253 pcb->ipsec_output_dscp_mapping);
4254 break;
4255 }
4256
4257 #endif // IPSEC_NEXUS
4258
4259 default: {
4260 result = ENOPROTOOPT;
4261 break;
4262 }
4263 }
4264
4265 return result;
4266 }
4267
4268 static errno_t
ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t * len)4269 ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
4270 __unused u_int32_t unit,
4271 void *unitinfo,
4272 int opt,
4273 void *data,
4274 size_t *len)
4275 {
4276 errno_t result = 0;
4277 struct ipsec_pcb *pcb = unitinfo;
4278 if (pcb == NULL) {
4279 return EINVAL;
4280 }
4281
4282 switch (opt) {
4283 case IPSEC_OPT_FLAGS: {
4284 if (*len != sizeof(u_int32_t)) {
4285 result = EMSGSIZE;
4286 } else {
4287 *(u_int32_t *)data = pcb->ipsec_external_flags;
4288 }
4289 break;
4290 }
4291
4292 case IPSEC_OPT_EXT_IFDATA_STATS: {
4293 if (*len != sizeof(int)) {
4294 result = EMSGSIZE;
4295 } else {
4296 *(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
4297 }
4298 break;
4299 }
4300
4301 case IPSEC_OPT_IFNAME: {
4302 if (*len < MIN(strlen(pcb->ipsec_if_xname) + 1, sizeof(pcb->ipsec_if_xname))) {
4303 result = EMSGSIZE;
4304 } else {
4305 if (pcb->ipsec_ifp == NULL) {
4306 // Only can get after connecting
4307 result = EINVAL;
4308 break;
4309 }
4310 *len = scnprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
4311 }
4312 break;
4313 }
4314
4315 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
4316 if (*len != sizeof(int)) {
4317 result = EMSGSIZE;
4318 } else {
4319 *(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
4320 }
4321 break;
4322 }
4323
4324 #if IPSEC_NEXUS
4325
4326 case IPSEC_OPT_ENABLE_CHANNEL: {
4327 if (*len != sizeof(int)) {
4328 result = EMSGSIZE;
4329 } else {
4330 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4331 *(int *)data = pcb->ipsec_kpipe_count;
4332 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4333 }
4334 break;
4335 }
4336
4337 case IPSEC_OPT_CHANNEL_BIND_PID: {
4338 if (*len != sizeof(pid_t)) {
4339 result = EMSGSIZE;
4340 } else {
4341 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4342 *(pid_t *)data = pcb->ipsec_kpipe_pid;
4343 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4344 }
4345 break;
4346 }
4347
4348 case IPSEC_OPT_CHANNEL_BIND_UUID: {
4349 if (*len != sizeof(uuid_t)) {
4350 result = EMSGSIZE;
4351 } else {
4352 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4353 uuid_copy(*((uuid_t *)data), pcb->ipsec_kpipe_proc_uuid);
4354 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4355 }
4356 break;
4357 }
4358
4359 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
4360 if (*len != sizeof(int)) {
4361 result = EMSGSIZE;
4362 } else {
4363 *(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.fsw_agent);
4364 }
4365 break;
4366 }
4367
4368 case IPSEC_OPT_ENABLE_NETIF: {
4369 if (*len != sizeof(int)) {
4370 result = EMSGSIZE;
4371 } else {
4372 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4373 *(int *)data = !!pcb->ipsec_use_netif;
4374 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4375 }
4376 break;
4377 }
4378
4379 case IPSEC_OPT_GET_CHANNEL_UUID: {
4380 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4381 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
4382 result = ENXIO;
4383 } else if (*len != sizeof(uuid_t) * pcb->ipsec_kpipe_count) {
4384 result = EMSGSIZE;
4385 } else {
4386 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
4387 uuid_copy(((uuid_t *)data)[i], pcb->ipsec_kpipe_uuid[i]);
4388 }
4389 }
4390 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4391 break;
4392 }
4393
4394 case IPSEC_OPT_INPUT_FRAG_SIZE: {
4395 if (*len != sizeof(u_int32_t)) {
4396 result = EMSGSIZE;
4397 } else {
4398 *(u_int32_t *)data = pcb->ipsec_input_frag_size;
4399 }
4400 break;
4401 }
4402 case IPSEC_OPT_SLOT_SIZE: {
4403 if (*len != sizeof(u_int32_t)) {
4404 result = EMSGSIZE;
4405 } else {
4406 *(u_int32_t *)data = pcb->ipsec_slot_size;
4407 }
4408 break;
4409 }
4410 case IPSEC_OPT_NETIF_RING_SIZE: {
4411 if (*len != sizeof(u_int32_t)) {
4412 result = EMSGSIZE;
4413 } else {
4414 *(u_int32_t *)data = pcb->ipsec_netif_ring_size;
4415 }
4416 break;
4417 }
4418 case IPSEC_OPT_TX_FSW_RING_SIZE: {
4419 if (*len != sizeof(u_int32_t)) {
4420 result = EMSGSIZE;
4421 } else {
4422 *(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
4423 }
4424 break;
4425 }
4426 case IPSEC_OPT_RX_FSW_RING_SIZE: {
4427 if (*len != sizeof(u_int32_t)) {
4428 result = EMSGSIZE;
4429 } else {
4430 *(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
4431 }
4432 break;
4433 }
4434 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
4435 if (*len != sizeof(u_int32_t)) {
4436 result = EMSGSIZE;
4437 } else {
4438 *(u_int32_t *)data = pcb->ipsec_kpipe_tx_ring_size;
4439 }
4440 break;
4441 }
4442 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
4443 if (*len != sizeof(u_int32_t)) {
4444 result = EMSGSIZE;
4445 } else {
4446 *(u_int32_t *)data = pcb->ipsec_kpipe_rx_ring_size;
4447 }
4448 break;
4449 }
4450
4451 #endif // IPSEC_NEXUS
4452
4453 default: {
4454 result = ENOPROTOOPT;
4455 break;
4456 }
4457 }
4458
4459 return result;
4460 }
4461
4462 /* Network Interface functions */
4463 static errno_t
ipsec_output(ifnet_t interface,mbuf_t data)4464 ipsec_output(ifnet_t interface,
4465 mbuf_t data)
4466 {
4467 struct ipsec_pcb *pcb = ifnet_softc(interface);
4468 struct ipsec_output_state ipsec_state;
4469 struct route ro;
4470 struct route_in6 ro6;
4471 size_t length;
4472 struct ip *ip = NULL;
4473 struct ip6_hdr *ip6 = NULL;
4474 struct ip_out_args ipoa;
4475 struct ip6_out_args ip6oa;
4476 int error = 0;
4477 u_int ip_version = 0;
4478 int flags = 0;
4479 struct flowadv *adv = NULL;
4480
4481 // Make sure this packet isn't looping through the interface
4482 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
4483 error = EINVAL;
4484 goto ipsec_output_err;
4485 }
4486
4487 // Mark the interface so NECP can evaluate tunnel policy
4488 necp_mark_packet_from_interface(data, interface);
4489
4490 if (data->m_len < sizeof(*ip)) {
4491 os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IP header length: %d.\n", data->m_len);
4492 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
4493 error = EINVAL;
4494 goto ipsec_output_err;
4495 }
4496
4497 ip = mtod(data, struct ip *);
4498 ip_version = ip->ip_v;
4499
4500 switch (ip_version) {
4501 case 4: {
4502 u_int8_t ip_hlen = 0;
4503 #ifdef _IP_VHL
4504 ip_hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
4505 #else
4506 ip_hlen = (uint8_t)(ip->ip_hl << 2);
4507 #endif
4508 if (ip_hlen < sizeof(*ip)) {
4509 os_log_error(OS_LOG_DEFAULT, "ipsec_output: Bad ip header length %d.\n", ip_hlen);
4510 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
4511 error = EINVAL;
4512 goto ipsec_output_err;
4513 }
4514 #if IPSEC_NEXUS
4515 if (!pcb->ipsec_use_netif)
4516 #endif // IPSEC_NEXUS
4517 {
4518 int af = AF_INET;
4519 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
4520 }
4521
4522 /* Apply encryption */
4523 memset(&ipsec_state, 0, sizeof(ipsec_state));
4524 ipsec_state.m = data;
4525 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
4526 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
4527 ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
4528
4529 error = ipsec4_interface_output(&ipsec_state, interface);
4530 /* Tunneled in IPv6 - packet is gone */
4531 if (error == 0 && ipsec_state.tunneled == 6) {
4532 goto done;
4533 }
4534
4535 data = ipsec_state.m;
4536 if (error || data == NULL) {
4537 if (error) {
4538 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec4_output error %d.\n", error);
4539 }
4540 goto ipsec_output_err;
4541 }
4542
4543 /* Set traffic class, set flow */
4544 m_set_service_class(data, pcb->ipsec_output_service_class);
4545 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4546 #if SKYWALK
4547 data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4548 #else /* !SKYWALK */
4549 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
4550 #endif /* !SKYWALK */
4551 data->m_pkthdr.pkt_proto = ip->ip_p;
4552 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4553
4554 /* Flip endian-ness for ip_output */
4555 ip = mtod(data, struct ip *);
4556 NTOHS(ip->ip_len);
4557 NTOHS(ip->ip_off);
4558
4559 /* Increment statistics */
4560 length = mbuf_pkthdr_len(data);
4561 ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
4562
4563 /* Send to ip_output */
4564 memset(&ro, 0, sizeof(ro));
4565
4566 flags = (IP_OUTARGS | /* Passing out args to specify interface */
4567 IP_NOIPSEC); /* To ensure the packet doesn't go through ipsec twice */
4568
4569 memset(&ipoa, 0, sizeof(ipoa));
4570 ipoa.ipoa_flowadv.code = 0;
4571 ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
4572 if (ipsec_state.outgoing_if) {
4573 ipoa.ipoa_boundif = ipsec_state.outgoing_if;
4574 ipoa.ipoa_flags |= IPOAF_BOUND_IF;
4575 }
4576 ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
4577
4578 adv = &ipoa.ipoa_flowadv;
4579
4580 (void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
4581 data = NULL;
4582
4583 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
4584 error = ENOBUFS;
4585 ifnet_disable_output(interface);
4586 }
4587
4588 goto done;
4589 }
4590 case 6: {
4591 if (data->m_len < sizeof(*ip6)) {
4592 os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IPv6 header length: %d.\n", data->m_len);
4593 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
4594 error = EINVAL;
4595 goto ipsec_output_err;
4596 }
4597 #if IPSEC_NEXUS
4598 if (!pcb->ipsec_use_netif)
4599 #endif // IPSEC_NEXUS
4600 {
4601 int af = AF_INET6;
4602 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
4603 }
4604
4605 data = ipsec6_splithdr(data);
4606 if (data == NULL) {
4607 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_splithdr returned NULL\n");
4608 goto ipsec_output_err;
4609 }
4610
4611 ip6 = mtod(data, struct ip6_hdr *);
4612
4613 memset(&ipsec_state, 0, sizeof(ipsec_state));
4614 ipsec_state.m = data;
4615 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
4616 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
4617 ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
4618
4619 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
4620 if (error == 0 && ipsec_state.tunneled == 4) { /* tunneled in IPv4 - packet is gone */
4621 goto done;
4622 }
4623 data = ipsec_state.m;
4624 if (error || data == NULL) {
4625 if (error) {
4626 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_output error %d\n", error);
4627 }
4628 goto ipsec_output_err;
4629 }
4630
4631 /* Set traffic class, set flow */
4632 m_set_service_class(data, pcb->ipsec_output_service_class);
4633 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4634 #if SKYWALK
4635 data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4636 #else /* !SKYWALK */
4637 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
4638 #endif /* !SKYWALK */
4639 data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
4640 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4641
4642 /* Increment statistics */
4643 length = mbuf_pkthdr_len(data);
4644 ifnet_stat_increment_out(interface, 1, (uint16_t)length, 0);
4645
4646 /* Send to ip6_output */
4647 memset(&ro6, 0, sizeof(ro6));
4648
4649 flags = IPV6_OUTARGS;
4650
4651 memset(&ip6oa, 0, sizeof(ip6oa));
4652 ip6oa.ip6oa_flowadv.code = 0;
4653 ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
4654 if (ipsec_state.outgoing_if) {
4655 ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
4656 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
4657 ip6_output_setsrcifscope(data, ipsec_state.outgoing_if, NULL);
4658 ip6_output_setdstifscope(data, ipsec_state.outgoing_if, NULL);
4659 } else {
4660 ip6_output_setsrcifscope(data, IFSCOPE_UNKNOWN, NULL);
4661 ip6_output_setdstifscope(data, IFSCOPE_UNKNOWN, NULL);
4662 }
4663 ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
4664
4665 adv = &ip6oa.ip6oa_flowadv;
4666
4667 (void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
4668 data = NULL;
4669
4670 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
4671 error = ENOBUFS;
4672 ifnet_disable_output(interface);
4673 }
4674
4675 goto done;
4676 }
4677 default: {
4678 os_log_error(OS_LOG_DEFAULT, "ipsec_output: Received unknown packet version %d.\n", ip_version);
4679 error = EINVAL;
4680 goto ipsec_output_err;
4681 }
4682 }
4683
4684 done:
4685 return error;
4686
4687 ipsec_output_err:
4688 if (data) {
4689 mbuf_freem(data);
4690 }
4691 goto done;
4692 }
4693
4694 static void
ipsec_start(ifnet_t interface)4695 ipsec_start(ifnet_t interface)
4696 {
4697 mbuf_t data;
4698 struct ipsec_pcb *pcb = ifnet_softc(interface);
4699
4700 VERIFY(pcb != NULL);
4701 for (;;) {
4702 if (ifnet_dequeue(interface, &data) != 0) {
4703 break;
4704 }
4705 if (ipsec_output(interface, data) != 0) {
4706 break;
4707 }
4708 }
4709 }
4710
4711 /* Network Interface functions */
4712 static errno_t
ipsec_demux(__unused ifnet_t interface,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)4713 ipsec_demux(__unused ifnet_t interface,
4714 mbuf_t data,
4715 __unused char *frame_header,
4716 protocol_family_t *protocol)
4717 {
4718 struct ip *ip;
4719 u_int ip_version;
4720
4721 while (data != NULL && mbuf_len(data) < 1) {
4722 data = mbuf_next(data);
4723 }
4724
4725 if (data == NULL) {
4726 return ENOENT;
4727 }
4728
4729 ip = mtod(data, struct ip *);
4730 ip_version = ip->ip_v;
4731
4732 switch (ip_version) {
4733 case 4:
4734 *protocol = PF_INET;
4735 return 0;
4736 case 6:
4737 *protocol = PF_INET6;
4738 return 0;
4739 default:
4740 *protocol = PF_UNSPEC;
4741 break;
4742 }
4743
4744 return 0;
4745 }
4746
4747 static errno_t
ipsec_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused u_int32_t demux_count)4748 ipsec_add_proto(__unused ifnet_t interface,
4749 protocol_family_t protocol,
4750 __unused const struct ifnet_demux_desc *demux_array,
4751 __unused u_int32_t demux_count)
4752 {
4753 switch (protocol) {
4754 case PF_INET:
4755 return 0;
4756 case PF_INET6:
4757 return 0;
4758 default:
4759 break;
4760 }
4761
4762 return ENOPROTOOPT;
4763 }
4764
4765 static errno_t
ipsec_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)4766 ipsec_del_proto(__unused ifnet_t interface,
4767 __unused protocol_family_t protocol)
4768 {
4769 return 0;
4770 }
4771
4772 static errno_t
ipsec_ioctl(ifnet_t interface,u_long command,void * data)4773 ipsec_ioctl(ifnet_t interface,
4774 u_long command,
4775 void *data)
4776 {
4777 #if IPSEC_NEXUS
4778 struct ipsec_pcb *pcb = ifnet_softc(interface);
4779 #endif
4780 errno_t result = 0;
4781
4782 switch (command) {
4783 case SIOCSIFMTU: {
4784 #if IPSEC_NEXUS
4785 if (pcb->ipsec_use_netif) {
4786 // Make sure we can fit packets in the channel buffers
4787 if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
4788 result = EINVAL;
4789 } else {
4790 ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
4791 }
4792 } else
4793 #endif // IPSEC_NEXUS
4794 {
4795 ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
4796 }
4797 break;
4798 }
4799
4800 case SIOCSIFFLAGS:
4801 /* ifioctl() takes care of it */
4802 break;
4803
4804 case SIOCSIFSUBFAMILY: {
4805 uint32_t subfamily;
4806
4807 subfamily = ((struct ifreq*)data)->ifr_type.ift_subfamily;
4808 switch (subfamily) {
4809 case IFRTYPE_SUBFAMILY_BLUETOOTH:
4810 interface->if_subfamily = IFNET_SUBFAMILY_BLUETOOTH;
4811 break;
4812 case IFRTYPE_SUBFAMILY_WIFI:
4813 interface->if_subfamily = IFNET_SUBFAMILY_WIFI;
4814 break;
4815 case IFRTYPE_SUBFAMILY_QUICKRELAY:
4816 interface->if_subfamily = IFNET_SUBFAMILY_QUICKRELAY;
4817 break;
4818 case IFRTYPE_SUBFAMILY_DEFAULT:
4819 interface->if_subfamily = IFNET_SUBFAMILY_DEFAULT;
4820 break;
4821 default:
4822 result = EINVAL;
4823 break;
4824 }
4825 break;
4826 }
4827
4828 default:
4829 result = EOPNOTSUPP;
4830 }
4831
4832 return result;
4833 }
4834
4835 static void
ipsec_detached(ifnet_t interface)4836 ipsec_detached(ifnet_t interface)
4837 {
4838 struct ipsec_pcb *pcb = ifnet_softc(interface);
4839
4840 (void)ifnet_release(interface);
4841 lck_mtx_lock(&ipsec_lock);
4842 ipsec_free_pcb(pcb, true);
4843 (void)ifnet_dispose(interface);
4844 lck_mtx_unlock(&ipsec_lock);
4845 }
4846
4847 /* Protocol Handlers */
4848
4849 static errno_t
ipsec_proto_input(ifnet_t interface,protocol_family_t protocol,mbuf_t m,__unused char * frame_header)4850 ipsec_proto_input(ifnet_t interface,
4851 protocol_family_t protocol,
4852 mbuf_t m,
4853 __unused char *frame_header)
4854 {
4855 mbuf_pkthdr_setrcvif(m, interface);
4856
4857 #if IPSEC_NEXUS
4858 struct ipsec_pcb *pcb = ifnet_softc(interface);
4859 if (!pcb->ipsec_use_netif)
4860 #endif // IPSEC_NEXUS
4861 {
4862 uint32_t af = 0;
4863 struct ip *ip = mtod(m, struct ip *);
4864 if (ip->ip_v == 4) {
4865 af = AF_INET;
4866 } else if (ip->ip_v == 6) {
4867 af = AF_INET6;
4868 }
4869 bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
4870 pktap_input(interface, protocol, m, NULL);
4871 }
4872
4873 int32_t pktlen = m->m_pkthdr.len;
4874 if (proto_input(protocol, m) != 0) {
4875 ifnet_stat_increment_in(interface, 0, 0, 1);
4876 m_freem(m);
4877 } else {
4878 ifnet_stat_increment_in(interface, 1, pktlen, 0);
4879 }
4880
4881 return 0;
4882 }
4883
4884 static errno_t
ipsec_proto_pre_output(__unused ifnet_t interface,protocol_family_t protocol,__unused mbuf_t * packet,__unused const struct sockaddr * dest,__unused void * route,__unused char * frame_type,__unused char * link_layer_dest)4885 ipsec_proto_pre_output(__unused ifnet_t interface,
4886 protocol_family_t protocol,
4887 __unused mbuf_t *packet,
4888 __unused const struct sockaddr *dest,
4889 __unused void *route,
4890 __unused char *frame_type,
4891 __unused char *link_layer_dest)
4892 {
4893 *(protocol_family_t *)(void *)frame_type = protocol;
4894 return 0;
4895 }
4896
4897 static errno_t
ipsec_attach_proto(ifnet_t interface,protocol_family_t protocol)4898 ipsec_attach_proto(ifnet_t interface,
4899 protocol_family_t protocol)
4900 {
4901 struct ifnet_attach_proto_param proto;
4902 errno_t result;
4903
4904 bzero(&proto, sizeof(proto));
4905 proto.input = ipsec_proto_input;
4906 proto.pre_output = ipsec_proto_pre_output;
4907
4908 result = ifnet_attach_protocol(interface, protocol, &proto);
4909 if (result != 0 && result != EEXIST) {
4910 os_log_error(OS_LOG_DEFAULT, "ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
4911 protocol, result);
4912 }
4913
4914 return result;
4915 }
4916
4917 errno_t
ipsec_inject_inbound_packet(ifnet_t interface,mbuf_t packet)4918 ipsec_inject_inbound_packet(ifnet_t interface,
4919 mbuf_t packet)
4920 {
4921 #if IPSEC_NEXUS
4922 struct ipsec_pcb *pcb = ifnet_softc(interface);
4923
4924 if (pcb->ipsec_use_netif) {
4925 if (!ipsec_data_move_begin(pcb)) {
4926 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__,
4927 if_name(pcb->ipsec_ifp));
4928 return ENXIO;
4929 }
4930
4931 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4932
4933 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
4934
4935 if (pcb->ipsec_input_chain_count > (u_int32_t)if_ipsec_max_pending_input) {
4936 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4937 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4938 ipsec_data_move_end(pcb);
4939 return ENOSPC;
4940 }
4941
4942 if (pcb->ipsec_input_chain != NULL) {
4943 pcb->ipsec_input_chain_last->m_nextpkt = packet;
4944 } else {
4945 pcb->ipsec_input_chain = packet;
4946 }
4947 pcb->ipsec_input_chain_count++;
4948 while (packet->m_nextpkt) {
4949 VERIFY(packet != packet->m_nextpkt);
4950 packet = packet->m_nextpkt;
4951 pcb->ipsec_input_chain_count++;
4952 }
4953 pcb->ipsec_input_chain_last = packet;
4954 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4955
4956 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
4957 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4958
4959 if (rx_ring != NULL) {
4960 kern_channel_notify(rx_ring, 0);
4961 }
4962
4963 ipsec_data_move_end(pcb);
4964 return 0;
4965 } else
4966 #endif // IPSEC_NEXUS
4967 {
4968 errno_t error;
4969 protocol_family_t protocol;
4970 if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
4971 return error;
4972 }
4973
4974 return ipsec_proto_input(interface, protocol, packet, NULL);
4975 }
4976 }
4977
4978 void
ipsec_set_pkthdr_for_interface(ifnet_t interface,mbuf_t packet,int family,uint32_t flowid)4979 ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family,
4980 uint32_t flowid)
4981 {
4982 #pragma unused (flowid)
4983 if (packet != NULL && interface != NULL) {
4984 struct ipsec_pcb *pcb = ifnet_softc(interface);
4985 if (pcb != NULL) {
4986 /* Set traffic class, set flow */
4987 m_set_service_class(packet, pcb->ipsec_output_service_class);
4988 packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4989 #if SKYWALK
4990 packet->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4991 packet->m_pkthdr.pkt_flowid = flowid;
4992 #else /* !SKYWALK */
4993 packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
4994 #endif /* !SKYWALK */
4995 if (family == AF_INET) {
4996 struct ip *ip = mtod(packet, struct ip *);
4997 packet->m_pkthdr.pkt_proto = ip->ip_p;
4998 } else if (family == AF_INET6) {
4999 struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
5000 packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
5001 }
5002 packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
5003 }
5004 }
5005 }
5006
5007 void
ipsec_set_ipoa_for_interface(ifnet_t interface,struct ip_out_args * ipoa)5008 ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
5009 {
5010 struct ipsec_pcb *pcb;
5011
5012 if (interface == NULL || ipoa == NULL) {
5013 return;
5014 }
5015 pcb = ifnet_softc(interface);
5016
5017 if (net_qos_policy_restricted == 0) {
5018 ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
5019 ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
5020 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
5021 net_qos_policy_restrict_avapps != 0) {
5022 ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
5023 } else {
5024 ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
5025 ipoa->ipoa_sotc = SO_TC_VO;
5026 }
5027 }
5028
5029 void
ipsec_set_ip6oa_for_interface(ifnet_t interface,struct ip6_out_args * ip6oa)5030 ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
5031 {
5032 struct ipsec_pcb *pcb;
5033
5034 if (interface == NULL || ip6oa == NULL) {
5035 return;
5036 }
5037 pcb = ifnet_softc(interface);
5038
5039 if (net_qos_policy_restricted == 0) {
5040 ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
5041 ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
5042 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
5043 net_qos_policy_restrict_avapps != 0) {
5044 ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
5045 } else {
5046 ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
5047 ip6oa->ip6oa_sotc = SO_TC_VO;
5048 }
5049 }
5050
5051 static boolean_t
ipsec_data_move_begin(struct ipsec_pcb * pcb)5052 ipsec_data_move_begin(struct ipsec_pcb *pcb)
5053 {
5054 boolean_t ret = 0;
5055
5056 lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
5057 if ((ret = IPSEC_IS_DATA_PATH_READY(pcb))) {
5058 pcb->ipsec_pcb_data_move++;
5059 }
5060 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
5061
5062 return ret;
5063 }
5064
5065 static void
ipsec_data_move_end(struct ipsec_pcb * pcb)5066 ipsec_data_move_end(struct ipsec_pcb *pcb)
5067 {
5068 lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
5069 VERIFY(pcb->ipsec_pcb_data_move > 0);
5070 /*
5071 * if there's no more thread moving data, wakeup any
5072 * drainers that's blocked waiting for this.
5073 */
5074 if (--pcb->ipsec_pcb_data_move == 0 && pcb->ipsec_pcb_drainers > 0) {
5075 wakeup(&(pcb->ipsec_pcb_data_move));
5076 }
5077 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
5078 }
5079
5080 static void
ipsec_data_move_drain(struct ipsec_pcb * pcb)5081 ipsec_data_move_drain(struct ipsec_pcb *pcb)
5082 {
5083 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
5084 /* data path must already be marked as not ready */
5085 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
5086 pcb->ipsec_pcb_drainers++;
5087 while (pcb->ipsec_pcb_data_move != 0) {
5088 (void)msleep(&(pcb->ipsec_pcb_data_move), &pcb->ipsec_pcb_data_move_lock,
5089 (PZERO - 1), __func__, NULL);
5090 }
5091 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
5092 VERIFY(pcb->ipsec_pcb_drainers > 0);
5093 pcb->ipsec_pcb_drainers--;
5094 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
5095 }
5096
5097 static void
ipsec_wait_data_move_drain(struct ipsec_pcb * pcb)5098 ipsec_wait_data_move_drain(struct ipsec_pcb *pcb)
5099 {
5100 /*
5101 * Mark the data path as not usable.
5102 */
5103 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
5104 IPSEC_CLR_DATA_PATH_READY(pcb);
5105 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
5106
5107 /* Wait until all threads in the data paths are done. */
5108 ipsec_data_move_drain(pcb);
5109 }
5110