1 /*
2 * Copyright (c) 2012-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <string.h>
30 #include <sys/types.h>
31 #include <sys/syslog.h>
32 #include <sys/queue.h>
33 #include <sys/malloc.h>
34 #include <sys/socket.h>
35 #include <sys/kpi_mbuf.h>
36 #include <sys/mbuf.h>
37 #include <sys/domain.h>
38 #include <sys/protosw.h>
39 #include <sys/socketvar.h>
40 #include <sys/kernel.h>
41 #include <sys/systm.h>
42 #include <sys/kern_control.h>
43 #include <sys/ubc.h>
44 #include <sys/codesign.h>
45 #include <sys/file_internal.h>
46 #include <sys/kauth.h>
47 #include <libkern/tree.h>
48 #include <kern/locks.h>
49 #include <kern/debug.h>
50 #include <kern/task.h>
51 #include <mach/task_info.h>
52 #include <net/if_var.h>
53 #include <net/route.h>
54 #include <net/flowhash.h>
55 #include <net/ntstat.h>
56 #include <net/content_filter.h>
57 #include <net/necp.h>
58 #include <netinet/in.h>
59 #include <netinet/in_var.h>
60 #include <netinet/tcp.h>
61 #include <netinet/tcp_var.h>
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/flow_divert.h>
64 #include <netinet/flow_divert_proto.h>
65 #include <netinet6/in6_pcb.h>
66 #include <netinet6/ip6protosw.h>
67 #include <dev/random/randomdev.h>
68 #include <libkern/crypto/sha1.h>
69 #include <libkern/crypto/crypto_internal.h>
70 #include <os/log.h>
71 #include <corecrypto/cc.h>
72 #include <net/sockaddr_utils.h>
73 #if CONTENT_FILTER
74 #include <net/content_filter.h>
75 #endif /* CONTENT_FILTER */
76
77 #define FLOW_DIVERT_CONNECT_STARTED 0x00000001
78 #define FLOW_DIVERT_READ_CLOSED 0x00000002
79 #define FLOW_DIVERT_WRITE_CLOSED 0x00000004
80 #define FLOW_DIVERT_TUNNEL_RD_CLOSED 0x00000008
81 #define FLOW_DIVERT_TUNNEL_WR_CLOSED 0x00000010
82 #define FLOW_DIVERT_HAS_HMAC 0x00000040
83 #define FLOW_DIVERT_NOTIFY_ON_RECEIVED 0x00000080
84 #define FLOW_DIVERT_IMPLICIT_CONNECT 0x00000100
85 #define FLOW_DIVERT_DID_SET_LOCAL_ADDR 0x00000200
86 #define FLOW_DIVERT_HAS_TOKEN 0x00000400
87 #define FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR 0x00000800
88 #define FLOW_DIVERT_FLOW_IS_TRANSPARENT 0x00001000
89
90 #define FDLOG(level, pcb, format, ...) \
91 os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " format "\n", (pcb)->hash, __VA_ARGS__)
92
93 #define FDLOG0(level, pcb, msg) \
94 os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " msg "\n", (pcb)->hash)
95
96 #define FDRETAIN(pcb) if ((pcb) != NULL) OSIncrementAtomic(&(pcb)->ref_count)
97 #define FDRELEASE(pcb) \
98 do { \
99 if ((pcb) != NULL && 1 == OSDecrementAtomic(&(pcb)->ref_count)) { \
100 flow_divert_pcb_destroy(pcb); \
101 } \
102 } while (0)
103
104 #define FDGRP_RETAIN(grp) if ((grp) != NULL) OSIncrementAtomic(&(grp)->ref_count)
105 #define FDGRP_RELEASE(grp) if ((grp) != NULL && 1 == OSDecrementAtomic(&(grp)->ref_count)) flow_divert_group_destroy(grp)
106
107 #define FDLOCK(pcb) lck_mtx_lock(&(pcb)->mtx)
108 #define FDUNLOCK(pcb) lck_mtx_unlock(&(pcb)->mtx)
109
110 #define FD_CTL_SENDBUFF_SIZE (128 * 1024)
111
112 #define GROUP_BIT_CTL_ENQUEUE_BLOCKED 0
113
114 #define GROUP_COUNT_MAX 31
115 #define FLOW_DIVERT_MAX_NAME_SIZE 4096
116 #define FLOW_DIVERT_MAX_KEY_SIZE 1024
117 #define FLOW_DIVERT_MAX_TRIE_MEMORY (1024 * 1024)
118
119 #define CHILD_MAP_SIZE 256
120 #define NULL_TRIE_IDX 0xffff
121 #define TRIE_NODE(t, i) ((t)->nodes[(i)])
122 #define TRIE_CHILD(t, i, b) (((t)->child_maps + (CHILD_MAP_SIZE * TRIE_NODE(t, i).child_map))[(b)])
123 #define TRIE_BYTE(t, i) ((t)->bytes[(i)])
124
125 #define SO_IS_DIVERTED(s) (((s)->so_flags & SOF_FLOW_DIVERT) && (s)->so_fd_pcb != NULL)
126
127 static struct flow_divert_pcb nil_pcb;
128
129 static LCK_ATTR_DECLARE(flow_divert_mtx_attr, 0, 0);
130 static LCK_GRP_DECLARE(flow_divert_mtx_grp, FLOW_DIVERT_CONTROL_NAME);
131 static LCK_RW_DECLARE_ATTR(g_flow_divert_group_lck, &flow_divert_mtx_grp,
132 &flow_divert_mtx_attr);
133
134 static TAILQ_HEAD(_flow_divert_group_list, flow_divert_group) g_flow_divert_in_process_group_list;
135
136 static struct flow_divert_group **g_flow_divert_groups __indexable = NULL;
137 static uint32_t g_active_group_count = 0;
138
139 static errno_t g_init_result = 0;
140
141 static kern_ctl_ref g_flow_divert_kctl_ref = NULL;
142
143 static struct protosw g_flow_divert_in_protosw;
144 static struct pr_usrreqs g_flow_divert_in_usrreqs;
145 static struct protosw g_flow_divert_in_udp_protosw;
146 static struct pr_usrreqs g_flow_divert_in_udp_usrreqs;
147 static struct ip6protosw g_flow_divert_in6_protosw;
148 static struct pr_usrreqs g_flow_divert_in6_usrreqs;
149 static struct ip6protosw g_flow_divert_in6_udp_protosw;
150 static struct pr_usrreqs g_flow_divert_in6_udp_usrreqs;
151
152 static struct protosw *g_tcp_protosw = NULL;
153 static struct ip6protosw *g_tcp6_protosw = NULL;
154 static struct protosw *g_udp_protosw = NULL;
155 static struct ip6protosw *g_udp6_protosw = NULL;
156
157 static KALLOC_TYPE_DEFINE(flow_divert_group_zone, struct flow_divert_group,
158 NET_KT_DEFAULT);
159 static KALLOC_TYPE_DEFINE(flow_divert_pcb_zone, struct flow_divert_pcb,
160 NET_KT_DEFAULT);
161
162 static errno_t
163 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, struct sockaddr **dup);
164
165 static boolean_t
166 flow_divert_is_sockaddr_valid(struct sockaddr *addr);
167
168 static int
169 flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet, struct sockaddr *toaddr);
170
171 struct sockaddr *
172 flow_divert_get_buffered_target_address(mbuf_ref_t buffer);
173
174 static void
175 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed);
176
177 static void flow_divert_group_destroy(struct flow_divert_group *group);
178
179 static inline uint8_t
flow_divert_syslog_type_to_oslog_type(int syslog_type)180 flow_divert_syslog_type_to_oslog_type(int syslog_type)
181 {
182 switch (syslog_type) {
183 case LOG_ERR: return OS_LOG_TYPE_ERROR;
184 case LOG_INFO: return OS_LOG_TYPE_INFO;
185 case LOG_DEBUG: return OS_LOG_TYPE_DEBUG;
186 default: return OS_LOG_TYPE_DEFAULT;
187 }
188 }
189
190 static inline int
flow_divert_pcb_cmp(const struct flow_divert_pcb * pcb_a,const struct flow_divert_pcb * pcb_b)191 flow_divert_pcb_cmp(const struct flow_divert_pcb *pcb_a, const struct flow_divert_pcb *pcb_b)
192 {
193 return memcmp(&pcb_a->hash, &pcb_b->hash, sizeof(pcb_a->hash));
194 }
195
196 RB_PROTOTYPE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
197 RB_GENERATE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
198
199 static const char *
flow_divert_packet_type2str(uint8_t packet_type)200 flow_divert_packet_type2str(uint8_t packet_type)
201 {
202 switch (packet_type) {
203 case FLOW_DIVERT_PKT_CONNECT:
204 return "connect";
205 case FLOW_DIVERT_PKT_CONNECT_RESULT:
206 return "connect result";
207 case FLOW_DIVERT_PKT_DATA:
208 return "data";
209 case FLOW_DIVERT_PKT_CLOSE:
210 return "close";
211 case FLOW_DIVERT_PKT_READ_NOTIFY:
212 return "read notification";
213 case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
214 return "properties update";
215 case FLOW_DIVERT_PKT_APP_MAP_CREATE:
216 return "app map create";
217 default:
218 return "unknown";
219 }
220 }
221
222 static struct flow_divert_pcb *
flow_divert_pcb_lookup(uint32_t hash,struct flow_divert_group * group)223 flow_divert_pcb_lookup(uint32_t hash, struct flow_divert_group *group)
224 {
225 struct flow_divert_pcb key_item;
226 struct flow_divert_pcb *fd_cb = NULL;
227
228 key_item.hash = hash;
229
230 lck_rw_lock_shared(&group->lck);
231 fd_cb = RB_FIND(fd_pcb_tree, &group->pcb_tree, &key_item);
232 FDRETAIN(fd_cb);
233 lck_rw_done(&group->lck);
234
235 return fd_cb;
236 }
237
238 static struct flow_divert_group *
flow_divert_group_lookup(uint32_t ctl_unit,struct flow_divert_pcb * fd_cb)239 flow_divert_group_lookup(uint32_t ctl_unit, struct flow_divert_pcb *fd_cb)
240 {
241 struct flow_divert_group *group = NULL;
242 lck_rw_lock_shared(&g_flow_divert_group_lck);
243 if (g_active_group_count == 0) {
244 if (fd_cb != NULL) {
245 FDLOG0(LOG_ERR, fd_cb, "No active groups, flow divert cannot be used for this socket");
246 }
247 } else if (ctl_unit == 0 || (ctl_unit >= GROUP_COUNT_MAX && ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
248 FDLOG(LOG_ERR, fd_cb, "Cannot lookup group with invalid control unit (%u)", ctl_unit);
249 } else if (ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
250 if (g_flow_divert_groups == NULL) {
251 if (fd_cb != NULL) {
252 FDLOG0(LOG_ERR, fd_cb, "No active non-in-process groups, flow divert cannot be used for this socket");
253 }
254 } else {
255 group = g_flow_divert_groups[ctl_unit];
256 if (group == NULL) {
257 if (fd_cb != NULL) {
258 FDLOG(LOG_ERR, fd_cb, "Group for control unit %u is NULL, flow divert cannot be used for this socket", ctl_unit);
259 }
260 } else {
261 FDGRP_RETAIN(group);
262 }
263 }
264 } else {
265 if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
266 if (fd_cb != NULL) {
267 FDLOG0(LOG_ERR, fd_cb, "No active in-process groups, flow divert cannot be used for this socket");
268 }
269 } else {
270 struct flow_divert_group *group_cursor = NULL;
271 TAILQ_FOREACH(group_cursor, &g_flow_divert_in_process_group_list, chain) {
272 if (group_cursor->ctl_unit == ctl_unit) {
273 group = group_cursor;
274 break;
275 }
276 }
277 if (group == NULL) {
278 if (fd_cb != NULL) {
279 FDLOG(LOG_ERR, fd_cb, "Group for control unit %u not found, flow divert cannot be used for this socket", ctl_unit);
280 }
281 } else if (fd_cb != NULL &&
282 (fd_cb->so == NULL ||
283 group_cursor->in_process_pid != fd_cb->so->last_pid)) {
284 FDLOG(LOG_ERR, fd_cb, "Cannot access group for control unit %u, mismatched PID (%u != %u)",
285 ctl_unit, group_cursor->in_process_pid, fd_cb->so ? fd_cb->so->last_pid : 0);
286 group = NULL;
287 } else {
288 FDGRP_RETAIN(group);
289 }
290 }
291 }
292 lck_rw_done(&g_flow_divert_group_lck);
293 return group;
294 }
295
296 static errno_t
flow_divert_pcb_insert(struct flow_divert_pcb * fd_cb,struct flow_divert_group * group)297 flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, struct flow_divert_group *group)
298 {
299 int error = 0;
300 lck_rw_lock_exclusive(&group->lck);
301 if (!(group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT)) {
302 if (NULL == RB_INSERT(fd_pcb_tree, &group->pcb_tree, fd_cb)) {
303 fd_cb->group = group;
304 fd_cb->control_group_unit = group->ctl_unit;
305 FDRETAIN(fd_cb); /* The group now has a reference */
306 } else {
307 FDLOG(LOG_ERR, fd_cb, "Group %u already contains a PCB with hash %u", group->ctl_unit, fd_cb->hash);
308 error = EEXIST;
309 }
310 } else {
311 FDLOG(LOG_ERR, fd_cb, "Group %u is defunct, cannot insert", group->ctl_unit);
312 error = ENOENT;
313 }
314 lck_rw_done(&group->lck);
315 return error;
316 }
317
318 static errno_t
flow_divert_add_to_group(struct flow_divert_pcb * fd_cb,uint32_t ctl_unit)319 flow_divert_add_to_group(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit)
320 {
321 errno_t error = 0;
322 struct flow_divert_group *group = NULL;
323 static uint32_t g_nextkey = 1;
324 static uint32_t g_hash_seed = 0;
325 int try_count = 0;
326
327 group = flow_divert_group_lookup(ctl_unit, fd_cb);
328 if (group == NULL) {
329 return ENOENT;
330 }
331
332 do {
333 uint32_t key[2];
334 uint32_t idx;
335
336 key[0] = g_nextkey++;
337 key[1] = RandomULong();
338
339 if (g_hash_seed == 0) {
340 g_hash_seed = RandomULong();
341 }
342
343 error = 0;
344 fd_cb->hash = net_flowhash(key, sizeof(key), g_hash_seed);
345
346 for (idx = 1; idx < GROUP_COUNT_MAX && error == 0; idx++) {
347 if (idx == ctl_unit) {
348 continue;
349 }
350 struct flow_divert_group *curr_group = flow_divert_group_lookup(idx, NULL);
351 if (curr_group != NULL) {
352 lck_rw_lock_shared(&curr_group->lck);
353 if (NULL != RB_FIND(fd_pcb_tree, &curr_group->pcb_tree, fd_cb)) {
354 error = EEXIST;
355 }
356 lck_rw_done(&curr_group->lck);
357 FDGRP_RELEASE(curr_group);
358 }
359 }
360
361 if (error == 0) {
362 error = flow_divert_pcb_insert(fd_cb, group);
363 }
364 } while (error == EEXIST && try_count++ < 3);
365
366 if (error == EEXIST) {
367 FDLOG0(LOG_ERR, fd_cb, "Failed to create a unique hash");
368 fd_cb->hash = 0;
369 }
370
371 FDGRP_RELEASE(group);
372 return error;
373 }
374
375 static struct flow_divert_pcb *
flow_divert_pcb_create(socket_t so)376 flow_divert_pcb_create(socket_t so)
377 {
378 struct flow_divert_pcb *new_pcb = NULL;
379
380 new_pcb = zalloc_flags(flow_divert_pcb_zone, Z_WAITOK | Z_ZERO);
381 lck_mtx_init(&new_pcb->mtx, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
382 new_pcb->so = so;
383 new_pcb->log_level = nil_pcb.log_level;
384
385 FDRETAIN(new_pcb); /* Represents the socket's reference */
386
387 return new_pcb;
388 }
389
390 static void
flow_divert_pcb_destroy(struct flow_divert_pcb * fd_cb)391 flow_divert_pcb_destroy(struct flow_divert_pcb *fd_cb)
392 {
393 FDLOG(LOG_INFO, fd_cb, "Destroying, app tx %llu, tunnel tx %llu, tunnel rx %llu",
394 fd_cb->bytes_written_by_app, fd_cb->bytes_sent, fd_cb->bytes_received);
395
396 if (fd_cb->connect_token != NULL) {
397 mbuf_freem(fd_cb->connect_token);
398 }
399 if (fd_cb->connect_packet != NULL) {
400 mbuf_freem(fd_cb->connect_packet);
401 }
402 if (fd_cb->app_data != NULL) {
403 kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
404 }
405 if (fd_cb->original_remote_endpoint != NULL) {
406 free_sockaddr(fd_cb->original_remote_endpoint);
407 }
408 zfree(flow_divert_pcb_zone, fd_cb);
409 }
410
411 static void
flow_divert_pcb_remove(struct flow_divert_pcb * fd_cb)412 flow_divert_pcb_remove(struct flow_divert_pcb *fd_cb)
413 {
414 if (fd_cb->group != NULL) {
415 struct flow_divert_group *group = fd_cb->group;
416 lck_rw_lock_exclusive(&group->lck);
417 FDLOG(LOG_INFO, fd_cb, "Removing from group %d, ref count = %d", group->ctl_unit, fd_cb->ref_count);
418 RB_REMOVE(fd_pcb_tree, &group->pcb_tree, fd_cb);
419 fd_cb->group = NULL;
420 FDRELEASE(fd_cb); /* Release the group's reference */
421 lck_rw_done(&group->lck);
422 }
423 }
424
425 static int
flow_divert_packet_init(struct flow_divert_pcb * fd_cb,uint8_t packet_type,mbuf_ref_t * packet)426 flow_divert_packet_init(struct flow_divert_pcb *fd_cb, uint8_t packet_type, mbuf_ref_t *packet)
427 {
428 struct flow_divert_packet_header hdr;
429 int error = 0;
430
431 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, packet);
432 if (error) {
433 FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
434 return error;
435 }
436
437 hdr.packet_type = packet_type;
438 hdr.conn_id = htonl(fd_cb->hash);
439
440 /* Lay down the header */
441 error = mbuf_copyback(*packet, 0, sizeof(hdr), &hdr, MBUF_DONTWAIT);
442 if (error) {
443 FDLOG(LOG_ERR, fd_cb, "mbuf_copyback(hdr) failed: %d", error);
444 mbuf_freem(*packet);
445 *packet = NULL;
446 return error;
447 }
448
449 return 0;
450 }
451
452 static int
flow_divert_packet_append_tlv(mbuf_ref_t packet,uint8_t type,uint32_t length,const void __sized_by (length)* value)453 flow_divert_packet_append_tlv(mbuf_ref_t packet, uint8_t type, uint32_t length, const void __sized_by(length) *value)
454 {
455 uint32_t net_length = htonl(length);
456 int error = 0;
457
458 error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(type), &type, MBUF_DONTWAIT);
459 if (error) {
460 FDLOG(LOG_ERR, &nil_pcb, "failed to append the type (%d)", type);
461 return error;
462 }
463
464 error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(net_length), &net_length, MBUF_DONTWAIT);
465 if (error) {
466 FDLOG(LOG_ERR, &nil_pcb, "failed to append the length (%u)", length);
467 return error;
468 }
469
470 error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), length, value, MBUF_DONTWAIT);
471 if (error) {
472 FDLOG0(LOG_ERR, &nil_pcb, "failed to append the value");
473 return error;
474 }
475
476 return error;
477 }
478
479 static int
flow_divert_packet_find_tlv(mbuf_ref_t packet,int offset,uint8_t type,int * err,int next)480 flow_divert_packet_find_tlv(mbuf_ref_t packet, int offset, uint8_t type, int *err, int next)
481 {
482 size_t cursor = offset;
483 int error = 0;
484 uint32_t curr_length = 0;
485 uint8_t curr_type = 0;
486
487 *err = 0;
488
489 do {
490 if (!next) {
491 error = mbuf_copydata(packet, cursor, sizeof(curr_type), &curr_type);
492 if (error) {
493 *err = ENOENT;
494 return -1;
495 }
496 } else {
497 next = 0;
498 curr_type = FLOW_DIVERT_TLV_NIL;
499 }
500
501 if (curr_type != type) {
502 cursor += sizeof(curr_type);
503 error = mbuf_copydata(packet, cursor, sizeof(curr_length), &curr_length);
504 if (error) {
505 *err = error;
506 return -1;
507 }
508
509 cursor += (sizeof(curr_length) + ntohl(curr_length));
510 }
511 } while (curr_type != type);
512
513 return (int)cursor;
514 }
515
516 static int
flow_divert_packet_get_tlv(mbuf_ref_t packet,int offset,uint8_t type,size_t buff_len,void * buff __sized_by (buff_len),uint32_t * val_size)517 flow_divert_packet_get_tlv(mbuf_ref_t packet, int offset, uint8_t type, size_t buff_len, void *buff __sized_by(buff_len), uint32_t *val_size)
518 {
519 int error = 0;
520 uint32_t length = 0;
521 int tlv_offset = 0;
522
523 tlv_offset = flow_divert_packet_find_tlv(packet, offset, type, &error, 0);
524 if (tlv_offset < 0) {
525 return error;
526 }
527
528 error = mbuf_copydata(packet, tlv_offset + sizeof(type), sizeof(length), &length);
529 if (error) {
530 return error;
531 }
532
533 length = ntohl(length);
534
535 uint32_t data_offset = tlv_offset + sizeof(type) + sizeof(length);
536
537 if (length > (mbuf_pkthdr_len(packet) - data_offset)) {
538 FDLOG(LOG_ERR, &nil_pcb, "Length of %u TLV (%u) is larger than remaining packet data (%lu)", type, length, (mbuf_pkthdr_len(packet) - data_offset));
539 return EINVAL;
540 }
541
542 if (val_size != NULL) {
543 *val_size = length;
544 }
545
546 if (buff != NULL && buff_len > 0) {
547 memset(buff, 0, buff_len);
548 size_t to_copy = (length < buff_len) ? length : buff_len;
549 error = mbuf_copydata(packet, data_offset, to_copy, buff);
550 if (error) {
551 return error;
552 }
553 }
554
555 return 0;
556 }
557
558 static int
flow_divert_packet_compute_hmac(mbuf_ref_t packet,struct flow_divert_group * group,uint8_t * hmac)559 flow_divert_packet_compute_hmac(mbuf_ref_t packet, struct flow_divert_group *group, uint8_t *hmac)
560 {
561 mbuf_ref_t curr_mbuf = packet;
562
563 if (g_crypto_funcs == NULL || group->token_key == NULL) {
564 return ENOPROTOOPT;
565 }
566
567 cchmac_di_decl(g_crypto_funcs->ccsha1_di, hmac_ctx);
568 g_crypto_funcs->cchmac_init_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, group->token_key_size, group->token_key);
569
570 while (curr_mbuf != NULL) {
571 g_crypto_funcs->cchmac_update_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, mbuf_len(curr_mbuf), mbuf_data(curr_mbuf));
572 curr_mbuf = mbuf_next(curr_mbuf);
573 }
574
575 g_crypto_funcs->cchmac_final_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, hmac);
576
577 return 0;
578 }
579
580 static int
flow_divert_packet_verify_hmac(mbuf_ref_t packet,uint32_t ctl_unit)581 flow_divert_packet_verify_hmac(mbuf_ref_t packet, uint32_t ctl_unit)
582 {
583 int error = 0;
584 struct flow_divert_group *group = NULL;
585 int hmac_offset;
586 uint8_t packet_hmac[SHA_DIGEST_LENGTH];
587 uint8_t computed_hmac[SHA_DIGEST_LENGTH];
588 mbuf_ref_t tail;
589
590 group = flow_divert_group_lookup(ctl_unit, NULL);
591 if (group == NULL) {
592 FDLOG(LOG_ERR, &nil_pcb, "Failed to lookup group for control unit %u", ctl_unit);
593 return ENOPROTOOPT;
594 }
595
596 lck_rw_lock_shared(&group->lck);
597
598 if (group->token_key == NULL) {
599 error = ENOPROTOOPT;
600 goto done;
601 }
602
603 hmac_offset = flow_divert_packet_find_tlv(packet, 0, FLOW_DIVERT_TLV_HMAC, &error, 0);
604 if (hmac_offset < 0) {
605 goto done;
606 }
607
608 error = flow_divert_packet_get_tlv(packet, hmac_offset, FLOW_DIVERT_TLV_HMAC, sizeof(packet_hmac), packet_hmac, NULL);
609 if (error) {
610 goto done;
611 }
612
613 /* Chop off the HMAC TLV */
614 error = mbuf_split(packet, hmac_offset, MBUF_WAITOK, &tail);
615 if (error) {
616 goto done;
617 }
618
619 mbuf_free(tail);
620
621 error = flow_divert_packet_compute_hmac(packet, group, computed_hmac);
622 if (error) {
623 goto done;
624 }
625
626 if (cc_cmp_safe(sizeof(packet_hmac), packet_hmac, computed_hmac)) {
627 FDLOG0(LOG_WARNING, &nil_pcb, "HMAC in token does not match computed HMAC");
628 error = EINVAL;
629 goto done;
630 }
631
632 done:
633 if (group != NULL) {
634 lck_rw_done(&group->lck);
635 FDGRP_RELEASE(group);
636 }
637 return error;
638 }
639
640 static void
flow_divert_add_data_statistics(struct flow_divert_pcb * fd_cb,size_t data_len,Boolean send)641 flow_divert_add_data_statistics(struct flow_divert_pcb *fd_cb, size_t data_len, Boolean send)
642 {
643 struct inpcb *inp = NULL;
644 struct ifnet *ifp = NULL;
645 stats_functional_type ifnet_count_type = stats_functional_type_none;
646
647 inp = sotoinpcb(fd_cb->so);
648 if (inp == NULL) {
649 return;
650 }
651
652 if (inp->inp_vflag & INP_IPV4) {
653 ifp = inp->inp_last_outifp;
654 } else if (inp->inp_vflag & INP_IPV6) {
655 ifp = inp->in6p_last_outifp;
656 }
657 if (ifp != NULL) {
658 ifnet_count_type = IFNET_COUNT_TYPE(ifp);
659 }
660
661 if (send) {
662 INP_ADD_STAT(inp, ifnet_count_type, txpackets, 1);
663 INP_ADD_STAT(inp, ifnet_count_type, txbytes, data_len);
664 } else {
665 INP_ADD_STAT(inp, ifnet_count_type, rxpackets, 1);
666 INP_ADD_STAT(inp, ifnet_count_type, rxbytes, data_len);
667 }
668 inp_set_activity_bitmap(inp);
669 }
670
671 static errno_t
flow_divert_check_no_cellular(struct flow_divert_pcb * fd_cb)672 flow_divert_check_no_cellular(struct flow_divert_pcb *fd_cb)
673 {
674 struct inpcb *inp = sotoinpcb(fd_cb->so);
675 if (INP_NO_CELLULAR(inp)) {
676 struct ifnet *ifp = NULL;
677 if (inp->inp_vflag & INP_IPV4) {
678 ifp = inp->inp_last_outifp;
679 } else if (inp->inp_vflag & INP_IPV6) {
680 ifp = inp->in6p_last_outifp;
681 }
682 if (ifp != NULL && IFNET_IS_CELLULAR(ifp)) {
683 FDLOG0(LOG_ERR, fd_cb, "Cellular is denied");
684 return EHOSTUNREACH;
685 }
686 }
687 return 0;
688 }
689
690 static errno_t
flow_divert_check_no_expensive(struct flow_divert_pcb * fd_cb)691 flow_divert_check_no_expensive(struct flow_divert_pcb *fd_cb)
692 {
693 struct inpcb *inp = sotoinpcb(fd_cb->so);
694 if (INP_NO_EXPENSIVE(inp)) {
695 struct ifnet *ifp = NULL;
696 if (inp->inp_vflag & INP_IPV4) {
697 ifp = inp->inp_last_outifp;
698 } else if (inp->inp_vflag & INP_IPV6) {
699 ifp = inp->in6p_last_outifp;
700 }
701 if (ifp != NULL && IFNET_IS_EXPENSIVE(ifp)) {
702 FDLOG0(LOG_ERR, fd_cb, "Expensive is denied");
703 return EHOSTUNREACH;
704 }
705 }
706 return 0;
707 }
708
709 static errno_t
flow_divert_check_no_constrained(struct flow_divert_pcb * fd_cb)710 flow_divert_check_no_constrained(struct flow_divert_pcb *fd_cb)
711 {
712 struct inpcb *inp = sotoinpcb(fd_cb->so);
713 if (INP_NO_CONSTRAINED(inp)) {
714 struct ifnet *ifp = NULL;
715 if (inp->inp_vflag & INP_IPV4) {
716 ifp = inp->inp_last_outifp;
717 } else if (inp->inp_vflag & INP_IPV6) {
718 ifp = inp->in6p_last_outifp;
719 }
720 if (ifp != NULL && IFNET_IS_CONSTRAINED(ifp)) {
721 FDLOG0(LOG_ERR, fd_cb, "Constrained is denied");
722 return EHOSTUNREACH;
723 }
724 }
725 return 0;
726 }
727
728 static void
flow_divert_update_closed_state(struct flow_divert_pcb * fd_cb,int how,bool tunnel,bool flush_snd)729 flow_divert_update_closed_state(struct flow_divert_pcb *fd_cb, int how, bool tunnel, bool flush_snd)
730 {
731 if (how != SHUT_RD) {
732 fd_cb->flags |= FLOW_DIVERT_WRITE_CLOSED;
733 if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
734 fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
735 if (flush_snd) {
736 /* If the tunnel is not accepting writes any more, then flush the send buffer */
737 sbflush(&fd_cb->so->so_snd);
738 }
739 }
740 }
741 if (how != SHUT_WR) {
742 fd_cb->flags |= FLOW_DIVERT_READ_CLOSED;
743 if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
744 fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
745 }
746 }
747 }
748
749 static uint16_t
trie_node_alloc(struct flow_divert_trie * trie)750 trie_node_alloc(struct flow_divert_trie *trie)
751 {
752 if (trie->nodes_free_next < trie->nodes_count) {
753 uint16_t node_idx = trie->nodes_free_next++;
754 TRIE_NODE(trie, node_idx).child_map = NULL_TRIE_IDX;
755 return node_idx;
756 } else {
757 return NULL_TRIE_IDX;
758 }
759 }
760
761 static uint16_t
trie_child_map_alloc(struct flow_divert_trie * trie)762 trie_child_map_alloc(struct flow_divert_trie *trie)
763 {
764 if (trie->child_maps_free_next < trie->child_maps_count) {
765 return trie->child_maps_free_next++;
766 } else {
767 return NULL_TRIE_IDX;
768 }
769 }
770
771 static uint16_t
trie_bytes_move(struct flow_divert_trie * trie,uint16_t bytes_idx,size_t bytes_size)772 trie_bytes_move(struct flow_divert_trie *trie, uint16_t bytes_idx, size_t bytes_size)
773 {
774 uint16_t start = trie->bytes_free_next;
775 if (start + bytes_size <= trie->bytes_count) {
776 if (start != bytes_idx) {
777 memmove(&TRIE_BYTE(trie, start), &TRIE_BYTE(trie, bytes_idx), bytes_size);
778 }
779 trie->bytes_free_next += bytes_size;
780 return start;
781 } else {
782 return NULL_TRIE_IDX;
783 }
784 }
785
786 static uint16_t
flow_divert_trie_insert(struct flow_divert_trie * trie,uint16_t string_start,size_t string_len)787 flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, size_t string_len)
788 {
789 uint16_t current = trie->root;
790 uint16_t child = trie->root;
791 uint16_t string_end = string_start + (uint16_t)string_len;
792 uint16_t string_idx = string_start;
793 uint16_t string_remainder = (uint16_t)string_len;
794
795 while (child != NULL_TRIE_IDX) {
796 uint16_t parent = current;
797 uint16_t node_idx;
798 uint16_t current_end;
799
800 current = child;
801 child = NULL_TRIE_IDX;
802
803 current_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
804
805 for (node_idx = TRIE_NODE(trie, current).start;
806 node_idx < current_end &&
807 string_idx < string_end &&
808 TRIE_BYTE(trie, node_idx) == TRIE_BYTE(trie, string_idx);
809 node_idx++, string_idx++) {
810 ;
811 }
812
813 string_remainder = string_end - string_idx;
814
815 if (node_idx < (TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length)) {
816 /*
817 * We did not reach the end of the current node's string.
818 * We need to split the current node into two:
819 * 1. A new node that contains the prefix of the node that matches
820 * the prefix of the string being inserted.
821 * 2. The current node modified to point to the remainder
822 * of the current node's string.
823 */
824 uint16_t prefix = trie_node_alloc(trie);
825 if (prefix == NULL_TRIE_IDX) {
826 FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while splitting an existing node");
827 return NULL_TRIE_IDX;
828 }
829
830 /*
831 * Prefix points to the portion of the current nodes's string that has matched
832 * the input string thus far.
833 */
834 TRIE_NODE(trie, prefix).start = TRIE_NODE(trie, current).start;
835 TRIE_NODE(trie, prefix).length = (node_idx - TRIE_NODE(trie, current).start);
836
837 /*
838 * Prefix has the current node as the child corresponding to the first byte
839 * after the split.
840 */
841 TRIE_NODE(trie, prefix).child_map = trie_child_map_alloc(trie);
842 if (TRIE_NODE(trie, prefix).child_map == NULL_TRIE_IDX) {
843 FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while splitting an existing node");
844 return NULL_TRIE_IDX;
845 }
846 TRIE_CHILD(trie, prefix, TRIE_BYTE(trie, node_idx)) = current;
847
848 /* Parent has the prefix as the child correspoding to the first byte in the prefix */
849 TRIE_CHILD(trie, parent, TRIE_BYTE(trie, TRIE_NODE(trie, prefix).start)) = prefix;
850
851 /* Current node is adjusted to point to the remainder */
852 TRIE_NODE(trie, current).start = node_idx;
853 TRIE_NODE(trie, current).length -= TRIE_NODE(trie, prefix).length;
854
855 /* We want to insert the new leaf (if any) as a child of the prefix */
856 current = prefix;
857 }
858
859 if (string_remainder > 0) {
860 /*
861 * We still have bytes in the string that have not been matched yet.
862 * If the current node has children, iterate to the child corresponding
863 * to the next byte in the string.
864 */
865 if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
866 child = TRIE_CHILD(trie, current, TRIE_BYTE(trie, string_idx));
867 }
868 }
869 } /* while (child != NULL_TRIE_IDX) */
870
871 if (string_remainder > 0) {
872 /* Add a new leaf containing the remainder of the string */
873 uint16_t leaf = trie_node_alloc(trie);
874 if (leaf == NULL_TRIE_IDX) {
875 FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while inserting a new leaf");
876 return NULL_TRIE_IDX;
877 }
878
879 TRIE_NODE(trie, leaf).start = trie_bytes_move(trie, string_idx, string_remainder);
880 if (TRIE_NODE(trie, leaf).start == NULL_TRIE_IDX) {
881 FDLOG0(LOG_ERR, &nil_pcb, "Ran out of bytes while inserting a new leaf");
882 return NULL_TRIE_IDX;
883 }
884 TRIE_NODE(trie, leaf).length = string_remainder;
885
886 /* Set the new leaf as the child of the current node */
887 if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
888 TRIE_NODE(trie, current).child_map = trie_child_map_alloc(trie);
889 if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
890 FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while inserting a new leaf");
891 return NULL_TRIE_IDX;
892 }
893 }
894 TRIE_CHILD(trie, current, TRIE_BYTE(trie, TRIE_NODE(trie, leaf).start)) = leaf;
895 current = leaf;
896 } /* else duplicate or this string is a prefix of one of the existing strings */
897
898 return current;
899 }
900
901 #define APPLE_WEBCLIP_ID_PREFIX "com.apple.webapp"
902 static uint16_t
flow_divert_trie_search(struct flow_divert_trie * trie,const uint8_t * string_bytes __sized_by (string_bytes_count),__unused size_t string_bytes_count)903 flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes __sized_by(string_bytes_count), __unused size_t string_bytes_count)
904 {
905 uint16_t current = trie->root;
906 uint16_t string_idx = 0;
907
908 while (current != NULL_TRIE_IDX) {
909 uint16_t next = NULL_TRIE_IDX;
910 uint16_t node_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
911 uint16_t node_idx;
912
913 for (node_idx = TRIE_NODE(trie, current).start;
914 node_idx < node_end && string_bytes[string_idx] != '\0' && string_bytes[string_idx] == TRIE_BYTE(trie, node_idx);
915 node_idx++, string_idx++) {
916 ;
917 }
918
919 if (node_idx == node_end) {
920 if (string_bytes[string_idx] == '\0') {
921 return current; /* Got an exact match */
922 } else if (string_idx == strlen(APPLE_WEBCLIP_ID_PREFIX) &&
923 0 == strlcmp((const char *)string_bytes, APPLE_WEBCLIP_ID_PREFIX, string_idx)) {
924 return current; /* Got an apple webclip id prefix match */
925 } else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
926 next = TRIE_CHILD(trie, current, string_bytes[string_idx]);
927 }
928 }
929 current = next;
930 }
931
932 return NULL_TRIE_IDX;
933 }
934
935 struct uuid_search_info {
936 uuid_t target_uuid;
937 char *found_signing_id __sized_by(found_signing_id_size);
938 boolean_t found_multiple_signing_ids;
939 proc_t found_proc;
940 size_t found_signing_id_size;
941 };
942
943 static int
flow_divert_find_proc_by_uuid_callout(proc_t p,void * arg)944 flow_divert_find_proc_by_uuid_callout(proc_t p, void *arg)
945 {
946 struct uuid_search_info *info = (struct uuid_search_info *)arg;
947 int result = PROC_RETURNED_DONE; /* By default, we didn't find the process */
948
949 if (info->found_signing_id != NULL) {
950 if (!info->found_multiple_signing_ids) {
951 /* All processes that were found had the same signing identifier, so just claim this first one and be done. */
952 info->found_proc = p;
953 result = PROC_CLAIMED_DONE;
954 } else {
955 uuid_string_t uuid_str;
956 uuid_unparse(info->target_uuid, uuid_str);
957 FDLOG(LOG_WARNING, &nil_pcb, "Found multiple processes with UUID %s with different signing identifiers", uuid_str);
958 }
959 kfree_data_sized_by(info->found_signing_id, info->found_signing_id_size);
960 }
961
962 if (result == PROC_RETURNED_DONE) {
963 uuid_string_t uuid_str;
964 uuid_unparse(info->target_uuid, uuid_str);
965 FDLOG(LOG_WARNING, &nil_pcb, "Failed to find a process with UUID %s", uuid_str);
966 }
967
968 return result;
969 }
970
971 static int
flow_divert_find_proc_by_uuid_filter(proc_t p,void * arg)972 flow_divert_find_proc_by_uuid_filter(proc_t p, void *arg)
973 {
974 struct uuid_search_info *info = (struct uuid_search_info *)arg;
975 int include = 0;
976
977 if (info->found_multiple_signing_ids) {
978 return include;
979 }
980
981 const unsigned char * p_uuid = proc_executableuuid_addr(p);
982 include = (uuid_compare(p_uuid, info->target_uuid) == 0);
983 if (include) {
984 const char *signing_id __null_terminated = cs_identity_get(p);
985 if (signing_id != NULL) {
986 FDLOG(LOG_INFO, &nil_pcb, "Found process %d with signing identifier %s", proc_getpid(p), signing_id);
987 size_t signing_id_size = strlen(signing_id) + 1;
988 if (info->found_signing_id == NULL) {
989 info->found_signing_id = kalloc_data(signing_id_size, Z_WAITOK);
990 info->found_signing_id_size = signing_id_size;
991 strlcpy(info->found_signing_id, signing_id, signing_id_size);
992 } else if (strlcmp(info->found_signing_id, signing_id, info->found_signing_id_size)) {
993 info->found_multiple_signing_ids = TRUE;
994 }
995 } else {
996 info->found_multiple_signing_ids = TRUE;
997 }
998 include = !info->found_multiple_signing_ids;
999 }
1000
1001 return include;
1002 }
1003
1004 static proc_t
flow_divert_find_proc_by_uuid(uuid_t uuid)1005 flow_divert_find_proc_by_uuid(uuid_t uuid)
1006 {
1007 struct uuid_search_info info;
1008
1009 if (LOG_INFO <= nil_pcb.log_level) {
1010 uuid_string_t uuid_str;
1011 uuid_unparse(uuid, uuid_str);
1012 FDLOG(LOG_INFO, &nil_pcb, "Looking for process with UUID %s", uuid_str);
1013 }
1014
1015 memset(&info, 0, sizeof(info));
1016 info.found_proc = PROC_NULL;
1017 uuid_copy(info.target_uuid, uuid);
1018
1019 proc_iterate(PROC_ALLPROCLIST, flow_divert_find_proc_by_uuid_callout, &info, flow_divert_find_proc_by_uuid_filter, &info);
1020
1021 return info.found_proc;
1022 }
1023
1024 static int
flow_divert_add_proc_info(struct flow_divert_pcb * fd_cb,proc_t proc,const char * signing_id __null_terminated,mbuf_ref_t connect_packet,bool is_effective)1025 flow_divert_add_proc_info(struct flow_divert_pcb *fd_cb, proc_t proc, const char *signing_id __null_terminated, mbuf_ref_t connect_packet, bool is_effective)
1026 {
1027 int error = 0;
1028 uint8_t *cdhash = NULL;
1029 audit_token_t audit_token = {};
1030 const char *proc_cs_id __null_terminated = signing_id;
1031
1032 proc_lock(proc);
1033
1034 if (proc_cs_id == NULL) {
1035 if (proc_getcsflags(proc) & (CS_VALID | CS_DEBUGGED)) {
1036 proc_cs_id = cs_identity_get(proc);
1037 } else {
1038 FDLOG0(LOG_ERR, fd_cb, "Signature of proc is invalid");
1039 }
1040 }
1041
1042 if (is_effective) {
1043 lck_rw_lock_shared(&fd_cb->group->lck);
1044 if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1045 if (proc_cs_id != NULL) {
1046 size_t proc_cs_id_size = strlen(proc_cs_id) + 1;
1047 uint16_t result = flow_divert_trie_search(&fd_cb->group->signing_id_trie, (const uint8_t *)__unsafe_null_terminated_to_indexable(proc_cs_id), proc_cs_id_size);
1048 if (result == NULL_TRIE_IDX) {
1049 FDLOG(LOG_WARNING, fd_cb, "%s did not match", proc_cs_id);
1050 error = EPERM;
1051 } else {
1052 FDLOG(LOG_INFO, fd_cb, "%s matched", proc_cs_id);
1053 }
1054 } else {
1055 error = EPERM;
1056 }
1057 }
1058 lck_rw_done(&fd_cb->group->lck);
1059 }
1060
1061 if (error != 0) {
1062 goto done;
1063 }
1064
1065 /*
1066 * If signing_id is not NULL then it came from the flow divert token and will be added
1067 * as part of the token, so there is no need to add it here.
1068 */
1069 if (signing_id == NULL && proc_cs_id != NULL) {
1070 error = flow_divert_packet_append_tlv(connect_packet,
1071 (is_effective ? FLOW_DIVERT_TLV_SIGNING_ID : FLOW_DIVERT_TLV_APP_REAL_SIGNING_ID),
1072 (uint32_t)strlen(proc_cs_id),
1073 __terminated_by_to_indexable(proc_cs_id));
1074 if (error != 0) {
1075 FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error);
1076 goto done;
1077 }
1078 }
1079
1080 cdhash = cs_get_cdhash(proc);
1081 if (cdhash != NULL) {
1082 error = flow_divert_packet_append_tlv(connect_packet,
1083 (is_effective ? FLOW_DIVERT_TLV_CDHASH : FLOW_DIVERT_TLV_APP_REAL_CDHASH),
1084 SHA1_RESULTLEN,
1085 cdhash);
1086 if (error) {
1087 FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error);
1088 goto done;
1089 }
1090 } else {
1091 FDLOG0(LOG_ERR, fd_cb, "failed to get the cdhash");
1092 }
1093
1094 task_t task __single = proc_task(proc);
1095 if (task != TASK_NULL) {
1096 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
1097 kern_return_t rc = task_info(task, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count);
1098 if (rc == KERN_SUCCESS) {
1099 int append_error = flow_divert_packet_append_tlv(connect_packet,
1100 (is_effective ? FLOW_DIVERT_TLV_APP_AUDIT_TOKEN : FLOW_DIVERT_TLV_APP_REAL_AUDIT_TOKEN),
1101 sizeof(audit_token_t),
1102 &audit_token);
1103 if (append_error) {
1104 FDLOG(LOG_ERR, fd_cb, "failed to append app audit token: %d", append_error);
1105 }
1106 }
1107 }
1108
1109 done:
1110 proc_unlock(proc);
1111
1112 return error;
1113 }
1114
1115 static int
flow_divert_add_all_proc_info(struct flow_divert_pcb * fd_cb,struct socket * so,proc_t proc,const char * signing_id __null_terminated,mbuf_ref_t connect_packet)1116 flow_divert_add_all_proc_info(struct flow_divert_pcb *fd_cb, struct socket *so, proc_t proc, const char *signing_id __null_terminated, mbuf_ref_t connect_packet)
1117 {
1118 int error = 0;
1119 proc_t effective_proc = PROC_NULL;
1120 proc_t responsible_proc = PROC_NULL;
1121 proc_t real_proc = proc_find(so->last_pid);
1122 bool release_real_proc = true;
1123
1124 proc_t src_proc = PROC_NULL;
1125 proc_t real_src_proc = PROC_NULL;
1126
1127 if (real_proc == PROC_NULL) {
1128 FDLOG(LOG_ERR, fd_cb, "failed to find the real proc record for %d", so->last_pid);
1129 release_real_proc = false;
1130 real_proc = proc;
1131 if (real_proc == PROC_NULL) {
1132 real_proc = current_proc();
1133 }
1134 }
1135
1136 if (so->so_flags & SOF_DELEGATED) {
1137 if (proc_getpid(real_proc) != so->e_pid) {
1138 effective_proc = proc_find(so->e_pid);
1139 } else {
1140 const unsigned char * real_proc_uuid = proc_executableuuid_addr(real_proc);
1141 if (uuid_compare(real_proc_uuid, so->e_uuid)) {
1142 effective_proc = flow_divert_find_proc_by_uuid(so->e_uuid);
1143 }
1144 }
1145 }
1146
1147 #if defined(XNU_TARGET_OS_OSX)
1148 lck_rw_lock_shared(&fd_cb->group->lck);
1149 if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1150 if (so->so_rpid > 0) {
1151 responsible_proc = proc_find(so->so_rpid);
1152 }
1153 }
1154 lck_rw_done(&fd_cb->group->lck);
1155 #endif
1156
1157 real_src_proc = real_proc;
1158
1159 if (responsible_proc != PROC_NULL) {
1160 src_proc = responsible_proc;
1161 if (effective_proc != NULL) {
1162 real_src_proc = effective_proc;
1163 }
1164 } else if (effective_proc != PROC_NULL) {
1165 src_proc = effective_proc;
1166 } else {
1167 src_proc = real_proc;
1168 }
1169
1170 error = flow_divert_add_proc_info(fd_cb, src_proc, signing_id, connect_packet, true);
1171 if (error != 0) {
1172 goto done;
1173 }
1174
1175 if (real_src_proc != NULL && real_src_proc != src_proc) {
1176 error = flow_divert_add_proc_info(fd_cb, real_src_proc, NULL, connect_packet, false);
1177 if (error != 0) {
1178 goto done;
1179 }
1180 }
1181
1182 done:
1183 if (responsible_proc != PROC_NULL) {
1184 proc_rele(responsible_proc);
1185 }
1186
1187 if (effective_proc != PROC_NULL) {
1188 proc_rele(effective_proc);
1189 }
1190
1191 if (real_proc != PROC_NULL && release_real_proc) {
1192 proc_rele(real_proc);
1193 }
1194
1195 return error;
1196 }
1197
1198 static int
flow_divert_send_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet)1199 flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet)
1200 {
1201 int error;
1202
1203 if (fd_cb->group == NULL) {
1204 FDLOG0(LOG_INFO, fd_cb, "no provider, cannot send packet");
1205 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, false);
1206 flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
1207 if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1208 error = ECONNABORTED;
1209 } else {
1210 error = EHOSTUNREACH;
1211 }
1212 fd_cb->so->so_error = (uint16_t)error;
1213 return error;
1214 }
1215
1216 lck_rw_lock_shared(&fd_cb->group->lck);
1217
1218 if (MBUFQ_EMPTY(&fd_cb->group->send_queue)) {
1219 error = ctl_enqueuembuf(g_flow_divert_kctl_ref, fd_cb->group->ctl_unit, packet, CTL_DATA_EOR);
1220 if (error) {
1221 FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_send_packet: ctl_enqueuembuf returned an error: %d", error);
1222 }
1223 } else {
1224 error = ENOBUFS;
1225 }
1226
1227 if (error == ENOBUFS) {
1228 if (!lck_rw_lock_shared_to_exclusive(&fd_cb->group->lck)) {
1229 lck_rw_lock_exclusive(&fd_cb->group->lck);
1230 }
1231 MBUFQ_ENQUEUE(&fd_cb->group->send_queue, packet);
1232 error = 0;
1233 OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &fd_cb->group->atomic_bits);
1234 }
1235
1236 lck_rw_done(&fd_cb->group->lck);
1237
1238 return error;
1239 }
1240
1241 static void
flow_divert_append_domain_name(char * domain_name __null_terminated,void * ctx)1242 flow_divert_append_domain_name(char *domain_name __null_terminated, void *ctx)
1243 {
1244 mbuf_ref_t packet = (mbuf_ref_t)ctx;
1245 size_t domain_name_length = 0;
1246
1247 if (packet == NULL || domain_name == NULL) {
1248 return;
1249 }
1250
1251 domain_name_length = strlen(domain_name);
1252 if (domain_name_length > 0 && domain_name_length < FLOW_DIVERT_MAX_NAME_SIZE) {
1253 int error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TARGET_HOSTNAME, (uint32_t)domain_name_length, __terminated_by_to_indexable(domain_name));
1254 if (error) {
1255 FDLOG(LOG_ERR, &nil_pcb, "Failed to append %s: %d", domain_name, error);
1256 }
1257 }
1258 }
1259
1260 static int
flow_divert_create_connect_packet(struct flow_divert_pcb * fd_cb,struct sockaddr * to,struct socket * so,proc_t p,mbuf_ref_t * out_connect_packet)1261 flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr *to, struct socket *so, proc_t p, mbuf_ref_t *out_connect_packet)
1262 {
1263 int error = 0;
1264 int flow_type = 0;
1265 char * signing_id __indexable = NULL;
1266 uint32_t sid_size = 0;
1267 mbuf_ref_t connect_packet = NULL;
1268 cfil_sock_id_t cfil_sock_id = CFIL_SOCK_ID_NONE;
1269 const void *cfil_id = NULL;
1270 size_t cfil_id_size = 0;
1271 struct inpcb *inp = sotoinpcb(so);
1272 struct ifnet *ifp = NULL;
1273 uint32_t flags = 0;
1274
1275 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet);
1276 if (error) {
1277 goto done;
1278 }
1279
1280 if (fd_cb->connect_token != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_HMAC)) {
1281 int find_error = flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
1282 if (find_error == 0 && sid_size > 0) {
1283 signing_id = kalloc_data(sid_size + 1, Z_WAITOK | Z_ZERO);
1284 if (signing_id != NULL) {
1285 flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, signing_id, NULL);
1286 FDLOG(LOG_INFO, fd_cb, "Got %s from token", signing_id);
1287 }
1288 }
1289 }
1290
1291 // TODO: remove ternary operator after rdar://121487109 is fixed
1292 error = flow_divert_add_all_proc_info(fd_cb, so, p, NULL == signing_id ? NULL : __unsafe_null_terminated_from_indexable(signing_id), connect_packet);
1293
1294 if (signing_id != NULL) {
1295 kfree_data(signing_id, sid_size + 1);
1296 }
1297
1298 if (error) {
1299 FDLOG(LOG_ERR, fd_cb, "Failed to add source proc info: %d", error);
1300 goto done;
1301 }
1302
1303 error = flow_divert_packet_append_tlv(connect_packet,
1304 FLOW_DIVERT_TLV_TRAFFIC_CLASS,
1305 sizeof(fd_cb->so->so_traffic_class),
1306 &fd_cb->so->so_traffic_class);
1307 if (error) {
1308 goto done;
1309 }
1310
1311 if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1312 flow_type = FLOW_DIVERT_FLOW_TYPE_TCP;
1313 } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1314 flow_type = FLOW_DIVERT_FLOW_TYPE_UDP;
1315 } else {
1316 error = EINVAL;
1317 goto done;
1318 }
1319 error = flow_divert_packet_append_tlv(connect_packet,
1320 FLOW_DIVERT_TLV_FLOW_TYPE,
1321 sizeof(flow_type),
1322 &flow_type);
1323
1324 if (error) {
1325 goto done;
1326 }
1327
1328 if (fd_cb->connect_token != NULL) {
1329 unsigned int token_len = m_length(fd_cb->connect_token);
1330 mbuf_concatenate(connect_packet, fd_cb->connect_token);
1331 mbuf_pkthdr_adjustlen(connect_packet, token_len);
1332 fd_cb->connect_token = NULL;
1333 } else {
1334 error = flow_divert_append_target_endpoint_tlv(connect_packet, to);
1335 if (error) {
1336 goto done;
1337 }
1338
1339 necp_with_inp_domain_name(so, connect_packet, flow_divert_append_domain_name);
1340 }
1341
1342 if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1343 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, SA_BYTES(&(fd_cb->local_endpoint.sa)));
1344 if (error) {
1345 goto done;
1346 }
1347 }
1348
1349 if (inp->inp_vflag & INP_IPV4) {
1350 ifp = inp->inp_last_outifp;
1351 } else if (inp->inp_vflag & INP_IPV6) {
1352 ifp = inp->in6p_last_outifp;
1353 }
1354 if ((inp->inp_flags & INP_BOUND_IF) ||
1355 ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) ||
1356 ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
1357 flags |= FLOW_DIVERT_TOKEN_FLAG_BOUND;
1358 if (ifp == NULL) {
1359 ifp = inp->inp_boundifp;
1360 }
1361 }
1362 if (ifp != NULL) {
1363 uint32_t flow_if_index = ifp->if_index;
1364 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_OUT_IF_INDEX,
1365 sizeof(flow_if_index), &flow_if_index);
1366 if (error) {
1367 goto done;
1368 }
1369 }
1370
1371 if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
1372 flags |= FLOW_DIVERT_TOKEN_FLAG_TFO;
1373 }
1374
1375 if (flags != 0) {
1376 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags);
1377 if (error) {
1378 goto done;
1379 }
1380 }
1381
1382 if (SOCK_TYPE(so) == SOCK_DGRAM) {
1383 cfil_sock_id = cfil_sock_id_from_datagram_socket(so, NULL, to);
1384 } else {
1385 cfil_sock_id = cfil_sock_id_from_socket(so);
1386 }
1387
1388 if (cfil_sock_id != CFIL_SOCK_ID_NONE) {
1389 cfil_id = &cfil_sock_id;
1390 cfil_id_size = sizeof(cfil_sock_id);
1391 } else if (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) {
1392 cfil_id = &inp->necp_client_uuid;
1393 cfil_id_size = sizeof(inp->necp_client_uuid);
1394 }
1395
1396 if (cfil_id != NULL && cfil_id_size > 0 && cfil_id_size <= sizeof(uuid_t)) {
1397 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CFIL_ID, (uint32_t)cfil_id_size, cfil_id);
1398 if (error) {
1399 goto done;
1400 }
1401 }
1402
1403 done:
1404 if (!error) {
1405 *out_connect_packet = connect_packet;
1406 } else if (connect_packet != NULL) {
1407 mbuf_freem(connect_packet);
1408 }
1409
1410 return error;
1411 }
1412
1413 static int
flow_divert_send_connect_packet(struct flow_divert_pcb * fd_cb)1414 flow_divert_send_connect_packet(struct flow_divert_pcb *fd_cb)
1415 {
1416 int error = 0;
1417 mbuf_ref_t connect_packet = fd_cb->connect_packet;
1418 mbuf_ref_t saved_connect_packet = NULL;
1419
1420 if (connect_packet != NULL) {
1421 error = mbuf_copym(connect_packet, 0, mbuf_pkthdr_len(connect_packet), MBUF_DONTWAIT, &saved_connect_packet);
1422 if (error) {
1423 FDLOG0(LOG_ERR, fd_cb, "Failed to copy the connect packet");
1424 goto done;
1425 }
1426
1427 error = flow_divert_send_packet(fd_cb, connect_packet);
1428 if (error) {
1429 goto done;
1430 }
1431
1432 fd_cb->connect_packet = saved_connect_packet;
1433 saved_connect_packet = NULL;
1434 } else {
1435 error = ENOENT;
1436 }
1437 done:
1438 if (saved_connect_packet != NULL) {
1439 mbuf_freem(saved_connect_packet);
1440 }
1441
1442 return error;
1443 }
1444
1445 static int
flow_divert_send_connect_result(struct flow_divert_pcb * fd_cb)1446 flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb)
1447 {
1448 int error = 0;
1449 mbuf_ref_t packet = NULL;
1450 int rbuff_space = 0;
1451
1452 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT_RESULT, &packet);
1453 if (error) {
1454 FDLOG(LOG_ERR, fd_cb, "failed to create a connect result packet: %d", error);
1455 goto done;
1456 }
1457
1458 rbuff_space = fd_cb->so->so_rcv.sb_hiwat;
1459 if (rbuff_space < 0) {
1460 rbuff_space = 0;
1461 }
1462 rbuff_space = htonl(rbuff_space);
1463 error = flow_divert_packet_append_tlv(packet,
1464 FLOW_DIVERT_TLV_SPACE_AVAILABLE,
1465 sizeof(rbuff_space),
1466 &rbuff_space);
1467 if (error) {
1468 goto done;
1469 }
1470
1471 if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1472 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, SA_BYTES(&(fd_cb->local_endpoint.sa)));
1473 if (error) {
1474 goto done;
1475 }
1476 }
1477
1478 error = flow_divert_send_packet(fd_cb, packet);
1479 if (error) {
1480 goto done;
1481 }
1482
1483 done:
1484 if (error && packet != NULL) {
1485 mbuf_freem(packet);
1486 }
1487
1488 return error;
1489 }
1490
1491 static int
flow_divert_send_close(struct flow_divert_pcb * fd_cb,int how)1492 flow_divert_send_close(struct flow_divert_pcb *fd_cb, int how)
1493 {
1494 int error = 0;
1495 mbuf_ref_t packet = NULL;
1496 uint32_t zero = 0;
1497
1498 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CLOSE, &packet);
1499 if (error) {
1500 FDLOG(LOG_ERR, fd_cb, "failed to create a close packet: %d", error);
1501 goto done;
1502 }
1503
1504 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(zero), &zero);
1505 if (error) {
1506 FDLOG(LOG_ERR, fd_cb, "failed to add the error code TLV: %d", error);
1507 goto done;
1508 }
1509
1510 how = htonl(how);
1511 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_HOW, sizeof(how), &how);
1512 if (error) {
1513 FDLOG(LOG_ERR, fd_cb, "failed to add the how flag: %d", error);
1514 goto done;
1515 }
1516
1517 error = flow_divert_send_packet(fd_cb, packet);
1518 if (error) {
1519 goto done;
1520 }
1521
1522 done:
1523 if (error && packet != NULL) {
1524 mbuf_free(packet);
1525 }
1526
1527 return error;
1528 }
1529
1530 static int
flow_divert_tunnel_how_closed(struct flow_divert_pcb * fd_cb)1531 flow_divert_tunnel_how_closed(struct flow_divert_pcb *fd_cb)
1532 {
1533 if ((fd_cb->flags & (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) ==
1534 (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) {
1535 return SHUT_RDWR;
1536 } else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_RD_CLOSED) {
1537 return SHUT_RD;
1538 } else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) {
1539 return SHUT_WR;
1540 }
1541
1542 return -1;
1543 }
1544
1545 /*
1546 * Determine what close messages if any need to be sent to the tunnel. Returns TRUE if the tunnel is closed for both reads and
1547 * writes. Returns FALSE otherwise.
1548 */
1549 static void
flow_divert_send_close_if_needed(struct flow_divert_pcb * fd_cb)1550 flow_divert_send_close_if_needed(struct flow_divert_pcb *fd_cb)
1551 {
1552 int how = -1;
1553
1554 /* Do not send any close messages if there is still data in the send buffer */
1555 if (fd_cb->so->so_snd.sb_cc == 0) {
1556 if ((fd_cb->flags & (FLOW_DIVERT_READ_CLOSED | FLOW_DIVERT_TUNNEL_RD_CLOSED)) == FLOW_DIVERT_READ_CLOSED) {
1557 /* Socket closed reads, but tunnel did not. Tell tunnel to close reads */
1558 how = SHUT_RD;
1559 }
1560 if ((fd_cb->flags & (FLOW_DIVERT_WRITE_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) == FLOW_DIVERT_WRITE_CLOSED) {
1561 /* Socket closed writes, but tunnel did not. Tell tunnel to close writes */
1562 if (how == SHUT_RD) {
1563 how = SHUT_RDWR;
1564 } else {
1565 how = SHUT_WR;
1566 }
1567 }
1568 }
1569
1570 if (how != -1) {
1571 FDLOG(LOG_INFO, fd_cb, "sending close, how = %d", how);
1572 if (flow_divert_send_close(fd_cb, how) != ENOBUFS) {
1573 /* Successfully sent the close packet. Record the ways in which the tunnel has been closed */
1574 if (how != SHUT_RD) {
1575 fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
1576 }
1577 if (how != SHUT_WR) {
1578 fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
1579 }
1580 }
1581 }
1582
1583 if (flow_divert_tunnel_how_closed(fd_cb) == SHUT_RDWR) {
1584 flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
1585 }
1586 }
1587
1588 static errno_t
flow_divert_send_data_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_len)1589 flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_len)
1590 {
1591 mbuf_ref_t packet = NULL;
1592 mbuf_ref_t last = NULL;
1593 int error = 0;
1594
1595 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1596 if (error || packet == NULL) {
1597 FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1598 goto done;
1599 }
1600
1601 if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1602 last = m_last(packet);
1603 mbuf_setnext(last, data);
1604 mbuf_pkthdr_adjustlen(packet, (int)data_len);
1605 } else {
1606 data_len = 0;
1607 }
1608 error = flow_divert_send_packet(fd_cb, packet);
1609 if (error == 0 && data_len > 0) {
1610 fd_cb->bytes_sent += data_len;
1611 flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1612 }
1613
1614 done:
1615 if (error) {
1616 if (last != NULL) {
1617 mbuf_setnext(last, NULL);
1618 }
1619 if (packet != NULL) {
1620 mbuf_freem(packet);
1621 }
1622 }
1623
1624 return error;
1625 }
1626
1627 static errno_t
flow_divert_send_datagram_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_len,struct sockaddr * toaddr,Boolean is_fragment,size_t datagram_size)1628 flow_divert_send_datagram_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_len, struct sockaddr *toaddr, Boolean is_fragment, size_t datagram_size)
1629 {
1630 mbuf_ref_t packet = NULL;
1631 mbuf_ref_t last = NULL;
1632 int error = 0;
1633
1634 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1635 if (error || packet == NULL) {
1636 FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1637 goto done;
1638 }
1639
1640 if (toaddr != NULL) {
1641 error = flow_divert_append_target_endpoint_tlv(packet, toaddr);
1642 if (error) {
1643 FDLOG(LOG_ERR, fd_cb, "flow_divert_append_target_endpoint_tlv() failed: %d", error);
1644 goto done;
1645 }
1646 }
1647 if (is_fragment) {
1648 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_IS_FRAGMENT, sizeof(is_fragment), &is_fragment);
1649 if (error) {
1650 FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_IS_FRAGMENT) failed: %d", error);
1651 goto done;
1652 }
1653 }
1654
1655 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_DATAGRAM_SIZE, sizeof(datagram_size), &datagram_size);
1656 if (error) {
1657 FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_DATAGRAM_SIZE) failed: %d", error);
1658 goto done;
1659 }
1660
1661 if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1662 last = m_last(packet);
1663 mbuf_setnext(last, data);
1664 mbuf_pkthdr_adjustlen(packet, (int)data_len);
1665 } else {
1666 data_len = 0;
1667 }
1668 error = flow_divert_send_packet(fd_cb, packet);
1669 if (error == 0 && data_len > 0) {
1670 fd_cb->bytes_sent += data_len;
1671 flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1672 }
1673
1674 done:
1675 if (error) {
1676 if (last != NULL) {
1677 mbuf_setnext(last, NULL);
1678 }
1679 if (packet != NULL) {
1680 mbuf_freem(packet);
1681 }
1682 }
1683
1684 return error;
1685 }
1686
1687 static errno_t
flow_divert_send_fragmented_datagram(struct flow_divert_pcb * fd_cb,mbuf_ref_t datagram,size_t datagram_len,struct sockaddr * toaddr)1688 flow_divert_send_fragmented_datagram(struct flow_divert_pcb *fd_cb, mbuf_ref_t datagram, size_t datagram_len, struct sockaddr *toaddr)
1689 {
1690 mbuf_ref_t next_data = datagram;
1691 size_t remaining_len = datagram_len;
1692 mbuf_ref_t remaining_data = NULL;
1693 int error = 0;
1694 bool first = true;
1695
1696 while (remaining_len > 0 && next_data != NULL) {
1697 size_t to_send = remaining_len;
1698 remaining_data = NULL;
1699
1700 if (to_send > FLOW_DIVERT_CHUNK_SIZE) {
1701 to_send = FLOW_DIVERT_CHUNK_SIZE;
1702 error = mbuf_split(next_data, to_send, MBUF_DONTWAIT, &remaining_data);
1703 if (error) {
1704 break;
1705 }
1706 }
1707
1708 error = flow_divert_send_datagram_packet(fd_cb, next_data, to_send, (first ? toaddr : NULL), TRUE, (first ? datagram_len : 0));
1709 if (error) {
1710 break;
1711 }
1712
1713 first = false;
1714 remaining_len -= to_send;
1715 next_data = remaining_data;
1716 }
1717
1718 if (error) {
1719 if (next_data != NULL) {
1720 mbuf_freem(next_data);
1721 }
1722 if (remaining_data != NULL) {
1723 mbuf_freem(remaining_data);
1724 }
1725 }
1726 return error;
1727 }
1728
1729 static void
flow_divert_send_buffered_data(struct flow_divert_pcb * fd_cb,Boolean force)1730 flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force)
1731 {
1732 size_t to_send;
1733 size_t sent = 0;
1734 int error = 0;
1735 mbuf_ref_t buffer;
1736
1737 to_send = fd_cb->so->so_snd.sb_cc;
1738 buffer = fd_cb->so->so_snd.sb_mb;
1739
1740 if (buffer == NULL && to_send > 0) {
1741 FDLOG(LOG_ERR, fd_cb, "Send buffer is NULL, but size is supposed to be %lu", to_send);
1742 return;
1743 }
1744
1745 /* Ignore the send window if force is enabled */
1746 if (!force && (to_send > fd_cb->send_window)) {
1747 to_send = fd_cb->send_window;
1748 }
1749
1750 if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1751 while (sent < to_send) {
1752 mbuf_ref_t data;
1753 size_t data_len;
1754
1755 data_len = to_send - sent;
1756 if (data_len > FLOW_DIVERT_CHUNK_SIZE) {
1757 data_len = FLOW_DIVERT_CHUNK_SIZE;
1758 }
1759
1760 error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data);
1761 if (error) {
1762 FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1763 break;
1764 }
1765
1766 error = flow_divert_send_data_packet(fd_cb, data, data_len);
1767 if (error) {
1768 if (data != NULL) {
1769 mbuf_freem(data);
1770 }
1771 break;
1772 }
1773
1774 sent += data_len;
1775 }
1776 sbdrop(&fd_cb->so->so_snd, (int)sent);
1777 sowwakeup(fd_cb->so);
1778 } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1779 mbuf_ref_t data;
1780 mbuf_ref_t m;
1781 size_t data_len;
1782
1783 while (buffer) {
1784 struct sockaddr *toaddr = flow_divert_get_buffered_target_address(buffer);
1785
1786 m = buffer;
1787 if (toaddr != NULL) {
1788 /* look for data in the chain */
1789 do {
1790 m = m->m_next;
1791 if (m != NULL && m->m_type == MT_DATA) {
1792 break;
1793 }
1794 } while (m);
1795 if (m == NULL) {
1796 /* unexpected */
1797 FDLOG0(LOG_ERR, fd_cb, "failed to find type MT_DATA in the mbuf chain.");
1798 goto move_on;
1799 }
1800 }
1801 data_len = mbuf_pkthdr_len(m);
1802 if (data_len > 0) {
1803 FDLOG(LOG_DEBUG, fd_cb, "mbuf_copym() data_len = %lu", data_len);
1804 error = mbuf_copym(m, 0, data_len, MBUF_DONTWAIT, &data);
1805 if (error) {
1806 FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1807 break;
1808 }
1809 } else {
1810 data = NULL;
1811 }
1812 if (data_len <= FLOW_DIVERT_CHUNK_SIZE) {
1813 error = flow_divert_send_datagram_packet(fd_cb, data, data_len, toaddr, FALSE, 0);
1814 } else {
1815 error = flow_divert_send_fragmented_datagram(fd_cb, data, data_len, toaddr);
1816 data = NULL;
1817 }
1818 if (error) {
1819 if (data != NULL) {
1820 mbuf_freem(data);
1821 }
1822 break;
1823 }
1824 sent += data_len;
1825 move_on:
1826 buffer = buffer->m_nextpkt;
1827 (void) sbdroprecord(&(fd_cb->so->so_snd));
1828 }
1829 }
1830
1831 if (sent > 0) {
1832 FDLOG(LOG_DEBUG, fd_cb, "sent %lu bytes of buffered data", sent);
1833 if (fd_cb->send_window >= sent) {
1834 fd_cb->send_window -= sent;
1835 } else {
1836 fd_cb->send_window = 0;
1837 }
1838 }
1839 }
1840
1841 static int
flow_divert_send_app_data(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_size,struct sockaddr * toaddr)1842 flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_size, struct sockaddr *toaddr)
1843 {
1844 size_t to_send = data_size;
1845 int error = 0;
1846
1847 if (to_send > fd_cb->send_window) {
1848 to_send = fd_cb->send_window;
1849 }
1850
1851 if (fd_cb->so->so_snd.sb_cc > 0) {
1852 to_send = 0; /* If the send buffer is non-empty, then we can't send anything */
1853 }
1854
1855 if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1856 size_t sent = 0;
1857 mbuf_ref_t remaining_data = data;
1858 size_t remaining_size = data_size;
1859 mbuf_ref_t pkt_data = NULL;
1860 while (sent < to_send && remaining_data != NULL && remaining_size > 0) {
1861 size_t pkt_data_len;
1862
1863 pkt_data = remaining_data;
1864
1865 if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) {
1866 pkt_data_len = FLOW_DIVERT_CHUNK_SIZE;
1867 } else {
1868 pkt_data_len = to_send - sent;
1869 }
1870
1871 if (pkt_data_len < remaining_size) {
1872 error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data);
1873 if (error) {
1874 FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
1875 pkt_data = NULL;
1876 break;
1877 }
1878 remaining_size -= pkt_data_len;
1879 } else {
1880 remaining_data = NULL;
1881 remaining_size = 0;
1882 }
1883
1884 error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len);
1885 if (error) {
1886 break;
1887 }
1888
1889 pkt_data = NULL;
1890 sent += pkt_data_len;
1891 }
1892
1893 if (fd_cb->send_window >= sent) {
1894 fd_cb->send_window -= sent;
1895 } else {
1896 fd_cb->send_window = 0;
1897 }
1898
1899 error = 0;
1900
1901 if (pkt_data != NULL) {
1902 if (sbspace(&fd_cb->so->so_snd) > 0) {
1903 if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) {
1904 FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n",
1905 fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1906 }
1907 } else {
1908 mbuf_freem(pkt_data);
1909 error = ENOBUFS;
1910 }
1911 }
1912
1913 if (remaining_data != NULL) {
1914 if (sbspace(&fd_cb->so->so_snd) > 0) {
1915 if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) {
1916 FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n",
1917 fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1918 }
1919 } else {
1920 mbuf_freem(remaining_data);
1921 error = ENOBUFS;
1922 }
1923 }
1924 } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1925 int send_dgram_error = 0;
1926 if (to_send || data_size == 0) {
1927 if (data_size <= FLOW_DIVERT_CHUNK_SIZE) {
1928 send_dgram_error = flow_divert_send_datagram_packet(fd_cb, data, data_size, toaddr, FALSE, 0);
1929 } else {
1930 send_dgram_error = flow_divert_send_fragmented_datagram(fd_cb, data, data_size, toaddr);
1931 data = NULL;
1932 }
1933 if (send_dgram_error) {
1934 FDLOG(LOG_NOTICE, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu", send_dgram_error, data_size);
1935 } else {
1936 if (data_size >= fd_cb->send_window) {
1937 fd_cb->send_window = 0;
1938 } else {
1939 fd_cb->send_window -= data_size;
1940 }
1941 data = NULL;
1942 }
1943 }
1944
1945 if (data != NULL) {
1946 /* buffer it */
1947 if (sbspace(&fd_cb->so->so_snd) > 0) {
1948 if (toaddr != NULL) {
1949 int append_error = 0;
1950 if (!sbappendaddr(&fd_cb->so->so_snd, toaddr, data, NULL, &append_error)) {
1951 FDLOG(LOG_ERR, fd_cb,
1952 "sbappendaddr failed. send buffer size = %u, send_window = %u, error = %d",
1953 fd_cb->so->so_snd.sb_cc, fd_cb->send_window, append_error);
1954 }
1955 } else {
1956 if (!sbappendrecord(&fd_cb->so->so_snd, data)) {
1957 FDLOG(LOG_ERR, fd_cb,
1958 "sbappendrecord failed. send buffer size = %u, send_window = %u",
1959 fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1960 }
1961 }
1962 } else {
1963 FDLOG(LOG_ERR, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu, dropping the datagram", error, data_size);
1964 mbuf_freem(data);
1965 }
1966 }
1967 }
1968
1969 return error;
1970 }
1971
1972 static int
flow_divert_send_read_notification(struct flow_divert_pcb * fd_cb)1973 flow_divert_send_read_notification(struct flow_divert_pcb *fd_cb)
1974 {
1975 int error = 0;
1976 mbuf_ref_t packet = NULL;
1977
1978 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_READ_NOTIFY, &packet);
1979 if (error) {
1980 FDLOG(LOG_ERR, fd_cb, "failed to create a read notification packet: %d", error);
1981 goto done;
1982 }
1983
1984 error = flow_divert_send_packet(fd_cb, packet);
1985 if (error) {
1986 goto done;
1987 }
1988
1989 done:
1990 if (error && packet != NULL) {
1991 mbuf_free(packet);
1992 }
1993
1994 return error;
1995 }
1996
1997 static int
flow_divert_send_traffic_class_update(struct flow_divert_pcb * fd_cb,int traffic_class)1998 flow_divert_send_traffic_class_update(struct flow_divert_pcb *fd_cb, int traffic_class)
1999 {
2000 int error = 0;
2001 mbuf_ref_t packet = NULL;
2002
2003 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_PROPERTIES_UPDATE, &packet);
2004 if (error) {
2005 FDLOG(LOG_ERR, fd_cb, "failed to create a properties update packet: %d", error);
2006 goto done;
2007 }
2008
2009 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TRAFFIC_CLASS, sizeof(traffic_class), &traffic_class);
2010 if (error) {
2011 FDLOG(LOG_ERR, fd_cb, "failed to add the traffic class: %d", error);
2012 goto done;
2013 }
2014
2015 error = flow_divert_send_packet(fd_cb, packet);
2016 if (error) {
2017 goto done;
2018 }
2019
2020 done:
2021 if (error && packet != NULL) {
2022 mbuf_free(packet);
2023 }
2024
2025 return error;
2026 }
2027
2028 static void
flow_divert_set_local_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * local_endpoint)2029 flow_divert_set_local_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *local_endpoint)
2030 {
2031 struct inpcb *inp = sotoinpcb(fd_cb->so);
2032
2033 if (local_endpoint->sa_family == AF_INET6) {
2034 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2035 fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2036 inp->in6p_laddr = (satosin6(local_endpoint))->sin6_addr;
2037 inp->inp_lifscope = (satosin6(local_endpoint))->sin6_scope_id;
2038 in6_verify_ifscope(&inp->in6p_laddr, inp->inp_lifscope);
2039 }
2040 if (inp->inp_lport == 0) {
2041 inp->inp_lport = (satosin6(local_endpoint))->sin6_port;
2042 }
2043 } else if (local_endpoint->sa_family == AF_INET) {
2044 if (inp->inp_laddr.s_addr == INADDR_ANY && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2045 fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2046 inp->inp_laddr = (satosin(local_endpoint))->sin_addr;
2047 }
2048 if (inp->inp_lport == 0) {
2049 inp->inp_lport = (satosin(local_endpoint))->sin_port;
2050 }
2051 }
2052 }
2053
2054 static void
flow_divert_set_remote_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * remote_endpoint)2055 flow_divert_set_remote_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *remote_endpoint)
2056 {
2057 struct inpcb *inp = sotoinpcb(fd_cb->so);
2058
2059 if (remote_endpoint->sa_family == AF_INET6) {
2060 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
2061 inp->in6p_faddr = (satosin6(remote_endpoint))->sin6_addr;
2062 inp->inp_fifscope = (satosin6(remote_endpoint))->sin6_scope_id;
2063 in6_verify_ifscope(&inp->in6p_faddr, inp->inp_fifscope);
2064 }
2065 if (inp->inp_fport == 0) {
2066 inp->inp_fport = (satosin6(remote_endpoint))->sin6_port;
2067 }
2068 } else if (remote_endpoint->sa_family == AF_INET) {
2069 if (inp->inp_faddr.s_addr == INADDR_ANY) {
2070 inp->inp_faddr = (satosin(remote_endpoint))->sin_addr;
2071 }
2072 if (inp->inp_fport == 0) {
2073 inp->inp_fport = (satosin(remote_endpoint))->sin_port;
2074 }
2075 }
2076 }
2077
2078 static uint32_t
flow_divert_derive_kernel_control_unit(pid_t pid,uint32_t * ctl_unit,uint32_t * aggregate_unit,bool * is_aggregate)2079 flow_divert_derive_kernel_control_unit(pid_t pid, uint32_t *ctl_unit, uint32_t *aggregate_unit, bool *is_aggregate)
2080 {
2081 uint32_t result = *ctl_unit;
2082
2083 // There are two models supported for deriving control units:
2084 // 1. A series of flow divert units that allow "transparently" failing
2085 // over to the next unit. For this model, the aggregate_unit contains list
2086 // of all control units (between 1 and 30) masked over each other.
2087 // 2. An indication that in-process flow divert should be preferred, with
2088 // an out of process flow divert to fail over to. For this model, the
2089 // ctl_unit is FLOW_DIVERT_IN_PROCESS_UNIT. In this case, that unit
2090 // is returned first, with the unpacked aggregate unit returned as a
2091 // fallback.
2092 *is_aggregate = false;
2093 if (*ctl_unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
2094 bool found_unit = false;
2095 if (pid != 0) {
2096 // Look for an in-process group that is already open, and use that unit
2097 struct flow_divert_group *group = NULL;
2098 TAILQ_FOREACH(group, &g_flow_divert_in_process_group_list, chain) {
2099 if (group->in_process_pid == pid) {
2100 // Found an in-process group for our same PID, use it
2101 found_unit = true;
2102 result = group->ctl_unit;
2103 break;
2104 }
2105 }
2106
2107 // If an in-process group isn't open yet, send a signal up through NECP to request one
2108 if (!found_unit) {
2109 necp_client_request_in_process_flow_divert(pid);
2110 }
2111 }
2112
2113 // If a unit was found, return it
2114 if (found_unit) {
2115 if (aggregate_unit != NULL && *aggregate_unit != 0) {
2116 *is_aggregate = true;
2117 }
2118 // The next time around, the aggregate unit values will be picked up
2119 *ctl_unit = 0;
2120 return result;
2121 }
2122
2123 // If no unit was found, fall through and clear out the ctl_unit
2124 result = 0;
2125 *ctl_unit = 0;
2126 }
2127
2128 if (aggregate_unit != NULL && *aggregate_unit != 0) {
2129 uint32_t counter;
2130 struct flow_divert_group *lower_order_group = NULL;
2131
2132 for (counter = 0; counter < (GROUP_COUNT_MAX - 1); counter++) {
2133 if ((*aggregate_unit) & (1 << counter)) {
2134 struct flow_divert_group *group = NULL;
2135 group = flow_divert_group_lookup(counter + 1, NULL);
2136
2137 if (group != NULL) {
2138 if (lower_order_group == NULL) {
2139 lower_order_group = group;
2140 } else if ((group->order < lower_order_group->order)) {
2141 lower_order_group = group;
2142 }
2143 }
2144 }
2145 }
2146
2147 if (lower_order_group != NULL) {
2148 *aggregate_unit &= ~(1 << (lower_order_group->ctl_unit - 1));
2149 *is_aggregate = true;
2150 return lower_order_group->ctl_unit;
2151 } else {
2152 *ctl_unit = 0;
2153 return result;
2154 }
2155 } else {
2156 *ctl_unit = 0;
2157 return result;
2158 }
2159 }
2160
2161 static int
flow_divert_try_next_group(struct flow_divert_pcb * fd_cb)2162 flow_divert_try_next_group(struct flow_divert_pcb *fd_cb)
2163 {
2164 int error = 0;
2165 uint32_t policy_control_unit = fd_cb->policy_control_unit;
2166
2167 flow_divert_pcb_remove(fd_cb);
2168
2169 do {
2170 struct flow_divert_group *next_group = NULL;
2171 bool is_aggregate = false;
2172 uint32_t next_ctl_unit = flow_divert_derive_kernel_control_unit(0, &policy_control_unit, &(fd_cb->aggregate_unit), &is_aggregate);
2173
2174 if (fd_cb->control_group_unit == next_ctl_unit) {
2175 FDLOG0(LOG_NOTICE, fd_cb, "Next control unit is the same as the current control unit, disabling flow divert");
2176 error = EALREADY;
2177 break;
2178 }
2179
2180 if (next_ctl_unit == 0 || next_ctl_unit >= GROUP_COUNT_MAX) {
2181 FDLOG0(LOG_NOTICE, fd_cb, "No more valid control units, disabling flow divert");
2182 error = ENOENT;
2183 break;
2184 }
2185
2186 next_group = flow_divert_group_lookup(next_ctl_unit, fd_cb);
2187 if (next_group == NULL) {
2188 FDLOG(LOG_NOTICE, fd_cb, "Group for control unit %u does not exist", next_ctl_unit);
2189 continue;
2190 }
2191
2192 FDLOG(LOG_NOTICE, fd_cb, "Moving from %u to %u", fd_cb->control_group_unit, next_ctl_unit);
2193
2194 error = flow_divert_pcb_insert(fd_cb, next_group);
2195 if (error == 0) {
2196 if (is_aggregate) {
2197 fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2198 } else {
2199 fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2200 }
2201 }
2202 FDGRP_RELEASE(next_group);
2203 } while (fd_cb->group == NULL);
2204
2205 if (fd_cb->group == NULL) {
2206 return error ? error : ENOENT;
2207 }
2208
2209 error = flow_divert_send_connect_packet(fd_cb);
2210 if (error) {
2211 FDLOG(LOG_NOTICE, fd_cb, "Failed to send the connect packet to %u, disabling flow divert", fd_cb->control_group_unit);
2212 flow_divert_pcb_remove(fd_cb);
2213 error = ENOENT;
2214 }
2215
2216 return error;
2217 }
2218
2219 static void
flow_divert_disable(struct flow_divert_pcb * fd_cb)2220 flow_divert_disable(struct flow_divert_pcb *fd_cb)
2221 {
2222 struct socket *so = NULL;
2223 mbuf_ref_t buffer;
2224 int error = 0;
2225 proc_t last_proc = NULL;
2226 struct sockaddr *remote_endpoint = fd_cb->original_remote_endpoint;
2227 bool do_connect = !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT);
2228 struct inpcb *inp = NULL;
2229
2230 so = fd_cb->so;
2231 if (so == NULL) {
2232 goto done;
2233 }
2234
2235 FDLOG0(LOG_NOTICE, fd_cb, "Skipped all flow divert services, disabling flow divert");
2236
2237 /* Restore the IP state */
2238 inp = sotoinpcb(so);
2239 inp->inp_vflag = fd_cb->original_vflag;
2240 inp->inp_faddr.s_addr = INADDR_ANY;
2241 inp->inp_fport = 0;
2242 memset(&(inp->in6p_faddr), 0, sizeof(inp->in6p_faddr));
2243 inp->inp_fifscope = IFSCOPE_NONE;
2244 inp->in6p_fport = 0;
2245 /* If flow divert set the local address, clear it out */
2246 if (fd_cb->flags & FLOW_DIVERT_DID_SET_LOCAL_ADDR) {
2247 inp->inp_laddr.s_addr = INADDR_ANY;
2248 memset(&(inp->in6p_laddr), 0, sizeof(inp->in6p_laddr));
2249 inp->inp_lifscope = IFSCOPE_NONE;
2250 }
2251 inp->inp_last_outifp = fd_cb->original_last_outifp;
2252 inp->in6p_last_outifp = fd_cb->original_last_outifp6;
2253
2254 /* Dis-associate the socket */
2255 so->so_flags &= ~SOF_FLOW_DIVERT;
2256 so->so_flags1 |= SOF1_FLOW_DIVERT_SKIP;
2257 so->so_fd_pcb = NULL;
2258 fd_cb->so = NULL;
2259
2260 FDRELEASE(fd_cb); /* Release the socket's reference */
2261
2262 /* Revert back to the original protocol */
2263 so->so_proto = pffindproto(SOCK_DOM(so), SOCK_PROTO(so), SOCK_TYPE(so));
2264
2265 /* Reset the socket state to avoid confusing NECP */
2266 so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED);
2267
2268 last_proc = proc_find(so->last_pid);
2269
2270 if (do_connect) {
2271 /* Connect using the original protocol */
2272 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, remote_endpoint, (last_proc != NULL ? last_proc : current_proc()));
2273 if (error) {
2274 FDLOG(LOG_ERR, fd_cb, "Failed to connect using the socket's original protocol: %d", error);
2275 goto done;
2276 }
2277 }
2278
2279 buffer = so->so_snd.sb_mb;
2280 if (buffer == NULL) {
2281 /* No buffered data, done */
2282 goto done;
2283 }
2284
2285 /* Send any buffered data using the original protocol */
2286 if (SOCK_TYPE(so) == SOCK_STREAM) {
2287 mbuf_ref_t data_to_send = NULL;
2288 size_t data_len = so->so_snd.sb_cc;
2289
2290 error = mbuf_copym(buffer, 0, data_len, MBUF_DONTWAIT, &data_to_send);
2291 if (error) {
2292 FDLOG0(LOG_ERR, fd_cb, "Failed to copy the mbuf chain in the socket's send buffer");
2293 goto done;
2294 }
2295
2296 sbflush(&so->so_snd);
2297
2298 if (data_to_send->m_flags & M_PKTHDR) {
2299 mbuf_pkthdr_setlen(data_to_send, data_len);
2300 }
2301
2302 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2303 0,
2304 data_to_send,
2305 NULL,
2306 NULL,
2307 (last_proc != NULL ? last_proc : current_proc()));
2308
2309 if (error && error != EWOULDBLOCK) {
2310 FDLOG(LOG_ERR, fd_cb, "Failed to send queued TCP data using the socket's original protocol: %d", error);
2311 } else {
2312 error = 0;
2313 }
2314 } else if (SOCK_TYPE(so) == SOCK_DGRAM) {
2315 struct sockbuf *sb = &so->so_snd;
2316 MBUFQ_HEAD(send_queue_head) send_queue;
2317 MBUFQ_INIT(&send_queue);
2318
2319 /* Flush the send buffer, moving all records to a temporary queue */
2320 while (sb->sb_mb != NULL) {
2321 mbuf_ref_t record = sb->sb_mb;
2322 mbuf_ref_t m = record;
2323 sb->sb_mb = sb->sb_mb->m_nextpkt;
2324 while (m != NULL) {
2325 sbfree(sb, m);
2326 m = m->m_next;
2327 }
2328 record->m_nextpkt = NULL;
2329 MBUFQ_ENQUEUE(&send_queue, record);
2330 }
2331 SB_EMPTY_FIXUP(sb);
2332
2333 while (!MBUFQ_EMPTY(&send_queue)) {
2334 mbuf_ref_t next_record = MBUFQ_FIRST(&send_queue);
2335 mbuf_ref_t addr = NULL;
2336 mbuf_ref_t control = NULL;
2337 mbuf_ref_t last_control = NULL;
2338 mbuf_ref_t data = NULL;
2339 mbuf_ref_t m = next_record;
2340 struct sockaddr *to_endpoint = NULL;
2341
2342 MBUFQ_DEQUEUE(&send_queue, next_record);
2343
2344 while (m != NULL) {
2345 if (m->m_type == MT_SONAME) {
2346 addr = m;
2347 } else if (m->m_type == MT_CONTROL) {
2348 if (control == NULL) {
2349 control = m;
2350 }
2351 last_control = m;
2352 } else if (m->m_type == MT_DATA) {
2353 data = m;
2354 break;
2355 }
2356 m = m->m_next;
2357 }
2358
2359 if (addr != NULL && !do_connect) {
2360 to_endpoint = flow_divert_get_buffered_target_address(addr);
2361 if (to_endpoint == NULL) {
2362 FDLOG0(LOG_NOTICE, fd_cb, "Failed to get the remote address from the buffer");
2363 }
2364 }
2365
2366 if (data == NULL) {
2367 FDLOG0(LOG_ERR, fd_cb, "Buffered record does not contain any data");
2368 mbuf_freem(next_record);
2369 continue;
2370 }
2371
2372 if (!(data->m_flags & M_PKTHDR)) {
2373 FDLOG0(LOG_ERR, fd_cb, "Buffered data does not have a packet header");
2374 mbuf_freem(next_record);
2375 continue;
2376 }
2377
2378 if (addr != NULL) {
2379 addr->m_next = NULL;
2380 }
2381
2382 if (last_control != NULL) {
2383 last_control->m_next = NULL;
2384 }
2385
2386 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2387 0,
2388 data,
2389 to_endpoint,
2390 control,
2391 (last_proc != NULL ? last_proc : current_proc()));
2392
2393 if (addr != NULL) {
2394 mbuf_freem(addr);
2395 }
2396
2397 if (error) {
2398 FDLOG(LOG_ERR, fd_cb, "Failed to send queued UDP data using the socket's original protocol: %d", error);
2399 }
2400 }
2401 }
2402 done:
2403 if (last_proc != NULL) {
2404 proc_rele(last_proc);
2405 }
2406
2407 if (error && so != NULL) {
2408 so->so_error = (uint16_t)error;
2409 flow_divert_disconnect_socket(so, do_connect, false);
2410 }
2411 }
2412
2413 static void
flow_divert_scope(struct flow_divert_pcb * fd_cb,int out_if_index,bool derive_new_address)2414 flow_divert_scope(struct flow_divert_pcb *fd_cb, int out_if_index, bool derive_new_address)
2415 {
2416 struct socket *so = NULL;
2417 struct inpcb *inp = NULL;
2418 struct ifnet *current_ifp = NULL;
2419 struct ifnet * __single new_ifp = NULL;
2420 int error = 0;
2421
2422 so = fd_cb->so;
2423 if (so == NULL) {
2424 return;
2425 }
2426
2427 inp = sotoinpcb(so);
2428
2429 if (out_if_index <= 0) {
2430 return;
2431 }
2432
2433 if (inp->inp_vflag & INP_IPV6) {
2434 current_ifp = inp->in6p_last_outifp;
2435 } else {
2436 current_ifp = inp->inp_last_outifp;
2437 }
2438
2439 if (current_ifp != NULL) {
2440 if (current_ifp->if_index == out_if_index) {
2441 /* No change */
2442 return;
2443 }
2444
2445 /* Scope the socket to the given interface */
2446 error = inp_bindif(inp, out_if_index, &new_ifp);
2447 if (error != 0) {
2448 FDLOG(LOG_ERR, fd_cb, "failed to scope to %d because inp_bindif returned %d", out_if_index, error);
2449 return;
2450 }
2451
2452 if (derive_new_address && fd_cb->original_remote_endpoint != NULL) {
2453 /* Get the appropriate address for the given interface */
2454 if (inp->inp_vflag & INP_IPV6) {
2455 inp->in6p_laddr = sa6_any.sin6_addr;
2456 error = in6_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin6.sin6_addr), NULL);
2457 } else {
2458 inp->inp_laddr.s_addr = INADDR_ANY;
2459 error = in_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, NULL, 0);
2460 }
2461
2462 if (error != 0) {
2463 FDLOG(LOG_WARNING, fd_cb, "failed to derive a new local address from %d because in_pcbladdr returned %d", out_if_index, error);
2464 }
2465 }
2466 } else {
2467 ifnet_head_lock_shared();
2468 if (IF_INDEX_IN_RANGE(out_if_index)) {
2469 new_ifp = ifindex2ifnet[out_if_index];
2470 }
2471 ifnet_head_done();
2472 }
2473
2474 /* Update the "last interface" of the socket */
2475 if (new_ifp != NULL) {
2476 if (inp->inp_vflag & INP_IPV6) {
2477 inp->in6p_last_outifp = new_ifp;
2478 } else {
2479 inp->inp_last_outifp = new_ifp;
2480 }
2481
2482 #if SKYWALK
2483 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2484 netns_set_ifnet(&inp->inp_netns_token, new_ifp);
2485 }
2486 #endif /* SKYWALK */
2487 }
2488 }
2489
2490 static void
flow_divert_handle_connect_result(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2491 flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2492 {
2493 uint32_t connect_error = 0;
2494 uint32_t ctl_unit = 0;
2495 int error = 0;
2496 union sockaddr_in_4_6 local_endpoint = {};
2497 union sockaddr_in_4_6 remote_endpoint = {};
2498 int out_if_index = 0;
2499 uint32_t send_window = 0;
2500 uint32_t app_data_length = 0;
2501
2502 memset(&local_endpoint, 0, sizeof(local_endpoint));
2503 memset(&remote_endpoint, 0, sizeof(remote_endpoint));
2504
2505 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(connect_error), &connect_error, NULL);
2506 if (error) {
2507 FDLOG(LOG_ERR, fd_cb, "failed to get the connect result: %d", error);
2508 return;
2509 }
2510
2511 connect_error = ntohl(connect_error);
2512 FDLOG(LOG_INFO, fd_cb, "received connect result %u", connect_error);
2513
2514 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_SPACE_AVAILABLE, sizeof(send_window), &send_window, NULL);
2515 if (error) {
2516 FDLOG(LOG_ERR, fd_cb, "failed to get the send window: %d", error);
2517 return;
2518 }
2519
2520 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit, NULL);
2521 if (error) {
2522 FDLOG0(LOG_INFO, fd_cb, "No control unit provided in the connect result");
2523 }
2524
2525 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOCAL_ADDR, sizeof(local_endpoint), &(local_endpoint.sin6), NULL);
2526 if (error) {
2527 FDLOG0(LOG_INFO, fd_cb, "No local address provided");
2528 }
2529
2530 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_endpoint), &(remote_endpoint.sin6), NULL);
2531 if (error) {
2532 FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2533 }
2534
2535 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2536 if (error) {
2537 FDLOG0(LOG_INFO, fd_cb, "No output if index provided");
2538 }
2539
2540 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2541 if (error) {
2542 FDLOG0(LOG_INFO, fd_cb, "No application data provided in connect result");
2543 }
2544
2545 error = 0;
2546
2547 FDLOCK(fd_cb);
2548 if (fd_cb->so != NULL) {
2549 struct inpcb *inp = NULL;
2550 struct socket *so = fd_cb->so;
2551 bool local_address_is_valid = false;
2552
2553 socket_lock(so, 1);
2554
2555 if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2556 FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring connect result");
2557 goto done;
2558 }
2559
2560 if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_state & SS_ISCONNECTING)) {
2561 FDLOG0(LOG_ERR, fd_cb, "TCP socket is not in the connecting state, ignoring connect result");
2562 goto done;
2563 }
2564
2565 inp = sotoinpcb(so);
2566
2567 if (connect_error || error) {
2568 goto set_socket_state;
2569 }
2570
2571 if (flow_divert_is_sockaddr_valid(SA(&local_endpoint))) {
2572 if (local_endpoint.sa.sa_family == AF_INET) {
2573 local_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2574 if ((inp->inp_vflag & INP_IPV4) && local_endpoint.sin.sin_addr.s_addr != INADDR_ANY) {
2575 local_address_is_valid = true;
2576 fd_cb->local_endpoint = local_endpoint;
2577 inp->inp_laddr.s_addr = INADDR_ANY;
2578 } else {
2579 fd_cb->local_endpoint.sin.sin_port = local_endpoint.sin.sin_port;
2580 }
2581 } else if (local_endpoint.sa.sa_family == AF_INET6) {
2582 local_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2583 if ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&local_endpoint.sin6.sin6_addr)) {
2584 local_address_is_valid = true;
2585 fd_cb->local_endpoint = local_endpoint;
2586 inp->in6p_laddr = sa6_any.sin6_addr;
2587 } else {
2588 fd_cb->local_endpoint.sin6.sin6_port = local_endpoint.sin6.sin6_port;
2589 }
2590 }
2591 }
2592
2593 flow_divert_scope(fd_cb, out_if_index, !local_address_is_valid);
2594 flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
2595
2596 if (flow_divert_is_sockaddr_valid(SA(&remote_endpoint)) && SOCK_TYPE(so) == SOCK_STREAM) {
2597 if (remote_endpoint.sa.sa_family == AF_INET) {
2598 remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2599 } else if (remote_endpoint.sa.sa_family == AF_INET6) {
2600 remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2601 }
2602 flow_divert_set_remote_endpoint(fd_cb, SA(&remote_endpoint));
2603 }
2604
2605 if (app_data_length > 0) {
2606 uint8_t * app_data = NULL;
2607 app_data = kalloc_data(app_data_length, Z_WAITOK);
2608 if (app_data != NULL) {
2609 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
2610 if (error == 0) {
2611 FDLOG(LOG_INFO, fd_cb, "Got %u bytes of app data from the connect result", app_data_length);
2612 if (fd_cb->app_data != NULL) {
2613 kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
2614 }
2615 fd_cb->app_data = app_data;
2616 fd_cb->app_data_length = app_data_length;
2617 } else {
2618 FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the connect result packet", app_data_length);
2619 kfree_data(app_data, app_data_length);
2620 }
2621 } else {
2622 FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the connect result", app_data_length);
2623 }
2624 }
2625
2626 if (error) {
2627 goto set_socket_state;
2628 }
2629
2630 if (fd_cb->group == NULL) {
2631 error = EINVAL;
2632 goto set_socket_state;
2633 }
2634
2635 ctl_unit = ntohl(ctl_unit);
2636 if (ctl_unit > 0) {
2637 int insert_error = 0;
2638 struct flow_divert_group *grp = NULL;
2639
2640 if (ctl_unit >= GROUP_COUNT_MAX) {
2641 FDLOG(LOG_ERR, fd_cb, "Connect result contains an invalid control unit: %u", ctl_unit);
2642 error = EINVAL;
2643 goto set_socket_state;
2644 }
2645
2646 grp = flow_divert_group_lookup(ctl_unit, fd_cb);
2647 if (grp == NULL) {
2648 error = ECONNRESET;
2649 goto set_socket_state;
2650 }
2651
2652 flow_divert_pcb_remove(fd_cb);
2653 insert_error = flow_divert_pcb_insert(fd_cb, grp);
2654 FDGRP_RELEASE(grp);
2655
2656 if (insert_error != 0) {
2657 error = ECONNRESET;
2658 goto set_socket_state;
2659 }
2660 }
2661
2662 fd_cb->send_window = ntohl(send_window);
2663
2664 set_socket_state:
2665 if (!connect_error && !error) {
2666 FDLOG0(LOG_INFO, fd_cb, "sending connect result");
2667 error = flow_divert_send_connect_result(fd_cb);
2668 }
2669
2670 if (connect_error || error) {
2671 if (connect_error && fd_cb->control_group_unit != fd_cb->policy_control_unit) {
2672 error = flow_divert_try_next_group(fd_cb);
2673 if (error && fd_cb->policy_control_unit == 0) {
2674 flow_divert_disable(fd_cb);
2675 goto done;
2676 } else if (error == 0) {
2677 goto done;
2678 }
2679 }
2680
2681 if (!connect_error) {
2682 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
2683 so->so_error = (uint16_t)error;
2684 flow_divert_send_close_if_needed(fd_cb);
2685 } else {
2686 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
2687 so->so_error = (uint16_t)connect_error;
2688 }
2689 flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
2690 } else {
2691 #if NECP
2692 /* Update NECP client with connected five-tuple */
2693 if (!uuid_is_null(inp->necp_client_uuid)) {
2694 socket_unlock(so, 0);
2695 necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
2696 socket_lock(so, 0);
2697 if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2698 /* The socket was closed while it was unlocked */
2699 goto done;
2700 }
2701 }
2702 #endif /* NECP */
2703
2704 flow_divert_send_buffered_data(fd_cb, FALSE);
2705 soisconnected(so);
2706 }
2707
2708 /* We don't need the connect packet any more */
2709 if (fd_cb->connect_packet != NULL) {
2710 mbuf_freem(fd_cb->connect_packet);
2711 fd_cb->connect_packet = NULL;
2712 }
2713
2714 /* We don't need the original remote endpoint any more */
2715 free_sockaddr(fd_cb->original_remote_endpoint);
2716 done:
2717 socket_unlock(so, 1);
2718 }
2719 FDUNLOCK(fd_cb);
2720 }
2721
2722 static void
flow_divert_handle_close(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2723 flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2724 {
2725 uint32_t close_error = 0;
2726 int error = 0;
2727 int how = 0;
2728
2729 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(close_error), &close_error, NULL);
2730 if (error) {
2731 FDLOG(LOG_ERR, fd_cb, "failed to get the close error: %d", error);
2732 return;
2733 }
2734
2735 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_HOW, sizeof(how), &how, NULL);
2736 if (error) {
2737 FDLOG(LOG_ERR, fd_cb, "failed to get the close how flag: %d", error);
2738 return;
2739 }
2740
2741 how = ntohl(how);
2742
2743 FDLOG(LOG_INFO, fd_cb, "close received, how = %d", how);
2744
2745 FDLOCK(fd_cb);
2746 if (fd_cb->so != NULL) {
2747 bool is_connected = (SOCK_TYPE(fd_cb->so) == SOCK_STREAM || !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
2748 socket_lock(fd_cb->so, 0);
2749
2750 if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2751 FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring close from provider");
2752 goto done;
2753 }
2754
2755 fd_cb->so->so_error = (uint16_t)ntohl(close_error);
2756
2757 flow_divert_update_closed_state(fd_cb, how, true, true);
2758
2759 /* Only do this for stream flows because "shutdown by peer" doesn't make sense for datagram flows */
2760 how = flow_divert_tunnel_how_closed(fd_cb);
2761 if (how == SHUT_RDWR) {
2762 flow_divert_disconnect_socket(fd_cb->so, is_connected, true);
2763 } else if (how == SHUT_RD && is_connected) {
2764 socantrcvmore(fd_cb->so);
2765 } else if (how == SHUT_WR && is_connected) {
2766 socantsendmore(fd_cb->so);
2767 }
2768 done:
2769 socket_unlock(fd_cb->so, 0);
2770 }
2771 FDUNLOCK(fd_cb);
2772 }
2773
2774 static mbuf_ref_t
flow_divert_create_control_mbuf(struct flow_divert_pcb * fd_cb)2775 flow_divert_create_control_mbuf(struct flow_divert_pcb *fd_cb)
2776 {
2777 struct inpcb *inp = sotoinpcb(fd_cb->so);
2778 bool need_recvdstaddr = false;
2779 /* Socket flow tracking needs to see the local address */
2780 need_recvdstaddr = SOFLOW_ENABLED(inp->inp_socket);
2781 if ((inp->inp_vflag & INP_IPV4) &&
2782 fd_cb->local_endpoint.sa.sa_family == AF_INET &&
2783 ((inp->inp_flags & INP_RECVDSTADDR) || need_recvdstaddr)) {
2784 return sbcreatecontrol((caddr_t)&(fd_cb->local_endpoint.sin.sin_addr), sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
2785 } else if ((inp->inp_vflag & INP_IPV6) &&
2786 fd_cb->local_endpoint.sa.sa_family == AF_INET6 &&
2787 ((inp->inp_flags & IN6P_PKTINFO) || need_recvdstaddr)) {
2788 struct in6_pktinfo pi6;
2789 memset(&pi6, 0, sizeof(pi6));
2790 pi6.ipi6_addr = fd_cb->local_endpoint.sin6.sin6_addr;
2791
2792 return sbcreatecontrol((caddr_t)&pi6, sizeof(pi6), IPV6_PKTINFO, IPPROTO_IPV6);
2793 }
2794 return NULL;
2795 }
2796
2797 static int
flow_divert_handle_data(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,size_t offset)2798 flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, size_t offset)
2799 {
2800 int error = 0;
2801
2802 FDLOCK(fd_cb);
2803 if (fd_cb->so != NULL) {
2804 mbuf_ref_t data = NULL;
2805 size_t data_size;
2806 struct sockaddr_storage remote_address;
2807 boolean_t got_remote_sa = FALSE;
2808 boolean_t appended = FALSE;
2809 boolean_t append_success = FALSE;
2810
2811 socket_lock(fd_cb->so, 0);
2812
2813 if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2814 FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring inbound data");
2815 goto done;
2816 }
2817
2818 if (sbspace(&fd_cb->so->so_rcv) == 0) {
2819 error = ENOBUFS;
2820 fd_cb->flags |= FLOW_DIVERT_NOTIFY_ON_RECEIVED;
2821 FDLOG0(LOG_INFO, fd_cb, "Receive buffer is full, will send read notification when app reads some data");
2822 goto done;
2823 }
2824
2825 if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
2826 uint32_t val_size = 0;
2827
2828 /* check if we got remote address with data */
2829 memset(&remote_address, 0, sizeof(remote_address));
2830 error = flow_divert_packet_get_tlv(packet, (int)offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, &val_size);
2831 if (error || val_size > sizeof(remote_address)) {
2832 FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2833 error = 0;
2834 } else {
2835 if (remote_address.ss_len > sizeof(remote_address)) {
2836 remote_address.ss_len = sizeof(remote_address);
2837 }
2838 /* validate the address */
2839 if (flow_divert_is_sockaddr_valid((struct sockaddr *)&remote_address)) {
2840 got_remote_sa = TRUE;
2841 } else {
2842 FDLOG0(LOG_INFO, fd_cb, "Remote address is invalid");
2843 }
2844 offset += (sizeof(uint8_t) + sizeof(uint32_t) + val_size);
2845 }
2846 }
2847
2848 data_size = (mbuf_pkthdr_len(packet) - offset);
2849
2850 if (fd_cb->so->so_state & SS_CANTRCVMORE) {
2851 FDLOG(LOG_NOTICE, fd_cb, "app cannot receive any more data, dropping %lu bytes of data", data_size);
2852 goto done;
2853 }
2854
2855 if (SOCK_TYPE(fd_cb->so) != SOCK_STREAM && SOCK_TYPE(fd_cb->so) != SOCK_DGRAM) {
2856 FDLOG(LOG_ERR, fd_cb, "socket has an unsupported type: %d", SOCK_TYPE(fd_cb->so));
2857 goto done;
2858 }
2859
2860 FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size);
2861
2862 error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data);
2863 if (error || data == NULL) {
2864 FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
2865 goto done;
2866 }
2867
2868 if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
2869 appended = (sbappendstream(&fd_cb->so->so_rcv, data) != 0);
2870 append_success = TRUE;
2871 } else {
2872 struct sockaddr * __single append_sa = NULL;
2873 mbuf_ref_t mctl;
2874
2875 if (got_remote_sa == TRUE) {
2876 error = flow_divert_dup_addr(remote_address.ss_family, (struct sockaddr *)&remote_address, &append_sa);
2877 } else {
2878 if (SOCK_CHECK_DOM(fd_cb->so, AF_INET6)) {
2879 error = in6_mapped_peeraddr(fd_cb->so, &append_sa);
2880 } else {
2881 error = in_getpeeraddr(fd_cb->so, &append_sa);
2882 }
2883 }
2884 if (error) {
2885 FDLOG0(LOG_ERR, fd_cb, "failed to dup the socket address.");
2886 }
2887
2888 mctl = flow_divert_create_control_mbuf(fd_cb);
2889 int append_error = 0;
2890 appended = sbappendaddr(&fd_cb->so->so_rcv, append_sa, data, mctl, &append_error);
2891 if (appended || append_error == 0) {
2892 append_success = TRUE;
2893 } else {
2894 FDLOG(LOG_ERR, fd_cb, "failed to append %lu bytes of data: %d", data_size, append_error);
2895 }
2896
2897 free_sockaddr(append_sa);
2898 }
2899
2900 if (append_success) {
2901 fd_cb->bytes_received += data_size;
2902 flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
2903 }
2904
2905 if (appended) {
2906 sorwakeup(fd_cb->so);
2907 }
2908 done:
2909 socket_unlock(fd_cb->so, 0);
2910 }
2911 FDUNLOCK(fd_cb);
2912
2913 return error;
2914 }
2915
2916 static void
flow_divert_handle_read_notification(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2917 flow_divert_handle_read_notification(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2918 {
2919 uint32_t read_count = 0;
2920 int error = 0;
2921
2922 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_READ_COUNT, sizeof(read_count), &read_count, NULL);
2923 if (error) {
2924 FDLOG(LOG_ERR, fd_cb, "failed to get the read count: %d", error);
2925 return;
2926 }
2927
2928 FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", ntohl(read_count));
2929
2930 FDLOCK(fd_cb);
2931 if (fd_cb->so != NULL) {
2932 socket_lock(fd_cb->so, 0);
2933
2934 if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2935 FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring read notification");
2936 goto done;
2937 }
2938
2939 fd_cb->send_window += ntohl(read_count);
2940 flow_divert_send_buffered_data(fd_cb, FALSE);
2941 done:
2942 socket_unlock(fd_cb->so, 0);
2943 }
2944 FDUNLOCK(fd_cb);
2945 }
2946
2947 static void
flow_divert_handle_group_init(struct flow_divert_group * group,mbuf_ref_t packet,int offset)2948 flow_divert_handle_group_init(struct flow_divert_group *group, mbuf_ref_t packet, int offset)
2949 {
2950 int error = 0;
2951 uint32_t key_size = 0;
2952 int log_level = 0;
2953 uint32_t flags = 0;
2954 int32_t order = FLOW_DIVERT_ORDER_LAST;
2955
2956 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, 0, NULL, &key_size);
2957 if (error) {
2958 FDLOG(LOG_ERR, &nil_pcb, "failed to get the key size: %d", error);
2959 return;
2960 }
2961
2962 if (key_size == 0 || key_size > FLOW_DIVERT_MAX_KEY_SIZE) {
2963 FDLOG(LOG_ERR, &nil_pcb, "Invalid key size: %u", key_size);
2964 return;
2965 }
2966
2967 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
2968 if (!error) {
2969 nil_pcb.log_level = (uint8_t)log_level;
2970 }
2971
2972 lck_rw_lock_exclusive(&group->lck);
2973
2974 if (group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT) {
2975 FDLOG(LOG_ERR, &nil_pcb, "Skipping (re)initialization of defunct group %u", group->ctl_unit);
2976 lck_rw_done(&group->lck);
2977 return;
2978 }
2979
2980 if (group->token_key != NULL) {
2981 kfree_data_sized_by(group->token_key, group->token_key_size);
2982 }
2983
2984 group->token_key = kalloc_data(key_size, Z_WAITOK);
2985 group->token_key_size = key_size;
2986 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, key_size, group->token_key, NULL);
2987 if (error) {
2988 FDLOG(LOG_ERR, &nil_pcb, "failed to get the token key: %d", error);
2989 kfree_data_sized_by(group->token_key, group->token_key_size);
2990 lck_rw_done(&group->lck);
2991 return;
2992 }
2993
2994 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags, NULL);
2995 if (!error) {
2996 group->flags = flags;
2997 }
2998
2999 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ORDER, sizeof(order), &order, NULL);
3000 if (!error) {
3001 FDLOG(LOG_INFO, &nil_pcb, "group %u order is %u", group->ctl_unit, order);
3002 group->order = order;
3003 }
3004
3005 lck_rw_done(&group->lck);
3006 }
3007
3008 static void
flow_divert_handle_properties_update(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)3009 flow_divert_handle_properties_update(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
3010 {
3011 int error = 0;
3012 int out_if_index = 0;
3013 uint32_t app_data_length = 0;
3014
3015 FDLOG0(LOG_INFO, fd_cb, "received a properties update");
3016
3017 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
3018 if (error) {
3019 FDLOG0(LOG_INFO, fd_cb, "No output if index provided in properties update");
3020 }
3021
3022 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
3023 if (error) {
3024 FDLOG0(LOG_INFO, fd_cb, "No application data provided in properties update");
3025 }
3026
3027 FDLOCK(fd_cb);
3028 if (fd_cb->so != NULL) {
3029 socket_lock(fd_cb->so, 0);
3030
3031 if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
3032 FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring properties update");
3033 goto done;
3034 }
3035
3036 if (out_if_index > 0) {
3037 flow_divert_scope(fd_cb, out_if_index, true);
3038 flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3039 }
3040
3041 if (app_data_length > 0) {
3042 uint8_t * app_data __indexable = NULL;
3043 app_data = kalloc_data(app_data_length, Z_WAITOK);
3044 if (app_data != NULL) {
3045 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
3046 if (error == 0) {
3047 if (fd_cb->app_data != NULL) {
3048 kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
3049 }
3050 fd_cb->app_data = app_data;
3051 fd_cb->app_data_length = app_data_length;
3052 } else {
3053 FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the properties update packet", app_data_length);
3054 kfree_data(app_data, app_data_length);
3055 }
3056 } else {
3057 FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the properties update", app_data_length);
3058 }
3059 }
3060 done:
3061 socket_unlock(fd_cb->so, 0);
3062 }
3063 FDUNLOCK(fd_cb);
3064 }
3065
3066 static void
flow_divert_handle_app_map_create(struct flow_divert_group * group,mbuf_ref_t packet,int offset)3067 flow_divert_handle_app_map_create(struct flow_divert_group *group, mbuf_ref_t packet, int offset)
3068 {
3069 size_t bytes_mem_size = 0;
3070 size_t child_maps_mem_size = 0;
3071 size_t nodes_mem_size = 0;
3072 size_t trie_memory_size = 0;
3073 int cursor = 0;
3074 int error = 0;
3075 struct flow_divert_trie new_trie;
3076 int insert_error = 0;
3077 int prefix_count = -1;
3078 int signing_id_count = 0;
3079 size_t bytes_count = 0;
3080 size_t nodes_count = 0;
3081 size_t maps_count = 0;
3082
3083 lck_rw_lock_exclusive(&group->lck);
3084
3085 /* Re-set the current trie */
3086 if (group->signing_id_trie.memory != NULL) {
3087 kfree_data_sized_by(group->signing_id_trie.memory, group->signing_id_trie.memory_size);
3088 }
3089 memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
3090 group->signing_id_trie.root = NULL_TRIE_IDX;
3091
3092 memset(&new_trie, 0, sizeof(new_trie));
3093
3094 /* Get the number of shared prefixes in the new set of signing ID strings */
3095 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL);
3096
3097 if (prefix_count < 0 || error) {
3098 FDLOG(LOG_ERR, &nil_pcb, "Invalid prefix count (%d) or an error occurred while reading the prefix count: %d", prefix_count, error);
3099 lck_rw_done(&group->lck);
3100 return;
3101 }
3102
3103 /* Compute the number of signing IDs and the total amount of bytes needed to store them */
3104 for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3105 cursor >= 0;
3106 cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3107 uint32_t sid_size = 0;
3108 error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3109 if (error || sid_size == 0) {
3110 FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d: %d", cursor, error);
3111 signing_id_count = 0;
3112 break;
3113 }
3114 if (os_add_overflow(bytes_count, sid_size, &bytes_count)) {
3115 FDLOG0(LOG_ERR, &nil_pcb, "Overflow while incrementing number of bytes");
3116 signing_id_count = 0;
3117 break;
3118 }
3119 signing_id_count++;
3120 }
3121
3122 if (signing_id_count == 0) {
3123 lck_rw_done(&group->lck);
3124 FDLOG0(LOG_NOTICE, &nil_pcb, "No signing identifiers");
3125 return;
3126 }
3127
3128 if (os_add3_overflow(prefix_count, signing_id_count, 1, &nodes_count)) { /* + 1 for the root node */
3129 lck_rw_done(&group->lck);
3130 FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of nodes");
3131 return;
3132 }
3133
3134 if (os_add_overflow(prefix_count, 1, &maps_count)) { /* + 1 for the root node */
3135 lck_rw_done(&group->lck);
3136 FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of maps");
3137 return;
3138 }
3139
3140 if (bytes_count > UINT16_MAX || nodes_count > UINT16_MAX || maps_count > UINT16_MAX) {
3141 lck_rw_done(&group->lck);
3142 FDLOG(LOG_NOTICE, &nil_pcb, "Invalid bytes count (%lu), nodes count (%lu) or maps count (%lu)", bytes_count, nodes_count, maps_count);
3143 return;
3144 }
3145
3146 FDLOG(LOG_INFO, &nil_pcb, "Nodes count = %lu, child maps count = %lu, bytes_count = %lu",
3147 nodes_count, maps_count, bytes_count);
3148
3149 if (os_mul_overflow(sizeof(*new_trie.nodes), (size_t)nodes_count, &nodes_mem_size) ||
3150 os_mul3_overflow(sizeof(*new_trie.child_maps), CHILD_MAP_SIZE, (size_t)maps_count, &child_maps_mem_size) ||
3151 os_mul_overflow(sizeof(*new_trie.bytes), (size_t)bytes_count, &bytes_mem_size) ||
3152 os_add3_overflow(nodes_mem_size, child_maps_mem_size, bytes_mem_size, &trie_memory_size)) {
3153 FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing trie memory sizes");
3154 lck_rw_done(&group->lck);
3155 return;
3156 }
3157
3158 if (trie_memory_size > FLOW_DIVERT_MAX_TRIE_MEMORY) {
3159 FDLOG(LOG_ERR, &nil_pcb, "Trie memory size (%lu) is too big (maximum is %u)", trie_memory_size, FLOW_DIVERT_MAX_TRIE_MEMORY);
3160 lck_rw_done(&group->lck);
3161 return;
3162 }
3163
3164 new_trie.memory = kalloc_data(trie_memory_size, Z_WAITOK);
3165 new_trie.memory_size = trie_memory_size;
3166 if (new_trie.memory == NULL) {
3167 FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate %lu bytes of memory for the signing ID trie",
3168 nodes_mem_size + child_maps_mem_size + bytes_mem_size);
3169 lck_rw_done(&group->lck);
3170 return;
3171 }
3172
3173 /* Initialize the free lists */
3174 new_trie.nodes = (struct flow_divert_trie_node *)new_trie.memory;
3175 new_trie.nodes_count = (uint16_t)nodes_count;
3176
3177 new_trie.nodes_free_next = 0;
3178 memset(new_trie.nodes, 0, nodes_mem_size);
3179
3180 new_trie.child_maps = (uint16_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size);
3181 new_trie.child_maps_count = (uint16_t)maps_count;
3182 new_trie.child_maps_size = child_maps_mem_size;
3183
3184 new_trie.child_maps_free_next = 0;
3185 memset(new_trie.child_maps, 0xff, child_maps_mem_size);
3186
3187 new_trie.bytes = (uint8_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size + child_maps_mem_size);
3188 new_trie.bytes_count = (uint16_t)bytes_count;
3189
3190 new_trie.bytes_free_next = 0;
3191 memset(new_trie.bytes, 0, bytes_mem_size);
3192
3193 /* The root is an empty node */
3194 new_trie.root = trie_node_alloc(&new_trie);
3195
3196 /* Add each signing ID to the trie */
3197 for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3198 cursor >= 0;
3199 cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3200 uint32_t sid_size = 0;
3201 error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3202 if (error || sid_size == 0) {
3203 FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d while building: %d", cursor, error);
3204 insert_error = EINVAL;
3205 break;
3206 }
3207 if (sid_size <= UINT16_MAX && new_trie.bytes_free_next + (uint16_t)sid_size <= new_trie.bytes_count) {
3208 uint16_t new_node_idx;
3209 error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, &TRIE_BYTE(&new_trie, new_trie.bytes_free_next), NULL);
3210 if (error) {
3211 FDLOG(LOG_ERR, &nil_pcb, "Failed to read the signing identifier at offset %d: %d", cursor, error);
3212 insert_error = EINVAL;
3213 break;
3214 }
3215 new_node_idx = flow_divert_trie_insert(&new_trie, new_trie.bytes_free_next, sid_size);
3216 if (new_node_idx == NULL_TRIE_IDX) {
3217 insert_error = EINVAL;
3218 break;
3219 }
3220 } else {
3221 FDLOG0(LOG_ERR, &nil_pcb, "No place to put signing ID for insertion");
3222 insert_error = ENOBUFS;
3223 break;
3224 }
3225 }
3226
3227 if (!insert_error) {
3228 group->signing_id_trie = new_trie;
3229 } else {
3230 kfree_data_sized_by(new_trie.memory, new_trie.memory_size);
3231 }
3232
3233 lck_rw_done(&group->lck);
3234 }
3235
3236 static void
flow_divert_handle_flow_states_request(struct flow_divert_group * group)3237 flow_divert_handle_flow_states_request(struct flow_divert_group *group)
3238 {
3239 struct flow_divert_pcb *fd_cb;
3240 mbuf_ref_t packet = NULL;
3241 SLIST_HEAD(, flow_divert_pcb) tmp_list;
3242 int error = 0;
3243 uint32_t ctl_unit = 0;
3244
3245 SLIST_INIT(&tmp_list);
3246
3247 error = flow_divert_packet_init(&nil_pcb, FLOW_DIVERT_PKT_FLOW_STATES, &packet);
3248 if (error || packet == NULL) {
3249 FDLOG(LOG_ERR, &nil_pcb, "flow_divert_packet_init failed: %d, cannot send flow states", error);
3250 return;
3251 }
3252
3253 lck_rw_lock_shared(&group->lck);
3254
3255 if (!MBUFQ_EMPTY(&group->send_queue)) {
3256 FDLOG0(LOG_WARNING, &nil_pcb, "flow_divert_handle_flow_states_request: group send queue is not empty");
3257 }
3258
3259 ctl_unit = group->ctl_unit;
3260
3261 RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3262 FDRETAIN(fd_cb);
3263 SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3264 }
3265
3266 lck_rw_done(&group->lck);
3267
3268 SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
3269 FDLOCK(fd_cb);
3270 if (fd_cb->so != NULL) {
3271 struct flow_divert_flow_state state = {};
3272 socket_lock(fd_cb->so, 0);
3273
3274 state.conn_id = fd_cb->hash;
3275 state.bytes_written_by_app = fd_cb->bytes_written_by_app;
3276 state.bytes_sent = fd_cb->bytes_sent;
3277 state.bytes_received = fd_cb->bytes_received;
3278 state.send_window = fd_cb->send_window;
3279 state.send_buffer_bytes = fd_cb->so->so_snd.sb_cc;
3280
3281 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_FLOW_STATE, sizeof(state), &state);
3282 if (error) {
3283 FDLOG(LOG_ERR, fd_cb, "Failed to add a flow state: %d", error);
3284 }
3285
3286 socket_unlock(fd_cb->so, 0);
3287 }
3288 FDUNLOCK(fd_cb);
3289 FDRELEASE(fd_cb);
3290 }
3291
3292 error = ctl_enqueuembuf(g_flow_divert_kctl_ref, ctl_unit, packet, CTL_DATA_EOR);
3293 if (error) {
3294 FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_handle_flow_states_request: ctl_enqueuembuf returned an error: %d", error);
3295 mbuf_freem(packet);
3296 }
3297 }
3298
3299 static int
flow_divert_input(mbuf_ref_t packet,struct flow_divert_group * group)3300 flow_divert_input(mbuf_ref_t packet, struct flow_divert_group *group)
3301 {
3302 struct flow_divert_packet_header hdr;
3303 int error = 0;
3304 struct flow_divert_pcb *fd_cb;
3305
3306 if (mbuf_pkthdr_len(packet) < sizeof(hdr)) {
3307 FDLOG(LOG_ERR, &nil_pcb, "got a bad packet, length (%lu) < sizeof hdr (%lu)", mbuf_pkthdr_len(packet), sizeof(hdr));
3308 error = EINVAL;
3309 goto done;
3310 }
3311
3312 error = mbuf_copydata(packet, 0, sizeof(hdr), &hdr);
3313 if (error) {
3314 FDLOG(LOG_ERR, &nil_pcb, "mbuf_copydata failed for the header: %d", error);
3315 error = ENOBUFS;
3316 goto done;
3317 }
3318
3319 hdr.conn_id = ntohl(hdr.conn_id);
3320
3321 if (hdr.conn_id == 0) {
3322 switch (hdr.packet_type) {
3323 case FLOW_DIVERT_PKT_GROUP_INIT:
3324 flow_divert_handle_group_init(group, packet, sizeof(hdr));
3325 break;
3326 case FLOW_DIVERT_PKT_APP_MAP_CREATE:
3327 flow_divert_handle_app_map_create(group, packet, sizeof(hdr));
3328 break;
3329 case FLOW_DIVERT_PKT_FLOW_STATES_REQUEST:
3330 flow_divert_handle_flow_states_request(group);
3331 break;
3332 default:
3333 FDLOG(LOG_WARNING, &nil_pcb, "got an unknown message type: %d", hdr.packet_type);
3334 break;
3335 }
3336 goto done;
3337 }
3338
3339 fd_cb = flow_divert_pcb_lookup(hdr.conn_id, group); /* This retains the PCB */
3340 if (fd_cb == NULL) {
3341 if (hdr.packet_type != FLOW_DIVERT_PKT_CLOSE && hdr.packet_type != FLOW_DIVERT_PKT_READ_NOTIFY) {
3342 FDLOG(LOG_NOTICE, &nil_pcb, "got a %s message from group %d for an unknown pcb: %u", flow_divert_packet_type2str(hdr.packet_type), group->ctl_unit, hdr.conn_id);
3343 }
3344 goto done;
3345 }
3346
3347 switch (hdr.packet_type) {
3348 case FLOW_DIVERT_PKT_CONNECT_RESULT:
3349 flow_divert_handle_connect_result(fd_cb, packet, sizeof(hdr));
3350 break;
3351 case FLOW_DIVERT_PKT_CLOSE:
3352 flow_divert_handle_close(fd_cb, packet, sizeof(hdr));
3353 break;
3354 case FLOW_DIVERT_PKT_DATA:
3355 error = flow_divert_handle_data(fd_cb, packet, sizeof(hdr));
3356 break;
3357 case FLOW_DIVERT_PKT_READ_NOTIFY:
3358 flow_divert_handle_read_notification(fd_cb, packet, sizeof(hdr));
3359 break;
3360 case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
3361 flow_divert_handle_properties_update(fd_cb, packet, sizeof(hdr));
3362 break;
3363 default:
3364 FDLOG(LOG_WARNING, fd_cb, "got an unknown message type: %d", hdr.packet_type);
3365 break;
3366 }
3367
3368 FDRELEASE(fd_cb);
3369
3370 done:
3371 mbuf_freem(packet);
3372 return error;
3373 }
3374
3375 static void
flow_divert_close_all(struct flow_divert_group * group)3376 flow_divert_close_all(struct flow_divert_group *group)
3377 {
3378 struct flow_divert_pcb *fd_cb;
3379 SLIST_HEAD(, flow_divert_pcb) tmp_list;
3380
3381 SLIST_INIT(&tmp_list);
3382
3383 lck_rw_lock_exclusive(&group->lck);
3384
3385 MBUFQ_DRAIN(&group->send_queue);
3386
3387 RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3388 FDRETAIN(fd_cb);
3389 SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3390 }
3391
3392 group->flags |= FLOW_DIVERT_GROUP_FLAG_DEFUNCT;
3393
3394 lck_rw_done(&group->lck);
3395
3396 while (!SLIST_EMPTY(&tmp_list)) {
3397 fd_cb = SLIST_FIRST(&tmp_list);
3398 FDLOCK(fd_cb);
3399 SLIST_REMOVE_HEAD(&tmp_list, tmp_list_entry);
3400 if (fd_cb->so != NULL) {
3401 socket_lock(fd_cb->so, 0);
3402 flow_divert_pcb_remove(fd_cb);
3403 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
3404 fd_cb->so->so_error = ECONNABORTED;
3405 flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3406 socket_unlock(fd_cb->so, 0);
3407 }
3408 FDUNLOCK(fd_cb);
3409 FDRELEASE(fd_cb);
3410 }
3411 }
3412
3413 void
flow_divert_detach(struct socket * so)3414 flow_divert_detach(struct socket *so)
3415 {
3416 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3417
3418 if (!SO_IS_DIVERTED(so)) {
3419 return;
3420 }
3421
3422 so->so_flags &= ~SOF_FLOW_DIVERT;
3423 so->so_fd_pcb = NULL;
3424
3425 FDLOG(LOG_INFO, fd_cb, "Detaching, ref count = %d", fd_cb->ref_count);
3426
3427 if (fd_cb->group != NULL) {
3428 /* Last-ditch effort to send any buffered data */
3429 flow_divert_send_buffered_data(fd_cb, TRUE);
3430
3431 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3432 flow_divert_send_close_if_needed(fd_cb);
3433 /* Remove from the group */
3434 flow_divert_pcb_remove(fd_cb);
3435 }
3436
3437 socket_unlock(so, 0);
3438 FDLOCK(fd_cb);
3439 fd_cb->so = NULL;
3440 FDUNLOCK(fd_cb);
3441 socket_lock(so, 0);
3442
3443 FDRELEASE(fd_cb); /* Release the socket's reference */
3444 }
3445
3446 static int
flow_divert_close(struct socket * so)3447 flow_divert_close(struct socket *so)
3448 {
3449 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3450
3451 if (!SO_IS_DIVERTED(so)) {
3452 return EINVAL;
3453 }
3454
3455 FDLOG0(LOG_INFO, fd_cb, "Closing");
3456
3457 if (SOCK_TYPE(so) == SOCK_STREAM) {
3458 soisdisconnecting(so);
3459 sbflush(&so->so_rcv);
3460 }
3461
3462 flow_divert_send_buffered_data(fd_cb, TRUE);
3463 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3464 flow_divert_send_close_if_needed(fd_cb);
3465
3466 /* Remove from the group */
3467 flow_divert_pcb_remove(fd_cb);
3468
3469 return 0;
3470 }
3471
3472 static int
flow_divert_disconnectx(struct socket * so,sae_associd_t aid,sae_connid_t cid __unused)3473 flow_divert_disconnectx(struct socket *so, sae_associd_t aid,
3474 sae_connid_t cid __unused)
3475 {
3476 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
3477 return EINVAL;
3478 }
3479
3480 return flow_divert_close(so);
3481 }
3482
3483 static int
flow_divert_shutdown(struct socket * so)3484 flow_divert_shutdown(struct socket *so)
3485 {
3486 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3487
3488 if (!SO_IS_DIVERTED(so)) {
3489 return EINVAL;
3490 }
3491
3492 FDLOG0(LOG_INFO, fd_cb, "Can't send more");
3493
3494 socantsendmore(so);
3495
3496 flow_divert_update_closed_state(fd_cb, SHUT_WR, false, true);
3497 flow_divert_send_close_if_needed(fd_cb);
3498
3499 return 0;
3500 }
3501
3502 static int
flow_divert_rcvd(struct socket * so,int flags __unused)3503 flow_divert_rcvd(struct socket *so, int flags __unused)
3504 {
3505 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3506 int space = 0;
3507
3508 if (!SO_IS_DIVERTED(so)) {
3509 return EINVAL;
3510 }
3511
3512 space = sbspace(&so->so_rcv);
3513 FDLOG(LOG_DEBUG, fd_cb, "app read bytes, space = %d", space);
3514 if ((fd_cb->flags & FLOW_DIVERT_NOTIFY_ON_RECEIVED) &&
3515 (space > 0) &&
3516 flow_divert_send_read_notification(fd_cb) == 0) {
3517 FDLOG0(LOG_INFO, fd_cb, "Sent a read notification");
3518 fd_cb->flags &= ~FLOW_DIVERT_NOTIFY_ON_RECEIVED;
3519 }
3520
3521 return 0;
3522 }
3523
3524 static int
flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet,struct sockaddr * toaddr)3525 flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet, struct sockaddr *toaddr)
3526 {
3527 int error = 0;
3528 int port = 0;
3529
3530 if (!flow_divert_is_sockaddr_valid(toaddr)) {
3531 FDLOG(LOG_ERR, &nil_pcb, "Invalid target address, family = %u, length = %u", toaddr->sa_family, toaddr->sa_len);
3532 error = EINVAL;
3533 goto done;
3534 }
3535
3536 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, toaddr->sa_len, SA_BYTES(toaddr));
3537 if (error) {
3538 goto done;
3539 }
3540
3541 if (toaddr->sa_family == AF_INET) {
3542 port = ntohs((satosin(toaddr))->sin_port);
3543 } else {
3544 port = ntohs((satosin6(toaddr))->sin6_port);
3545 }
3546
3547 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port);
3548 if (error) {
3549 goto done;
3550 }
3551
3552 done:
3553 return error;
3554 }
3555
3556 struct sockaddr *
flow_divert_get_buffered_target_address(mbuf_ref_t buffer)3557 flow_divert_get_buffered_target_address(mbuf_ref_t buffer)
3558 {
3559 if (buffer != NULL && buffer->m_type == MT_SONAME) {
3560 struct sockaddr *toaddr = mtod(buffer, struct sockaddr *);
3561 if (toaddr != NULL && flow_divert_is_sockaddr_valid(toaddr)) {
3562 return toaddr;
3563 }
3564 }
3565 return NULL;
3566 }
3567
3568 static boolean_t
flow_divert_is_sockaddr_valid(struct sockaddr * addr)3569 flow_divert_is_sockaddr_valid(struct sockaddr *addr)
3570 {
3571 switch (addr->sa_family) {
3572 case AF_INET:
3573 if (addr->sa_len < sizeof(struct sockaddr_in)) {
3574 return FALSE;
3575 }
3576 break;
3577 case AF_INET6:
3578 if (addr->sa_len < sizeof(struct sockaddr_in6)) {
3579 return FALSE;
3580 }
3581 break;
3582 default:
3583 return FALSE;
3584 }
3585 return TRUE;
3586 }
3587
3588 static errno_t
flow_divert_dup_addr(sa_family_t family,struct sockaddr * addr,struct sockaddr ** dup)3589 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr,
3590 struct sockaddr **dup)
3591 {
3592 int error = 0;
3593 struct sockaddr *result;
3594 struct sockaddr_storage ss;
3595
3596 if (addr != NULL) {
3597 result = addr;
3598 } else {
3599 memset(&ss, 0, sizeof(ss));
3600 ss.ss_family = family;
3601 if (ss.ss_family == AF_INET) {
3602 ss.ss_len = sizeof(struct sockaddr_in);
3603 } else if (ss.ss_family == AF_INET6) {
3604 ss.ss_len = sizeof(struct sockaddr_in6);
3605 } else {
3606 error = EINVAL;
3607 }
3608 result = (struct sockaddr *)&ss;
3609 }
3610
3611 if (!error) {
3612 *dup = dup_sockaddr(result, 1);
3613 if (*dup == NULL) {
3614 error = ENOBUFS;
3615 }
3616 }
3617
3618 return error;
3619 }
3620
3621 static void
flow_divert_disconnect_socket(struct socket * so,bool is_connected,bool delay_if_needed)3622 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed)
3623 {
3624 if (SOCK_TYPE(so) == SOCK_STREAM || is_connected) {
3625 soisdisconnected(so);
3626 }
3627 if (SOCK_TYPE(so) == SOCK_DGRAM) {
3628 struct inpcb *inp = sotoinpcb(so);
3629 if (inp != NULL && !(so->so_flags & SOF_PCBCLEARING)) {
3630 /*
3631 * Let NetworkStatistics know this PCB is going away
3632 * before we detach it.
3633 */
3634 if (nstat_collect && (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
3635 nstat_pcb_detach(inp);
3636 }
3637
3638 if (SOCK_DOM(so) == PF_INET6) {
3639 ROUTE_RELEASE(&inp->in6p_route);
3640 } else {
3641 ROUTE_RELEASE(&inp->inp_route);
3642 }
3643 if (delay_if_needed) {
3644 (void) cfil_sock_is_dead(so);
3645 } else {
3646 inp->inp_state = INPCB_STATE_DEAD;
3647 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
3648 }
3649 /* makes sure we're not called twice from so_close */
3650 so->so_flags |= SOF_PCBCLEARING;
3651 }
3652 }
3653 }
3654
3655 static errno_t
flow_divert_ctloutput(struct socket * so,struct sockopt * sopt)3656 flow_divert_ctloutput(struct socket *so, struct sockopt *sopt)
3657 {
3658 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3659
3660 if (!SO_IS_DIVERTED(so)) {
3661 return EINVAL;
3662 }
3663
3664 if (sopt->sopt_name == SO_TRAFFIC_CLASS) {
3665 if (sopt->sopt_dir == SOPT_SET && fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3666 flow_divert_send_traffic_class_update(fd_cb, so->so_traffic_class);
3667 }
3668 }
3669
3670 if (SOCK_DOM(so) == PF_INET) {
3671 return g_tcp_protosw->pr_ctloutput(so, sopt);
3672 } else if (SOCK_DOM(so) == PF_INET6) {
3673 return g_tcp6_protosw->pr_ctloutput(so, sopt);
3674 }
3675 return 0;
3676 }
3677
3678 static errno_t
flow_divert_connect_out_internal(struct socket * so,struct sockaddr * to,proc_t p,bool implicit)3679 flow_divert_connect_out_internal(struct socket *so, struct sockaddr *to, proc_t p, bool implicit)
3680 {
3681 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3682 int error = 0;
3683 struct inpcb *inp = sotoinpcb(so);
3684 struct sockaddr_in *sinp;
3685 mbuf_ref_t connect_packet = NULL;
3686 int do_send = 1;
3687
3688 if (!SO_IS_DIVERTED(so)) {
3689 return EINVAL;
3690 }
3691
3692 if (fd_cb->group == NULL) {
3693 error = ENETUNREACH;
3694 goto done;
3695 }
3696
3697 if (inp == NULL) {
3698 error = EINVAL;
3699 goto done;
3700 } else if (inp->inp_state == INPCB_STATE_DEAD) {
3701 if (so->so_error) {
3702 error = so->so_error;
3703 so->so_error = 0;
3704 } else {
3705 error = EINVAL;
3706 }
3707 goto done;
3708 }
3709
3710 if (fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3711 error = EALREADY;
3712 goto done;
3713 }
3714
3715 FDLOG0(LOG_INFO, fd_cb, "Connecting");
3716
3717 if (fd_cb->connect_packet == NULL) {
3718 struct sockaddr_in sin = {};
3719 struct ifnet * __single ifp = NULL;
3720
3721 if (to == NULL) {
3722 FDLOG0(LOG_ERR, fd_cb, "No destination address available when creating connect packet");
3723 error = EINVAL;
3724 goto done;
3725 }
3726
3727 if (!flow_divert_is_sockaddr_valid(to)) {
3728 FDLOG0(LOG_ERR, fd_cb, "Destination address is not valid when creating connect packet");
3729 error = EINVAL;
3730 goto done;
3731 }
3732
3733 fd_cb->original_remote_endpoint = dup_sockaddr(to, 0);
3734 if (fd_cb->original_remote_endpoint == NULL) {
3735 FDLOG0(LOG_ERR, fd_cb, "Failed to dup the remote endpoint");
3736 error = ENOMEM;
3737 goto done;
3738 }
3739 fd_cb->original_vflag = inp->inp_vflag;
3740 fd_cb->original_last_outifp = inp->inp_last_outifp;
3741 fd_cb->original_last_outifp6 = inp->in6p_last_outifp;
3742
3743 sinp = (struct sockaddr_in *)(void *)to;
3744 if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
3745 error = EAFNOSUPPORT;
3746 goto done;
3747 }
3748
3749 if (to->sa_family == AF_INET6 && !(inp->inp_flags & IN6P_IPV6_V6ONLY)) {
3750 struct sockaddr_in6 sin6 = {};
3751 sin6.sin6_family = AF_INET6;
3752 sin6.sin6_len = sizeof(struct sockaddr_in6);
3753 sin6.sin6_port = satosin6(to)->sin6_port;
3754 sin6.sin6_addr = satosin6(to)->sin6_addr;
3755 if (IN6_IS_ADDR_V4MAPPED(&(sin6.sin6_addr))) {
3756 in6_sin6_2_sin(&sin, &sin6);
3757 to = (struct sockaddr *)&sin;
3758 }
3759 }
3760
3761 if (to->sa_family == AF_INET6) {
3762 struct sockaddr_in6 *to6 = satosin6(to);
3763
3764 inp->inp_vflag &= ~INP_IPV4;
3765 inp->inp_vflag |= INP_IPV6;
3766 fd_cb->local_endpoint.sin6.sin6_len = sizeof(struct sockaddr_in6);
3767 fd_cb->local_endpoint.sin6.sin6_family = AF_INET6;
3768 fd_cb->local_endpoint.sin6.sin6_port = inp->inp_lport;
3769 error = in6_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin6.sin6_addr), &ifp);
3770 if (error) {
3771 FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv6 address: %d", error);
3772 if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || IN6_IS_ADDR_UNSPECIFIED(&(satosin6(to)->sin6_addr))) {
3773 error = 0;
3774 } else {
3775 goto done;
3776 }
3777 }
3778 if (ifp != NULL) {
3779 inp->in6p_last_outifp = ifp;
3780 ifnet_release(ifp);
3781 }
3782
3783 if (IN6_IS_SCOPE_EMBED(&(fd_cb->local_endpoint.sin6.sin6_addr)) &&
3784 in6_embedded_scope &&
3785 fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] != 0) {
3786 fd_cb->local_endpoint.sin6.sin6_scope_id = ntohs(fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1]);
3787 fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] = 0;
3788 }
3789
3790 if (IN6_IS_SCOPE_EMBED(&(to6->sin6_addr)) &&
3791 in6_embedded_scope &&
3792 to6->sin6_addr.s6_addr16[1] != 0) {
3793 to6->sin6_scope_id = ntohs(to6->sin6_addr.s6_addr16[1]);
3794 to6->sin6_addr.s6_addr16[1] = 0;
3795 }
3796 } else if (to->sa_family == AF_INET) {
3797 inp->inp_vflag |= INP_IPV4;
3798 inp->inp_vflag &= ~INP_IPV6;
3799 fd_cb->local_endpoint.sin.sin_len = sizeof(struct sockaddr_in);
3800 fd_cb->local_endpoint.sin.sin_family = AF_INET;
3801 fd_cb->local_endpoint.sin.sin_port = inp->inp_lport;
3802 error = in_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, &ifp, 0);
3803 if (error) {
3804 FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv4 address: %d", error);
3805 if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || satosin(to)->sin_addr.s_addr == INADDR_ANY) {
3806 error = 0;
3807 } else {
3808 goto done;
3809 }
3810 }
3811 if (ifp != NULL) {
3812 inp->inp_last_outifp = ifp;
3813 ifnet_release(ifp);
3814 }
3815 } else {
3816 FDLOG(LOG_WARNING, fd_cb, "target address has an unsupported family: %d", to->sa_family);
3817 }
3818
3819 error = flow_divert_check_no_cellular(fd_cb) ||
3820 flow_divert_check_no_expensive(fd_cb) ||
3821 flow_divert_check_no_constrained(fd_cb);
3822 if (error) {
3823 goto done;
3824 }
3825
3826 if (SOCK_TYPE(so) == SOCK_STREAM || /* TCP or */
3827 !implicit || /* connect() was called or */
3828 ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) || /* local address is not un-specified */
3829 ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
3830 fd_cb->flags |= FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR;
3831 }
3832
3833 error = flow_divert_create_connect_packet(fd_cb, to, so, p, &connect_packet);
3834 if (error) {
3835 goto done;
3836 }
3837
3838 if (!implicit || SOCK_TYPE(so) == SOCK_STREAM) {
3839 flow_divert_set_remote_endpoint(fd_cb, to);
3840 flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3841 }
3842
3843 if (implicit) {
3844 fd_cb->flags |= FLOW_DIVERT_IMPLICIT_CONNECT;
3845 }
3846
3847 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
3848 FDLOG0(LOG_INFO, fd_cb, "Delaying sending the connect packet until send or receive");
3849 do_send = 0;
3850 }
3851
3852 fd_cb->connect_packet = connect_packet;
3853 connect_packet = NULL;
3854 } else {
3855 FDLOG0(LOG_INFO, fd_cb, "Sending saved connect packet");
3856 }
3857
3858 if (do_send) {
3859 error = flow_divert_send_connect_packet(fd_cb);
3860 if (error) {
3861 goto done;
3862 }
3863
3864 fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
3865 }
3866
3867 if (SOCK_TYPE(so) == SOCK_DGRAM && !(fd_cb->flags & FLOW_DIVERT_HAS_TOKEN)) {
3868 soisconnected(so);
3869 } else {
3870 soisconnecting(so);
3871 }
3872
3873 done:
3874 return error;
3875 }
3876
3877 errno_t
flow_divert_connect_out(struct socket * so,struct sockaddr * to,proc_t p)3878 flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
3879 {
3880 #if CONTENT_FILTER
3881 if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3882 int error = cfil_sock_attach(so, NULL, to, CFS_CONNECTION_DIR_OUT);
3883 if (error != 0) {
3884 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3885 FDLOG(LOG_ERR, fd_cb, "Failed to attach cfil: %d", error);
3886 return error;
3887 }
3888 }
3889 #endif /* CONTENT_FILTER */
3890
3891 return flow_divert_connect_out_internal(so, to, p, false);
3892 }
3893
3894 static int
flow_divert_connectx_out_common(struct socket * so,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_connid_t * pcid,struct uio * auio,user_ssize_t * bytes_written)3895 flow_divert_connectx_out_common(struct socket *so, struct sockaddr *dst,
3896 struct proc *p, uint32_t ifscope, sae_connid_t *pcid, struct uio *auio, user_ssize_t *bytes_written)
3897 {
3898 struct inpcb *inp = sotoinpcb(so);
3899 int error;
3900
3901 if (inp == NULL) {
3902 return EINVAL;
3903 }
3904
3905 VERIFY(dst != NULL);
3906
3907 #if CONTENT_FILTER && NECP
3908 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3909 if (fd_cb != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_TOKEN) &&
3910 SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3911 inp_update_necp_policy(sotoinpcb(so), NULL, dst, 0);
3912 }
3913 #endif /* CONTENT_FILTER */
3914
3915 /* bind socket to the specified interface, if requested */
3916 if (ifscope != IFSCOPE_NONE &&
3917 (error = inp_bindif(inp, ifscope, NULL)) != 0) {
3918 return error;
3919 }
3920
3921 error = flow_divert_connect_out(so, dst, p);
3922
3923 if (error != 0) {
3924 return error;
3925 }
3926
3927 /* if there is data, send it */
3928 if (auio != NULL) {
3929 user_ssize_t datalen = 0;
3930
3931 socket_unlock(so, 0);
3932
3933 VERIFY(bytes_written != NULL);
3934
3935 datalen = uio_resid(auio);
3936 error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL, (uio_t)auio, NULL, NULL, 0);
3937 socket_lock(so, 0);
3938
3939 if (error == 0 || error == EWOULDBLOCK) {
3940 *bytes_written = datalen - uio_resid(auio);
3941 }
3942
3943 /*
3944 * sosend returns EWOULDBLOCK if it's a non-blocking
3945 * socket or a timeout occured (this allows to return
3946 * the amount of queued data through sendit()).
3947 *
3948 * However, connectx() returns EINPROGRESS in case of a
3949 * blocking socket. So we change the return value here.
3950 */
3951 if (error == EWOULDBLOCK) {
3952 error = EINPROGRESS;
3953 }
3954 }
3955
3956 if (error == 0 && pcid != NULL) {
3957 *pcid = 1; /* there is only 1 connection for a TCP */
3958 }
3959
3960 return error;
3961 }
3962
3963 static int
flow_divert_connectx_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3964 flow_divert_connectx_out(struct socket *so, struct sockaddr *src __unused,
3965 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3966 sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3967 uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3968 {
3969 return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
3970 }
3971
3972 static int
flow_divert_connectx6_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3973 flow_divert_connectx6_out(struct socket *so, struct sockaddr *src __unused,
3974 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3975 sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3976 uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3977 {
3978 return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
3979 }
3980
3981 static errno_t
flow_divert_data_out(struct socket * so,int flags,mbuf_ref_t data,struct sockaddr * to,mbuf_ref_t control,struct proc * p)3982 flow_divert_data_out(struct socket *so, int flags, mbuf_ref_t data, struct sockaddr *to, mbuf_ref_t control, struct proc *p)
3983 {
3984 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3985 int error = 0;
3986 struct inpcb *inp;
3987 #if CONTENT_FILTER
3988 struct m_tag *cfil_tag = NULL;
3989 #endif
3990
3991 if (!SO_IS_DIVERTED(so)) {
3992 return EINVAL;
3993 }
3994
3995 inp = sotoinpcb(so);
3996 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3997 error = ECONNRESET;
3998 goto done;
3999 }
4000
4001 if ((fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) && SOCK_TYPE(so) == SOCK_DGRAM) {
4002 /* The provider considers this datagram flow to be closed, so no data can be sent */
4003 FDLOG0(LOG_INFO, fd_cb, "provider is no longer accepting writes, cannot send data");
4004 error = EHOSTUNREACH;
4005 goto done;
4006 }
4007
4008 #if CONTENT_FILTER
4009 /*
4010 * If the socket is subject to a UDP Content Filter and no remote address is passed in,
4011 * retrieve the CFIL saved remote address from the mbuf and use it.
4012 */
4013 if (to == NULL && CFIL_DGRAM_FILTERED(so)) {
4014 struct sockaddr * __single cfil_faddr = NULL;
4015 cfil_tag = cfil_dgram_get_socket_state(data, NULL, NULL, &cfil_faddr, NULL);
4016 if (cfil_tag) {
4017 to = (struct sockaddr *)(void *)cfil_faddr;
4018 }
4019 FDLOG(LOG_INFO, fd_cb, "Using remote address from CFIL saved state: %p", to);
4020 }
4021 #endif
4022
4023 /* Implicit connect */
4024 if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
4025 FDLOG0(LOG_INFO, fd_cb, "implicit connect");
4026
4027 error = flow_divert_connect_out_internal(so, to, p, true);
4028 if (error) {
4029 goto done;
4030 }
4031 } else {
4032 error = flow_divert_check_no_cellular(fd_cb) ||
4033 flow_divert_check_no_expensive(fd_cb) ||
4034 flow_divert_check_no_constrained(fd_cb);
4035 if (error) {
4036 goto done;
4037 }
4038 }
4039
4040 if (data != NULL) {
4041 size_t data_size = 0;
4042 if (mbuf_flags(data) & M_PKTHDR) {
4043 data_size = mbuf_pkthdr_len(data);
4044 } else {
4045 for (mbuf_t blob = data; blob != NULL; blob = mbuf_next(blob)) {
4046 data_size += mbuf_len(blob);
4047 }
4048 }
4049
4050 FDLOG(LOG_DEBUG, fd_cb, "app wrote %lu bytes", data_size);
4051 fd_cb->bytes_written_by_app += data_size;
4052
4053 error = flow_divert_send_app_data(fd_cb, data, data_size, to);
4054
4055 data = NULL;
4056
4057 if (error) {
4058 goto done;
4059 }
4060 }
4061
4062 if (flags & PRUS_EOF) {
4063 flow_divert_shutdown(so);
4064 }
4065
4066 done:
4067 if (data) {
4068 mbuf_freem(data);
4069 }
4070 if (control) {
4071 mbuf_free(control);
4072 }
4073 #if CONTENT_FILTER
4074 if (cfil_tag) {
4075 m_tag_free(cfil_tag);
4076 }
4077 #endif
4078
4079 return error;
4080 }
4081
4082 static int
flow_divert_preconnect(struct socket * so)4083 flow_divert_preconnect(struct socket *so)
4084 {
4085 int error = 0;
4086 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4087
4088 if (!SO_IS_DIVERTED(so)) {
4089 return EINVAL;
4090 }
4091
4092 if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
4093 FDLOG0(LOG_INFO, fd_cb, "Pre-connect read: sending saved connect packet");
4094 error = flow_divert_send_connect_packet(so->so_fd_pcb);
4095 if (error) {
4096 return error;
4097 }
4098
4099 fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
4100 }
4101
4102 soclearfastopen(so);
4103
4104 return error;
4105 }
4106
4107 static void
flow_divert_set_protosw(struct socket * so)4108 flow_divert_set_protosw(struct socket *so)
4109 {
4110 if (SOCK_DOM(so) == PF_INET) {
4111 so->so_proto = &g_flow_divert_in_protosw;
4112 } else {
4113 so->so_proto = (struct protosw *)&g_flow_divert_in6_protosw;
4114 }
4115 }
4116
4117 static void
flow_divert_set_udp_protosw(struct socket * so)4118 flow_divert_set_udp_protosw(struct socket *so)
4119 {
4120 if (SOCK_DOM(so) == PF_INET) {
4121 so->so_proto = &g_flow_divert_in_udp_protosw;
4122 } else {
4123 so->so_proto = (struct protosw *)&g_flow_divert_in6_udp_protosw;
4124 }
4125 }
4126
4127 errno_t
flow_divert_implicit_data_out(struct socket * so,int flags,mbuf_ref_t data,struct sockaddr * to,mbuf_ref_t control,struct proc * p)4128 flow_divert_implicit_data_out(struct socket *so, int flags, mbuf_ref_t data, struct sockaddr *to, mbuf_ref_t control, struct proc *p)
4129 {
4130 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4131 struct inpcb *inp;
4132 int error = 0;
4133
4134 inp = sotoinpcb(so);
4135 if (inp == NULL) {
4136 return EINVAL;
4137 }
4138
4139 if (fd_cb == NULL) {
4140 error = flow_divert_pcb_init(so);
4141 fd_cb = so->so_fd_pcb;
4142 if (error != 0 || fd_cb == NULL) {
4143 goto done;
4144 }
4145 }
4146 return flow_divert_data_out(so, flags, data, to, control, p);
4147
4148 done:
4149 if (data) {
4150 mbuf_freem(data);
4151 }
4152 if (control) {
4153 mbuf_free(control);
4154 }
4155
4156 return error;
4157 }
4158
4159 static errno_t
flow_divert_pcb_init_internal(struct socket * so,uint32_t ctl_unit,uint32_t aggregate_unit)4160 flow_divert_pcb_init_internal(struct socket *so, uint32_t ctl_unit, uint32_t aggregate_unit)
4161 {
4162 errno_t error = 0;
4163 struct flow_divert_pcb *fd_cb = NULL;
4164 uint32_t agg_unit = aggregate_unit;
4165 uint32_t policy_control_unit = ctl_unit;
4166 bool is_aggregate = false;
4167
4168 if (so->so_flags & SOF_FLOW_DIVERT) {
4169 return EALREADY;
4170 }
4171
4172 fd_cb = flow_divert_pcb_create(so);
4173 if (fd_cb == NULL) {
4174 return ENOMEM;
4175 }
4176
4177 do {
4178 uint32_t group_unit = flow_divert_derive_kernel_control_unit(so->last_pid, &policy_control_unit, &agg_unit, &is_aggregate);
4179 if (group_unit == 0 || (group_unit >= GROUP_COUNT_MAX && group_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
4180 FDLOG0(LOG_ERR, fd_cb, "No valid group is available, cannot init flow divert");
4181 error = EINVAL;
4182 break;
4183 }
4184
4185 error = flow_divert_add_to_group(fd_cb, group_unit);
4186 if (error == 0) {
4187 so->so_fd_pcb = fd_cb;
4188 so->so_flags |= SOF_FLOW_DIVERT;
4189 fd_cb->control_group_unit = group_unit;
4190 fd_cb->policy_control_unit = ctl_unit;
4191 fd_cb->aggregate_unit = agg_unit;
4192 if (is_aggregate) {
4193 fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4194 } else {
4195 fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4196 }
4197
4198 if (SOCK_TYPE(so) == SOCK_STREAM) {
4199 flow_divert_set_protosw(so);
4200 } else if (SOCK_TYPE(so) == SOCK_DGRAM) {
4201 flow_divert_set_udp_protosw(so);
4202 }
4203
4204 FDLOG0(LOG_INFO, fd_cb, "Created");
4205 } else if (error != ENOENT) {
4206 FDLOG(LOG_ERR, fd_cb, "pcb insert failed: %d", error);
4207 }
4208 } while (error == ENOENT);
4209
4210 if (error != 0) {
4211 FDRELEASE(fd_cb);
4212 }
4213
4214 return error;
4215 }
4216
4217 errno_t
flow_divert_pcb_init(struct socket * so)4218 flow_divert_pcb_init(struct socket *so)
4219 {
4220 struct inpcb *inp = sotoinpcb(so);
4221 uint32_t aggregate_units = 0;
4222 uint32_t ctl_unit = necp_socket_get_flow_divert_control_unit(inp, &aggregate_units);
4223 return flow_divert_pcb_init_internal(so, ctl_unit, aggregate_units);
4224 }
4225
4226 errno_t
flow_divert_token_set(struct socket * so,struct sockopt * sopt)4227 flow_divert_token_set(struct socket *so, struct sockopt *sopt)
4228 {
4229 uint32_t ctl_unit = 0;
4230 uint32_t key_unit = 0;
4231 uint32_t aggregate_unit = 0;
4232 int error = 0;
4233 int hmac_error = 0;
4234 mbuf_ref_t token = NULL;
4235
4236 if (so->so_flags & SOF_FLOW_DIVERT) {
4237 error = EALREADY;
4238 goto done;
4239 }
4240
4241 if (g_init_result) {
4242 FDLOG(LOG_ERR, &nil_pcb, "flow_divert_init failed (%d), cannot use flow divert", g_init_result);
4243 error = ENOPROTOOPT;
4244 goto done;
4245 }
4246
4247 if ((SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) ||
4248 (SOCK_PROTO(so) != IPPROTO_TCP && SOCK_PROTO(so) != IPPROTO_UDP) ||
4249 (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6)) {
4250 error = EINVAL;
4251 goto done;
4252 } else {
4253 if (SOCK_TYPE(so) == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP) {
4254 struct tcpcb *tp = sototcpcb(so);
4255 if (tp == NULL || tp->t_state != TCPS_CLOSED) {
4256 error = EINVAL;
4257 goto done;
4258 }
4259 }
4260 }
4261
4262 error = soopt_getm(sopt, &token);
4263 if (error) {
4264 token = NULL;
4265 goto done;
4266 }
4267
4268 error = soopt_mcopyin(sopt, token);
4269 if (error) {
4270 token = NULL;
4271 goto done;
4272 }
4273
4274 error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(key_unit), (void *)&key_unit, NULL);
4275 if (!error) {
4276 key_unit = ntohl(key_unit);
4277 if (key_unit >= GROUP_COUNT_MAX) {
4278 key_unit = 0;
4279 }
4280 } else if (error != ENOENT) {
4281 FDLOG(LOG_ERR, &nil_pcb, "Failed to get the key unit from the token: %d", error);
4282 goto done;
4283 } else {
4284 key_unit = 0;
4285 }
4286
4287 error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), (void *)&ctl_unit, NULL);
4288 if (error) {
4289 FDLOG(LOG_ERR, &nil_pcb, "Failed to get the control socket unit from the token: %d", error);
4290 goto done;
4291 }
4292
4293 error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_AGGREGATE_UNIT, sizeof(aggregate_unit), (void *)&aggregate_unit, NULL);
4294 if (error && error != ENOENT) {
4295 FDLOG(LOG_ERR, &nil_pcb, "Failed to get the aggregate unit from the token: %d", error);
4296 goto done;
4297 }
4298
4299 /* A valid kernel control unit is required */
4300 ctl_unit = ntohl(ctl_unit);
4301 aggregate_unit = ntohl(aggregate_unit);
4302
4303 if (ctl_unit > 0 && ctl_unit < GROUP_COUNT_MAX) {
4304 hmac_error = flow_divert_packet_verify_hmac(token, (key_unit != 0 ? key_unit : ctl_unit));
4305 if (hmac_error && hmac_error != ENOENT) {
4306 FDLOG(LOG_ERR, &nil_pcb, "HMAC verfication failed: %d", hmac_error);
4307 error = hmac_error;
4308 goto done;
4309 }
4310 }
4311
4312 error = flow_divert_pcb_init_internal(so, ctl_unit, aggregate_unit);
4313 if (error == 0) {
4314 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4315 int log_level = LOG_NOTICE;
4316
4317 error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
4318 if (error == 0) {
4319 fd_cb->log_level = (uint8_t)log_level;
4320 }
4321 error = 0;
4322
4323 fd_cb->connect_token = token;
4324 token = NULL;
4325
4326 fd_cb->flags |= FLOW_DIVERT_HAS_TOKEN;
4327 }
4328
4329 if (hmac_error == 0) {
4330 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4331 if (fd_cb != NULL) {
4332 fd_cb->flags |= FLOW_DIVERT_HAS_HMAC;
4333 }
4334 }
4335
4336 done:
4337 if (token != NULL) {
4338 mbuf_freem(token);
4339 }
4340
4341 return error;
4342 }
4343
4344 errno_t
flow_divert_token_get(struct socket * so,struct sockopt * sopt)4345 flow_divert_token_get(struct socket *so, struct sockopt *sopt)
4346 {
4347 uint32_t ctl_unit;
4348 int error = 0;
4349 uint8_t hmac[SHA_DIGEST_LENGTH];
4350 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4351 mbuf_ref_t token = NULL;
4352 struct flow_divert_group *control_group = NULL;
4353
4354 if (!SO_IS_DIVERTED(so)) {
4355 error = EINVAL;
4356 goto done;
4357 }
4358
4359 if (fd_cb->group == NULL) {
4360 error = EINVAL;
4361 goto done;
4362 }
4363
4364 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &token);
4365 if (error) {
4366 FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
4367 goto done;
4368 }
4369
4370 ctl_unit = htonl(fd_cb->group->ctl_unit);
4371
4372 error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit);
4373 if (error) {
4374 goto done;
4375 }
4376
4377 error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_FLOW_ID, sizeof(fd_cb->hash), &fd_cb->hash);
4378 if (error) {
4379 goto done;
4380 }
4381
4382 if (fd_cb->app_data != NULL) {
4383 error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_APP_DATA, (uint32_t)fd_cb->app_data_length, fd_cb->app_data);
4384 if (error) {
4385 goto done;
4386 }
4387 }
4388
4389 control_group = flow_divert_group_lookup(fd_cb->control_group_unit, fd_cb);
4390 if (control_group != NULL) {
4391 lck_rw_lock_shared(&control_group->lck);
4392 ctl_unit = htonl(control_group->ctl_unit);
4393 error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(ctl_unit), &ctl_unit);
4394 if (!error) {
4395 error = flow_divert_packet_compute_hmac(token, control_group, hmac);
4396 }
4397 lck_rw_done(&control_group->lck);
4398 FDGRP_RELEASE(control_group);
4399 } else {
4400 error = ENOPROTOOPT;
4401 }
4402
4403 if (error) {
4404 goto done;
4405 }
4406
4407 error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_HMAC, sizeof(hmac), hmac);
4408 if (error) {
4409 goto done;
4410 }
4411
4412 if (sopt->sopt_val == USER_ADDR_NULL) {
4413 /* If the caller passed NULL to getsockopt, just set the size of the token and return */
4414 sopt->sopt_valsize = mbuf_pkthdr_len(token);
4415 goto done;
4416 }
4417
4418 error = soopt_mcopyout(sopt, token);
4419 if (error) {
4420 token = NULL; /* For some reason, soopt_mcopyout() frees the mbuf if it fails */
4421 goto done;
4422 }
4423
4424 done:
4425 if (token != NULL) {
4426 mbuf_freem(token);
4427 }
4428
4429 return error;
4430 }
4431
4432 void
flow_divert_group_destroy(struct flow_divert_group * group)4433 flow_divert_group_destroy(struct flow_divert_group *group)
4434 {
4435 lck_rw_lock_exclusive(&group->lck);
4436
4437 FDLOG(LOG_NOTICE, &nil_pcb, "Destroying group %u", group->ctl_unit);
4438
4439 if (group->token_key != NULL) {
4440 memset(group->token_key, 0, group->token_key_size);
4441 kfree_data_sized_by(group->token_key, group->token_key_size);
4442 }
4443
4444 /* Re-set the current trie */
4445 if (group->signing_id_trie.memory != NULL) {
4446 kfree_data_sized_by(group->signing_id_trie.memory, group->signing_id_trie.memory_size);
4447 }
4448 memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
4449 group->signing_id_trie.root = NULL_TRIE_IDX;
4450
4451 lck_rw_done(&group->lck);
4452
4453 zfree(flow_divert_group_zone, group);
4454 }
4455
4456 static struct flow_divert_group *
flow_divert_allocate_group(u_int32_t unit,pid_t pid)4457 flow_divert_allocate_group(u_int32_t unit, pid_t pid)
4458 {
4459 struct flow_divert_group *new_group = NULL;
4460 new_group = zalloc_flags(flow_divert_group_zone, Z_WAITOK | Z_ZERO);
4461 lck_rw_init(&new_group->lck, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
4462 RB_INIT(&new_group->pcb_tree);
4463 new_group->ctl_unit = unit;
4464 new_group->in_process_pid = pid;
4465 MBUFQ_INIT(&new_group->send_queue);
4466 new_group->signing_id_trie.root = NULL_TRIE_IDX;
4467 new_group->ref_count = 1;
4468 new_group->order = FLOW_DIVERT_ORDER_LAST;
4469 return new_group;
4470 }
4471
4472 static errno_t
flow_divert_kctl_setup(u_int32_t * unit,void ** unitinfo)4473 flow_divert_kctl_setup(u_int32_t *unit, void **unitinfo)
4474 {
4475 if (unit == NULL || unitinfo == NULL) {
4476 return EINVAL;
4477 }
4478
4479 struct flow_divert_group *new_group = NULL;
4480 errno_t error = 0;
4481 lck_rw_lock_shared(&g_flow_divert_group_lck);
4482 if (*unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
4483 // Return next unused in-process unit
4484 u_int32_t unit_cursor = FLOW_DIVERT_IN_PROCESS_UNIT_MIN;
4485 struct flow_divert_group *group_next = NULL;
4486 TAILQ_FOREACH(group_next, &g_flow_divert_in_process_group_list, chain) {
4487 if (group_next->ctl_unit > unit_cursor) {
4488 // Found a gap, lets fill it in
4489 break;
4490 }
4491 unit_cursor = group_next->ctl_unit + 1;
4492 if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4493 break;
4494 }
4495 }
4496 if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4497 error = EBUSY;
4498 } else {
4499 *unit = unit_cursor;
4500 new_group = flow_divert_allocate_group(*unit, proc_pid(current_proc()));
4501 if (group_next != NULL) {
4502 TAILQ_INSERT_BEFORE(group_next, new_group, chain);
4503 } else {
4504 TAILQ_INSERT_TAIL(&g_flow_divert_in_process_group_list, new_group, chain);
4505 }
4506 g_active_group_count++;
4507 }
4508 } else {
4509 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
4510 error = EPERM;
4511 } else {
4512 if (g_flow_divert_groups == NULL) {
4513 g_flow_divert_groups = kalloc_type(struct flow_divert_group *,
4514 GROUP_COUNT_MAX, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4515 }
4516
4517 // Return next unused group unit
4518 bool found_unused_unit = false;
4519 u_int32_t unit_cursor;
4520 for (unit_cursor = 1; unit_cursor < GROUP_COUNT_MAX; unit_cursor++) {
4521 struct flow_divert_group *group = g_flow_divert_groups[unit_cursor];
4522 if (group == NULL) {
4523 // Open slot, assign this one
4524 *unit = unit_cursor;
4525 new_group = flow_divert_allocate_group(*unit, 0);
4526 g_flow_divert_groups[*unit] = new_group;
4527 found_unused_unit = true;
4528 g_active_group_count++;
4529 break;
4530 }
4531 }
4532 if (!found_unused_unit) {
4533 error = EBUSY;
4534 }
4535 }
4536 }
4537 lck_rw_done(&g_flow_divert_group_lck);
4538
4539 *unitinfo = new_group;
4540
4541 return error;
4542 }
4543
4544 static errno_t
flow_divert_kctl_connect(kern_ctl_ref kctlref __unused,struct sockaddr_ctl * sac,void ** unitinfo)4545 flow_divert_kctl_connect(kern_ctl_ref kctlref __unused, struct sockaddr_ctl *sac, void **unitinfo)
4546 {
4547 if (unitinfo == NULL) {
4548 return EINVAL;
4549 }
4550
4551 // Just validate. The group will already have been allocated.
4552 struct flow_divert_group *group = (struct flow_divert_group *)*unitinfo;
4553 if (group == NULL || sac->sc_unit != group->ctl_unit) {
4554 FDLOG(LOG_ERR, &nil_pcb, "Flow divert connect fail, unit mismatch %u != %u",
4555 sac->sc_unit, group ? group->ctl_unit : 0);
4556 return EINVAL;
4557 }
4558
4559 return 0;
4560 }
4561
4562 static errno_t
flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused,uint32_t unit,void * unitinfo)4563 flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused, uint32_t unit, void *unitinfo)
4564 {
4565 struct flow_divert_group *group = NULL;
4566 errno_t error = 0;
4567
4568 if (unitinfo == NULL) {
4569 return 0;
4570 }
4571
4572 FDLOG(LOG_INFO, &nil_pcb, "disconnecting group %d", unit);
4573
4574 lck_rw_lock_exclusive(&g_flow_divert_group_lck);
4575
4576 if (g_active_group_count == 0) {
4577 panic("flow divert group %u is disconnecting, but no groups are active (active count = %u)",
4578 unit, g_active_group_count);
4579 }
4580
4581 if (unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
4582 if (unit >= GROUP_COUNT_MAX) {
4583 return EINVAL;
4584 }
4585
4586 if (g_flow_divert_groups == NULL) {
4587 panic("flow divert group %u is disconnecting, but groups array is NULL",
4588 unit);
4589 }
4590 group = g_flow_divert_groups[unit];
4591
4592 if (group != (struct flow_divert_group *)unitinfo) {
4593 panic("group with unit %d (%p) != unit info (%p)", unit, group, unitinfo);
4594 }
4595
4596 g_flow_divert_groups[unit] = NULL;
4597 } else {
4598 group = (struct flow_divert_group *)unitinfo;
4599 if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
4600 panic("flow divert group %u is disconnecting, but in-process group list is empty",
4601 unit);
4602 }
4603
4604 TAILQ_REMOVE(&g_flow_divert_in_process_group_list, group, chain);
4605 }
4606
4607 g_active_group_count--;
4608
4609 if (g_active_group_count == 0) {
4610 kfree_type(struct flow_divert_group *,
4611 GROUP_COUNT_MAX, g_flow_divert_groups);
4612 g_flow_divert_groups = NULL;
4613 }
4614
4615 lck_rw_done(&g_flow_divert_group_lck);
4616
4617 if (group != NULL) {
4618 flow_divert_close_all(group);
4619 FDGRP_RELEASE(group);
4620 } else {
4621 error = EINVAL;
4622 }
4623
4624 return error;
4625 }
4626
4627 static errno_t
flow_divert_kctl_send(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,mbuf_ref_t m,__unused int flags)4628 flow_divert_kctl_send(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, mbuf_ref_t m, __unused int flags)
4629 {
4630 errno_t error = 0;
4631 struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4632 if (group != NULL) {
4633 error = flow_divert_input(m, group);
4634 FDGRP_RELEASE(group);
4635 } else {
4636 error = ENOENT;
4637 }
4638 return error;
4639 }
4640
4641 static void
flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,__unused int flags)4642 flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, __unused int flags)
4643 {
4644 struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4645 if (group == NULL) {
4646 return;
4647 }
4648
4649 if (!OSTestAndClear(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits)) {
4650 struct flow_divert_pcb *fd_cb;
4651 SLIST_HEAD(, flow_divert_pcb) tmp_list;
4652
4653 lck_rw_lock_exclusive(&group->lck);
4654
4655 while (!MBUFQ_EMPTY(&group->send_queue)) {
4656 mbuf_ref_t next_packet;
4657 FDLOG0(LOG_DEBUG, &nil_pcb, "trying ctl_enqueuembuf again");
4658 next_packet = MBUFQ_FIRST(&group->send_queue);
4659 int error = ctl_enqueuembuf(g_flow_divert_kctl_ref, group->ctl_unit, next_packet, CTL_DATA_EOR);
4660 if (error) {
4661 FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_kctl_rcvd: ctl_enqueuembuf returned an error: %d", error);
4662 OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits);
4663 lck_rw_done(&group->lck);
4664 return;
4665 }
4666 MBUFQ_DEQUEUE(&group->send_queue, next_packet);
4667 }
4668
4669 SLIST_INIT(&tmp_list);
4670
4671 RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
4672 FDRETAIN(fd_cb);
4673 SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
4674 }
4675
4676 lck_rw_done(&group->lck);
4677
4678 SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
4679 FDLOCK(fd_cb);
4680 if (fd_cb->so != NULL) {
4681 socket_lock(fd_cb->so, 0);
4682 if (fd_cb->group != NULL) {
4683 flow_divert_send_buffered_data(fd_cb, FALSE);
4684 }
4685 socket_unlock(fd_cb->so, 0);
4686 }
4687 FDUNLOCK(fd_cb);
4688 FDRELEASE(fd_cb);
4689 }
4690 }
4691
4692 FDGRP_RELEASE(group);
4693 }
4694
4695 static int
flow_divert_kctl_init(void)4696 flow_divert_kctl_init(void)
4697 {
4698 struct kern_ctl_reg ctl_reg;
4699 int result;
4700
4701 memset(&ctl_reg, 0, sizeof(ctl_reg));
4702
4703 strlcpy(ctl_reg.ctl_name, FLOW_DIVERT_CONTROL_NAME, sizeof(ctl_reg.ctl_name));
4704 ctl_reg.ctl_name[sizeof(ctl_reg.ctl_name) - 1] = '\0';
4705
4706 // Do not restrict to privileged processes. flow_divert_kctl_setup checks
4707 // permissions separately.
4708 ctl_reg.ctl_flags = CTL_FLAG_REG_EXTENDED | CTL_FLAG_REG_SETUP;
4709 ctl_reg.ctl_sendsize = FD_CTL_SENDBUFF_SIZE;
4710
4711 ctl_reg.ctl_connect = flow_divert_kctl_connect;
4712 ctl_reg.ctl_disconnect = flow_divert_kctl_disconnect;
4713 ctl_reg.ctl_send = flow_divert_kctl_send;
4714 ctl_reg.ctl_rcvd = flow_divert_kctl_rcvd;
4715 ctl_reg.ctl_setup = flow_divert_kctl_setup;
4716
4717 result = ctl_register(&ctl_reg, &g_flow_divert_kctl_ref);
4718
4719 if (result) {
4720 FDLOG(LOG_ERR, &nil_pcb, "flow_divert_kctl_init - ctl_register failed: %d\n", result);
4721 return result;
4722 }
4723
4724 return 0;
4725 }
4726
4727 void
flow_divert_init(void)4728 flow_divert_init(void)
4729 {
4730 memset(&nil_pcb, 0, sizeof(nil_pcb));
4731 nil_pcb.log_level = LOG_NOTICE;
4732
4733 g_tcp_protosw = pffindproto(AF_INET, IPPROTO_TCP, SOCK_STREAM);
4734
4735 VERIFY(g_tcp_protosw != NULL);
4736
4737 memcpy(&g_flow_divert_in_protosw, g_tcp_protosw, sizeof(g_flow_divert_in_protosw));
4738 memcpy(&g_flow_divert_in_usrreqs, g_tcp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_usrreqs));
4739
4740 g_flow_divert_in_usrreqs.pru_connect = flow_divert_connect_out;
4741 g_flow_divert_in_usrreqs.pru_connectx = flow_divert_connectx_out;
4742 g_flow_divert_in_usrreqs.pru_disconnect = flow_divert_close;
4743 g_flow_divert_in_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4744 g_flow_divert_in_usrreqs.pru_rcvd = flow_divert_rcvd;
4745 g_flow_divert_in_usrreqs.pru_send = flow_divert_data_out;
4746 g_flow_divert_in_usrreqs.pru_shutdown = flow_divert_shutdown;
4747 g_flow_divert_in_usrreqs.pru_preconnect = flow_divert_preconnect;
4748
4749 g_flow_divert_in_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4750 g_flow_divert_in_protosw.pr_ctloutput = flow_divert_ctloutput;
4751
4752 /*
4753 * Socket filters shouldn't attach/detach to/from this protosw
4754 * since pr_protosw is to be used instead, which points to the
4755 * real protocol; if they do, it is a bug and we should panic.
4756 */
4757 g_flow_divert_in_protosw.pr_filter_head.tqh_first =
4758 __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4759 g_flow_divert_in_protosw.pr_filter_head.tqh_last =
4760 __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4761
4762 /* UDP */
4763 g_udp_protosw = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM);
4764 VERIFY(g_udp_protosw != NULL);
4765
4766 memcpy(&g_flow_divert_in_udp_protosw, g_udp_protosw, sizeof(g_flow_divert_in_udp_protosw));
4767 memcpy(&g_flow_divert_in_udp_usrreqs, g_udp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_udp_usrreqs));
4768
4769 g_flow_divert_in_udp_usrreqs.pru_connect = flow_divert_connect_out;
4770 g_flow_divert_in_udp_usrreqs.pru_connectx = flow_divert_connectx_out;
4771 g_flow_divert_in_udp_usrreqs.pru_disconnect = flow_divert_close;
4772 g_flow_divert_in_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4773 g_flow_divert_in_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4774 g_flow_divert_in_udp_usrreqs.pru_send = flow_divert_data_out;
4775 g_flow_divert_in_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4776 g_flow_divert_in_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4777 g_flow_divert_in_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4778
4779 g_flow_divert_in_udp_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4780 g_flow_divert_in_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4781
4782 /*
4783 * Socket filters shouldn't attach/detach to/from this protosw
4784 * since pr_protosw is to be used instead, which points to the
4785 * real protocol; if they do, it is a bug and we should panic.
4786 */
4787 g_flow_divert_in_udp_protosw.pr_filter_head.tqh_first =
4788 __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4789 g_flow_divert_in_udp_protosw.pr_filter_head.tqh_last =
4790 __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4791
4792 g_tcp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_TCP, SOCK_STREAM);
4793
4794 VERIFY(g_tcp6_protosw != NULL);
4795
4796 memcpy(&g_flow_divert_in6_protosw, g_tcp6_protosw, sizeof(g_flow_divert_in6_protosw));
4797 memcpy(&g_flow_divert_in6_usrreqs, g_tcp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_usrreqs));
4798
4799 g_flow_divert_in6_usrreqs.pru_connect = flow_divert_connect_out;
4800 g_flow_divert_in6_usrreqs.pru_connectx = flow_divert_connectx6_out;
4801 g_flow_divert_in6_usrreqs.pru_disconnect = flow_divert_close;
4802 g_flow_divert_in6_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4803 g_flow_divert_in6_usrreqs.pru_rcvd = flow_divert_rcvd;
4804 g_flow_divert_in6_usrreqs.pru_send = flow_divert_data_out;
4805 g_flow_divert_in6_usrreqs.pru_shutdown = flow_divert_shutdown;
4806 g_flow_divert_in6_usrreqs.pru_preconnect = flow_divert_preconnect;
4807
4808 g_flow_divert_in6_protosw.pr_usrreqs = &g_flow_divert_in6_usrreqs;
4809 g_flow_divert_in6_protosw.pr_ctloutput = flow_divert_ctloutput;
4810 /*
4811 * Socket filters shouldn't attach/detach to/from this protosw
4812 * since pr_protosw is to be used instead, which points to the
4813 * real protocol; if they do, it is a bug and we should panic.
4814 */
4815 g_flow_divert_in6_protosw.pr_filter_head.tqh_first =
4816 __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4817 g_flow_divert_in6_protosw.pr_filter_head.tqh_last =
4818 __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4819
4820 /* UDP6 */
4821 g_udp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_UDP, SOCK_DGRAM);
4822
4823 VERIFY(g_udp6_protosw != NULL);
4824
4825 memcpy(&g_flow_divert_in6_udp_protosw, g_udp6_protosw, sizeof(g_flow_divert_in6_udp_protosw));
4826 memcpy(&g_flow_divert_in6_udp_usrreqs, g_udp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_udp_usrreqs));
4827
4828 g_flow_divert_in6_udp_usrreqs.pru_connect = flow_divert_connect_out;
4829 g_flow_divert_in6_udp_usrreqs.pru_connectx = flow_divert_connectx6_out;
4830 g_flow_divert_in6_udp_usrreqs.pru_disconnect = flow_divert_close;
4831 g_flow_divert_in6_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4832 g_flow_divert_in6_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4833 g_flow_divert_in6_udp_usrreqs.pru_send = flow_divert_data_out;
4834 g_flow_divert_in6_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4835 g_flow_divert_in6_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4836 g_flow_divert_in6_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4837
4838 g_flow_divert_in6_udp_protosw.pr_usrreqs = &g_flow_divert_in6_udp_usrreqs;
4839 g_flow_divert_in6_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4840 /*
4841 * Socket filters shouldn't attach/detach to/from this protosw
4842 * since pr_protosw is to be used instead, which points to the
4843 * real protocol; if they do, it is a bug and we should panic.
4844 */
4845 g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_first =
4846 __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4847 g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_last =
4848 __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4849
4850 TAILQ_INIT(&g_flow_divert_in_process_group_list);
4851
4852 g_init_result = flow_divert_kctl_init();
4853 if (g_init_result) {
4854 goto done;
4855 }
4856
4857 done:
4858 if (g_init_result != 0) {
4859 if (g_flow_divert_kctl_ref != NULL) {
4860 ctl_deregister(g_flow_divert_kctl_ref);
4861 g_flow_divert_kctl_ref = NULL;
4862 }
4863 }
4864 }
4865