1 /*
2 * Copyright (c) 2013-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * THEORY OF OPERATION
26 *
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by INET/INET6 sockets.
30 *
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
34 * back to the kernel.
35 *
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
40 *
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
44 *
45 * It should be noted that messages about many INET/INET6 sockets can be multiplexed
46 * over a single kernel control socket.
47 *
48 * Notes:
49 * - The current implementation supports all INET/INET6 sockets (i.e. TCP,
50 * UDP, ICMP, etc).
51 * - The current implementation supports up to two simultaneous content filters
52 * for iOS devices and eight simultaneous content filters for OSX.
53 *
54 *
55 * NECP FILTER CONTROL UNIT
56 *
57 * A user space filter agent uses the Network Extension Control Policy (NECP)
58 * database to specify which INET/INET6 sockets need to be filtered. The NECP
59 * criteria may be based on a variety of properties like user ID or proc UUID.
60 *
61 * The NECP "filter control unit" is used by the socket content filter subsystem
62 * to deliver the relevant INET/INET6 content information to the appropriate
63 * user space filter agent via its kernel control socket instance.
64 * This works as follows:
65 *
66 * 1) The user space filter agent specifies an NECP filter control unit when
67 * in adds its filtering rules to the NECP database.
68 *
69 * 2) The user space filter agent also sets its NECP filter control unit on the
70 * content filter kernel control socket via the socket option
71 * CFIL_OPT_NECP_CONTROL_UNIT.
72 *
73 * 3) The NECP database is consulted to find out if a given INET/INET6 socket
74 * needs to be subjected to content filtering and returns the corresponding
75 * NECP filter control unit -- the NECP filter control unit is actually
76 * stored in the INET/INET6 socket structure so the NECP lookup is really simple.
77 *
78 * 4) The NECP filter control unit is then used to find the corresponding
79 * kernel control socket instance.
80 *
81 * Note: NECP currently supports a single filter control unit per INET/INET6 socket
82 * but this restriction may be soon lifted.
83 *
84 *
85 * THE MESSAGING PROTOCOL
86 *
87 * The socket content filter subsystem and a user space filter agent
88 * communicate over the kernel control socket via an asynchronous
89 * messaging protocol (this is not a request-response protocol).
90 * The socket content filter subsystem sends event messages to the user
91 * space filter agent about the INET/INET6 sockets it is interested to filter.
92 * The user space filter agent sends action messages to either allow
93 * data to pass or to disallow the data flow (and drop the connection).
94 *
95 * All messages over a content filter kernel control socket share the same
96 * common header of type "struct cfil_msg_hdr". The message type tells if
97 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
98 * The message header field "cfm_sock_id" identifies a given INET/INET6 flow.
99 * For TCP, flows are per-socket. For UDP and other datagrame protocols, there
100 * could be multiple flows per socket.
101 *
102 * Note the message header length field may be padded for alignment and can
103 * be larger than the actual content of the message.
104 * The field "cfm_op" describe the kind of event or action.
105 *
106 * Here are the kinds of content filter events:
107 * - CFM_OP_SOCKET_ATTACHED: a new INET/INET6 socket is being filtered
108 * - CFM_OP_SOCKET_CLOSED: A INET/INET6 socket is closed
109 * - CFM_OP_DATA_OUT: A span of data is being sent on a INET/INET6 socket
110 * - CFM_OP_DATA_IN: A span of data is being or received on a INET/INET6 socket
111 *
112 *
113 * EVENT MESSAGES
114 *
115 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
116 * data that is being sent or received. The position of this span of data
117 * in the data flow is described by a set of start and end offsets. These
118 * are absolute 64 bits offsets. The first byte sent (or received) starts
119 * at offset 0 and ends at offset 1. The length of the content data
120 * is given by the difference between the end offset and the start offset.
121 *
122 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
123 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
124 * action message is sent by the user space filter agent.
125 *
126 * Note: absolute 64 bits offsets should be large enough for the foreseeable
127 * future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
128 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
129 *
130 * They are two kinds of primary content filter actions:
131 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
132 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
133 *
134 * There is also an action to mark a given client flow as already filtered
135 * at a higher level, CFM_OP_BLESS_CLIENT.
136 *
137 *
138 * ACTION MESSAGES
139 *
140 * The CFM_OP_DATA_UPDATE action messages let the user space filter
141 * agent allow data to flow up to the specified pass offset -- there
142 * is a pass offset for outgoing data and a pass offset for incoming data.
143 * When a new INET/INET6 socket is attached to the content filter and a flow is
144 * created, each pass offset is initially set to 0 so no data is allowed to pass by
145 * default. When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
146 * then the data flow becomes unrestricted.
147 *
148 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
149 * with a pass offset smaller than the pass offset of a previous
150 * CFM_OP_DATA_UPDATE message is silently ignored.
151 *
152 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
153 * to tell the kernel how much data it wants to see by using the peek offsets.
154 * Just like pass offsets, there is a peek offset for each direction.
155 * When a new INET/INET6 flow is created, each peek offset is initially set to 0
156 * so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages are dispatched by default
157 * until a CFM_OP_DATA_UPDATE action message with a greater than 0 peek offset is sent
158 * by the user space filter agent. When the peek offset is set to CFM_MAX_OFFSET via
159 * a CFM_OP_DATA_UPDATE then the flow of update data events becomes unrestricted.
160 *
161 * Note that peek offsets cannot be smaller than the corresponding pass offset.
162 * Also a peek offsets cannot be smaller than the corresponding end offset
163 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
164 * to set a too small peek value is silently ignored.
165 *
166 *
167 * PER FLOW "struct cfil_info"
168 *
169 * As soon as a INET/INET6 socket gets attached to a content filter, a
170 * "struct cfil_info" is created to hold the content filtering state for this
171 * socket. For UDP and other datagram protocols, as soon as traffic is seen for
172 * each new flow identified by its 4-tuple of source address/port and destination
173 * address/port, a "struct cfil_info" is created. Each datagram socket may
174 * have multiple flows maintained in a hash table of "struct cfil_info" entries.
175 *
176 * The content filtering state is made of the following information
177 * for each direction:
178 * - The current pass offset;
179 * - The first and last offsets of the data pending, waiting for a filtering
180 * decision;
181 * - The inject queue for data that passed the filters and that needs
182 * to be re-injected;
183 * - A content filter specific state in a set of "struct cfil_entry"
184 *
185 *
186 * CONTENT FILTER STATE "struct cfil_entry"
187 *
188 * The "struct cfil_entry" maintains the information most relevant to the
189 * message handling over a kernel control socket with a user space filter agent.
190 *
191 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
192 * to the kernel control socket unit it corresponds to and also has a pointer
193 * to the corresponding "struct content_filter".
194 *
195 * For each direction, "struct cfil_entry" maintains the following information:
196 * - The pass offset
197 * - The peek offset
198 * - The offset of the last data peeked at by the filter
199 * - A queue of data that's waiting to be delivered to the user space filter
200 * agent on the kernel control socket
201 * - A queue of data for which event messages have been sent on the kernel
202 * control socket and are pending for a filtering decision.
203 *
204 *
205 * CONTENT FILTER QUEUES
206 *
207 * Data that is being filtered is steered away from the INET/INET6 socket buffer
208 * and instead will sit in one of three content filter queues until the data
209 * can be re-injected into the INET/INET6 socket buffer.
210 *
211 * A content filter queue is represented by "struct cfil_queue" that contains
212 * a list of mbufs and the start and end offset of the data span of
213 * the list of mbufs.
214 *
215 * The data moves into the three content filter queues according to this
216 * sequence:
217 * a) The "cfe_ctl_q" of "struct cfil_entry"
218 * b) The "cfe_pending_q" of "struct cfil_entry"
219 * c) The "cfi_inject_q" of "struct cfil_info"
220 *
221 * Note: The sequence (a),(b) may be repeated several times if there is more
222 * than one content filter attached to the INET/INET6 socket.
223 *
224 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
225 * kernel conntrol socket for two reasons:
226 * - The peek offset is less that the end offset of the mbuf data
227 * - The kernel control socket is flow controlled
228 *
229 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
230 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
231 * socket and are waiting for a pass action message fromn the user space
232 * filter agent. An mbuf length must be fully allowed to pass to be removed
233 * from the cfe_pending_q.
234 *
235 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
236 * by the user space filter agent and that needs to be re-injected into the
237 * INET/INET6 socket.
238 *
239 *
240 * IMPACT ON FLOW CONTROL
241 *
242 * An essential aspect of the content filer subsystem is to minimize the
243 * impact on flow control of the INET/INET6 sockets being filtered.
244 *
245 * The processing overhead of the content filtering may have an effect on
246 * flow control by adding noticeable delays and cannot be eliminated --
247 * care must be taken by the user space filter agent to minimize the
248 * processing delays.
249 *
250 * The amount of data being filtered is kept in buffers while waiting for
251 * a decision by the user space filter agent. This amount of data pending
252 * needs to be subtracted from the amount of data available in the
253 * corresponding INET/INET6 socket buffer. This is done by modifying
254 * sbspace() and tcp_sbspace() to account for amount of data pending
255 * in the content filter.
256 *
257 *
258 * LOCKING STRATEGY
259 *
260 * The global state of content filter subsystem is protected by a single
261 * read-write lock "cfil_lck_rw". The data flow can be done with the
262 * cfil read-write lock held as shared so it can be re-entered from multiple
263 * threads.
264 *
265 * The per INET/INET6 socket content filterstate -- "struct cfil_info" -- is
266 * protected by the socket lock.
267 *
268 * A INET/INET6 socket lock cannot be taken while the cfil read-write lock
269 * is held. That's why we have some sequences where we drop the cfil read-write
270 * lock before taking the INET/INET6 lock.
271 *
272 * It is also important to lock the INET/INET6 socket buffer while the content
273 * filter is modifying the amount of pending data. Otherwise the calculations
274 * in sbspace() and tcp_sbspace() could be wrong.
275 *
276 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
277 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
278 *
279 * Actually "cfe_link" and "cfe_filter" are protected by both by
280 * "cfil_lck_rw" and the socket lock: they may be modified only when
281 * "cfil_lck_rw" is exclusive and the socket is locked.
282 *
283 * To read the other fields of "struct content_filter" we have to take
284 * "cfil_lck_rw" in shared mode.
285 *
286 * DATAGRAM SPECIFICS:
287 *
288 * The socket content filter supports all INET/INET6 protocols. However
289 * the treatments for TCP sockets and for datagram (UDP, ICMP, etc) sockets
290 * are slightly different.
291 *
292 * Each datagram socket may have multiple flows. Each flow is identified
293 * by the flow's source address/port and destination address/port tuple
294 * and is represented as a "struct cfil_info" entry. For each socket,
295 * a hash table is used to maintain the collection of flows under that socket.
296 *
297 * Each datagram flow is uniquely identified by it's "struct cfil_info" cfi_sock_id.
298 * The highest 32-bits of the cfi_sock_id contains the socket's so_gencnt. This portion
299 * of the cfi_sock_id is used locate the socket during socket lookup. The lowest 32-bits
300 * of the cfi_sock_id contains a hash of the flow's 4-tuple. This portion of the cfi_sock_id
301 * is used as the hash value for the flow hash table lookup within the parent socket.
302 *
303 * Since datagram sockets may not be connected, flow states may not be maintained in the
304 * socket structures and thus have to be saved for each packet. These saved states will be
305 * used for both outgoing and incoming reinjections. For outgoing packets, destination
306 * address/port as well as the current socket states will be saved. During reinjection,
307 * these saved states will be used instead. For incoming packets, control and address
308 * mbufs will be chained to the data. During reinjection, the whole chain will be queued
309 * onto the incoming socket buffer.
310 *
311 * LIMITATIONS
312 *
313 * - Support all INET/INET6 sockets, such as TCP, UDP, ICMP, etc
314 *
315 * - Does not support TCP unordered messages
316 */
317
318 /*
319 * TO DO LIST
320 *
321 * Deal with OOB
322 *
323 */
324
325 #include <sys/types.h>
326 #include <sys/kern_control.h>
327 #include <sys/queue.h>
328 #include <sys/domain.h>
329 #include <sys/protosw.h>
330 #include <sys/syslog.h>
331 #include <sys/systm.h>
332 #include <sys/param.h>
333 #include <sys/mbuf.h>
334
335 #include <kern/locks.h>
336 #include <kern/zalloc.h>
337 #include <kern/debug.h>
338
339 #include <net/ntstat.h>
340 #include <net/content_filter.h>
341 #include <net/content_filter_crypto.h>
342
343 #define _IP_VHL
344 #include <netinet/ip.h>
345 #include <netinet/in_pcb.h>
346 #include <netinet/tcp.h>
347 #include <netinet/tcp_var.h>
348 #include <netinet/udp.h>
349 #include <netinet/udp_var.h>
350 #include <kern/socket_flows.h>
351
352 #include <string.h>
353 #include <libkern/libkern.h>
354 #include <kern/sched_prim.h>
355 #include <kern/task.h>
356 #include <mach/task_info.h>
357
358 #include <net/sockaddr_utils.h>
359
360 #define MAX_CONTENT_FILTER 8
361
362 extern int tcp_msl;
363 extern struct inpcbinfo ripcbinfo;
364 struct cfil_entry;
365
366 /*
367 * The structure content_filter represents a user space content filter
368 * It's created and associated with a kernel control socket instance
369 */
370 struct content_filter {
371 kern_ctl_ref cf_kcref;
372 u_int32_t cf_kcunit;
373 u_int32_t cf_flags;
374
375 uint32_t cf_necp_control_unit;
376
377 uint32_t cf_sock_count;
378 TAILQ_HEAD(, cfil_entry) cf_sock_entries;
379
380 cfil_crypto_state_t cf_crypto_state;
381 };
382
383 #define CFF_ACTIVE 0x01
384 #define CFF_DETACHING 0x02
385 #define CFF_FLOW_CONTROLLED 0x04
386 #define CFF_PRESERVE_CONNECTIONS 0x08
387
388 struct content_filter *content_filters[MAX_CONTENT_FILTER];
389 uint32_t cfil_active_count = 0; /* Number of active content filters */
390 uint32_t cfil_sock_attached_count = 0; /* Number of sockets attachements */
391 uint32_t cfil_sock_attached_stats_count = 0; /* Number of sockets requested periodic stats report */
392 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
393
394 static kern_ctl_ref cfil_kctlref = NULL;
395
396 static LCK_GRP_DECLARE(cfil_lck_grp, "content filter");
397 static LCK_RW_DECLARE(cfil_lck_rw, &cfil_lck_grp);
398
399 #define CFIL_RW_LCK_MAX 8
400
401 int cfil_rw_nxt_lck = 0;
402 void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
403
404 int cfil_rw_nxt_unlck = 0;
405 void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
406
407 static KALLOC_TYPE_DEFINE(content_filter_zone, struct content_filter, NET_KT_DEFAULT);
408
409 MBUFQ_HEAD(cfil_mqhead);
410
411 struct cfil_queue {
412 uint64_t q_start; /* offset of first byte in queue */
413 uint64_t q_end; /* offset of last byte in queue */
414 struct cfil_mqhead q_mq;
415 };
416
417 /*
418 * struct cfil_entry
419 *
420 * The is one entry per content filter
421 */
422 struct cfil_entry {
423 TAILQ_ENTRY(cfil_entry) cfe_link;
424 SLIST_ENTRY(cfil_entry) cfe_order_link;
425 struct content_filter *cfe_filter;
426
427 struct cfil_info *cfe_cfil_info;
428 uint32_t cfe_flags;
429 uint32_t cfe_necp_control_unit;
430 struct timeval cfe_last_event; /* To user space */
431 struct timeval cfe_last_action; /* From user space */
432 uint64_t cfe_byte_inbound_count_reported; /* stats already been reported */
433 uint64_t cfe_byte_outbound_count_reported; /* stats already been reported */
434 struct timeval cfe_stats_report_ts; /* Timestamp for last stats report */
435 uint32_t cfe_stats_report_frequency; /* Interval for stats report in msecs */
436 boolean_t cfe_laddr_sent;
437
438 struct cfe_buf {
439 /*
440 * cfe_pending_q holds data that has been delivered to
441 * the filter and for which we are waiting for an action
442 */
443 struct cfil_queue cfe_pending_q;
444 /*
445 * This queue is for data that has not be delivered to
446 * the content filter (new data, pass peek or flow control)
447 */
448 struct cfil_queue cfe_ctl_q;
449
450 uint64_t cfe_pass_offset;
451 uint64_t cfe_peek_offset;
452 uint64_t cfe_peeked;
453 } cfe_snd, cfe_rcv;
454 };
455
456 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
457 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
458 #define CFEF_DATA_START 0x0004 /* can send data event */
459 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
460 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
461 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
462 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
463 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
464
465
466 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
467 struct timeval64 _tdiff; \
468 size_t offset = (cfil)->cfi_op_list_ctr; \
469 if (offset < CFI_MAX_TIME_LOG_ENTRY) { \
470 timersub(t1, t0, &_tdiff); \
471 (cfil)->cfi_op_time[offset] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000); \
472 (cfil)->cfi_op_list[offset] = (unsigned char)op; \
473 (cfil)->cfi_op_list_ctr ++; \
474 }
475
476 /*
477 * struct cfil_info
478 *
479 * There is a struct cfil_info per socket
480 */
481 struct cfil_info {
482 TAILQ_ENTRY(cfil_info) cfi_link;
483 TAILQ_ENTRY(cfil_info) cfi_link_stats;
484 struct socket *cfi_so;
485 uint64_t cfi_flags;
486 uint64_t cfi_sock_id;
487 struct timeval64 cfi_first_event;
488 uint32_t cfi_op_list_ctr;
489 uint32_t cfi_op_time[CFI_MAX_TIME_LOG_ENTRY]; /* time interval in microseconds since first event */
490 unsigned char cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
491 union sockaddr_in_4_6 cfi_so_attach_faddr; /* faddr at the time of attach */
492 union sockaddr_in_4_6 cfi_so_attach_laddr; /* laddr at the time of attach */
493
494 int cfi_dir;
495 uint64_t cfi_byte_inbound_count;
496 uint64_t cfi_byte_outbound_count;
497
498 boolean_t cfi_isSignatureLatest; /* Indicates if signature covers latest flow attributes */
499 u_int32_t cfi_filter_control_unit;
500 u_int32_t cfi_filter_policy_gencount;
501 u_int32_t cfi_debug;
502 struct cfi_buf {
503 /*
504 * cfi_pending_first and cfi_pending_last describe the total
505 * amount of data outstanding for all the filters on
506 * this socket and data in the flow queue
507 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
508 */
509 uint64_t cfi_pending_first;
510 uint64_t cfi_pending_last;
511 uint32_t cfi_pending_mbcnt;
512 uint32_t cfi_pending_mbnum;
513 uint32_t cfi_tail_drop_cnt;
514 /*
515 * cfi_pass_offset is the minimum of all the filters
516 */
517 uint64_t cfi_pass_offset;
518 /*
519 * cfi_inject_q holds data that needs to be re-injected
520 * into the socket after filtering and that can
521 * be queued because of flow control
522 */
523 struct cfil_queue cfi_inject_q;
524 } cfi_snd, cfi_rcv;
525
526 struct cfil_entry cfi_entries[MAX_CONTENT_FILTER];
527 struct soflow_hash_entry *cfi_hash_entry;
528 SLIST_HEAD(, cfil_entry) cfi_ordered_entries;
529 os_refcnt_t cfi_ref_count;
530 } __attribute__((aligned(8)));
531
532 #define CFIF_DROP 0x0001 /* drop action applied */
533 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
534 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
535 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
536 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
537 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
538 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
539 #define CFIF_SOCKET_CONNECTED 0x0100 /* socket is connected */
540 #define CFIF_INITIAL_VERDICT 0x0200 /* received initial verdict */
541 #define CFIF_NO_CLOSE_WAIT 0x0400 /* do not wait to close */
542 #define CFIF_SO_DELAYED_DEAD 0x0800 /* Delayed socket DEAD marking */
543 #define CFIF_SO_DELAYED_TCP_TIME_WAIT 0x1000 /* Delayed TCP FIN TIME WAIT */
544
545 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
546 #define CFI_SHIFT_GENCNT 32
547 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
548 #define CFI_SHIFT_FLOWHASH 0
549
550 #define CFI_ENTRY_KCUNIT(i, e) ((uint32_t)(((e) - &((i)->cfi_entries[0])) + 1))
551
552 static KALLOC_TYPE_DEFINE(cfil_info_zone, struct cfil_info, NET_KT_DEFAULT);
553
554 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
555 TAILQ_HEAD(cfil_sock_head_stats, cfil_info) cfil_sock_head_stats;
556
557 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
558 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
559
560 /*
561 * UDP Socket Support
562 */
563 #define IS_ICMP(so) (so && (SOCK_CHECK_TYPE(so, SOCK_RAW) || SOCK_CHECK_TYPE(so, SOCK_DGRAM)) && \
564 (SOCK_CHECK_PROTO(so, IPPROTO_ICMP) || SOCK_CHECK_PROTO(so, IPPROTO_ICMPV6)))
565 #define IS_RAW(so) (so && SOCK_CHECK_TYPE(so, SOCK_RAW) && SOCK_CHECK_PROTO(so, IPPROTO_RAW))
566
567 #define OPTIONAL_IP_HEADER(so) (!IS_TCP(so) && !IS_UDP(so))
568 #define GET_SO_PROTOCOL(so) (so ? SOCK_PROTO(so) : IPPROTO_IP)
569 #define GET_SO_INP_PROTOCOL(so) ((so && sotoinpcb(so)) ? sotoinpcb(so)->inp_ip_p : IPPROTO_IP)
570 #define GET_SO_PROTO(so) ((GET_SO_PROTOCOL(so) != IPPROTO_IP) ? GET_SO_PROTOCOL(so) : GET_SO_INP_PROTOCOL(so))
571 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
572
573 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
574 ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
575 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
576 cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
577 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
578 #define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
579 #define NULLADDRESS(addr) ((addr.sa.sa_len == 0) || \
580 (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
581 (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
582
583 #define SKIP_FILTER_FOR_TCP_SOCKET(so) \
584 (so == NULL || \
585 (!SOCK_CHECK_DOM(so, PF_INET) && !SOCK_CHECK_DOM(so, PF_INET6)) || \
586 !SOCK_CHECK_TYPE(so, SOCK_STREAM) || \
587 !SOCK_CHECK_PROTO(so, IPPROTO_TCP) || \
588 (so->so_flags & SOF_MP_SUBFLOW) != 0 || \
589 (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0)
590
591 /*
592 * Special handling for 0.0.0.0-faddr TCP flows. This flows will be changed to loopback addr by TCP and
593 * may result in an immediate TCP RESET and socket close. This leads to CFIL blocking the owner thread for
594 * 1 sec waiting for ack from user-space provider (ack recevied by CFIL but socket already removed from
595 * global socket list). To avoid this, identify these flows and do not perform the close-wait blocking.
596 * These flows are identified as destined to Loopback address and were disconnected shortly after connect
597 * (before initial-verdict received).
598 */
599 #define IS_LOOPBACK_FADDR(inp) \
600 (inp && ((IS_INP_V6(inp) && IN6_IS_ADDR_LOOPBACK(&inp->in6p_faddr)) || (ntohl(inp->inp_faddr.s_addr) == INADDR_LOOPBACK)))
601
602 #define SET_NO_CLOSE_WAIT(inp, cfil_info) \
603 if (inp && cfil_info && !(cfil_info->cfi_flags & CFIF_INITIAL_VERDICT) && IS_LOOPBACK_FADDR(inp)) { \
604 cfil_info->cfi_flags |= CFIF_NO_CLOSE_WAIT; \
605 }
606
607 #define IS_NO_CLOSE_WAIT(cfil_info) (cfil_info && (cfil_info->cfi_flags & CFIF_NO_CLOSE_WAIT))
608
609 os_refgrp_decl(static, cfil_refgrp, "CFILRefGroup", NULL);
610
611 #define CFIL_INFO_FREE(cfil_info) \
612 if (cfil_info && (os_ref_release(&cfil_info->cfi_ref_count) == 0)) { \
613 cfil_info_free(cfil_info); \
614 }
615
616 #define SOCKET_PID(so) ((so->so_flags & SOF_DELEGATED) ? so->e_pid : so->last_pid)
617 #define MATCH_PID(so) (so && (cfil_log_pid == SOCKET_PID(so)))
618 #define MATCH_PORT(inp, local, remote) \
619 ((inp && ntohs(inp->inp_lport) == cfil_log_port) || (inp && ntohs(inp->inp_fport) == cfil_log_port) || \
620 check_port(local, cfil_log_port) || check_port(remote, cfil_log_port))
621 #define MATCH_PROTO(so) (GET_SO_PROTO(so) == cfil_log_proto)
622
623 #define DEBUG_FLOW(inp, so, local, remote) \
624 ((cfil_log_port && MATCH_PORT(inp, local, remote)) || (cfil_log_pid && MATCH_PID(so)) || (cfil_log_proto && MATCH_PROTO(so)))
625
626 #define SO_DELAYED_DEAD_SET(so, set) \
627 if (so->so_cfil) { \
628 if (set) { \
629 so->so_cfil->cfi_flags |= CFIF_SO_DELAYED_DEAD; \
630 } else { \
631 so->so_cfil->cfi_flags &= ~CFIF_SO_DELAYED_DEAD; \
632 } \
633 } else if (so->so_flow_db) { \
634 if (set) { \
635 so->so_flow_db->soflow_db_flags |= SOFLOWF_SO_DELAYED_DEAD; \
636 } else { \
637 so->so_flow_db->soflow_db_flags &= ~SOFLOWF_SO_DELAYED_DEAD; \
638 } \
639 }
640
641 #define SO_DELAYED_DEAD_GET(so) \
642 (so->so_cfil ? (so->so_cfil->cfi_flags & CFIF_SO_DELAYED_DEAD) : \
643 (so->so_flow_db) ? (so->so_flow_db->soflow_db_flags & SOFLOWF_SO_DELAYED_DEAD) : false)
644
645 #define SO_DELAYED_TCP_TIME_WAIT_SET(so, set) \
646 if (so->so_cfil) { \
647 if (set) { \
648 so->so_cfil->cfi_flags |= CFIF_SO_DELAYED_TCP_TIME_WAIT; \
649 } else { \
650 so->so_cfil->cfi_flags &= ~CFIF_SO_DELAYED_TCP_TIME_WAIT; \
651 } \
652 }
653
654 #define SO_DELAYED_TCP_TIME_WAIT_GET(so) \
655 (so->so_cfil ? (so->so_cfil->cfi_flags & CFIF_SO_DELAYED_TCP_TIME_WAIT) : false)
656
657 /*
658 * Periodic Statistics Report:
659 */
660 static struct thread *cfil_stats_report_thread;
661 #define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC 500 // Highest report frequency
662 #define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
663 #define CFIL_STATS_REPORT_MAX_COUNT 50 // Max stats to be reported per run
664
665 /* This buffer must have same layout as struct cfil_msg_stats_report */
666 struct cfil_stats_report_buffer {
667 struct cfil_msg_hdr msghdr;
668 uint32_t count;
669 struct cfil_msg_sock_stats stats[CFIL_STATS_REPORT_MAX_COUNT];
670 };
671 static struct cfil_stats_report_buffer *global_cfil_stats_report_buffers[MAX_CONTENT_FILTER];
672 static uint32_t global_cfil_stats_counts[MAX_CONTENT_FILTER];
673
674 /*
675 * UDP Garbage Collection:
676 */
677 #define UDP_FLOW_GC_ACTION_TO 10 // Flow Action Timeout (no action from user space) in seconds
678 #define UDP_FLOW_GC_MAX_COUNT 100 // Max UDP flows to be handled per run
679
680 /*
681 * UDP flow queue thresholds
682 */
683 #define UDP_FLOW_GC_MBUF_CNT_MAX (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
684 #define UDP_FLOW_GC_MBUF_NUM_MAX (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
685 #define UDP_FLOW_GC_MBUF_SHIFT 5 // Shift to get 1/32 of platform limits
686 /*
687 * UDP flow queue threshold globals:
688 */
689 static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
690 static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
691
692 /*
693 * CFIL specific mbuf tag:
694 * Save state of socket at the point of data entry into cfil.
695 * Use saved state for reinjection at protocol layer.
696 */
697 struct cfil_tag {
698 union sockaddr_in_4_6 cfil_faddr;
699 uint32_t cfil_so_state_change_cnt;
700 uint32_t cfil_so_options;
701 int cfil_inp_flags;
702 };
703
704 /*
705 * Global behavior flags:
706 */
707 #define CFIL_BEHAVIOR_FLAG_PRESERVE_CONNECTIONS 0x00000001
708 static uint32_t cfil_behavior_flags = 0;
709
710 #define DO_PRESERVE_CONNECTIONS (cfil_behavior_flags & CFIL_BEHAVIOR_FLAG_PRESERVE_CONNECTIONS)
711
712 /*
713 * Statistics
714 */
715
716 struct cfil_stats cfil_stats;
717
718 /*
719 * For troubleshooting
720 */
721 int cfil_log_level = LOG_ERR;
722 int cfil_log_port = 0;
723 int cfil_log_pid = 0;
724 int cfil_log_proto = 0;
725 int cfil_log_data = 0;
726 int cfil_log_stats = 0;
727 int cfil_debug = 1;
728
729 /*
730 * Sysctls for logs and statistics
731 */
732 static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
733 struct sysctl_req *);
734 static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
735 struct sysctl_req *);
736
737 SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "cfil");
738
739 SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
740 &cfil_log_level, 0, "");
741
742 SYSCTL_INT(_net_cfil, OID_AUTO, log_port, CTLFLAG_RW | CTLFLAG_LOCKED,
743 &cfil_log_port, 0, "");
744
745 SYSCTL_INT(_net_cfil, OID_AUTO, log_pid, CTLFLAG_RW | CTLFLAG_LOCKED,
746 &cfil_log_pid, 0, "");
747
748 SYSCTL_INT(_net_cfil, OID_AUTO, log_proto, CTLFLAG_RW | CTLFLAG_LOCKED,
749 &cfil_log_proto, 0, "");
750
751 SYSCTL_INT(_net_cfil, OID_AUTO, log_data, CTLFLAG_RW | CTLFLAG_LOCKED,
752 &cfil_log_data, 0, "");
753
754 SYSCTL_INT(_net_cfil, OID_AUTO, log_stats, CTLFLAG_RW | CTLFLAG_LOCKED,
755 &cfil_log_stats, 0, "");
756
757 SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
758 &cfil_debug, 0, "");
759
760 SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD | CTLFLAG_LOCKED,
761 &cfil_sock_attached_count, 0, "");
762
763 SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD | CTLFLAG_LOCKED,
764 &cfil_active_count, 0, "");
765
766 SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
767 &cfil_close_wait_timeout, 0, "");
768
769 SYSCTL_UINT(_net_cfil, OID_AUTO, behavior_flags, CTLFLAG_RW | CTLFLAG_LOCKED,
770 &cfil_behavior_flags, 0, "");
771
772 static int cfil_sbtrim = 1;
773 SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW | CTLFLAG_LOCKED,
774 &cfil_sbtrim, 0, "");
775
776 SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD | CTLFLAG_LOCKED,
777 0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
778
779 SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD | CTLFLAG_LOCKED,
780 0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
781
782 SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
783 &cfil_stats, cfil_stats, "");
784
785 /*
786 * Forward declaration to appease the compiler
787 */
788 static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
789 uint64_t, uint64_t);
790 static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
791 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
792 static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr *);
793 static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
794 static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
795 struct mbuf *, struct mbuf *, uint32_t);
796 static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
797 struct mbuf *, uint32_t);
798 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
799 struct in_addr, u_int16_t);
800 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
801 struct in6_addr *, u_int16_t, uint32_t);
802
803 static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t, int);
804 static void cfil_info_free(struct cfil_info *);
805 static struct cfil_info * cfil_info_alloc(struct socket *, struct soflow_hash_entry *);
806 static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
807 static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
808 static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
809 static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
810 static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
811 static void cfil_info_verify(struct cfil_info *);
812 static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
813 uint64_t, uint64_t);
814 static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
815 static void cfil_release_sockbuf(struct socket *, int);
816 static int cfil_filters_attached(struct socket *);
817
818 static void cfil_rw_lock_exclusive(lck_rw_t *);
819 static void cfil_rw_unlock_exclusive(lck_rw_t *);
820 static void cfil_rw_lock_shared(lck_rw_t *);
821 static void cfil_rw_unlock_shared(lck_rw_t *);
822 static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
823 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
824
825 static unsigned int cfil_data_length(struct mbuf *, int *, int *);
826 static struct cfil_info *cfil_sock_udp_get_info(struct socket *, uint32_t, bool, struct soflow_hash_entry *, struct sockaddr *, struct sockaddr *);
827 static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
828 struct mbuf *, struct mbuf *, uint32_t, struct soflow_hash_entry *);
829 static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
830 static void cfil_sock_udp_is_closed(struct socket *);
831 static int cfil_sock_udp_notify_shutdown(struct socket *, int, int, int);
832 static int cfil_sock_udp_shutdown(struct socket *, int *);
833 static void cfil_sock_udp_close_wait(struct socket *);
834 static void cfil_sock_udp_buf_update(struct sockbuf *);
835 static int cfil_filters_udp_attached(struct socket *, bool);
836 static void cfil_get_flow_address_v6(struct soflow_hash_entry *, struct inpcb *,
837 struct in6_addr **, struct in6_addr **,
838 u_int16_t *, u_int16_t *);
839 static void cfil_get_flow_address(struct soflow_hash_entry *, struct inpcb *,
840 struct in_addr *, struct in_addr *,
841 u_int16_t *, u_int16_t *);
842 static void cfil_info_log(int, struct cfil_info *, const char *);
843 void cfil_filter_show(u_int32_t);
844 void cfil_info_show(void);
845 bool cfil_info_action_timed_out(struct cfil_info *, int);
846 bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
847 struct m_tag *cfil_dgram_save_socket_state(struct cfil_info *, struct mbuf *);
848 boolean_t cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags);
849 static void cfil_sock_received_verdict(struct socket *so);
850 static void cfil_fill_event_msg_addresses(struct soflow_hash_entry *, struct inpcb *,
851 union sockaddr_in_4_6 *, union sockaddr_in_4_6 *,
852 boolean_t, boolean_t);
853 static void cfil_stats_report_thread_func(void *, wait_result_t);
854 static void cfil_stats_report(void *v, wait_result_t w);
855 static bool cfil_dgram_gc_needed(struct socket *, struct soflow_hash_entry *, u_int64_t);
856 static bool cfil_dgram_gc_perform(struct socket *, struct soflow_hash_entry *);
857 static bool cfil_dgram_detach_entry(struct socket *, struct soflow_hash_entry *);
858 static bool cfil_dgram_detach_db(struct socket *, struct soflow_db *);
859 bool check_port(struct sockaddr *, u_short);
860
861 /*
862 * Content filter global read write lock
863 */
864
865 static void
cfil_rw_lock_exclusive(lck_rw_t * lck)866 cfil_rw_lock_exclusive(lck_rw_t *lck)
867 {
868 void * __single lr_saved;
869
870 lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
871
872 lck_rw_lock_exclusive(lck);
873
874 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
875 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
876 }
877
878 static void
cfil_rw_unlock_exclusive(lck_rw_t * lck)879 cfil_rw_unlock_exclusive(lck_rw_t *lck)
880 {
881 void * __single lr_saved;
882
883 lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
884
885 lck_rw_unlock_exclusive(lck);
886
887 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
888 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
889 }
890
891 static void
cfil_rw_lock_shared(lck_rw_t * lck)892 cfil_rw_lock_shared(lck_rw_t *lck)
893 {
894 void * __single lr_saved;
895
896 lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
897
898 lck_rw_lock_shared(lck);
899
900 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
901 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
902 }
903
904 static void
cfil_rw_unlock_shared(lck_rw_t * lck)905 cfil_rw_unlock_shared(lck_rw_t *lck)
906 {
907 void * __single lr_saved;
908
909 lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
910
911 lck_rw_unlock_shared(lck);
912
913 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
914 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
915 }
916
917 static boolean_t
cfil_rw_lock_shared_to_exclusive(lck_rw_t * lck)918 cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
919 {
920 boolean_t upgraded;
921 void * __single lr_saved;
922
923 lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
924
925 upgraded = lck_rw_lock_shared_to_exclusive(lck);
926 if (upgraded) {
927 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
928 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
929 }
930 return upgraded;
931 }
932
933 static void
cfil_rw_lock_exclusive_to_shared(lck_rw_t * lck)934 cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
935 {
936 void * __single lr_saved;
937
938 lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
939
940 lck_rw_lock_exclusive_to_shared(lck);
941
942 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
943 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
944 }
945
946 static void
cfil_rw_lock_assert_held(lck_rw_t * lck,int exclusive)947 cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
948 {
949 #if !MACH_ASSERT
950 #pragma unused(lck, exclusive)
951 #endif
952 LCK_RW_ASSERT(lck,
953 exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
954 }
955
956 /*
957 * Return the number of bytes in the mbuf chain using the same
958 * method as m_length() or sballoc()
959 *
960 * Returns data len - starting from PKT start
961 * - retmbcnt - optional param to get total mbuf bytes in chain
962 * - retmbnum - optional param to get number of mbufs in chain
963 */
964 static unsigned int
cfil_data_length(struct mbuf * m,int * retmbcnt,int * retmbnum)965 cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
966 {
967 struct mbuf *m0;
968 unsigned int pktlen = 0;
969 int mbcnt;
970 int mbnum;
971
972 // Locate M_PKTHDR and mark as start of data if present
973 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
974 if (m0->m_flags & M_PKTHDR) {
975 m = m0;
976 break;
977 }
978 }
979
980 if (retmbcnt == NULL && retmbnum == NULL) {
981 return m_length(m);
982 }
983
984 pktlen = 0;
985 mbcnt = 0;
986 mbnum = 0;
987 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
988 pktlen += m0->m_len;
989 mbnum++;
990 mbcnt += _MSIZE;
991 if (m0->m_flags & M_EXT) {
992 mbcnt += m0->m_ext.ext_size;
993 }
994 }
995 if (retmbcnt) {
996 *retmbcnt = mbcnt;
997 }
998 if (retmbnum) {
999 *retmbnum = mbnum;
1000 }
1001 return pktlen;
1002 }
1003
1004 static struct mbuf *
cfil_data_start(struct mbuf * m)1005 cfil_data_start(struct mbuf *m)
1006 {
1007 struct mbuf *m0;
1008
1009 // Locate M_PKTHDR and use it as start of data if present
1010 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
1011 if (m0->m_flags & M_PKTHDR) {
1012 return m0;
1013 }
1014 }
1015 return m;
1016 }
1017
1018 /*
1019 * Common mbuf queue utilities
1020 */
1021
1022 static inline void
cfil_queue_init(struct cfil_queue * cfq)1023 cfil_queue_init(struct cfil_queue *cfq)
1024 {
1025 cfq->q_start = 0;
1026 cfq->q_end = 0;
1027 MBUFQ_INIT(&cfq->q_mq);
1028 }
1029
1030 static inline uint64_t
cfil_queue_drain(struct cfil_queue * cfq)1031 cfil_queue_drain(struct cfil_queue *cfq)
1032 {
1033 uint64_t drained = cfq->q_start - cfq->q_end;
1034 cfq->q_start = 0;
1035 cfq->q_end = 0;
1036 MBUFQ_DRAIN(&cfq->q_mq);
1037
1038 return drained;
1039 }
1040
1041 /* Return 1 when empty, 0 otherwise */
1042 static inline int
cfil_queue_empty(struct cfil_queue * cfq)1043 cfil_queue_empty(struct cfil_queue *cfq)
1044 {
1045 return MBUFQ_EMPTY(&cfq->q_mq);
1046 }
1047
1048 static inline uint64_t
cfil_queue_offset_first(struct cfil_queue * cfq)1049 cfil_queue_offset_first(struct cfil_queue *cfq)
1050 {
1051 return cfq->q_start;
1052 }
1053
1054 static inline uint64_t
cfil_queue_offset_last(struct cfil_queue * cfq)1055 cfil_queue_offset_last(struct cfil_queue *cfq)
1056 {
1057 return cfq->q_end;
1058 }
1059
1060 static inline uint64_t
cfil_queue_len(struct cfil_queue * cfq)1061 cfil_queue_len(struct cfil_queue *cfq)
1062 {
1063 return cfq->q_end - cfq->q_start;
1064 }
1065
1066 /*
1067 * Routines to verify some fundamental assumptions
1068 */
1069
1070 static void
cfil_queue_verify(struct cfil_queue * cfq)1071 cfil_queue_verify(struct cfil_queue *cfq)
1072 {
1073 mbuf_t chain;
1074 mbuf_t m;
1075 mbuf_t n;
1076 uint64_t queuesize = 0;
1077
1078 /* Verify offset are ordered */
1079 VERIFY(cfq->q_start <= cfq->q_end);
1080
1081 /*
1082 * When queue is empty, the offsets are equal otherwise the offsets
1083 * are different
1084 */
1085 VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
1086 (!MBUFQ_EMPTY(&cfq->q_mq) &&
1087 cfq->q_start != cfq->q_end));
1088
1089 MBUFQ_FOREACH(chain, &cfq->q_mq) {
1090 size_t chainsize = 0;
1091 m = chain;
1092 unsigned int mlen = cfil_data_length(m, NULL, NULL);
1093 // skip the addr and control stuff if present
1094 m = cfil_data_start(m);
1095
1096 if (m == NULL ||
1097 m == (void *)M_TAG_FREE_PATTERN ||
1098 m->m_next == (void *)M_TAG_FREE_PATTERN ||
1099 m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) {
1100 panic("%s - mq %p is free at %p", __func__,
1101 &cfq->q_mq, m);
1102 }
1103 for (n = m; n != NULL; n = n->m_next) {
1104 if (!m_has_mtype(n, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
1105 panic("%s - %p unsupported type %u", __func__,
1106 n, n->m_type);
1107 }
1108 chainsize += n->m_len;
1109 }
1110 if (mlen != chainsize) {
1111 panic("%s - %p m_length() %u != chainsize %lu",
1112 __func__, m, mlen, chainsize);
1113 }
1114 queuesize += chainsize;
1115 }
1116 OS_ANALYZER_SUPPRESS("81031590") if (queuesize != cfq->q_end - cfq->q_start) {
1117 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
1118 m, queuesize, cfq->q_end - cfq->q_start);
1119 }
1120 }
1121
1122 static void
cfil_queue_enqueue(struct cfil_queue * cfq,mbuf_t m,size_t len)1123 cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
1124 {
1125 CFIL_QUEUE_VERIFY(cfq);
1126
1127 MBUFQ_ENQUEUE(&cfq->q_mq, m);
1128 cfq->q_end += len;
1129
1130 CFIL_QUEUE_VERIFY(cfq);
1131 }
1132
1133 static void
cfil_queue_remove(struct cfil_queue * cfq,mbuf_t m,size_t len)1134 cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1135 {
1136 CFIL_QUEUE_VERIFY(cfq);
1137
1138 VERIFY(cfil_data_length(m, NULL, NULL) == len);
1139
1140 MBUFQ_REMOVE(&cfq->q_mq, m);
1141 MBUFQ_NEXT(m) = NULL;
1142 cfq->q_start += len;
1143
1144 CFIL_QUEUE_VERIFY(cfq);
1145 }
1146
1147 static mbuf_t
cfil_queue_first(struct cfil_queue * cfq)1148 cfil_queue_first(struct cfil_queue *cfq)
1149 {
1150 return MBUFQ_FIRST(&cfq->q_mq);
1151 }
1152
1153 static mbuf_t
cfil_queue_next(struct cfil_queue * cfq,mbuf_t m)1154 cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1155 {
1156 #pragma unused(cfq)
1157 return MBUFQ_NEXT(m);
1158 }
1159
1160 static void
cfil_entry_buf_verify(struct cfe_buf * cfe_buf)1161 cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1162 {
1163 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1164 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1165
1166 /* Verify the queues are ordered so that pending is before ctl */
1167 VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1168
1169 /* The peek offset cannot be less than the pass offset */
1170 VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1171
1172 /* Make sure we've updated the offset we peeked at */
1173 VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1174 }
1175
1176 static void
cfil_entry_verify(struct cfil_entry * entry)1177 cfil_entry_verify(struct cfil_entry *entry)
1178 {
1179 cfil_entry_buf_verify(&entry->cfe_snd);
1180 cfil_entry_buf_verify(&entry->cfe_rcv);
1181 }
1182
1183 static void
cfil_info_buf_verify(struct cfi_buf * cfi_buf)1184 cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1185 {
1186 CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1187
1188 VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1189 }
1190
1191 static void
cfil_info_verify(struct cfil_info * cfil_info)1192 cfil_info_verify(struct cfil_info *cfil_info)
1193 {
1194 int i;
1195
1196 if (cfil_info == NULL) {
1197 return;
1198 }
1199
1200 cfil_info_buf_verify(&cfil_info->cfi_snd);
1201 cfil_info_buf_verify(&cfil_info->cfi_rcv);
1202
1203 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
1204 cfil_entry_verify(&cfil_info->cfi_entries[i]);
1205 }
1206 }
1207
1208 static void
verify_content_filter(struct content_filter * cfc)1209 verify_content_filter(struct content_filter *cfc)
1210 {
1211 struct cfil_entry *entry;
1212 uint32_t count = 0;
1213
1214 VERIFY(cfc->cf_sock_count >= 0);
1215
1216 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1217 count++;
1218 VERIFY(cfc == entry->cfe_filter);
1219 }
1220 VERIFY(count == cfc->cf_sock_count);
1221 }
1222
1223 /*
1224 * Kernel control socket callbacks
1225 */
1226 static errno_t
cfil_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)1227 cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1228 void **unitinfo)
1229 {
1230 errno_t error = 0;
1231 struct content_filter * __single cfc = NULL;
1232
1233 CFIL_LOG(LOG_NOTICE, "");
1234
1235 cfc = zalloc_flags(content_filter_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1236
1237 cfil_rw_lock_exclusive(&cfil_lck_rw);
1238
1239 if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1240 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1241 error = EINVAL;
1242 } else if (content_filters[sac->sc_unit - 1] != NULL) {
1243 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1244 error = EADDRINUSE;
1245 } else {
1246 /*
1247 * kernel control socket kcunit numbers start at 1
1248 */
1249 content_filters[sac->sc_unit - 1] = cfc;
1250
1251 cfc->cf_kcref = kctlref;
1252 cfc->cf_kcunit = sac->sc_unit;
1253 TAILQ_INIT(&cfc->cf_sock_entries);
1254
1255 *unitinfo = cfc;
1256 cfil_active_count++;
1257
1258 if (cfil_active_count == 1) {
1259 soflow_feat_set_functions(cfil_dgram_gc_needed, cfil_dgram_gc_perform,
1260 cfil_dgram_detach_entry, cfil_dgram_detach_db);
1261 }
1262
1263 // Allocate periodic stats buffer for this filter
1264 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] == NULL) {
1265 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1266
1267 struct cfil_stats_report_buffer * __single buf;
1268
1269 buf = kalloc_type(struct cfil_stats_report_buffer,
1270 Z_WAITOK | Z_ZERO | Z_NOFAIL);
1271
1272 cfil_rw_lock_exclusive(&cfil_lck_rw);
1273
1274 /* Another thread may have won the race */
1275 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1276 kfree_type(struct cfil_stats_report_buffer, buf);
1277 } else {
1278 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = buf;
1279 }
1280 }
1281 }
1282 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1283
1284 if (error != 0 && cfc != NULL) {
1285 zfree(content_filter_zone, cfc);
1286 }
1287
1288 if (error == 0) {
1289 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1290 } else {
1291 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1292 }
1293
1294 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1295 error, cfil_active_count, sac->sc_unit);
1296
1297 return error;
1298 }
1299
1300 static void
cfil_update_behavior_flags(void)1301 cfil_update_behavior_flags(void)
1302 {
1303 struct content_filter *cfc = NULL;
1304
1305 // Update global flag
1306 bool preserve_connections = false;
1307 for (int i = 0; i < MAX_CONTENT_FILTER; i++) {
1308 cfc = content_filters[i];
1309 if (cfc != NULL) {
1310 if (cfc->cf_flags & CFF_PRESERVE_CONNECTIONS) {
1311 preserve_connections = true;
1312 } else {
1313 preserve_connections = false;
1314 break;
1315 }
1316 }
1317 }
1318 if (preserve_connections == true) {
1319 cfil_behavior_flags |= CFIL_BEHAVIOR_FLAG_PRESERVE_CONNECTIONS;
1320 } else {
1321 cfil_behavior_flags &= ~CFIL_BEHAVIOR_FLAG_PRESERVE_CONNECTIONS;
1322 }
1323 CFIL_LOG(LOG_INFO, "CFIL Preserve Connections - %s",
1324 (cfil_behavior_flags & CFIL_BEHAVIOR_FLAG_PRESERVE_CONNECTIONS) ? "On" : "Off");
1325 }
1326
1327 static errno_t
cfil_ctl_disconnect(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo)1328 cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1329 {
1330 #pragma unused(kctlref)
1331 errno_t error = 0;
1332 struct content_filter * __single cfc;
1333 struct cfil_entry *entry;
1334 uint64_t sock_flow_id = 0;
1335
1336 CFIL_LOG(LOG_NOTICE, "");
1337
1338 if (kcunit > MAX_CONTENT_FILTER) {
1339 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1340 kcunit, MAX_CONTENT_FILTER);
1341 error = EINVAL;
1342 goto done;
1343 }
1344
1345 cfc = (struct content_filter *)unitinfo;
1346 if (cfc == NULL) {
1347 goto done;
1348 }
1349
1350 cfil_rw_lock_exclusive(&cfil_lck_rw);
1351 if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1352 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1353 kcunit);
1354 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1355 goto done;
1356 }
1357 cfc->cf_flags |= CFF_DETACHING;
1358 /*
1359 * Remove all sockets from the filter
1360 */
1361 while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1362 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1363
1364 verify_content_filter(cfc);
1365 /*
1366 * Accept all outstanding data by pushing to next filter
1367 * or back to socket
1368 *
1369 * TBD: Actually we should make sure all data has been pushed
1370 * back to socket
1371 */
1372 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1373 struct cfil_info *cfil_info = entry->cfe_cfil_info;
1374 struct socket *so = cfil_info->cfi_so;
1375 sock_flow_id = cfil_info->cfi_sock_id;
1376
1377 /* Need to let data flow immediately */
1378 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1379 CFEF_DATA_START;
1380
1381 // Before we release global lock, retain the cfil_info -
1382 // We attempt to retain a valid cfil_info to prevent any deallocation until
1383 // we are done. Abort retain if cfil_info has already entered the free code path.
1384 if (cfil_info == NULL || os_ref_retain_try(&cfil_info->cfi_ref_count) == false) {
1385 // Failing to retain cfil_info means detach is in progress already,
1386 // remove entry from filter list and move on.
1387 entry->cfe_filter = NULL;
1388 entry->cfe_necp_control_unit = 0;
1389 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1390 cfc->cf_sock_count--;
1391 continue;
1392 }
1393
1394 /*
1395 * Respect locking hierarchy
1396 */
1397 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1398
1399 // Search for socket from cfil_info sock_flow_id and lock so
1400 so = cfil_socket_from_sock_id(sock_flow_id, false);
1401 if (so == NULL || so != cfil_info->cfi_so) {
1402 cfil_rw_lock_exclusive(&cfil_lck_rw);
1403
1404 // Socket has already been disconnected and removed from socket list.
1405 // Remove entry from filter list and move on.
1406 if (entry == TAILQ_FIRST(&cfc->cf_sock_entries)) {
1407 entry->cfe_filter = NULL;
1408 entry->cfe_necp_control_unit = 0;
1409 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1410 cfc->cf_sock_count--;
1411 }
1412
1413 goto release_cfil_info;
1414 }
1415
1416 /*
1417 * When cfe_filter is NULL the filter is detached
1418 * and the entry has been removed from cf_sock_entries
1419 */
1420 if ((so->so_cfil == NULL && so->so_flow_db == NULL) || entry->cfe_filter == NULL) {
1421 cfil_rw_lock_exclusive(&cfil_lck_rw);
1422 goto release;
1423 }
1424
1425 (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
1426 CFM_MAX_OFFSET,
1427 CFM_MAX_OFFSET);
1428
1429 (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
1430 CFM_MAX_OFFSET,
1431 CFM_MAX_OFFSET);
1432
1433 cfil_rw_lock_exclusive(&cfil_lck_rw);
1434
1435 /*
1436 * Check again to make sure if the cfil_info is still valid
1437 * as the socket may have been unlocked when when calling
1438 * cfil_acquire_sockbuf()
1439 */
1440 if (entry->cfe_filter == NULL ||
1441 (so->so_cfil == NULL && soflow_db_get_feature_context(so->so_flow_db, sock_flow_id) == NULL)) {
1442 goto release;
1443 }
1444
1445 /* The filter is now detached */
1446 entry->cfe_flags |= CFEF_CFIL_DETACHED;
1447
1448 if (cfil_info->cfi_debug) {
1449 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER DISCONNECTED");
1450 }
1451
1452 CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1453 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1454 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1455 cfil_filters_attached(so) == 0) {
1456 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1457 (uint64_t)VM_KERNEL_ADDRPERM(so));
1458 wakeup((caddr_t)cfil_info);
1459 }
1460
1461 /*
1462 * Remove the filter entry from the content filter
1463 * but leave the rest of the state intact as the queues
1464 * may not be empty yet
1465 */
1466 entry->cfe_filter = NULL;
1467 entry->cfe_necp_control_unit = 0;
1468
1469 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1470 cfc->cf_sock_count--;
1471
1472 // This is the last filter disconnecting, clear the cfil_info
1473 // saved policy state so we will be able to drop this flow if
1474 // a new filter get installed.
1475 if (cfil_active_count == 1) {
1476 cfil_info->cfi_filter_control_unit = 0;
1477 cfil_info->cfi_filter_policy_gencount = 0;
1478 }
1479 release:
1480 socket_unlock(so, 1);
1481
1482 release_cfil_info:
1483 /*
1484 * Release reference on cfil_info. To avoid double locking,
1485 * temporarily unlock in case it has been detached and we
1486 * end up freeing it which will take the global lock again.
1487 */
1488 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1489 CFIL_INFO_FREE(cfil_info);
1490 cfil_rw_lock_exclusive(&cfil_lck_rw);
1491 }
1492 }
1493 verify_content_filter(cfc);
1494
1495 /* Free the stats buffer for this filter */
1496 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1497 kfree_type(struct cfil_stats_report_buffer,
1498 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1]);
1499 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = NULL;
1500 }
1501 VERIFY(cfc->cf_sock_count == 0);
1502
1503 /*
1504 * Make filter inactive
1505 */
1506 content_filters[kcunit - 1] = NULL;
1507 cfil_active_count--;
1508 cfil_update_behavior_flags();
1509 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1510
1511 if (cfc->cf_crypto_state != NULL) {
1512 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
1513 cfc->cf_crypto_state = NULL;
1514 }
1515
1516 zfree(content_filter_zone, cfc);
1517 done:
1518 if (error == 0) {
1519 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1520 } else {
1521 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1522 }
1523
1524 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1525 error, cfil_active_count, kcunit);
1526
1527 return error;
1528 }
1529
1530 /*
1531 * cfil_acquire_sockbuf()
1532 *
1533 * Prevent any other thread from acquiring the sockbuf
1534 * We use sb_cfil_thread as a semaphore to prevent other threads from
1535 * messing with the sockbuf -- see sblock()
1536 * Note: We do not set SB_LOCK here because the thread may check or modify
1537 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1538 * sblock(), sbunlock() or sodefunct()
1539 */
1540 static int
cfil_acquire_sockbuf(struct socket * so,struct cfil_info * cfil_info,int outgoing)1541 cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
1542 {
1543 thread_t __single tp = current_thread();
1544 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1545 lck_mtx_t *mutex_held;
1546 int error = 0;
1547
1548 /*
1549 * Wait until no thread is holding the sockbuf and other content
1550 * filter threads have released the sockbuf
1551 */
1552 while ((sb->sb_flags & SB_LOCK) ||
1553 (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1554 if (so->so_proto->pr_getlock != NULL) {
1555 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1556 } else {
1557 mutex_held = so->so_proto->pr_domain->dom_mtx;
1558 }
1559
1560 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1561
1562 sb->sb_wantlock++;
1563 VERIFY(sb->sb_wantlock != 0);
1564
1565 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1566 NULL);
1567
1568 VERIFY(sb->sb_wantlock != 0);
1569 sb->sb_wantlock--;
1570 }
1571 /*
1572 * Use reference count for repetitive calls on same thread
1573 */
1574 if (sb->sb_cfil_refs == 0) {
1575 VERIFY(sb->sb_cfil_thread == NULL);
1576 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1577
1578 sb->sb_cfil_thread = tp;
1579 sb->sb_flags |= SB_LOCK;
1580 }
1581 sb->sb_cfil_refs++;
1582
1583 /* We acquire the socket buffer when we need to cleanup */
1584 if (cfil_info == NULL) {
1585 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1586 (uint64_t)VM_KERNEL_ADDRPERM(so));
1587 error = 0;
1588 } else if (cfil_info->cfi_flags & CFIF_DROP) {
1589 CFIL_LOG(LOG_ERR, "so %llx drop set",
1590 (uint64_t)VM_KERNEL_ADDRPERM(so));
1591 error = EPIPE;
1592 }
1593
1594 return error;
1595 }
1596
1597 static void
cfil_release_sockbuf(struct socket * so,int outgoing)1598 cfil_release_sockbuf(struct socket *so, int outgoing)
1599 {
1600 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1601 thread_t __single tp = current_thread();
1602
1603 socket_lock_assert_owned(so);
1604
1605 if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) {
1606 panic("%s sb_cfil_thread %p not current %p", __func__,
1607 sb->sb_cfil_thread, tp);
1608 }
1609 /*
1610 * Don't panic if we are defunct because SB_LOCK has
1611 * been cleared by sodefunct()
1612 */
1613 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
1614 panic("%s SB_LOCK not set on %p", __func__,
1615 sb);
1616 }
1617 /*
1618 * We can unlock when the thread unwinds to the last reference
1619 */
1620 sb->sb_cfil_refs--;
1621 if (sb->sb_cfil_refs == 0) {
1622 sb->sb_cfil_thread = NULL;
1623 sb->sb_flags &= ~SB_LOCK;
1624
1625 if (sb->sb_wantlock > 0) {
1626 wakeup(&sb->sb_flags);
1627 }
1628 }
1629 }
1630
1631 cfil_sock_id_t
cfil_sock_id_from_socket(struct socket * so)1632 cfil_sock_id_from_socket(struct socket *so)
1633 {
1634 if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) {
1635 return so->so_cfil->cfi_sock_id;
1636 } else {
1637 return CFIL_SOCK_ID_NONE;
1638 }
1639 }
1640
1641 /*
1642 * cfil_socket_safe_lock -
1643 * This routine attempts to lock the socket safely.
1644 *
1645 * The passed in pcbinfo is assumed to be locked and must be unlocked once the
1646 * inp state is safeguarded and before we attempt to lock/unlock the socket.
1647 * This is to prevent getting blocked by socket_lock() while holding the pcbinfo
1648 * lock, avoiding potential deadlock with other processes contending for the same
1649 * resources. This is also to avoid double locking the pcbinfo for rip sockets
1650 * since rip_unlock() will lock ripcbinfo if it needs to dispose inpcb when
1651 * so_usecount is 0.
1652 */
1653 static bool
cfil_socket_safe_lock(struct inpcb * inp,struct inpcbinfo * pcbinfo)1654 cfil_socket_safe_lock(struct inpcb *inp, struct inpcbinfo *pcbinfo)
1655 {
1656 struct socket *so = NULL;
1657
1658 VERIFY(pcbinfo != NULL);
1659
1660 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1661 // Safeguarded the inp state, unlock pcbinfo before locking socket.
1662 lck_rw_done(&pcbinfo->ipi_lock);
1663
1664 so = inp->inp_socket;
1665 socket_lock(so, 1);
1666 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1667 return true;
1668 }
1669 } else {
1670 // Failed to safeguarded the inp state, unlock pcbinfo and abort.
1671 lck_rw_done(&pcbinfo->ipi_lock);
1672 }
1673
1674 if (so) {
1675 socket_unlock(so, 1);
1676 }
1677 return false;
1678 }
1679
1680 static struct socket *
cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id,bool udp_only)1681 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1682 {
1683 struct socket *so = NULL;
1684 u_int64_t gencnt = cfil_sock_id >> 32;
1685 u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1686 struct inpcb *inp = NULL;
1687 struct inpcbinfo *pcbinfo = NULL;
1688
1689 if (udp_only) {
1690 goto find_udp;
1691 }
1692
1693 pcbinfo = &tcbinfo;
1694 lck_rw_lock_shared(&pcbinfo->ipi_lock);
1695 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1696 if (inp->inp_state != INPCB_STATE_DEAD &&
1697 inp->inp_socket != NULL &&
1698 inp->inp_flowhash == flowhash &&
1699 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1700 inp->inp_socket->so_cfil != NULL) {
1701 if (cfil_socket_safe_lock(inp, pcbinfo)) {
1702 so = inp->inp_socket;
1703 }
1704 /* pcbinfo is already unlocked, we are done. */
1705 goto done;
1706 }
1707 }
1708 lck_rw_done(&pcbinfo->ipi_lock);
1709 if (so != NULL) {
1710 goto done;
1711 }
1712
1713 find_udp:
1714
1715 pcbinfo = &udbinfo;
1716 lck_rw_lock_shared(&pcbinfo->ipi_lock);
1717 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1718 if (inp->inp_state != INPCB_STATE_DEAD &&
1719 inp->inp_socket != NULL &&
1720 inp->inp_socket->so_flow_db != NULL &&
1721 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1722 if (cfil_socket_safe_lock(inp, pcbinfo)) {
1723 so = inp->inp_socket;
1724 }
1725 /* pcbinfo is already unlocked, we are done. */
1726 goto done;
1727 }
1728 }
1729 lck_rw_done(&pcbinfo->ipi_lock);
1730 if (so != NULL) {
1731 goto done;
1732 }
1733
1734 pcbinfo = &ripcbinfo;
1735 lck_rw_lock_shared(&pcbinfo->ipi_lock);
1736 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1737 if (inp->inp_state != INPCB_STATE_DEAD &&
1738 inp->inp_socket != NULL &&
1739 inp->inp_socket->so_flow_db != NULL &&
1740 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1741 if (cfil_socket_safe_lock(inp, pcbinfo)) {
1742 so = inp->inp_socket;
1743 }
1744 /* pcbinfo is already unlocked, we are done. */
1745 goto done;
1746 }
1747 }
1748 lck_rw_done(&pcbinfo->ipi_lock);
1749
1750 done:
1751 if (so == NULL) {
1752 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1753 CFIL_LOG(LOG_DEBUG,
1754 "no socket for sock_id %llx gencnt %llx flowhash %x",
1755 cfil_sock_id, gencnt, flowhash);
1756 }
1757
1758 return so;
1759 }
1760
1761 static struct socket *
cfil_socket_from_client_uuid(uuid_t necp_client_uuid,bool * cfil_attached)1762 cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1763 {
1764 struct socket *so = NULL;
1765 struct inpcb *inp = NULL;
1766 struct inpcbinfo *pcbinfo = &tcbinfo;
1767
1768 lck_rw_lock_shared(&pcbinfo->ipi_lock);
1769 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1770 if (inp->inp_state != INPCB_STATE_DEAD &&
1771 inp->inp_socket != NULL &&
1772 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1773 *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1774 if (cfil_socket_safe_lock(inp, pcbinfo)) {
1775 so = inp->inp_socket;
1776 }
1777 /* pcbinfo is already unlocked, we are done. */
1778 goto done;
1779 }
1780 }
1781 lck_rw_done(&pcbinfo->ipi_lock);
1782 if (so != NULL) {
1783 goto done;
1784 }
1785
1786 pcbinfo = &udbinfo;
1787 lck_rw_lock_shared(&pcbinfo->ipi_lock);
1788 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1789 if (inp->inp_state != INPCB_STATE_DEAD &&
1790 inp->inp_socket != NULL &&
1791 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1792 *cfil_attached = (inp->inp_socket->so_flow_db != NULL);
1793 if (cfil_socket_safe_lock(inp, pcbinfo)) {
1794 so = inp->inp_socket;
1795 }
1796 /* pcbinfo is already unlocked, we are done. */
1797 goto done;
1798 }
1799 }
1800 lck_rw_done(&pcbinfo->ipi_lock);
1801
1802 done:
1803 return so;
1804 }
1805
1806 static void
cfil_info_stats_toggle(struct cfil_info * cfil_info,struct cfil_entry * entry,uint32_t report_frequency)1807 cfil_info_stats_toggle(struct cfil_info *cfil_info, struct cfil_entry *entry, uint32_t report_frequency)
1808 {
1809 struct cfil_info *cfil = NULL;
1810 Boolean found = FALSE;
1811 int kcunit;
1812
1813 if (cfil_info == NULL) {
1814 return;
1815 }
1816
1817 if (report_frequency) {
1818 if (entry == NULL) {
1819 return;
1820 }
1821
1822 // Update stats reporting frequency.
1823 if (entry->cfe_stats_report_frequency != report_frequency) {
1824 entry->cfe_stats_report_frequency = report_frequency;
1825 if (entry->cfe_stats_report_frequency < CFIL_STATS_REPORT_INTERVAL_MIN_MSEC) {
1826 entry->cfe_stats_report_frequency = CFIL_STATS_REPORT_INTERVAL_MIN_MSEC;
1827 }
1828 microuptime(&entry->cfe_stats_report_ts);
1829
1830 // Insert cfil_info into list only if it is not in yet.
1831 TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1832 if (cfil == cfil_info) {
1833 return;
1834 }
1835 }
1836
1837 TAILQ_INSERT_TAIL(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1838
1839 // Wake up stats thread if this is first flow added
1840 if (cfil_sock_attached_stats_count == 0) {
1841 thread_wakeup((caddr_t)&cfil_sock_attached_stats_count);
1842 }
1843 cfil_sock_attached_stats_count++;
1844
1845 if (cfil_info->cfi_debug && cfil_log_stats) {
1846 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu <%llx>> stats frequency %d msecs",
1847 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1848 cfil_info->cfi_sock_id, cfil_info->cfi_sock_id,
1849 entry->cfe_stats_report_frequency);
1850 }
1851 }
1852 } else {
1853 // Turn off stats reporting for this filter.
1854 if (entry != NULL) {
1855 // Already off, no change.
1856 if (entry->cfe_stats_report_frequency == 0) {
1857 return;
1858 }
1859
1860 entry->cfe_stats_report_frequency = 0;
1861 // If cfil_info still has filter(s) asking for stats, no need to remove from list.
1862 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
1863 if (cfil_info->cfi_entries[kcunit - 1].cfe_stats_report_frequency > 0) {
1864 return;
1865 }
1866 }
1867 }
1868
1869 // No more filter asking for stats for this cfil_info, remove from list.
1870 if (!TAILQ_EMPTY(&cfil_sock_head_stats)) {
1871 found = FALSE;
1872 TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1873 if (cfil == cfil_info) {
1874 found = TRUE;
1875 break;
1876 }
1877 }
1878 if (found) {
1879 cfil_sock_attached_stats_count--;
1880 TAILQ_REMOVE(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1881 if (cfil_info->cfi_debug && cfil_log_stats) {
1882 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu <%llx>> stats frequency reset",
1883 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1884 cfil_info->cfi_sock_id, cfil_info->cfi_sock_id);
1885 }
1886 }
1887 }
1888 }
1889 }
1890
1891 static errno_t
cfil_ctl_send(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo,mbuf_t m,int flags)1892 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1893 int flags)
1894 {
1895 #pragma unused(kctlref, flags)
1896 errno_t error = 0;
1897 struct cfil_msg_hdr *msghdr;
1898 struct content_filter *cfc = (struct content_filter *)unitinfo;
1899 struct socket *so;
1900 struct cfil_msg_action * __single action_msg;
1901 struct cfil_entry *entry;
1902 struct cfil_info * __single cfil_info = NULL;
1903 unsigned int data_len = 0;
1904
1905 CFIL_LOG(LOG_INFO, "");
1906
1907 if (cfc == NULL) {
1908 CFIL_LOG(LOG_ERR, "no unitinfo");
1909 error = EINVAL;
1910 goto done;
1911 }
1912
1913 if (kcunit > MAX_CONTENT_FILTER) {
1914 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1915 kcunit, MAX_CONTENT_FILTER);
1916 error = EINVAL;
1917 goto done;
1918 }
1919 if (m == NULL) {
1920 CFIL_LOG(LOG_ERR, "null mbuf");
1921 error = EINVAL;
1922 goto done;
1923 }
1924 data_len = m_length(m);
1925
1926 if (data_len < sizeof(struct cfil_msg_hdr)) {
1927 CFIL_LOG(LOG_ERR, "too short %u", data_len);
1928 error = EINVAL;
1929 goto done;
1930 }
1931 msghdr = mtod(m, struct cfil_msg_hdr *);
1932 if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1933 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1934 error = EINVAL;
1935 goto done;
1936 }
1937 if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1938 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1939 error = EINVAL;
1940 goto done;
1941 }
1942 if (msghdr->cfm_len > data_len) {
1943 CFIL_LOG(LOG_ERR, "bad length %u", msghdr->cfm_len);
1944 error = EINVAL;
1945 goto done;
1946 }
1947
1948 /* Validate action operation */
1949 switch (msghdr->cfm_op) {
1950 case CFM_OP_DATA_UPDATE:
1951 OSIncrementAtomic(
1952 &cfil_stats.cfs_ctl_action_data_update);
1953 break;
1954 case CFM_OP_DROP:
1955 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1956 break;
1957 case CFM_OP_BLESS_CLIENT:
1958 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1959 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1960 error = EINVAL;
1961 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1962 msghdr->cfm_len,
1963 msghdr->cfm_op);
1964 goto done;
1965 }
1966 error = cfil_action_bless_client(kcunit, msghdr);
1967 goto done;
1968 case CFM_OP_SET_CRYPTO_KEY:
1969 if (msghdr->cfm_len != sizeof(struct cfil_msg_set_crypto_key)) {
1970 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1971 error = EINVAL;
1972 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1973 msghdr->cfm_len,
1974 msghdr->cfm_op);
1975 goto done;
1976 }
1977 error = cfil_action_set_crypto_key(kcunit, msghdr);
1978 goto done;
1979 default:
1980 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1981 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1982 error = EINVAL;
1983 goto done;
1984 }
1985 if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1986 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1987 error = EINVAL;
1988 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1989 msghdr->cfm_len,
1990 msghdr->cfm_op);
1991 goto done;
1992 }
1993 cfil_rw_lock_shared(&cfil_lck_rw);
1994 if (cfc != (void *)content_filters[kcunit - 1]) {
1995 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1996 kcunit);
1997 error = EINVAL;
1998 cfil_rw_unlock_shared(&cfil_lck_rw);
1999 goto done;
2000 }
2001 cfil_rw_unlock_shared(&cfil_lck_rw);
2002
2003 // Search for socket (TCP+UDP and lock so)
2004 so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
2005 if (so == NULL) {
2006 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
2007 msghdr->cfm_sock_id);
2008 error = EINVAL;
2009 goto done;
2010 }
2011
2012 cfil_info = so->so_flow_db != NULL ?
2013 soflow_db_get_feature_context(so->so_flow_db, msghdr->cfm_sock_id) : so->so_cfil;
2014
2015 // We should not obtain global lock here in order to avoid deadlock down the path.
2016 // But we attempt to retain a valid cfil_info to prevent any deallocation until
2017 // we are done. Abort retain if cfil_info has already entered the free code path.
2018 if (cfil_info && os_ref_retain_try(&cfil_info->cfi_ref_count) == false) {
2019 socket_unlock(so, 1);
2020 goto done;
2021 }
2022
2023 if (cfil_info == NULL) {
2024 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
2025 (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
2026 error = EINVAL;
2027 goto unlock;
2028 } else if (cfil_info->cfi_flags & CFIF_DROP) {
2029 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
2030 (uint64_t)VM_KERNEL_ADDRPERM(so));
2031 error = EINVAL;
2032 goto unlock;
2033 }
2034
2035 if (cfil_info->cfi_debug) {
2036 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED MSG FROM FILTER");
2037 }
2038
2039 entry = &cfil_info->cfi_entries[kcunit - 1];
2040 if (entry->cfe_filter == NULL) {
2041 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
2042 (uint64_t)VM_KERNEL_ADDRPERM(so));
2043 error = EINVAL;
2044 goto unlock;
2045 }
2046
2047 if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) {
2048 entry->cfe_flags |= CFEF_DATA_START;
2049 } else {
2050 CFIL_LOG(LOG_ERR,
2051 "so %llx attached not sent for %u",
2052 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
2053 error = EINVAL;
2054 goto unlock;
2055 }
2056
2057 microuptime(&entry->cfe_last_action);
2058 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
2059
2060 action_msg = (struct cfil_msg_action *)msghdr;
2061
2062 switch (msghdr->cfm_op) {
2063 case CFM_OP_DATA_UPDATE:
2064
2065 if (cfil_info->cfi_debug) {
2066 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DATA_UPDATE");
2067 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu <%llx>> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2068 (uint64_t)VM_KERNEL_ADDRPERM(so),
2069 cfil_info->cfi_sock_id, cfil_info->cfi_sock_id,
2070 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2071 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2072 }
2073
2074 /*
2075 * Received verdict, at this point we know this
2076 * socket connection is allowed. Unblock thread
2077 * immediately before proceeding to process the verdict.
2078 */
2079 cfil_sock_received_verdict(so);
2080
2081 if (action_msg->cfa_out_peek_offset != 0 ||
2082 action_msg->cfa_out_pass_offset != 0) {
2083 error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
2084 action_msg->cfa_out_pass_offset,
2085 action_msg->cfa_out_peek_offset);
2086 }
2087 if (error == EJUSTRETURN) {
2088 error = 0;
2089 }
2090 if (error != 0) {
2091 break;
2092 }
2093 if (action_msg->cfa_in_peek_offset != 0 ||
2094 action_msg->cfa_in_pass_offset != 0) {
2095 error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
2096 action_msg->cfa_in_pass_offset,
2097 action_msg->cfa_in_peek_offset);
2098 }
2099 if (error == EJUSTRETURN) {
2100 error = 0;
2101 }
2102
2103 // Toggle stats reporting according to received verdict.
2104 cfil_rw_lock_exclusive(&cfil_lck_rw);
2105 cfil_info_stats_toggle(cfil_info, entry, action_msg->cfa_stats_frequency);
2106 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2107
2108 break;
2109
2110 case CFM_OP_DROP:
2111 if (cfil_info->cfi_debug) {
2112 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DROP");
2113 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu <%llx>> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2114 (uint64_t)VM_KERNEL_ADDRPERM(so),
2115 cfil_info->cfi_sock_id, cfil_info->cfi_sock_id,
2116 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2117 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2118 }
2119
2120 error = cfil_action_drop(so, cfil_info, kcunit);
2121 cfil_sock_received_verdict(so);
2122 break;
2123
2124 default:
2125 error = EINVAL;
2126 break;
2127 }
2128 unlock:
2129 CFIL_INFO_FREE(cfil_info)
2130 socket_unlock(so, 1);
2131 done:
2132 mbuf_freem(m);
2133
2134 if (error == 0) {
2135 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
2136 } else {
2137 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
2138 }
2139
2140 return error;
2141 }
2142
2143 static errno_t
cfil_ctl_getopt(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo,int opt,void * data,size_t * len)2144 cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2145 int opt, void *data, size_t *len)
2146 {
2147 #pragma unused(kctlref, opt)
2148 struct cfil_info * __single cfil_info = NULL;
2149 errno_t error = 0;
2150 struct content_filter *cfc = (struct content_filter *)unitinfo;
2151
2152 CFIL_LOG(LOG_NOTICE, "");
2153
2154 if (cfc == NULL) {
2155 CFIL_LOG(LOG_ERR, "no unitinfo");
2156 return EINVAL;
2157 }
2158
2159 cfil_rw_lock_shared(&cfil_lck_rw);
2160
2161 if (kcunit > MAX_CONTENT_FILTER) {
2162 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2163 kcunit, MAX_CONTENT_FILTER);
2164 error = EINVAL;
2165 goto done;
2166 }
2167 if (cfc != (void *)content_filters[kcunit - 1]) {
2168 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2169 kcunit);
2170 error = EINVAL;
2171 goto done;
2172 }
2173 switch (opt) {
2174 case CFIL_OPT_NECP_CONTROL_UNIT:
2175 if (*len < sizeof(uint32_t)) {
2176 CFIL_LOG(LOG_ERR, "len too small %lu", *len);
2177 error = EINVAL;
2178 goto done;
2179 }
2180 if (data != NULL) {
2181 *(uint32_t *)data = cfc->cf_necp_control_unit;
2182 }
2183 break;
2184 case CFIL_OPT_PRESERVE_CONNECTIONS:
2185 if (*len < sizeof(uint32_t)) {
2186 CFIL_LOG(LOG_ERR, "CFIL_OPT_PRESERVE_CONNECTIONS len too small %lu", *len);
2187 error = EINVAL;
2188 goto done;
2189 }
2190 if (data != NULL) {
2191 *(uint32_t *)data = (cfc->cf_flags & CFF_PRESERVE_CONNECTIONS) ? true : false;
2192 }
2193 break;
2194 case CFIL_OPT_GET_SOCKET_INFO:
2195 if (*len != sizeof(struct cfil_opt_sock_info)) {
2196 CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
2197 error = EINVAL;
2198 goto done;
2199 }
2200 if (data == NULL) {
2201 CFIL_LOG(LOG_ERR, "data not passed");
2202 error = EINVAL;
2203 goto done;
2204 }
2205
2206 struct cfil_opt_sock_info *sock_info =
2207 (struct cfil_opt_sock_info *) data;
2208
2209 // Unlock here so that we never hold both cfil_lck_rw and the
2210 // socket_lock at the same time. Otherwise, this can deadlock
2211 // because soclose() takes the socket_lock and then exclusive
2212 // cfil_lck_rw and we require the opposite order.
2213
2214 // WARNING: Be sure to never use anything protected
2215 // by cfil_lck_rw beyond this point.
2216 // WARNING: Be sure to avoid fallthrough and
2217 // goto return_already_unlocked from this branch.
2218 cfil_rw_unlock_shared(&cfil_lck_rw);
2219
2220 // Search (TCP+UDP) and lock socket
2221 struct socket *sock =
2222 cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
2223 if (sock == NULL) {
2224 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
2225 sock_info->cfs_sock_id);
2226 error = ENOENT;
2227 goto return_already_unlocked;
2228 }
2229
2230 cfil_info = (sock->so_flow_db != NULL) ?
2231 soflow_db_get_feature_context(sock->so_flow_db, sock_info->cfs_sock_id) : sock->so_cfil;
2232
2233 if (cfil_info == NULL) {
2234 CFIL_LOG(LOG_INFO, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
2235 (uint64_t)VM_KERNEL_ADDRPERM(sock));
2236 error = EINVAL;
2237 socket_unlock(sock, 1);
2238 goto return_already_unlocked;
2239 }
2240
2241 if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL) {
2242 CFIL_LOG(LOG_INFO, "CFIL: GET_SOCKET_INFO failed: so %llx NULL so_proto / pr_domain",
2243 (uint64_t)VM_KERNEL_ADDRPERM(sock));
2244 error = EINVAL;
2245 socket_unlock(sock, 1);
2246 goto return_already_unlocked;
2247 }
2248
2249 // Fill out family, type, and protocol
2250 sock_info->cfs_sock_family = SOCK_DOM(sock);
2251 sock_info->cfs_sock_type = SOCK_TYPE(sock);
2252 sock_info->cfs_sock_protocol = GET_SO_PROTO(sock);
2253
2254 // Source and destination addresses
2255 struct inpcb *inp = sotoinpcb(sock);
2256 if (inp->inp_vflag & INP_IPV6) {
2257 struct in6_addr * __single laddr = NULL, * __single faddr = NULL;
2258 u_int16_t lport = 0, fport = 0;
2259
2260 cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
2261 &laddr, &faddr, &lport, &fport);
2262 fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport, inp->inp_lifscope);
2263 fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport, inp->inp_fifscope);
2264 } else if (inp->inp_vflag & INP_IPV4) {
2265 struct in_addr laddr = {.s_addr = 0}, faddr = {.s_addr = 0};
2266 u_int16_t lport = 0, fport = 0;
2267
2268 cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
2269 &laddr, &faddr, &lport, &fport);
2270 fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2271 fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2272 }
2273
2274 // Set the pid info
2275 sock_info->cfs_pid = sock->last_pid;
2276 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
2277
2278 if (sock->so_flags & SOF_DELEGATED) {
2279 sock_info->cfs_e_pid = sock->e_pid;
2280 memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
2281 } else {
2282 sock_info->cfs_e_pid = sock->last_pid;
2283 memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
2284 }
2285 #if defined(XNU_TARGET_OS_OSX)
2286 if (!uuid_is_null(sock->so_ruuid)) {
2287 sock_info->cfs_r_pid = sock->so_rpid;
2288 memcpy(sock_info->cfs_r_uuid, sock->so_ruuid, sizeof(uuid_t));
2289 }
2290 #endif
2291 socket_unlock(sock, 1);
2292
2293 goto return_already_unlocked;
2294 default:
2295 error = ENOPROTOOPT;
2296 break;
2297 }
2298 done:
2299 cfil_rw_unlock_shared(&cfil_lck_rw);
2300
2301 return error;
2302
2303 return_already_unlocked:
2304
2305 return error;
2306 }
2307
2308 static errno_t
cfil_ctl_setopt(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo,int opt,void * data,size_t len)2309 cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2310 int opt, void *data, size_t len)
2311 {
2312 #pragma unused(kctlref, opt)
2313 errno_t error = 0;
2314 struct content_filter *cfc = (struct content_filter *)unitinfo;
2315
2316 CFIL_LOG(LOG_NOTICE, "");
2317
2318 if (cfc == NULL) {
2319 CFIL_LOG(LOG_ERR, "no unitinfo");
2320 return EINVAL;
2321 }
2322
2323 cfil_rw_lock_exclusive(&cfil_lck_rw);
2324
2325 if (kcunit > MAX_CONTENT_FILTER) {
2326 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2327 kcunit, MAX_CONTENT_FILTER);
2328 error = EINVAL;
2329 goto done;
2330 }
2331 if (cfc != (void *)content_filters[kcunit - 1]) {
2332 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2333 kcunit);
2334 error = EINVAL;
2335 goto done;
2336 }
2337 switch (opt) {
2338 case CFIL_OPT_NECP_CONTROL_UNIT:
2339 if (len < sizeof(uint32_t)) {
2340 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2341 "len too small %lu", len);
2342 error = EINVAL;
2343 goto done;
2344 }
2345 if (cfc->cf_necp_control_unit != 0) {
2346 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2347 "already set %u",
2348 cfc->cf_necp_control_unit);
2349 error = EINVAL;
2350 goto done;
2351 }
2352 cfc->cf_necp_control_unit = *(uint32_t *)data;
2353 break;
2354 case CFIL_OPT_PRESERVE_CONNECTIONS:
2355 if (len < sizeof(uint32_t)) {
2356 CFIL_LOG(LOG_ERR, "CFIL_OPT_PRESERVE_CONNECTIONS "
2357 "len too small %lu", len);
2358 error = EINVAL;
2359 goto done;
2360 }
2361 uint32_t preserve_connections = *((uint32_t *)data);
2362 CFIL_LOG(LOG_INFO, "CFIL_OPT_PRESERVE_CONNECTIONS got %d (kcunit %d)", preserve_connections, kcunit);
2363 if (preserve_connections) {
2364 cfc->cf_flags |= CFF_PRESERVE_CONNECTIONS;
2365 } else {
2366 cfc->cf_flags &= ~CFF_PRESERVE_CONNECTIONS;
2367 }
2368
2369 cfil_update_behavior_flags();
2370 break;
2371 default:
2372 error = ENOPROTOOPT;
2373 break;
2374 }
2375 done:
2376 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2377
2378 return error;
2379 }
2380
2381
2382 static void
cfil_ctl_rcvd(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo,int flags)2383 cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
2384 {
2385 #pragma unused(kctlref, flags)
2386 struct content_filter *cfc = (struct content_filter *)unitinfo;
2387 struct socket *so = NULL;
2388 int error;
2389 struct cfil_entry *entry;
2390 struct cfil_info *cfil_info = NULL;
2391
2392 CFIL_LOG(LOG_INFO, "");
2393
2394 if (cfc == NULL) {
2395 CFIL_LOG(LOG_ERR, "no unitinfo");
2396 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2397 return;
2398 }
2399
2400 if (kcunit > MAX_CONTENT_FILTER) {
2401 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2402 kcunit, MAX_CONTENT_FILTER);
2403 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2404 return;
2405 }
2406 cfil_rw_lock_shared(&cfil_lck_rw);
2407 if (cfc != (void *)content_filters[kcunit - 1]) {
2408 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2409 kcunit);
2410 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2411 goto done;
2412 }
2413 /* Let's assume the flow control is lifted */
2414 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2415 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
2416 cfil_rw_lock_exclusive(&cfil_lck_rw);
2417 }
2418
2419 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
2420
2421 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
2422 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
2423 }
2424 /*
2425 * Flow control will be raised again as soon as an entry cannot enqueue
2426 * to the kernel control socket
2427 */
2428 while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
2429 verify_content_filter(cfc);
2430
2431 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
2432
2433 /* Find an entry that is flow controlled */
2434 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
2435 if (entry->cfe_cfil_info == NULL ||
2436 entry->cfe_cfil_info->cfi_so == NULL) {
2437 continue;
2438 }
2439 if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) {
2440 continue;
2441 }
2442 }
2443 if (entry == NULL) {
2444 break;
2445 }
2446
2447 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
2448
2449 cfil_info = entry->cfe_cfil_info;
2450 so = cfil_info->cfi_so;
2451
2452 if (cfil_info == NULL || os_ref_retain_try(&cfil_info->cfi_ref_count) == false) {
2453 break;
2454 }
2455
2456 cfil_rw_unlock_shared(&cfil_lck_rw);
2457 socket_lock(so, 1);
2458
2459 do {
2460 error = cfil_acquire_sockbuf(so, cfil_info, 1);
2461 if (error == 0) {
2462 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
2463 }
2464 cfil_release_sockbuf(so, 1);
2465 if (error != 0) {
2466 break;
2467 }
2468
2469 error = cfil_acquire_sockbuf(so, cfil_info, 0);
2470 if (error == 0) {
2471 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
2472 }
2473 cfil_release_sockbuf(so, 0);
2474 } while (0);
2475
2476 CFIL_INFO_FREE(cfil_info);
2477 socket_lock_assert_owned(so);
2478 socket_unlock(so, 1);
2479
2480 cfil_rw_lock_shared(&cfil_lck_rw);
2481 }
2482 done:
2483 cfil_rw_unlock_shared(&cfil_lck_rw);
2484 }
2485
2486 struct cflil_tag_container {
2487 struct m_tag cfil_m_tag;
2488 struct cfil_tag cfil_tag;
2489 };
2490
2491 static struct m_tag *
m_tag_kalloc_cfil_udp(u_int32_t id,u_int16_t type,uint16_t len,int wait)2492 m_tag_kalloc_cfil_udp(u_int32_t id, u_int16_t type, uint16_t len, int wait)
2493 {
2494 struct cflil_tag_container *tag_container;
2495 struct m_tag *tag = NULL;
2496
2497 assert3u(id, ==, KERNEL_MODULE_TAG_ID);
2498 assert3u(type, ==, KERNEL_TAG_TYPE_CFIL_UDP);
2499 assert3u(len, ==, sizeof(struct cfil_tag));
2500
2501 if (len != sizeof(struct cfil_tag)) {
2502 return NULL;
2503 }
2504
2505 tag_container = kalloc_type(struct cflil_tag_container, wait | M_ZERO);
2506 if (tag_container != NULL) {
2507 tag = &tag_container->cfil_m_tag;
2508
2509 assert3p(tag, ==, tag_container);
2510
2511 M_TAG_INIT(tag, id, type, len, &tag_container->cfil_tag, NULL);
2512 }
2513
2514 return tag;
2515 }
2516
2517 static void
m_tag_kfree_cfil_udp(struct m_tag * tag)2518 m_tag_kfree_cfil_udp(struct m_tag *tag)
2519 {
2520 struct cflil_tag_container * __single tag_container = (struct cflil_tag_container *)tag;
2521
2522 kfree_type(struct cflil_tag_container, tag_container);
2523 }
2524
2525 void
cfil_register_m_tag(void)2526 cfil_register_m_tag(void)
2527 {
2528 errno_t error = 0;
2529
2530 error = m_register_internal_tag_type(KERNEL_TAG_TYPE_CFIL_UDP, sizeof(struct cfil_tag),
2531 m_tag_kalloc_cfil_udp, m_tag_kfree_cfil_udp);
2532
2533 assert3u(error, ==, 0);
2534 }
2535
2536 void
cfil_init(void)2537 cfil_init(void)
2538 {
2539 struct kern_ctl_reg kern_ctl;
2540 errno_t error = 0;
2541 unsigned int mbuf_limit = 0;
2542
2543 CFIL_LOG(LOG_NOTICE, "");
2544
2545 /*
2546 * Compile time verifications
2547 */
2548 _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2549 _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2550 _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2551 _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2552
2553 /*
2554 * Runtime time verifications
2555 */
2556 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2557 sizeof(uint32_t)));
2558 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2559 sizeof(uint32_t)));
2560 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2561 sizeof(uint32_t)));
2562 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2563 sizeof(uint32_t)));
2564
2565 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2566 sizeof(uint32_t)));
2567 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2568 sizeof(uint32_t)));
2569
2570 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2571 sizeof(uint32_t)));
2572 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2573 sizeof(uint32_t)));
2574 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2575 sizeof(uint32_t)));
2576 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2577 sizeof(uint32_t)));
2578
2579 /*
2580 * Allocate locks
2581 */
2582 TAILQ_INIT(&cfil_sock_head);
2583 TAILQ_INIT(&cfil_sock_head_stats);
2584
2585 /*
2586 * Register kernel control
2587 */
2588 bzero(&kern_ctl, sizeof(kern_ctl));
2589 strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2590 sizeof(kern_ctl.ctl_name));
2591 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2592 kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2593 kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2594 kern_ctl.ctl_connect = cfil_ctl_connect;
2595 kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2596 kern_ctl.ctl_send = cfil_ctl_send;
2597 kern_ctl.ctl_getopt = cfil_ctl_getopt;
2598 kern_ctl.ctl_setopt = cfil_ctl_setopt;
2599 kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2600 error = ctl_register(&kern_ctl, &cfil_kctlref);
2601 if (error != 0) {
2602 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2603 return;
2604 }
2605
2606 // Spawn thread for statistics reporting
2607 if (kernel_thread_start(cfil_stats_report_thread_func, NULL,
2608 &cfil_stats_report_thread) != KERN_SUCCESS) {
2609 panic_plain("%s: Can't create statistics report thread", __func__);
2610 /* NOTREACHED */
2611 }
2612 /* this must not fail */
2613 VERIFY(cfil_stats_report_thread != NULL);
2614
2615 // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2616 mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2617 cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2618 cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2619
2620 memset(&global_cfil_stats_report_buffers, 0, sizeof(global_cfil_stats_report_buffers));
2621 }
2622
2623 struct cfil_info *
cfil_info_alloc(struct socket * so,struct soflow_hash_entry * hash_entry)2624 cfil_info_alloc(struct socket *so, struct soflow_hash_entry *hash_entry)
2625 {
2626 int kcunit;
2627 struct cfil_info *cfil_info = NULL;
2628 struct inpcb *inp = sotoinpcb(so);
2629
2630 CFIL_LOG(LOG_INFO, "");
2631
2632 socket_lock_assert_owned(so);
2633
2634 cfil_info = zalloc_flags(cfil_info_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2635 os_ref_init(&cfil_info->cfi_ref_count, &cfil_refgrp);
2636
2637 cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2638 cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2639
2640 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2641 struct cfil_entry *entry;
2642
2643 entry = &cfil_info->cfi_entries[kcunit - 1];
2644 entry->cfe_cfil_info = cfil_info;
2645
2646 /* Initialize the filter entry */
2647 entry->cfe_filter = NULL;
2648 entry->cfe_flags = 0;
2649 entry->cfe_necp_control_unit = 0;
2650 entry->cfe_snd.cfe_pass_offset = 0;
2651 entry->cfe_snd.cfe_peek_offset = 0;
2652 entry->cfe_snd.cfe_peeked = 0;
2653 entry->cfe_rcv.cfe_pass_offset = 0;
2654 entry->cfe_rcv.cfe_peek_offset = 0;
2655 entry->cfe_rcv.cfe_peeked = 0;
2656 /*
2657 * Timestamp the last action to avoid pre-maturely
2658 * triggering garbage collection
2659 */
2660 microuptime(&entry->cfe_last_action);
2661
2662 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2663 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2664 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2665 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2666 }
2667
2668 cfil_rw_lock_exclusive(&cfil_lck_rw);
2669
2670 /*
2671 * Create a cfi_sock_id that's not the socket pointer!
2672 */
2673
2674 if (hash_entry == NULL) {
2675 // This is the TCP case, cfil_info is tracked per socket
2676 if (inp->inp_flowhash == 0) {
2677 inp_calc_flowhash(inp);
2678 ASSERT(inp->inp_flowhash != 0);
2679 }
2680
2681 so->so_cfil = cfil_info;
2682 cfil_info->cfi_so = so;
2683 cfil_info->cfi_sock_id =
2684 ((so->so_gencnt << 32) | inp->inp_flowhash);
2685 } else {
2686 // This is the UDP case, cfil_info is tracked in per-socket hash
2687 cfil_info->cfi_so = so;
2688 cfil_info->cfi_hash_entry = hash_entry;
2689 cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->soflow_flowhash & 0xffffffff));
2690 }
2691
2692 TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2693 SLIST_INIT(&cfil_info->cfi_ordered_entries);
2694
2695 cfil_sock_attached_count++;
2696
2697 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2698
2699 if (cfil_info != NULL) {
2700 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2701 } else {
2702 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2703 }
2704
2705 return cfil_info;
2706 }
2707
2708 int
cfil_info_attach_unit(struct socket * so,uint32_t filter_control_unit,struct cfil_info * cfil_info)2709 cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
2710 {
2711 int kcunit;
2712 int attached = 0;
2713
2714 CFIL_LOG(LOG_INFO, "");
2715
2716 socket_lock_assert_owned(so);
2717
2718 cfil_rw_lock_exclusive(&cfil_lck_rw);
2719
2720 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2721 struct content_filter *cfc = content_filters[kcunit - 1];
2722 struct cfil_entry *entry;
2723 struct cfil_entry *iter_entry;
2724 struct cfil_entry *iter_prev;
2725
2726 if (cfc == NULL) {
2727 continue;
2728 }
2729 if (!(cfc->cf_necp_control_unit & filter_control_unit)) {
2730 continue;
2731 }
2732
2733 entry = &cfil_info->cfi_entries[kcunit - 1];
2734
2735 entry->cfe_filter = cfc;
2736 entry->cfe_necp_control_unit = cfc->cf_necp_control_unit;
2737 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2738 cfc->cf_sock_count++;
2739
2740 /* Insert the entry into the list ordered by control unit */
2741 iter_prev = NULL;
2742 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
2743 if (entry->cfe_necp_control_unit < iter_entry->cfe_necp_control_unit) {
2744 break;
2745 }
2746 iter_prev = iter_entry;
2747 }
2748
2749 if (iter_prev == NULL) {
2750 SLIST_INSERT_HEAD(&cfil_info->cfi_ordered_entries, entry, cfe_order_link);
2751 } else {
2752 SLIST_INSERT_AFTER(iter_prev, entry, cfe_order_link);
2753 }
2754
2755 verify_content_filter(cfc);
2756 attached = 1;
2757 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2758 }
2759
2760 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2761
2762 return attached;
2763 }
2764
2765 static void
cfil_info_free(struct cfil_info * cfil_info)2766 cfil_info_free(struct cfil_info *cfil_info)
2767 {
2768 int kcunit;
2769 uint64_t in_drain = 0;
2770 uint64_t out_drained = 0;
2771
2772 if (cfil_info == NULL) {
2773 return;
2774 }
2775
2776 CFIL_LOG(LOG_INFO, "");
2777
2778 cfil_rw_lock_exclusive(&cfil_lck_rw);
2779
2780 if (cfil_info->cfi_debug) {
2781 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FREEING CFIL_INFO");
2782 }
2783
2784 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2785 struct cfil_entry *entry;
2786 struct content_filter *cfc;
2787
2788 entry = &cfil_info->cfi_entries[kcunit - 1];
2789
2790 /* Don't be silly and try to detach twice */
2791 if (entry->cfe_filter == NULL) {
2792 continue;
2793 }
2794
2795 cfc = content_filters[kcunit - 1];
2796
2797 VERIFY(cfc == entry->cfe_filter);
2798
2799 entry->cfe_filter = NULL;
2800 entry->cfe_necp_control_unit = 0;
2801 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2802 cfc->cf_sock_count--;
2803
2804 verify_content_filter(cfc);
2805 }
2806
2807 cfil_sock_attached_count--;
2808 TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2809
2810 // Turn off stats reporting for cfil_info.
2811 cfil_info_stats_toggle(cfil_info, NULL, 0);
2812
2813 out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2814 in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2815
2816 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2817 struct cfil_entry *entry;
2818
2819 entry = &cfil_info->cfi_entries[kcunit - 1];
2820 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2821 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2822 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2823 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2824 }
2825 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2826
2827 if (out_drained) {
2828 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2829 }
2830 if (in_drain) {
2831 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2832 }
2833
2834 zfree(cfil_info_zone, cfil_info);
2835 }
2836
2837 /*
2838 * Received a verdict from userspace for a socket.
2839 * Perform any delayed operation if needed.
2840 */
2841 static void
cfil_sock_received_verdict(struct socket * so)2842 cfil_sock_received_verdict(struct socket *so)
2843 {
2844 if (so == NULL || so->so_cfil == NULL) {
2845 return;
2846 }
2847
2848 so->so_cfil->cfi_flags |= CFIF_INITIAL_VERDICT;
2849
2850 /*
2851 * If socket has already been connected, trigger
2852 * soisconnected now.
2853 */
2854 if (so->so_cfil->cfi_flags & CFIF_SOCKET_CONNECTED) {
2855 so->so_cfil->cfi_flags &= ~CFIF_SOCKET_CONNECTED;
2856 soisconnected(so);
2857 return;
2858 }
2859 }
2860
2861 /*
2862 * Entry point from Sockets layer
2863 * The socket is locked.
2864 *
2865 * Checks if a connected socket is subject to filter and
2866 * pending the initial verdict.
2867 */
2868 boolean_t
cfil_sock_connected_pending_verdict(struct socket * so)2869 cfil_sock_connected_pending_verdict(struct socket *so)
2870 {
2871 if (so == NULL || so->so_cfil == NULL) {
2872 return false;
2873 }
2874
2875 if (so->so_cfil->cfi_flags & CFIF_INITIAL_VERDICT) {
2876 return false;
2877 } else {
2878 /*
2879 * Remember that this protocol is already connected, so
2880 * we will trigger soisconnected() upon receipt of
2881 * initial verdict later.
2882 */
2883 so->so_cfil->cfi_flags |= CFIF_SOCKET_CONNECTED;
2884 return true;
2885 }
2886 }
2887
2888 /*
2889 * Entry point from Flow Divert
2890 * The socket is locked.
2891 *
2892 * Mark socket as DEAD if all CFIL data has been processed by filter(s).
2893 * Otherwise, delay the marking until all data has been processed.
2894 */
2895 boolean_t
cfil_sock_is_dead(struct socket * so)2896 cfil_sock_is_dead(struct socket *so)
2897 {
2898 struct inpcb *inp = NULL;
2899
2900 if (so == NULL) {
2901 return false;
2902 }
2903
2904 socket_lock_assert_owned(so);
2905
2906 if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
2907 int32_t pending_snd = cfil_sock_data_pending(&so->so_snd);
2908 int32_t pending_rcv = cfil_sock_data_pending(&so->so_rcv);
2909 if (pending_snd || pending_rcv) {
2910 SO_DELAYED_DEAD_SET(so, true)
2911 return false;
2912 }
2913 }
2914
2915 inp = sotoinpcb(so);
2916 if (inp != NULL) {
2917 inp->inp_state = INPCB_STATE_DEAD;
2918 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
2919 SO_DELAYED_DEAD_SET(so, false)
2920 return true;
2921 }
2922 return false;
2923 }
2924
2925 /*
2926 * Entry point from tcp_timer.c
2927 * The socket is locked.
2928 *
2929 * Perform TCP FIN time wait handling if all CFIL data has been processed by filter(s).
2930 * Otherwise, delay until all data has been processed.
2931 */
2932 boolean_t
cfil_sock_tcp_add_time_wait(struct socket * so)2933 cfil_sock_tcp_add_time_wait(struct socket *so)
2934 {
2935 struct inpcb *inp = NULL;
2936 struct tcpcb *tp = NULL;
2937
2938 // Only handle TCP sockets
2939 if (so == NULL || !IS_TCP(so)) {
2940 return false;
2941 }
2942
2943 socket_lock_assert_owned(so);
2944
2945 if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
2946 int32_t pending_snd = cfil_sock_data_pending(&so->so_snd);
2947 int32_t pending_rcv = cfil_sock_data_pending(&so->so_rcv);
2948 if (pending_snd || pending_rcv) {
2949 SO_DELAYED_TCP_TIME_WAIT_SET(so, true)
2950 return false;
2951 }
2952 }
2953
2954 inp = sotoinpcb(so);
2955 tp = inp ? intotcpcb(inp) : NULL;
2956 if (tp != NULL) {
2957 add_to_time_wait_now(tp, 2 * tcp_msl);
2958 SO_DELAYED_TCP_TIME_WAIT_SET(so, false)
2959 return true;
2960 }
2961 return false;
2962 }
2963
2964 boolean_t
cfil_filter_present(void)2965 cfil_filter_present(void)
2966 {
2967 return cfil_active_count > 0;
2968 }
2969
2970 /*
2971 * Entry point from Sockets layer
2972 * The socket is locked.
2973 */
2974 errno_t
cfil_sock_attach(struct socket * so,struct sockaddr * local,struct sockaddr * remote,int dir)2975 cfil_sock_attach(struct socket *so, struct sockaddr *local, struct sockaddr *remote, int dir)
2976 {
2977 errno_t error = 0;
2978 uint32_t filter_control_unit;
2979 int debug = 0;
2980
2981 socket_lock_assert_owned(so);
2982
2983 if (so->so_flags1 & SOF1_FLOW_DIVERT_SKIP) {
2984 /*
2985 * This socket has already been evaluated (and ultimately skipped) by
2986 * flow divert, so it has also already been through content filter if there
2987 * is one.
2988 */
2989 goto done;
2990 }
2991
2992 /* Limit ourselves to TCP that are not MPTCP subflows */
2993 if (SKIP_FILTER_FOR_TCP_SOCKET(so)) {
2994 goto done;
2995 }
2996
2997 debug = DEBUG_FLOW(sotoinpcb(so), so, local, remote);
2998 if (debug) {
2999 CFIL_LOG(LOG_ERR, "CFIL: TCP (dir %d) - debug flow with port %d", dir, cfil_log_port);
3000 }
3001
3002 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
3003 if (filter_control_unit == 0) {
3004 goto done;
3005 }
3006
3007 if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
3008 goto done;
3009 }
3010 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
3011 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
3012 goto done;
3013 }
3014 if (cfil_active_count == 0) {
3015 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
3016 goto done;
3017 }
3018 if (so->so_cfil != NULL) {
3019 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
3020 CFIL_LOG(LOG_ERR, "already attached");
3021 goto done;
3022 } else {
3023 cfil_info_alloc(so, NULL);
3024 if (so->so_cfil == NULL) {
3025 error = ENOMEM;
3026 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
3027 goto done;
3028 }
3029 so->so_cfil->cfi_dir = dir;
3030 so->so_cfil->cfi_filter_control_unit = filter_control_unit;
3031 so->so_cfil->cfi_filter_policy_gencount = necp_socket_get_policy_gencount(so);
3032 so->so_cfil->cfi_debug = debug;
3033 }
3034 if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
3035 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
3036 filter_control_unit);
3037 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
3038 goto done;
3039 }
3040 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llu <%llx>",
3041 (uint64_t)VM_KERNEL_ADDRPERM(so),
3042 filter_control_unit, so->so_cfil->cfi_sock_id, so->so_cfil->cfi_sock_id);
3043
3044 so->so_flags |= SOF_CONTENT_FILTER;
3045 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
3046
3047 /* Hold a reference on the socket */
3048 so->so_usecount++;
3049
3050 /*
3051 * Save passed addresses for attach event msg (in case resend
3052 * is needed.
3053 */
3054 if (remote != NULL && (remote->sa_len <= sizeof(union sockaddr_in_4_6))) {
3055 SOCKADDR_COPY(remote, SA(&so->so_cfil->cfi_so_attach_faddr), remote->sa_len);
3056 }
3057 if (local != NULL && (local->sa_len <= sizeof(union sockaddr_in_4_6))) {
3058 SOCKADDR_COPY(local, SA(&so->so_cfil->cfi_so_attach_laddr), local->sa_len);
3059 }
3060
3061 if (so->so_cfil->cfi_debug) {
3062 cfil_info_log(LOG_ERR, so->so_cfil, "CFIL: ADDED");
3063 }
3064
3065 error = cfil_dispatch_attach_event(so, so->so_cfil, 0, dir);
3066 /* We can recover from flow control or out of memory errors */
3067 if (error == ENOBUFS || error == ENOMEM) {
3068 error = 0;
3069 } else if (error != 0) {
3070 goto done;
3071 }
3072
3073 CFIL_INFO_VERIFY(so->so_cfil);
3074 done:
3075 return error;
3076 }
3077
3078 /*
3079 * Entry point from Sockets layer
3080 * The socket is locked.
3081 */
3082 errno_t
cfil_sock_detach(struct socket * so)3083 cfil_sock_detach(struct socket *so)
3084 {
3085 if (NEED_DGRAM_FLOW_TRACKING(so)) {
3086 return 0;
3087 }
3088
3089 if (so->so_cfil) {
3090 if (so->so_flags & SOF_CONTENT_FILTER) {
3091 so->so_flags &= ~SOF_CONTENT_FILTER;
3092 VERIFY(so->so_usecount > 0);
3093 so->so_usecount--;
3094 }
3095 CFIL_INFO_FREE(so->so_cfil);
3096 so->so_cfil = NULL;
3097 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
3098 }
3099 return 0;
3100 }
3101
3102 /*
3103 * Fill in the address info of an event message from either
3104 * the socket or passed in address info.
3105 */
3106 static void
cfil_fill_event_msg_addresses(struct soflow_hash_entry * entry,struct inpcb * inp,union sockaddr_in_4_6 * sin_src,union sockaddr_in_4_6 * sin_dst,boolean_t isIPv4,boolean_t outgoing)3107 cfil_fill_event_msg_addresses(struct soflow_hash_entry *entry, struct inpcb *inp,
3108 union sockaddr_in_4_6 *sin_src, union sockaddr_in_4_6 *sin_dst,
3109 boolean_t isIPv4, boolean_t outgoing)
3110 {
3111 if (isIPv4) {
3112 struct in_addr laddr = {0}, faddr = {0};
3113 u_int16_t lport = 0, fport = 0;
3114
3115 cfil_get_flow_address(entry, inp, &laddr, &faddr, &lport, &fport);
3116
3117 if (outgoing) {
3118 fill_ip_sockaddr_4_6(sin_src, laddr, lport);
3119 fill_ip_sockaddr_4_6(sin_dst, faddr, fport);
3120 } else {
3121 fill_ip_sockaddr_4_6(sin_src, faddr, fport);
3122 fill_ip_sockaddr_4_6(sin_dst, laddr, lport);
3123 }
3124 } else {
3125 struct in6_addr * __single laddr = NULL, * __single faddr = NULL;
3126 u_int16_t lport = 0, fport = 0;
3127 const u_int32_t lifscope = inp ? inp->inp_lifscope : IFSCOPE_UNKNOWN;
3128 const u_int32_t fifscope = inp ? inp->inp_fifscope : IFSCOPE_UNKNOWN;
3129
3130 cfil_get_flow_address_v6(entry, inp, &laddr, &faddr, &lport, &fport);
3131 if (outgoing) {
3132 fill_ip6_sockaddr_4_6(sin_src, laddr, lport, lifscope);
3133 fill_ip6_sockaddr_4_6(sin_dst, faddr, fport, fifscope);
3134 } else {
3135 fill_ip6_sockaddr_4_6(sin_src, faddr, fport, fifscope);
3136 fill_ip6_sockaddr_4_6(sin_dst, laddr, lport, lifscope);
3137 }
3138 }
3139 }
3140
3141 static boolean_t
cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,struct cfil_info * cfil_info,struct cfil_msg_sock_attached * msg)3142 cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,
3143 struct cfil_info *cfil_info,
3144 struct cfil_msg_sock_attached *msg)
3145 {
3146 struct cfil_crypto_data data = {};
3147 struct iovec extra_data[1] = { { NULL, 0 } };
3148
3149 if (crypto_state == NULL || msg == NULL || cfil_info == NULL) {
3150 return false;
3151 }
3152
3153 data.sock_id = msg->cfs_msghdr.cfm_sock_id;
3154 data.direction = msg->cfs_conn_dir;
3155
3156 data.pid = msg->cfs_pid;
3157 data.effective_pid = msg->cfs_e_pid;
3158 data.responsible_pid = msg->cfs_r_pid;
3159 uuid_copy(data.uuid, msg->cfs_uuid);
3160 uuid_copy(data.effective_uuid, msg->cfs_e_uuid);
3161 uuid_copy(data.responsible_uuid, msg->cfs_r_uuid);
3162 data.socketProtocol = msg->cfs_sock_protocol;
3163 if (data.direction == CFS_CONNECTION_DIR_OUT) {
3164 data.remote.sin6 = msg->cfs_dst.sin6;
3165 data.local.sin6 = msg->cfs_src.sin6;
3166 } else {
3167 data.remote.sin6 = msg->cfs_src.sin6;
3168 data.local.sin6 = msg->cfs_dst.sin6;
3169 }
3170
3171 size_t len = strbuflen(msg->cfs_remote_domain_name, sizeof(msg->cfs_remote_domain_name));
3172 if (len > 0) {
3173 extra_data[0].iov_base = msg->cfs_remote_domain_name;
3174 extra_data[0].iov_len = len;
3175 }
3176
3177 // At attach, if local address is already present, no need to re-sign subsequent data messages.
3178 if (!NULLADDRESS(data.local)) {
3179 cfil_info->cfi_isSignatureLatest = true;
3180 }
3181
3182 msg->cfs_signature_length = sizeof(cfil_crypto_signature);
3183 if (cfil_crypto_sign_data(crypto_state, &data, extra_data, sizeof(extra_data) / sizeof(extra_data[0]), msg->cfs_signature, &msg->cfs_signature_length) != 0) {
3184 msg->cfs_signature_length = 0;
3185 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign attached msg <sockID %llu <%llx>>",
3186 msg->cfs_msghdr.cfm_sock_id, msg->cfs_msghdr.cfm_sock_id);
3187 return false;
3188 }
3189
3190 return true;
3191 }
3192
3193 struct cfil_sign_parameters {
3194 cfil_crypto_state_t csp_state;
3195 struct cfil_crypto_data *csp_data;
3196 uint8_t * __indexable csp_signature;
3197 uint32_t *csp_signature_size;
3198 };
3199
3200 static void
cfil_sign_with_domain_name(char * domain_name __null_terminated,void * ctx)3201 cfil_sign_with_domain_name(char *domain_name __null_terminated, void *ctx)
3202 {
3203 struct cfil_sign_parameters *parameters = (struct cfil_sign_parameters *)ctx;
3204 struct iovec extra_data[1] = { { NULL, 0 } };
3205
3206 if (parameters == NULL) {
3207 return;
3208 }
3209
3210 if (domain_name != NULL) {
3211 extra_data[0].iov_base = __unsafe_null_terminated_to_indexable(domain_name);
3212 extra_data[0].iov_len = strlen(domain_name);
3213 }
3214
3215 *(parameters->csp_signature_size) = sizeof(cfil_crypto_signature);
3216 if (cfil_crypto_sign_data(parameters->csp_state, parameters->csp_data,
3217 extra_data, sizeof(extra_data) / sizeof(extra_data[0]),
3218 parameters->csp_signature, parameters->csp_signature_size) != 0) {
3219 *(parameters->csp_signature_size) = 0;
3220 }
3221 }
3222
3223 static boolean_t
cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,struct socket * so,struct cfil_info * cfil_info,struct cfil_msg_data_event * msg)3224 cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,
3225 struct socket *so, struct cfil_info *cfil_info,
3226 struct cfil_msg_data_event *msg)
3227 {
3228 struct cfil_crypto_data data = {};
3229
3230 if (crypto_state == NULL || msg == NULL ||
3231 so == NULL || cfil_info == NULL) {
3232 return false;
3233 }
3234
3235 data.sock_id = cfil_info->cfi_sock_id;
3236 data.direction = cfil_info->cfi_dir;
3237 data.pid = so->last_pid;
3238 memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3239 if (so->so_flags & SOF_DELEGATED) {
3240 data.effective_pid = so->e_pid;
3241 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3242 } else {
3243 data.effective_pid = so->last_pid;
3244 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3245 }
3246 #if defined(XNU_TARGET_OS_OSX)
3247 if (!uuid_is_null(so->so_ruuid)) {
3248 data.responsible_pid = so->so_rpid;
3249 memcpy(data.responsible_uuid, so->so_ruuid, sizeof(uuid_t));
3250 }
3251 #endif
3252 data.socketProtocol = GET_SO_PROTO(so);
3253
3254 if (data.direction == CFS_CONNECTION_DIR_OUT) {
3255 data.remote.sin6 = msg->cfc_dst.sin6;
3256 data.local.sin6 = msg->cfc_src.sin6;
3257 } else {
3258 data.remote.sin6 = msg->cfc_src.sin6;
3259 data.local.sin6 = msg->cfc_dst.sin6;
3260 }
3261
3262 // At first data, local address may show up for the first time, update address cache and
3263 // no need to re-sign subsequent data messages anymore.
3264 if (!NULLADDRESS(data.local)) {
3265 memcpy(&cfil_info->cfi_so_attach_laddr, &data.local, data.local.sa.sa_len);
3266 cfil_info->cfi_isSignatureLatest = true;
3267 }
3268
3269 struct cfil_sign_parameters parameters = {
3270 .csp_state = crypto_state,
3271 .csp_data = &data,
3272 .csp_signature = msg->cfd_signature,
3273 .csp_signature_size = &msg->cfd_signature_length,
3274 };
3275 necp_with_inp_domain_name(so, ¶meters, cfil_sign_with_domain_name);
3276
3277 if (msg->cfd_signature_length == 0) {
3278 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign data msg <sockID %llu <%llx>>",
3279 msg->cfd_msghdr.cfm_sock_id, msg->cfd_msghdr.cfm_sock_id);
3280 return false;
3281 }
3282
3283 return true;
3284 }
3285
3286 static boolean_t
cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,struct socket * so,struct cfil_info * cfil_info,struct cfil_msg_sock_closed * msg)3287 cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,
3288 struct socket *so, struct cfil_info *cfil_info,
3289 struct cfil_msg_sock_closed *msg)
3290 {
3291 struct cfil_crypto_data data = {};
3292 struct soflow_hash_entry hash_entry = {};
3293 struct soflow_hash_entry *hash_entry_ptr = NULL;
3294 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3295
3296 if (crypto_state == NULL || msg == NULL ||
3297 so == NULL || inp == NULL || cfil_info == NULL) {
3298 return false;
3299 }
3300
3301 data.sock_id = cfil_info->cfi_sock_id;
3302 data.direction = cfil_info->cfi_dir;
3303
3304 data.pid = so->last_pid;
3305 memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3306 if (so->so_flags & SOF_DELEGATED) {
3307 data.effective_pid = so->e_pid;
3308 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3309 } else {
3310 data.effective_pid = so->last_pid;
3311 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3312 }
3313 #if defined(XNU_TARGET_OS_OSX)
3314 if (!uuid_is_null(so->so_ruuid)) {
3315 data.responsible_pid = so->so_rpid;
3316 memcpy(data.responsible_uuid, so->so_ruuid, sizeof(uuid_t));
3317 }
3318 #endif
3319 data.socketProtocol = GET_SO_PROTO(so);
3320
3321 /*
3322 * Fill in address info:
3323 * For UDP, use the cfil_info hash entry directly.
3324 * For TCP, compose an hash entry with the saved addresses.
3325 */
3326 if (cfil_info->cfi_hash_entry != NULL) {
3327 hash_entry_ptr = cfil_info->cfi_hash_entry;
3328 } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3329 cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3330 soflow_fill_hash_entry_from_address(&hash_entry, TRUE, SA(&cfil_info->cfi_so_attach_laddr), FALSE);
3331 soflow_fill_hash_entry_from_address(&hash_entry, FALSE, SA(&cfil_info->cfi_so_attach_faddr), FALSE);
3332 hash_entry_ptr = &hash_entry;
3333 }
3334 if (hash_entry_ptr != NULL) {
3335 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
3336 union sockaddr_in_4_6 *src = outgoing ? &data.local : &data.remote;
3337 union sockaddr_in_4_6 *dst = outgoing ? &data.remote : &data.local;
3338 cfil_fill_event_msg_addresses(hash_entry_ptr, inp, src, dst, !IS_INP_V6(inp), outgoing);
3339 }
3340
3341 data.byte_count_in = cfil_info->cfi_byte_inbound_count;
3342 data.byte_count_out = cfil_info->cfi_byte_outbound_count;
3343
3344 struct cfil_sign_parameters parameters = {
3345 .csp_state = crypto_state,
3346 .csp_data = &data,
3347 .csp_signature = msg->cfc_signature,
3348 .csp_signature_size = &msg->cfc_signature_length
3349 };
3350 necp_with_inp_domain_name(so, ¶meters, cfil_sign_with_domain_name);
3351
3352 if (msg->cfc_signature_length == 0) {
3353 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign closed msg <sockID %llu <%llx>>",
3354 msg->cfc_msghdr.cfm_sock_id, msg->cfc_msghdr.cfm_sock_id);
3355 return false;
3356 }
3357
3358 return true;
3359 }
3360
3361 static void
cfil_populate_attached_msg_domain_name(char * domain_name __null_terminated,void * ctx)3362 cfil_populate_attached_msg_domain_name(char *domain_name __null_terminated, void *ctx)
3363 {
3364 struct cfil_msg_sock_attached *msg_attached = (struct cfil_msg_sock_attached *)ctx;
3365
3366 if (msg_attached == NULL) {
3367 return;
3368 }
3369
3370 if (domain_name != NULL) {
3371 strlcpy(msg_attached->cfs_remote_domain_name, domain_name, sizeof(msg_attached->cfs_remote_domain_name));
3372 }
3373 }
3374
3375 static bool
cfil_copy_audit_token(pid_t pid,audit_token_t * buffer)3376 cfil_copy_audit_token(pid_t pid, audit_token_t *buffer)
3377 {
3378 bool success = false;
3379 proc_t p = proc_find(pid);
3380 if (p != PROC_NULL) {
3381 task_t __single t = proc_task(p);
3382 if (t != TASK_NULL) {
3383 audit_token_t audit_token = {};
3384 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
3385 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count) == KERN_SUCCESS) {
3386 memcpy(buffer, &audit_token, sizeof(audit_token_t));
3387 success = true;
3388 }
3389 }
3390 proc_rele(p);
3391 }
3392 return success;
3393 }
3394
3395 static int
cfil_dispatch_attach_event(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int conn_dir)3396 cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info,
3397 uint32_t kcunit, int conn_dir)
3398 {
3399 errno_t error = 0;
3400 struct cfil_entry *entry = NULL;
3401 struct cfil_msg_sock_attached * __single msg_attached;
3402 struct content_filter *cfc = NULL;
3403 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3404 struct soflow_hash_entry *hash_entry_ptr = NULL;
3405 struct soflow_hash_entry hash_entry;
3406
3407 memset(&hash_entry, 0, sizeof(struct soflow_hash_entry));
3408
3409 socket_lock_assert_owned(so);
3410
3411 cfil_rw_lock_shared(&cfil_lck_rw);
3412
3413 if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
3414 error = EINVAL;
3415 goto done;
3416 }
3417
3418 if (kcunit == 0) {
3419 entry = SLIST_FIRST(&cfil_info->cfi_ordered_entries);
3420 } else {
3421 entry = &cfil_info->cfi_entries[kcunit - 1];
3422 }
3423
3424 if (entry == NULL) {
3425 goto done;
3426 }
3427
3428 cfc = entry->cfe_filter;
3429 if (cfc == NULL) {
3430 goto done;
3431 }
3432
3433 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) {
3434 goto done;
3435 }
3436
3437 if (kcunit == 0) {
3438 kcunit = CFI_ENTRY_KCUNIT(cfil_info, entry);
3439 }
3440
3441 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
3442 (uint64_t)VM_KERNEL_ADDRPERM(so), entry->cfe_necp_control_unit, kcunit);
3443
3444 /* Would be wasteful to try when flow controlled */
3445 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3446 error = ENOBUFS;
3447 goto done;
3448 }
3449
3450 msg_attached = kalloc_data(sizeof(struct cfil_msg_sock_attached), Z_WAITOK);
3451 if (msg_attached == NULL) {
3452 error = ENOMEM;
3453 goto done;
3454 }
3455
3456 bzero(msg_attached, sizeof(struct cfil_msg_sock_attached));
3457 msg_attached->cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
3458 msg_attached->cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
3459 msg_attached->cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
3460 msg_attached->cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
3461 msg_attached->cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3462
3463 msg_attached->cfs_sock_family = SOCK_DOM(so);
3464 msg_attached->cfs_sock_type = SOCK_TYPE(so);
3465 msg_attached->cfs_sock_protocol = GET_SO_PROTO(so);
3466 msg_attached->cfs_pid = so->last_pid;
3467 memcpy(msg_attached->cfs_uuid, so->last_uuid, sizeof(uuid_t));
3468 if (so->so_flags & SOF_DELEGATED) {
3469 msg_attached->cfs_e_pid = so->e_pid;
3470 memcpy(msg_attached->cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
3471 } else {
3472 msg_attached->cfs_e_pid = so->last_pid;
3473 memcpy(msg_attached->cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
3474 }
3475 #if defined(XNU_TARGET_OS_OSX)
3476 if (!uuid_is_null(so->so_ruuid)) {
3477 msg_attached->cfs_r_pid = so->so_rpid;
3478 memcpy(msg_attached->cfs_r_uuid, so->so_ruuid, sizeof(uuid_t));
3479 }
3480 #endif
3481 /*
3482 * Fill in address info:
3483 * For UDP, use the cfil_info hash entry directly.
3484 * For TCP, compose an hash entry with the saved addresses.
3485 */
3486 if (cfil_info->cfi_hash_entry != NULL) {
3487 hash_entry_ptr = cfil_info->cfi_hash_entry;
3488 } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3489 cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3490 soflow_fill_hash_entry_from_address(&hash_entry, TRUE, SA(&cfil_info->cfi_so_attach_laddr), FALSE);
3491 soflow_fill_hash_entry_from_address(&hash_entry, FALSE, SA(&cfil_info->cfi_so_attach_faddr), FALSE);
3492 hash_entry_ptr = &hash_entry;
3493 }
3494 if (hash_entry_ptr != NULL) {
3495 cfil_fill_event_msg_addresses(hash_entry_ptr, inp,
3496 &msg_attached->cfs_src, &msg_attached->cfs_dst,
3497 !IS_INP_V6(inp), conn_dir == CFS_CONNECTION_DIR_OUT);
3498 }
3499 msg_attached->cfs_conn_dir = conn_dir;
3500
3501 if (msg_attached->cfs_e_pid != 0) {
3502 if (!cfil_copy_audit_token(msg_attached->cfs_e_pid, (audit_token_t *)&msg_attached->cfs_audit_token)) {
3503 CFIL_LOG(LOG_ERR, "CFIL: Failed to get effective audit token for <sockID %llu <%llx>> ",
3504 entry->cfe_cfil_info->cfi_sock_id, entry->cfe_cfil_info->cfi_sock_id);
3505 }
3506 }
3507
3508 if (msg_attached->cfs_pid != 0) {
3509 if (msg_attached->cfs_pid == msg_attached->cfs_e_pid) {
3510 memcpy(&msg_attached->cfs_real_audit_token, &msg_attached->cfs_audit_token, sizeof(msg_attached->cfs_real_audit_token));
3511 } else if (!cfil_copy_audit_token(msg_attached->cfs_pid, (audit_token_t *)&msg_attached->cfs_real_audit_token)) {
3512 CFIL_LOG(LOG_ERR, "CFIL: Failed to get real audit token for <sockID %llu <%llx>> ",
3513 entry->cfe_cfil_info->cfi_sock_id, entry->cfe_cfil_info->cfi_sock_id);
3514 }
3515 }
3516
3517 necp_with_inp_domain_name(so, msg_attached, cfil_populate_attached_msg_domain_name);
3518
3519 if (cfil_info->cfi_debug) {
3520 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING ATTACH UP");
3521 }
3522
3523 cfil_dispatch_attach_event_sign(entry->cfe_filter->cf_crypto_state, cfil_info, msg_attached);
3524
3525 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3526 entry->cfe_filter->cf_kcunit,
3527 msg_attached,
3528 sizeof(struct cfil_msg_sock_attached),
3529 CTL_DATA_EOR);
3530
3531 kfree_data(msg_attached, sizeof(struct cfil_msg_sock_attached));
3532
3533 if (error != 0) {
3534 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
3535 goto done;
3536 }
3537 microuptime(&entry->cfe_last_event);
3538 cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
3539 cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
3540
3541 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
3542 OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
3543 done:
3544
3545 /* We can recover from flow control */
3546 if (error == ENOBUFS) {
3547 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3548 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
3549
3550 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3551 cfil_rw_lock_exclusive(&cfil_lck_rw);
3552 }
3553
3554 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3555
3556 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3557 } else {
3558 if (error != 0) {
3559 OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
3560 }
3561
3562 cfil_rw_unlock_shared(&cfil_lck_rw);
3563 }
3564 return error;
3565 }
3566
3567 static int
cfil_dispatch_disconnect_event(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int outgoing)3568 cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3569 {
3570 errno_t error = 0;
3571 struct mbuf *msg = NULL;
3572 struct cfil_entry *entry;
3573 struct cfe_buf *entrybuf;
3574 struct cfil_msg_hdr msg_disconnected;
3575 struct content_filter *cfc;
3576
3577 socket_lock_assert_owned(so);
3578
3579 cfil_rw_lock_shared(&cfil_lck_rw);
3580
3581 entry = &cfil_info->cfi_entries[kcunit - 1];
3582 if (outgoing) {
3583 entrybuf = &entry->cfe_snd;
3584 } else {
3585 entrybuf = &entry->cfe_rcv;
3586 }
3587
3588 cfc = entry->cfe_filter;
3589 if (cfc == NULL) {
3590 goto done;
3591 }
3592
3593 // Mark if this flow qualifies for immediate close.
3594 SET_NO_CLOSE_WAIT(sotoinpcb(so), cfil_info);
3595
3596 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3597 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3598
3599 /*
3600 * Send the disconnection event once
3601 */
3602 if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
3603 (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
3604 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
3605 (uint64_t)VM_KERNEL_ADDRPERM(so));
3606 goto done;
3607 }
3608
3609 /*
3610 * We're not disconnected as long as some data is waiting
3611 * to be delivered to the filter
3612 */
3613 if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
3614 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
3615 (uint64_t)VM_KERNEL_ADDRPERM(so));
3616 error = EBUSY;
3617 goto done;
3618 }
3619 /* Would be wasteful to try when flow controlled */
3620 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3621 error = ENOBUFS;
3622 goto done;
3623 }
3624
3625 if (cfil_info->cfi_debug) {
3626 const char * __null_terminated out = "CFIL: OUT - SENDING DISCONNECT UP";
3627 const char * __null_terminated in = "CFIL: IN - SENDING DISCONNECT UP";
3628 cfil_info_log(LOG_ERR, cfil_info, outgoing ? out : in);
3629 }
3630
3631 bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
3632 msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
3633 msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
3634 msg_disconnected.cfm_type = CFM_TYPE_EVENT;
3635 msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
3636 CFM_OP_DISCONNECT_IN;
3637 msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3638 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3639 entry->cfe_filter->cf_kcunit,
3640 &msg_disconnected,
3641 sizeof(struct cfil_msg_hdr),
3642 CTL_DATA_EOR);
3643 if (error != 0) {
3644 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3645 mbuf_freem(msg);
3646 goto done;
3647 }
3648 microuptime(&entry->cfe_last_event);
3649 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
3650
3651 /* Remember we have sent the disconnection message */
3652 if (outgoing) {
3653 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
3654 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
3655 } else {
3656 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
3657 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
3658 }
3659 done:
3660 if (error == ENOBUFS) {
3661 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3662 OSIncrementAtomic(
3663 &cfil_stats.cfs_disconnect_event_flow_control);
3664
3665 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3666 cfil_rw_lock_exclusive(&cfil_lck_rw);
3667 }
3668
3669 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3670
3671 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3672 } else {
3673 if (error != 0) {
3674 OSIncrementAtomic(
3675 &cfil_stats.cfs_disconnect_event_fail);
3676 }
3677
3678 cfil_rw_unlock_shared(&cfil_lck_rw);
3679 }
3680 return error;
3681 }
3682
3683 int
cfil_dispatch_closed_event(struct socket * so,struct cfil_info * cfil_info,int kcunit)3684 cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
3685 {
3686 struct cfil_entry *entry;
3687 struct cfil_msg_sock_closed msg_closed;
3688 errno_t error = 0;
3689 struct content_filter *cfc;
3690 struct inpcb *inp = NULL;
3691
3692 socket_lock_assert_owned(so);
3693
3694 cfil_rw_lock_shared(&cfil_lck_rw);
3695
3696 entry = &cfil_info->cfi_entries[kcunit - 1];
3697 cfc = entry->cfe_filter;
3698 if (cfc == NULL) {
3699 goto done;
3700 }
3701
3702 CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
3703 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3704
3705 /* Would be wasteful to try when flow controlled */
3706 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3707 error = ENOBUFS;
3708 goto done;
3709 }
3710 /*
3711 * Send a single closed message per filter
3712 */
3713 if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) {
3714 goto done;
3715 }
3716 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3717 goto done;
3718 }
3719
3720 microuptime(&entry->cfe_last_event);
3721 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
3722
3723 bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
3724 msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
3725 msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
3726 msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
3727 msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
3728 msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3729 msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
3730 msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
3731 memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY);
3732 memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY);
3733 msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
3734 msg_closed.cfc_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
3735 msg_closed.cfc_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
3736
3737 if (entry->cfe_laddr_sent == false) {
3738 /* cache it if necessary */
3739 if (cfil_info->cfi_so_attach_laddr.sa.sa_len == 0) {
3740 inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
3741 if (inp != NULL) {
3742 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
3743 union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
3744 union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
3745 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
3746 src, dst, !IS_INP_V6(inp), outgoing);
3747 }
3748 }
3749
3750 if (cfil_info->cfi_so_attach_laddr.sa.sa_len != 0) {
3751 msg_closed.cfc_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
3752 entry->cfe_laddr_sent = true;
3753 }
3754 }
3755
3756 cfil_dispatch_closed_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, &msg_closed);
3757
3758 if (cfil_info->cfi_debug) {
3759 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING CLOSED UP");
3760 }
3761
3762 /* for debugging
3763 * if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3764 * msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
3765 * }
3766 * for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3767 * CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3768 * }
3769 */
3770
3771 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3772 entry->cfe_filter->cf_kcunit,
3773 &msg_closed,
3774 sizeof(struct cfil_msg_sock_closed),
3775 CTL_DATA_EOR);
3776 if (error != 0) {
3777 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
3778 error);
3779 goto done;
3780 }
3781
3782 entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
3783 OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
3784 done:
3785 /* We can recover from flow control */
3786 if (error == ENOBUFS) {
3787 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3788 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
3789
3790 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3791 cfil_rw_lock_exclusive(&cfil_lck_rw);
3792 }
3793
3794 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3795
3796 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3797 } else {
3798 if (error != 0) {
3799 OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
3800 }
3801
3802 cfil_rw_unlock_shared(&cfil_lck_rw);
3803 }
3804
3805 return error;
3806 }
3807
3808 static void
fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 * sin46,struct in6_addr * ip6,u_int16_t port,uint32_t ifscope)3809 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3810 struct in6_addr *ip6, u_int16_t port, uint32_t ifscope)
3811 {
3812 if (sin46 == NULL) {
3813 return;
3814 }
3815
3816 struct sockaddr_in6 *sin6 = &sin46->sin6;
3817
3818 sin6->sin6_family = AF_INET6;
3819 sin6->sin6_len = sizeof(*sin6);
3820 sin6->sin6_port = port;
3821 sin6->sin6_addr = *ip6;
3822 if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
3823 sin6->sin6_scope_id = ifscope;
3824 if (in6_embedded_scope) {
3825 in6_verify_ifscope(&sin6->sin6_addr, sin6->sin6_scope_id);
3826 if (sin6->sin6_addr.s6_addr16[1] != 0) {
3827 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
3828 sin6->sin6_addr.s6_addr16[1] = 0;
3829 }
3830 }
3831 }
3832 }
3833
3834 static void
fill_ip_sockaddr_4_6(union sockaddr_in_4_6 * sin46,struct in_addr ip,u_int16_t port)3835 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3836 struct in_addr ip, u_int16_t port)
3837 {
3838 if (sin46 == NULL) {
3839 return;
3840 }
3841
3842 struct sockaddr_in *sin = &sin46->sin;
3843
3844 sin->sin_family = AF_INET;
3845 sin->sin_len = sizeof(*sin);
3846 sin->sin_port = port;
3847 sin->sin_addr.s_addr = ip.s_addr;
3848 }
3849
3850 static void
cfil_get_flow_address_v6(struct soflow_hash_entry * entry,struct inpcb * inp,struct in6_addr ** laddr,struct in6_addr ** faddr,u_int16_t * lport,u_int16_t * fport)3851 cfil_get_flow_address_v6(struct soflow_hash_entry *entry, struct inpcb *inp,
3852 struct in6_addr **laddr, struct in6_addr **faddr,
3853 u_int16_t *lport, u_int16_t *fport)
3854 {
3855 if (entry != NULL) {
3856 *laddr = &entry->soflow_laddr.addr6;
3857 *faddr = &entry->soflow_faddr.addr6;
3858 *lport = entry->soflow_lport;
3859 *fport = entry->soflow_fport;
3860 } else {
3861 *laddr = &inp->in6p_laddr;
3862 *faddr = &inp->in6p_faddr;
3863 *lport = inp->inp_lport;
3864 *fport = inp->inp_fport;
3865 }
3866 }
3867
3868 static void
cfil_get_flow_address(struct soflow_hash_entry * entry,struct inpcb * inp,struct in_addr * laddr,struct in_addr * faddr,u_int16_t * lport,u_int16_t * fport)3869 cfil_get_flow_address(struct soflow_hash_entry *entry, struct inpcb *inp,
3870 struct in_addr *laddr, struct in_addr *faddr,
3871 u_int16_t *lport, u_int16_t *fport)
3872 {
3873 if (entry != NULL) {
3874 *laddr = entry->soflow_laddr.addr46.ia46_addr4;
3875 *faddr = entry->soflow_faddr.addr46.ia46_addr4;
3876 *lport = entry->soflow_lport;
3877 *fport = entry->soflow_fport;
3878 } else {
3879 *laddr = inp->inp_laddr;
3880 *faddr = inp->inp_faddr;
3881 *lport = inp->inp_lport;
3882 *fport = inp->inp_fport;
3883 }
3884 }
3885
3886 static int
cfil_dispatch_data_event(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int outgoing,struct mbuf * data,unsigned int copyoffset,unsigned int copylen)3887 cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3888 struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
3889 {
3890 errno_t error = 0;
3891 struct mbuf *copy = NULL;
3892 struct mbuf * __single msg = NULL;
3893 unsigned int one = 1;
3894 struct cfil_msg_data_event *data_req;
3895 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3896 struct cfil_entry *entry;
3897 struct cfe_buf *entrybuf;
3898 struct content_filter *cfc;
3899 struct timeval tv;
3900 int inp_flags = 0;
3901
3902 cfil_rw_lock_shared(&cfil_lck_rw);
3903
3904 entry = &cfil_info->cfi_entries[kcunit - 1];
3905 if (outgoing) {
3906 entrybuf = &entry->cfe_snd;
3907 } else {
3908 entrybuf = &entry->cfe_rcv;
3909 }
3910
3911 cfc = entry->cfe_filter;
3912 if (cfc == NULL) {
3913 goto done;
3914 }
3915
3916 data = cfil_data_start(data);
3917 if (data == NULL) {
3918 CFIL_LOG(LOG_ERR, "No data start");
3919 goto done;
3920 }
3921
3922 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3923 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3924
3925 socket_lock_assert_owned(so);
3926
3927 /* Would be wasteful to try */
3928 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3929 error = ENOBUFS;
3930 goto done;
3931 }
3932
3933 /* Make a copy of the data to pass to kernel control socket */
3934 copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT, NULL, NULL,
3935 M_COPYM_NOOP_HDR);
3936 if (copy == NULL) {
3937 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
3938 error = ENOMEM;
3939 goto done;
3940 }
3941
3942 /* We need an mbuf packet for the message header */
3943 const size_t hdrsize = sizeof(struct cfil_msg_data_event);
3944 error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
3945 if (error != 0) {
3946 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
3947 m_freem(copy);
3948 /*
3949 * ENOBUFS is to indicate flow control
3950 */
3951 error = ENOMEM;
3952 goto done;
3953 }
3954 mbuf_setlen(msg, hdrsize);
3955 mbuf_pkthdr_setlen(msg, hdrsize + copylen);
3956 msg->m_next = copy;
3957 data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
3958 bzero(data_req, hdrsize);
3959 data_req->cfd_msghdr.cfm_len = (uint32_t)hdrsize + copylen;
3960 data_req->cfd_msghdr.cfm_version = 1;
3961 data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
3962 data_req->cfd_msghdr.cfm_op =
3963 outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
3964 data_req->cfd_msghdr.cfm_sock_id =
3965 entry->cfe_cfil_info->cfi_sock_id;
3966 data_req->cfd_start_offset = entrybuf->cfe_peeked;
3967 data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
3968 // The last_pid or e_pid is set here because a socket could have been
3969 // accepted by launchd and a new process spawned (with a new pid).
3970 // So the last pid associated with the socket is appended to the data event.
3971 // for a provider that is peeking bytes.
3972 if (so->so_flags & SOF_DELEGATED) {
3973 data_req->cfd_delegated_pid = so->e_pid;
3974 } else {
3975 data_req->cfd_delegated_pid = so->last_pid;
3976 }
3977 if (data_req->cfd_delegated_pid != 0) {
3978 if (!cfil_copy_audit_token(data_req->cfd_delegated_pid, (audit_token_t *)&data_req->cfd_delegated_audit_token)) {
3979 CFIL_LOG(LOG_ERR, "CFIL: Failed to get audit token for <sockID %llu <%llx>> ",
3980 entry->cfe_cfil_info->cfi_sock_id, entry->cfe_cfil_info->cfi_sock_id);
3981 }
3982 }
3983
3984 data_req->cfd_flags = 0;
3985 if (OPTIONAL_IP_HEADER(so)) {
3986 /*
3987 * For non-UDP/TCP traffic, indicate to filters if optional
3988 * IP header is present:
3989 * outgoing - indicate according to INP_HDRINCL flag
3990 * incoming - For IPv4 only, stripping of IP header is
3991 * optional. But for CFIL, we delay stripping
3992 * at rip_input. So CFIL always expects IP
3993 * frames. IP header will be stripped according
3994 * to INP_STRIPHDR flag later at reinjection.
3995 */
3996 if ((!outgoing && !IS_INP_V6(inp)) ||
3997 (outgoing && cfil_dgram_peek_socket_state(data, &inp_flags) && (inp_flags & INP_HDRINCL))) {
3998 data_req->cfd_flags |= CFD_DATA_FLAG_IP_HEADER;
3999 }
4000 }
4001
4002 /*
4003 * Copy address/port into event msg.
4004 * For non connected sockets need to copy addresses from passed
4005 * parameters
4006 */
4007 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
4008 &data_req->cfc_src, &data_req->cfc_dst,
4009 !IS_INP_V6(inp), outgoing);
4010
4011 if (cfil_info->cfi_debug && cfil_log_data) {
4012 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DATA UP");
4013 }
4014
4015 if (cfil_info->cfi_isSignatureLatest == false) {
4016 cfil_dispatch_data_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, data_req);
4017 }
4018
4019 microuptime(&tv);
4020 CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
4021
4022 /* Pass the message to the content filter */
4023 error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
4024 entry->cfe_filter->cf_kcunit,
4025 msg, CTL_DATA_EOR);
4026 if (error != 0) {
4027 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
4028 mbuf_freem(msg);
4029 goto done;
4030 }
4031 entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
4032 OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
4033
4034 if (cfil_info->cfi_debug && cfil_log_data) {
4035 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu <%llx> outgoing %d: mbuf %llx copyoffset %u copylen %u (%s)",
4036 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen,
4037 data_req->cfd_flags & CFD_DATA_FLAG_IP_HEADER ? "IP HDR" : "NO IP HDR");
4038 }
4039
4040 done:
4041 if (error == ENOBUFS) {
4042 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
4043 OSIncrementAtomic(
4044 &cfil_stats.cfs_data_event_flow_control);
4045
4046 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
4047 cfil_rw_lock_exclusive(&cfil_lck_rw);
4048 }
4049
4050 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
4051
4052 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4053 } else {
4054 if (error != 0) {
4055 OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
4056 }
4057
4058 cfil_rw_unlock_shared(&cfil_lck_rw);
4059 }
4060 return error;
4061 }
4062
4063 /*
4064 * Process the queue of data waiting to be delivered to content filter
4065 */
4066 static int
cfil_data_service_ctl_q(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int outgoing)4067 cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
4068 {
4069 errno_t error = 0;
4070 struct mbuf *data, *tmp = NULL;
4071 unsigned int datalen = 0, copylen = 0, copyoffset = 0;
4072 struct cfil_entry *entry;
4073 struct cfe_buf *entrybuf;
4074 uint64_t currentoffset = 0;
4075
4076 if (cfil_info == NULL) {
4077 return 0;
4078 }
4079
4080 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4081 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4082
4083 socket_lock_assert_owned(so);
4084
4085 entry = &cfil_info->cfi_entries[kcunit - 1];
4086 if (outgoing) {
4087 entrybuf = &entry->cfe_snd;
4088 } else {
4089 entrybuf = &entry->cfe_rcv;
4090 }
4091
4092 /* Send attached message if not yet done */
4093 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
4094 error = cfil_dispatch_attach_event(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, entry),
4095 cfil_info->cfi_dir);
4096 if (error != 0) {
4097 /* We can recover from flow control */
4098 if (error == ENOBUFS || error == ENOMEM) {
4099 error = 0;
4100 }
4101 goto done;
4102 }
4103 } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
4104 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
4105 goto done;
4106 }
4107
4108 if (cfil_info->cfi_debug && cfil_log_data) {
4109 CFIL_LOG(LOG_ERR, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
4110 entrybuf->cfe_pass_offset,
4111 entrybuf->cfe_peeked,
4112 entrybuf->cfe_peek_offset);
4113 }
4114
4115 /* Move all data that can pass */
4116 while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
4117 entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
4118 datalen = cfil_data_length(data, NULL, NULL);
4119 tmp = data;
4120
4121 if (entrybuf->cfe_ctl_q.q_start + datalen <=
4122 entrybuf->cfe_pass_offset) {
4123 /*
4124 * The first mbuf can fully pass
4125 */
4126 copylen = datalen;
4127 } else {
4128 /*
4129 * The first mbuf can partially pass
4130 */
4131 copylen = (unsigned int)(entrybuf->cfe_pass_offset - entrybuf->cfe_ctl_q.q_start);
4132 }
4133 VERIFY(copylen <= datalen);
4134
4135 if (cfil_info->cfi_debug && cfil_log_data) {
4136 CFIL_LOG(LOG_ERR,
4137 "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
4138 "datalen %u copylen %u",
4139 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
4140 entrybuf->cfe_ctl_q.q_start,
4141 entrybuf->cfe_peeked,
4142 entrybuf->cfe_pass_offset,
4143 entrybuf->cfe_peek_offset,
4144 datalen, copylen);
4145 }
4146
4147 /*
4148 * Data that passes has been peeked at explicitly or
4149 * implicitly
4150 */
4151 if (entrybuf->cfe_ctl_q.q_start + copylen >
4152 entrybuf->cfe_peeked) {
4153 entrybuf->cfe_peeked =
4154 entrybuf->cfe_ctl_q.q_start + copylen;
4155 }
4156 /*
4157 * Stop on partial pass
4158 */
4159 if (copylen < datalen) {
4160 break;
4161 }
4162
4163 /* All good, move full data from ctl queue to pending queue */
4164 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
4165
4166 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
4167 if (outgoing) {
4168 OSAddAtomic64(datalen,
4169 &cfil_stats.cfs_pending_q_out_enqueued);
4170 } else {
4171 OSAddAtomic64(datalen,
4172 &cfil_stats.cfs_pending_q_in_enqueued);
4173 }
4174 }
4175 CFIL_INFO_VERIFY(cfil_info);
4176 if (tmp != NULL) {
4177 CFIL_LOG(LOG_DEBUG,
4178 "%llx first %llu peeked %llu pass %llu peek %llu"
4179 "datalen %u copylen %u",
4180 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
4181 entrybuf->cfe_ctl_q.q_start,
4182 entrybuf->cfe_peeked,
4183 entrybuf->cfe_pass_offset,
4184 entrybuf->cfe_peek_offset,
4185 datalen, copylen);
4186 }
4187 tmp = NULL;
4188
4189 /* Now deal with remaining data the filter wants to peek at */
4190 for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
4191 currentoffset = entrybuf->cfe_ctl_q.q_start;
4192 data != NULL && currentoffset < entrybuf->cfe_peek_offset;
4193 data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
4194 currentoffset += datalen) {
4195 datalen = cfil_data_length(data, NULL, NULL);
4196 tmp = data;
4197
4198 /* We've already peeked at this mbuf */
4199 if (currentoffset + datalen <= entrybuf->cfe_peeked) {
4200 continue;
4201 }
4202 /*
4203 * The data in the first mbuf may have been
4204 * partially peeked at
4205 */
4206 copyoffset = (unsigned int)(entrybuf->cfe_peeked - currentoffset);
4207 VERIFY(copyoffset < datalen);
4208 copylen = datalen - copyoffset;
4209 VERIFY(copylen <= datalen);
4210 /*
4211 * Do not copy more than needed
4212 */
4213 if (currentoffset + copyoffset + copylen >
4214 entrybuf->cfe_peek_offset) {
4215 copylen = (unsigned int)(entrybuf->cfe_peek_offset -
4216 (currentoffset + copyoffset));
4217 }
4218
4219 if (cfil_info->cfi_debug && cfil_log_data) {
4220 CFIL_LOG(LOG_ERR,
4221 "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
4222 "datalen %u copylen %u copyoffset %u",
4223 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
4224 currentoffset,
4225 entrybuf->cfe_peeked,
4226 entrybuf->cfe_pass_offset,
4227 entrybuf->cfe_peek_offset,
4228 datalen, copylen, copyoffset);
4229 }
4230
4231 /*
4232 * Stop if there is nothing more to peek at
4233 */
4234 if (copylen == 0) {
4235 break;
4236 }
4237 /*
4238 * Let the filter get a peek at this span of data
4239 */
4240 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
4241 outgoing, data, copyoffset, copylen);
4242 if (error != 0) {
4243 /* On error, leave data in ctl_q */
4244 break;
4245 }
4246 entrybuf->cfe_peeked += copylen;
4247 if (outgoing) {
4248 OSAddAtomic64(copylen,
4249 &cfil_stats.cfs_ctl_q_out_peeked);
4250 } else {
4251 OSAddAtomic64(copylen,
4252 &cfil_stats.cfs_ctl_q_in_peeked);
4253 }
4254
4255 /* Stop when data could not be fully peeked at */
4256 if (copylen + copyoffset < datalen) {
4257 break;
4258 }
4259 }
4260 CFIL_INFO_VERIFY(cfil_info);
4261 if (tmp != NULL) {
4262 CFIL_LOG(LOG_DEBUG,
4263 "%llx first %llu peeked %llu pass %llu peek %llu"
4264 "datalen %u copylen %u copyoffset %u",
4265 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
4266 currentoffset,
4267 entrybuf->cfe_peeked,
4268 entrybuf->cfe_pass_offset,
4269 entrybuf->cfe_peek_offset,
4270 datalen, copylen, copyoffset);
4271 }
4272
4273 /*
4274 * Process data that has passed the filter
4275 */
4276 error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
4277 if (error != 0) {
4278 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
4279 error);
4280 goto done;
4281 }
4282
4283 /*
4284 * Dispatch disconnect events that could not be sent
4285 */
4286 if (cfil_info == NULL) {
4287 goto done;
4288 } else if (outgoing) {
4289 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
4290 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) {
4291 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
4292 }
4293 } else {
4294 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
4295 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) {
4296 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
4297 }
4298 }
4299
4300 done:
4301 CFIL_LOG(LOG_DEBUG,
4302 "first %llu peeked %llu pass %llu peek %llu",
4303 entrybuf->cfe_ctl_q.q_start,
4304 entrybuf->cfe_peeked,
4305 entrybuf->cfe_pass_offset,
4306 entrybuf->cfe_peek_offset);
4307
4308 CFIL_INFO_VERIFY(cfil_info);
4309 return error;
4310 }
4311
4312 /*
4313 * cfil_data_filter()
4314 *
4315 * Process data for a content filter installed on a socket
4316 */
4317 int
cfil_data_filter(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int outgoing,struct mbuf * data,uint32_t datalen)4318 cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4319 struct mbuf *data, uint32_t datalen)
4320 {
4321 errno_t error = 0;
4322 struct cfil_entry *entry;
4323 struct cfe_buf *entrybuf;
4324
4325 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4326 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4327
4328 socket_lock_assert_owned(so);
4329
4330 entry = &cfil_info->cfi_entries[kcunit - 1];
4331 if (outgoing) {
4332 entrybuf = &entry->cfe_snd;
4333 } else {
4334 entrybuf = &entry->cfe_rcv;
4335 }
4336
4337 /* Are we attached to the filter? */
4338 if (entry->cfe_filter == NULL) {
4339 error = 0;
4340 goto done;
4341 }
4342
4343 /* Dispatch to filters */
4344 cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
4345 if (outgoing) {
4346 OSAddAtomic64(datalen,
4347 &cfil_stats.cfs_ctl_q_out_enqueued);
4348 } else {
4349 OSAddAtomic64(datalen,
4350 &cfil_stats.cfs_ctl_q_in_enqueued);
4351 }
4352
4353 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4354 if (error != 0) {
4355 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4356 error);
4357 }
4358 /*
4359 * We have to return EJUSTRETURN in all cases to avoid double free
4360 * by socket layer
4361 */
4362 error = EJUSTRETURN;
4363 done:
4364 CFIL_INFO_VERIFY(cfil_info);
4365
4366 CFIL_LOG(LOG_INFO, "return %d", error);
4367 return error;
4368 }
4369
4370 static void
cfil_strip_ip_header(struct cfil_info * cfil_info,mbuf_t data,struct socket * so)4371 cfil_strip_ip_header(struct cfil_info *cfil_info, mbuf_t data, struct socket *so)
4372 {
4373 struct ip *ip = NULL;
4374 unsigned int hlen = 0;
4375 mbuf_t data_start = NULL;
4376 struct inpcb *inp = so ? sotoinpcb(so) : NULL;
4377
4378 if (inp && (inp->inp_flags & INP_STRIPHDR)) {
4379 data_start = cfil_data_start(data);
4380 if (data_start != NULL && (data_start->m_flags & M_PKTHDR)) {
4381 ip = mtod(data_start, struct ip *);
4382 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4383
4384 if (cfil_info->cfi_debug && cfil_log_data) {
4385 CFIL_LOG(LOG_ERR, "CFIL: IPHDR STRIPPING: <so %llx>: <hlen %d m_len %d>",
4386 (uint64_t)VM_KERNEL_ADDRPERM(so),
4387 hlen, data_start->m_len);
4388 }
4389 VERIFY(hlen <= data_start->m_len);
4390 data_start->m_len -= hlen;
4391 data_start->m_pkthdr.len -= hlen;
4392 data_start->m_data += hlen;
4393 }
4394 }
4395 }
4396
4397 /*
4398 * cfil_service_inject_queue() re-inject data that passed the
4399 * content filters
4400 */
4401 static int
cfil_service_inject_queue(struct socket * so,struct cfil_info * cfil_info,int outgoing)4402 cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4403 {
4404 mbuf_t data;
4405 unsigned int datalen;
4406 int mbcnt = 0;
4407 int mbnum = 0;
4408 errno_t error = 0;
4409 struct cfi_buf *cfi_buf;
4410 struct cfil_queue *inject_q;
4411 int need_rwakeup = 0;
4412 int count = 0;
4413
4414 if (cfil_info == NULL) {
4415 return 0;
4416 }
4417
4418 socket_lock_assert_owned(so);
4419
4420 if (so->so_state & SS_DEFUNCT) {
4421 return 0;
4422 }
4423
4424 if (outgoing) {
4425 cfi_buf = &cfil_info->cfi_snd;
4426 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
4427 } else {
4428 cfi_buf = &cfil_info->cfi_rcv;
4429 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
4430 }
4431 inject_q = &cfi_buf->cfi_inject_q;
4432
4433 if (cfil_queue_empty(inject_q)) {
4434 return 0;
4435 }
4436
4437 if (cfil_info->cfi_debug && cfil_log_data) {
4438 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
4439 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
4440 }
4441
4442 while ((data = cfil_queue_first(inject_q)) != NULL) {
4443 datalen = cfil_data_length(data, &mbcnt, &mbnum);
4444
4445 if (cfil_info->cfi_debug && cfil_log_data) {
4446 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4447 (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4448 }
4449
4450 /* Remove data from queue and adjust stats */
4451 cfil_queue_remove(inject_q, data, datalen);
4452 cfi_buf->cfi_pending_first += datalen;
4453 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4454 cfi_buf->cfi_pending_mbnum -= mbnum;
4455 cfil_info_buf_verify(cfi_buf);
4456
4457 if (outgoing) {
4458 error = sosend_reinject(so, NULL, data, NULL, 0);
4459 if (error != 0) {
4460 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
4461 CFIL_LOG(LOG_ERR, "CFIL: sosend() failed %d", error);
4462 break;
4463 }
4464 // At least one injection succeeded, need to wake up pending threads.
4465 need_rwakeup = 1;
4466 } else {
4467 data->m_flags |= M_SKIPCFIL;
4468
4469 /*
4470 * NOTE: We currently only support TCP, UDP, ICMP,
4471 * ICMPv6 and RAWIP. For MPTCP and message TCP we'll
4472 * need to call the appropriate sbappendxxx()
4473 * of fix sock_inject_data_in()
4474 */
4475 if (NEED_DGRAM_FLOW_TRACKING(so)) {
4476 if (OPTIONAL_IP_HEADER(so)) {
4477 cfil_strip_ip_header(cfil_info, data, so);
4478 }
4479
4480 if (sbappendchain(&so->so_rcv, data)) {
4481 need_rwakeup = 1;
4482 }
4483 } else {
4484 if (sbappendstream(&so->so_rcv, data)) {
4485 need_rwakeup = 1;
4486 }
4487 }
4488 }
4489
4490 if (outgoing) {
4491 OSAddAtomic64(datalen,
4492 &cfil_stats.cfs_inject_q_out_passed);
4493 } else {
4494 OSAddAtomic64(datalen,
4495 &cfil_stats.cfs_inject_q_in_passed);
4496 }
4497
4498 count++;
4499 }
4500
4501 if (cfil_info->cfi_debug && cfil_log_data) {
4502 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4503 (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4504 }
4505
4506 /* A single wakeup is for several packets is more efficient */
4507 if (need_rwakeup) {
4508 if (outgoing == TRUE) {
4509 sowwakeup(so);
4510 } else {
4511 sorwakeup(so);
4512 }
4513 }
4514
4515 if (error != 0 && cfil_info) {
4516 if (error == ENOBUFS) {
4517 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
4518 }
4519 if (error == ENOMEM) {
4520 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
4521 }
4522
4523 if (outgoing) {
4524 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
4525 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
4526 } else {
4527 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
4528 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
4529 }
4530 }
4531
4532 /*
4533 * Notify
4534 */
4535 if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
4536 cfil_sock_notify_shutdown(so, SHUT_WR);
4537 if (cfil_sock_data_pending(&so->so_snd) == 0) {
4538 soshutdownlock_final(so, SHUT_WR);
4539 }
4540 }
4541 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4542 if (cfil_filters_attached(so) == 0) {
4543 CFIL_LOG(LOG_INFO, "so %llx waking",
4544 (uint64_t)VM_KERNEL_ADDRPERM(so));
4545 wakeup((caddr_t)cfil_info);
4546 }
4547 }
4548
4549 if (SO_DELAYED_DEAD_GET(so)) {
4550 // Check to see if all data processed for this socket, if so mark it DEAD now.
4551 const bool is_dead = cfil_sock_is_dead(so);
4552 if (is_dead && cfil_info->cfi_debug) {
4553 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Marked previoulsy delayed socket as DEAD");
4554 }
4555 }
4556 if (SO_DELAYED_TCP_TIME_WAIT_GET(so)) {
4557 // Check to see if all data processed for this socket, if so handle the TCP time wait now
4558 const bool is_added = cfil_sock_tcp_add_time_wait(so);
4559 if (is_added && cfil_info->cfi_debug) {
4560 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Handled previously delayed socket for TCP time wait");
4561 }
4562 }
4563
4564 CFIL_INFO_VERIFY(cfil_info);
4565
4566 return error;
4567 }
4568
4569 static int
cfil_service_pending_queue(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int outgoing)4570 cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
4571 {
4572 uint64_t passlen, curlen;
4573 mbuf_t data;
4574 unsigned int datalen;
4575 errno_t error = 0;
4576 struct cfil_entry *entry;
4577 struct cfe_buf *entrybuf;
4578 struct cfil_queue *pending_q;
4579 struct cfil_entry *iter_entry = NULL;
4580
4581 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4582 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4583
4584 socket_lock_assert_owned(so);
4585
4586 entry = &cfil_info->cfi_entries[kcunit - 1];
4587 if (outgoing) {
4588 entrybuf = &entry->cfe_snd;
4589 } else {
4590 entrybuf = &entry->cfe_rcv;
4591 }
4592
4593 pending_q = &entrybuf->cfe_pending_q;
4594
4595 passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
4596
4597 if (cfil_queue_empty(pending_q)) {
4598 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4599 iter_entry != NULL;
4600 iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4601 error = cfil_data_service_ctl_q(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing);
4602 /* 0 means passed so we can continue */
4603 if (error != 0) {
4604 break;
4605 }
4606 }
4607 goto done;
4608 }
4609
4610 /*
4611 * Locate the chunks of data that we can pass to the next filter
4612 * A data chunk must be on mbuf boundaries
4613 */
4614 curlen = 0;
4615 while ((data = cfil_queue_first(pending_q)) != NULL) {
4616 datalen = cfil_data_length(data, NULL, NULL);
4617
4618 if (cfil_info->cfi_debug && cfil_log_data) {
4619 CFIL_LOG(LOG_ERR,
4620 "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
4621 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
4622 passlen, curlen);
4623 }
4624
4625 if (curlen + datalen > passlen) {
4626 break;
4627 }
4628
4629 cfil_queue_remove(pending_q, data, datalen);
4630
4631 curlen += datalen;
4632
4633 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4634 iter_entry != NULL;
4635 iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4636 error = cfil_data_filter(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing,
4637 data, datalen);
4638 /* 0 means passed so we can continue */
4639 if (error != 0) {
4640 break;
4641 }
4642 }
4643 /* When data has passed all filters, re-inject */
4644 if (error == 0) {
4645 if (outgoing) {
4646 cfil_queue_enqueue(
4647 &cfil_info->cfi_snd.cfi_inject_q,
4648 data, datalen);
4649 OSAddAtomic64(datalen,
4650 &cfil_stats.cfs_inject_q_out_enqueued);
4651 } else {
4652 cfil_queue_enqueue(
4653 &cfil_info->cfi_rcv.cfi_inject_q,
4654 data, datalen);
4655 OSAddAtomic64(datalen,
4656 &cfil_stats.cfs_inject_q_in_enqueued);
4657 }
4658 }
4659 }
4660
4661 done:
4662 CFIL_INFO_VERIFY(cfil_info);
4663
4664 return error;
4665 }
4666
4667 int
cfil_update_data_offsets(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int outgoing,uint64_t pass_offset,uint64_t peek_offset)4668 cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4669 uint64_t pass_offset, uint64_t peek_offset)
4670 {
4671 errno_t error = 0;
4672 struct cfil_entry *entry = NULL;
4673 struct cfe_buf *entrybuf;
4674 int updated = 0;
4675
4676 CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
4677
4678 socket_lock_assert_owned(so);
4679
4680 if (cfil_info == NULL) {
4681 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4682 (uint64_t)VM_KERNEL_ADDRPERM(so));
4683 error = 0;
4684 goto done;
4685 } else if (cfil_info->cfi_flags & CFIF_DROP) {
4686 CFIL_LOG(LOG_ERR, "so %llx drop set",
4687 (uint64_t)VM_KERNEL_ADDRPERM(so));
4688 error = EPIPE;
4689 goto done;
4690 }
4691
4692 entry = &cfil_info->cfi_entries[kcunit - 1];
4693 if (outgoing) {
4694 entrybuf = &entry->cfe_snd;
4695 } else {
4696 entrybuf = &entry->cfe_rcv;
4697 }
4698
4699 /* Record updated offsets for this content filter */
4700 if (pass_offset > entrybuf->cfe_pass_offset) {
4701 entrybuf->cfe_pass_offset = pass_offset;
4702
4703 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4704 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4705 }
4706 updated = 1;
4707 } else {
4708 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
4709 pass_offset, entrybuf->cfe_pass_offset);
4710 }
4711 /* Filter does not want or need to see data that's allowed to pass */
4712 if (peek_offset > entrybuf->cfe_pass_offset &&
4713 peek_offset > entrybuf->cfe_peek_offset) {
4714 entrybuf->cfe_peek_offset = peek_offset;
4715 updated = 1;
4716 }
4717 /* Nothing to do */
4718 if (updated == 0) {
4719 goto done;
4720 }
4721
4722 /* Move data held in control queue to pending queue if needed */
4723 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4724 if (error != 0) {
4725 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4726 error);
4727 goto done;
4728 }
4729 error = EJUSTRETURN;
4730
4731 done:
4732 /*
4733 * The filter is effectively detached when pass all from both sides
4734 * or when the socket is closed and no more data is waiting
4735 * to be delivered to the filter
4736 */
4737 if (entry != NULL &&
4738 ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
4739 entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
4740 ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4741 cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
4742 cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
4743 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4744
4745 if (cfil_info->cfi_debug) {
4746 const char * __null_terminated out = "CFIL: OUT - PASSED ALL - DETACH";
4747 const char * __null_terminated in = "CFIL: IN - PASSED ALL - DETACH";
4748 cfil_info_log(LOG_ERR, cfil_info, outgoing ? out : in);
4749 }
4750
4751 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4752 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4753 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4754 cfil_filters_attached(so) == 0) {
4755 if (cfil_info->cfi_debug) {
4756 cfil_info_log(LOG_ERR, cfil_info, "CFIL: WAKING");
4757 }
4758 CFIL_LOG(LOG_INFO, "so %llx waking",
4759 (uint64_t)VM_KERNEL_ADDRPERM(so));
4760 wakeup((caddr_t)cfil_info);
4761 }
4762 }
4763 CFIL_INFO_VERIFY(cfil_info);
4764 CFIL_LOG(LOG_INFO, "return %d", error);
4765 return error;
4766 }
4767
4768 /*
4769 * Update pass offset for socket when no data is pending
4770 */
4771 static int
cfil_set_socket_pass_offset(struct socket * so,struct cfil_info * cfil_info,int outgoing)4772 cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4773 {
4774 struct cfi_buf *cfi_buf;
4775 struct cfil_entry *entry;
4776 struct cfe_buf *entrybuf;
4777 uint32_t kcunit;
4778 uint64_t pass_offset = 0;
4779 boolean_t first = true;
4780
4781 if (cfil_info == NULL) {
4782 return 0;
4783 }
4784
4785 if (cfil_info->cfi_debug && cfil_log_data) {
4786 CFIL_LOG(LOG_ERR, "so %llx outgoing %d",
4787 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4788 }
4789
4790 socket_lock_assert_owned(so);
4791
4792 if (outgoing) {
4793 cfi_buf = &cfil_info->cfi_snd;
4794 } else {
4795 cfi_buf = &cfil_info->cfi_rcv;
4796 }
4797
4798 if (cfil_info->cfi_debug && cfil_log_data) {
4799 CFIL_LOG(LOG_ERR, "CFIL: <so %llx, sockID %llu <%llx>> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4800 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfil_info->cfi_sock_id, outgoing,
4801 cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
4802 }
4803
4804 if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
4805 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4806 entry = &cfil_info->cfi_entries[kcunit - 1];
4807
4808 /* Are we attached to a filter? */
4809 if (entry->cfe_filter == NULL) {
4810 continue;
4811 }
4812
4813 if (outgoing) {
4814 entrybuf = &entry->cfe_snd;
4815 } else {
4816 entrybuf = &entry->cfe_rcv;
4817 }
4818
4819 // Keep track of the smallest pass_offset among filters.
4820 if (first == true ||
4821 entrybuf->cfe_pass_offset < pass_offset) {
4822 pass_offset = entrybuf->cfe_pass_offset;
4823 first = false;
4824 }
4825 }
4826 cfi_buf->cfi_pass_offset = pass_offset;
4827 }
4828
4829 if (cfil_info->cfi_debug && cfil_log_data) {
4830 CFIL_LOG(LOG_ERR, "CFIL: <so %llx, sockID %llu <%llx>>, cfi_pass_offset %llu",
4831 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
4832 }
4833
4834 return 0;
4835 }
4836
4837 int
cfil_action_data_pass(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int outgoing,uint64_t pass_offset,uint64_t peek_offset)4838 cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4839 uint64_t pass_offset, uint64_t peek_offset)
4840 {
4841 errno_t error = 0;
4842
4843 CFIL_LOG(LOG_INFO, "");
4844
4845 socket_lock_assert_owned(so);
4846
4847 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
4848 if (error != 0) {
4849 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
4850 (uint64_t)VM_KERNEL_ADDRPERM(so),
4851 outgoing ? "out" : "in");
4852 goto release;
4853 }
4854
4855 error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
4856 pass_offset, peek_offset);
4857
4858 cfil_service_inject_queue(so, cfil_info, outgoing);
4859
4860 cfil_set_socket_pass_offset(so, cfil_info, outgoing);
4861 release:
4862 CFIL_INFO_VERIFY(cfil_info);
4863 cfil_release_sockbuf(so, outgoing);
4864
4865 return error;
4866 }
4867
4868
4869 static void
cfil_flush_queues(struct socket * so,struct cfil_info * cfil_info)4870 cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
4871 {
4872 struct cfil_entry *entry;
4873 int kcunit;
4874 uint64_t drained;
4875
4876 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4877 goto done;
4878 }
4879
4880 socket_lock_assert_owned(so);
4881
4882 /*
4883 * Flush the output queues and ignore errors as long as
4884 * we are attached
4885 */
4886 (void) cfil_acquire_sockbuf(so, cfil_info, 1);
4887 if (cfil_info != NULL) {
4888 drained = 0;
4889 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4890 entry = &cfil_info->cfi_entries[kcunit - 1];
4891
4892 drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
4893 drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
4894 }
4895 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
4896
4897 if (drained) {
4898 if (cfil_info->cfi_flags & CFIF_DROP) {
4899 OSIncrementAtomic(
4900 &cfil_stats.cfs_flush_out_drop);
4901 } else {
4902 OSIncrementAtomic(
4903 &cfil_stats.cfs_flush_out_close);
4904 }
4905 }
4906 }
4907 cfil_release_sockbuf(so, 1);
4908
4909 /*
4910 * Flush the input queues
4911 */
4912 (void) cfil_acquire_sockbuf(so, cfil_info, 0);
4913 if (cfil_info != NULL) {
4914 drained = 0;
4915 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4916 entry = &cfil_info->cfi_entries[kcunit - 1];
4917
4918 drained += cfil_queue_drain(
4919 &entry->cfe_rcv.cfe_ctl_q);
4920 drained += cfil_queue_drain(
4921 &entry->cfe_rcv.cfe_pending_q);
4922 }
4923 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
4924
4925 if (drained) {
4926 if (cfil_info->cfi_flags & CFIF_DROP) {
4927 OSIncrementAtomic(
4928 &cfil_stats.cfs_flush_in_drop);
4929 } else {
4930 OSIncrementAtomic(
4931 &cfil_stats.cfs_flush_in_close);
4932 }
4933 }
4934 }
4935 cfil_release_sockbuf(so, 0);
4936 done:
4937 CFIL_INFO_VERIFY(cfil_info);
4938 }
4939
4940 int
cfil_action_drop(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit)4941 cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
4942 {
4943 errno_t error = 0;
4944 struct cfil_entry *entry;
4945 struct proc *p;
4946
4947 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4948 goto done;
4949 }
4950
4951 socket_lock_assert_owned(so);
4952
4953 entry = &cfil_info->cfi_entries[kcunit - 1];
4954
4955 /* Are we attached to the filter? */
4956 if (entry->cfe_filter == NULL) {
4957 goto done;
4958 }
4959
4960 cfil_info->cfi_flags |= CFIF_DROP;
4961
4962 p = current_proc();
4963
4964 /*
4965 * Force the socket to be marked defunct
4966 * (forcing fixed along with rdar://19391339)
4967 */
4968 if (so->so_flow_db == NULL) {
4969 error = sosetdefunct(p, so,
4970 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
4971 FALSE);
4972
4973 /* Flush the socket buffer and disconnect */
4974 if (error == 0) {
4975 error = sodefunct(p, so,
4976 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
4977 }
4978 }
4979
4980 /* The filter is done, mark as detached */
4981 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4982
4983 if (cfil_info->cfi_debug) {
4984 cfil_info_log(LOG_ERR, cfil_info, "CFIL: DROP - DETACH");
4985 }
4986
4987 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4988 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4989
4990 /* Pending data needs to go */
4991 cfil_flush_queues(so, cfil_info);
4992
4993 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4994 if (cfil_filters_attached(so) == 0) {
4995 CFIL_LOG(LOG_INFO, "so %llx waking",
4996 (uint64_t)VM_KERNEL_ADDRPERM(so));
4997 wakeup((caddr_t)cfil_info);
4998 }
4999 }
5000 done:
5001 return error;
5002 }
5003
5004 int
cfil_action_bless_client(uint32_t kcunit,struct cfil_msg_hdr * msghdr)5005 cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
5006 {
5007 errno_t error = 0;
5008 struct cfil_info * __single cfil_info = NULL;
5009
5010 bool cfil_attached = false;
5011 struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
5012
5013 // Search and lock socket
5014 struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
5015 if (so == NULL) {
5016 error = ENOENT;
5017 } else {
5018 // The client gets a pass automatically
5019 cfil_info = (so->so_flow_db != NULL) ?
5020 soflow_db_get_feature_context(so->so_flow_db, msghdr->cfm_sock_id) : so->so_cfil;
5021
5022 if (cfil_attached) {
5023 if (cfil_info != NULL && cfil_info->cfi_debug) {
5024 cfil_info_log(LOG_ERR, cfil_info, "CFIL: VERDICT RECEIVED: BLESS");
5025 }
5026 cfil_sock_received_verdict(so);
5027 (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
5028 (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
5029 } else {
5030 so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
5031 }
5032 socket_unlock(so, 1);
5033 }
5034
5035 return error;
5036 }
5037
5038 int
cfil_action_set_crypto_key(uint32_t kcunit,struct cfil_msg_hdr * msghdr)5039 cfil_action_set_crypto_key(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
5040 {
5041 struct content_filter *cfc = NULL;
5042 cfil_crypto_state_t crypto_state = NULL;
5043 struct cfil_msg_set_crypto_key *keymsg = (struct cfil_msg_set_crypto_key *)msghdr;
5044
5045 CFIL_LOG(LOG_NOTICE, "");
5046
5047 if (kcunit > MAX_CONTENT_FILTER) {
5048 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
5049 kcunit, MAX_CONTENT_FILTER);
5050 return EINVAL;
5051 }
5052 crypto_state = cfil_crypto_init_client((uint8_t *)keymsg->crypto_key);
5053 if (crypto_state == NULL) {
5054 CFIL_LOG(LOG_ERR, "failed to initialize crypto state for unit %u)",
5055 kcunit);
5056 return EINVAL;
5057 }
5058
5059 cfil_rw_lock_exclusive(&cfil_lck_rw);
5060
5061 cfc = content_filters[kcunit - 1];
5062 if (cfc->cf_kcunit != kcunit) {
5063 CFIL_LOG(LOG_ERR, "bad unit info %u)",
5064 kcunit);
5065 cfil_rw_unlock_exclusive(&cfil_lck_rw);
5066 cfil_crypto_cleanup_state(crypto_state);
5067 return EINVAL;
5068 }
5069 if (cfc->cf_crypto_state != NULL) {
5070 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
5071 cfc->cf_crypto_state = NULL;
5072 }
5073 cfc->cf_crypto_state = crypto_state;
5074
5075 cfil_rw_unlock_exclusive(&cfil_lck_rw);
5076 return 0;
5077 }
5078
5079 static int
cfil_update_entry_offsets(struct socket * so,struct cfil_info * cfil_info,int outgoing,unsigned int datalen)5080 cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
5081 {
5082 struct cfil_entry *entry;
5083 struct cfe_buf *entrybuf;
5084 uint32_t kcunit;
5085
5086 CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
5087 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
5088
5089 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5090 entry = &cfil_info->cfi_entries[kcunit - 1];
5091
5092 /* Are we attached to the filter? */
5093 if (entry->cfe_filter == NULL) {
5094 continue;
5095 }
5096
5097 if (outgoing) {
5098 entrybuf = &entry->cfe_snd;
5099 } else {
5100 entrybuf = &entry->cfe_rcv;
5101 }
5102
5103 entrybuf->cfe_ctl_q.q_start += datalen;
5104 if (entrybuf->cfe_pass_offset < entrybuf->cfe_ctl_q.q_start) {
5105 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
5106 }
5107 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
5108 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
5109 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
5110 }
5111
5112 entrybuf->cfe_ctl_q.q_end += datalen;
5113
5114 entrybuf->cfe_pending_q.q_start += datalen;
5115 entrybuf->cfe_pending_q.q_end += datalen;
5116 }
5117 CFIL_INFO_VERIFY(cfil_info);
5118 return 0;
5119 }
5120
5121 int
cfil_data_common(struct socket * so,struct cfil_info * cfil_info,int outgoing,struct sockaddr * to,struct mbuf * data,struct mbuf * control,uint32_t flags)5122 cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
5123 struct mbuf *data, struct mbuf *control, uint32_t flags)
5124 {
5125 #pragma unused(to, control, flags)
5126 errno_t error = 0;
5127 unsigned int datalen;
5128 int mbcnt = 0;
5129 int mbnum = 0;
5130 int kcunit;
5131 struct cfi_buf *cfi_buf;
5132 struct mbuf *chain = NULL;
5133
5134 if (cfil_info == NULL) {
5135 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
5136 (uint64_t)VM_KERNEL_ADDRPERM(so));
5137 error = 0;
5138 goto done;
5139 } else if (cfil_info->cfi_flags & CFIF_DROP) {
5140 CFIL_LOG(LOG_ERR, "so %llx drop set",
5141 (uint64_t)VM_KERNEL_ADDRPERM(so));
5142 error = EPIPE;
5143 goto done;
5144 }
5145
5146 datalen = cfil_data_length(data, &mbcnt, &mbnum);
5147
5148 if (datalen == 0) {
5149 error = 0;
5150 goto done;
5151 }
5152
5153 if (outgoing) {
5154 cfi_buf = &cfil_info->cfi_snd;
5155 cfil_info->cfi_byte_outbound_count += datalen;
5156 } else {
5157 cfi_buf = &cfil_info->cfi_rcv;
5158 cfil_info->cfi_byte_inbound_count += datalen;
5159 }
5160
5161 cfi_buf->cfi_pending_last += datalen;
5162 cfi_buf->cfi_pending_mbcnt += mbcnt;
5163 cfi_buf->cfi_pending_mbnum += mbnum;
5164
5165 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5166 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
5167 cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
5168 cfi_buf->cfi_tail_drop_cnt++;
5169 cfi_buf->cfi_pending_mbcnt -= mbcnt;
5170 cfi_buf->cfi_pending_mbnum -= mbnum;
5171 return EPIPE;
5172 }
5173 }
5174
5175 cfil_info_buf_verify(cfi_buf);
5176
5177 if (cfil_info->cfi_debug && cfil_log_data) {
5178 CFIL_LOG(LOG_ERR, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u cfi_pass_offset %llu",
5179 (uint64_t)VM_KERNEL_ADDRPERM(so),
5180 outgoing ? "OUT" : "IN",
5181 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
5182 (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
5183 cfi_buf->cfi_pending_last,
5184 cfi_buf->cfi_pending_mbcnt,
5185 cfi_buf->cfi_pass_offset);
5186 }
5187
5188 /* Fast path when below pass offset */
5189 if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
5190 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
5191 if (cfil_info->cfi_debug && cfil_log_data) {
5192 CFIL_LOG(LOG_ERR, "CFIL: QUEUEING DATA: <so %llx> %s: FAST PATH",
5193 (uint64_t)VM_KERNEL_ADDRPERM(so),
5194 outgoing ? "OUT" : "IN");
5195 }
5196 // For incoming packets, see if we need to strip off ip header
5197 if (!outgoing && NEED_DGRAM_FLOW_TRACKING(so) && OPTIONAL_IP_HEADER(so)) {
5198 cfil_strip_ip_header(cfil_info, data, so);
5199 }
5200 } else {
5201 struct cfil_entry *iter_entry;
5202 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
5203 // Is cfil attached to this filter?
5204 kcunit = CFI_ENTRY_KCUNIT(cfil_info, iter_entry);
5205 if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
5206 if (NEED_DGRAM_FLOW_TRACKING(so) && chain == NULL) {
5207 /* Datagrams only:
5208 * Chain addr (incoming only TDB), control (optional) and data into one chain.
5209 * This full chain will be reinjected into socket after recieving verdict.
5210 */
5211 (void) cfil_dgram_save_socket_state(cfil_info, data);
5212 chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
5213 if (chain == NULL) {
5214 return ENOBUFS;
5215 }
5216 data = chain;
5217 }
5218 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
5219 datalen);
5220 }
5221 /* 0 means passed so continue with next filter */
5222 if (error != 0) {
5223 break;
5224 }
5225 }
5226 }
5227
5228 /* Move cursor if no filter claimed the data */
5229 if (error == 0) {
5230 cfi_buf->cfi_pending_first += datalen;
5231 cfi_buf->cfi_pending_mbcnt -= mbcnt;
5232 cfi_buf->cfi_pending_mbnum -= mbnum;
5233 cfil_info_buf_verify(cfi_buf);
5234 }
5235 done:
5236 CFIL_INFO_VERIFY(cfil_info);
5237
5238 return error;
5239 }
5240
5241 /*
5242 * Callback from socket layer sosendxxx()
5243 */
5244 int
cfil_sock_data_out(struct socket * so,struct sockaddr * to,struct mbuf * data,struct mbuf * control,uint32_t flags,struct soflow_hash_entry * flow_entry)5245 cfil_sock_data_out(struct socket *so, struct sockaddr *to,
5246 struct mbuf *data, struct mbuf *control, uint32_t flags, struct soflow_hash_entry *flow_entry)
5247 {
5248 int error = 0;
5249 int new_filter_control_unit = 0;
5250
5251 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5252 return cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags, flow_entry);
5253 }
5254
5255 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5256 /* Drop pre-existing TCP sockets if filter is enabled now */
5257 if (!DO_PRESERVE_CONNECTIONS && cfil_active_count > 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5258 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5259 if (new_filter_control_unit > 0) {
5260 CFIL_LOG(LOG_NOTICE, "CFIL: TCP(OUT) <so %llx> - filter state changed - dropped pre-existing flow", (uint64_t)VM_KERNEL_ADDRPERM(so));
5261 return EPIPE;
5262 }
5263 }
5264 return 0;
5265 }
5266
5267 /* Drop pre-existing TCP sockets when filter state changed */
5268 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5269 if (new_filter_control_unit > 0 && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5270 if (DO_PRESERVE_CONNECTIONS || (so->so_cfil->cfi_filter_policy_gencount == necp_socket_get_policy_gencount(so))) {
5271 // CFIL state has changed, but preserve the flow intentionally or if this is not a result of NECP policy change
5272 so->so_cfil->cfi_filter_control_unit = new_filter_control_unit;
5273 } else {
5274 CFIL_LOG(LOG_NOTICE, "CFIL: TCP(OUT) <so %llx> - filter state changed - dropped pre-existing flow (old state 0x%x new state 0x%x)",
5275 (uint64_t)VM_KERNEL_ADDRPERM(so),
5276 so->so_cfil->cfi_filter_control_unit, new_filter_control_unit);
5277 return EPIPE;
5278 }
5279 }
5280
5281 /*
5282 * Pass initial data for TFO.
5283 */
5284 if (IS_INITIAL_TFO_DATA(so)) {
5285 return 0;
5286 }
5287
5288 socket_lock_assert_owned(so);
5289
5290 if (so->so_cfil->cfi_flags & CFIF_DROP) {
5291 CFIL_LOG(LOG_ERR, "so %llx drop set",
5292 (uint64_t)VM_KERNEL_ADDRPERM(so));
5293 return EPIPE;
5294 }
5295 if (control != NULL) {
5296 CFIL_LOG(LOG_ERR, "so %llx control",
5297 (uint64_t)VM_KERNEL_ADDRPERM(so));
5298 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
5299 }
5300 if ((flags & MSG_OOB)) {
5301 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
5302 (uint64_t)VM_KERNEL_ADDRPERM(so));
5303 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
5304 }
5305 /*
5306 * Abort if socket is defunct.
5307 */
5308 if (so->so_flags & SOF_DEFUNCT) {
5309 return EPIPE;
5310 }
5311 if ((so->so_snd.sb_flags & SB_LOCK) == 0) {
5312 panic("so %p SB_LOCK not set", so);
5313 }
5314
5315 if (so->so_snd.sb_cfil_thread != NULL) {
5316 panic("%s sb_cfil_thread %p not NULL", __func__,
5317 so->so_snd.sb_cfil_thread);
5318 }
5319
5320 error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
5321
5322 return error;
5323 }
5324
5325 /*
5326 * Callback from socket layer sbappendxxx()
5327 */
5328 int
cfil_sock_data_in(struct socket * so,struct sockaddr * from,struct mbuf * data,struct mbuf * control,uint32_t flags,struct soflow_hash_entry * flow_entry)5329 cfil_sock_data_in(struct socket *so, struct sockaddr *from,
5330 struct mbuf *data, struct mbuf *control, uint32_t flags, struct soflow_hash_entry *flow_entry)
5331 {
5332 int error = 0;
5333 int new_filter_control_unit = 0;
5334
5335 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5336 return cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags, flow_entry);
5337 }
5338
5339 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5340 /* Drop pre-existing TCP sockets if filter is enabled now */
5341 if (!DO_PRESERVE_CONNECTIONS && cfil_active_count > 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5342 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5343 if (new_filter_control_unit > 0) {
5344 CFIL_LOG(LOG_NOTICE, "CFIL: TCP(IN) <so %llx> - filter state changed - dropped pre-existing flow", (uint64_t)VM_KERNEL_ADDRPERM(so));
5345 return EPIPE;
5346 }
5347 }
5348 return 0;
5349 }
5350
5351 /* Drop pre-existing TCP sockets when filter state changed */
5352 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5353 if (new_filter_control_unit > 0 && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5354 if (DO_PRESERVE_CONNECTIONS || (so->so_cfil->cfi_filter_policy_gencount == necp_socket_get_policy_gencount(so))) {
5355 // CFIL state has changed, but preserve the flow intentionally or if this is not a result of NECP policy change
5356 so->so_cfil->cfi_filter_control_unit = new_filter_control_unit;
5357 } else {
5358 CFIL_LOG(LOG_NOTICE, "CFIL: TCP(IN) <so %llx> - filter state changed - dropped pre-existing flow (old state 0x%x new state 0x%x)",
5359 (uint64_t)VM_KERNEL_ADDRPERM(so),
5360 so->so_cfil->cfi_filter_control_unit, new_filter_control_unit);
5361 return EPIPE;
5362 }
5363 }
5364
5365 /*
5366 * Pass initial data for TFO.
5367 */
5368 if (IS_INITIAL_TFO_DATA(so)) {
5369 return 0;
5370 }
5371
5372 socket_lock_assert_owned(so);
5373
5374 if (so->so_cfil->cfi_flags & CFIF_DROP) {
5375 CFIL_LOG(LOG_ERR, "so %llx drop set",
5376 (uint64_t)VM_KERNEL_ADDRPERM(so));
5377 return EPIPE;
5378 }
5379 if (control != NULL) {
5380 CFIL_LOG(LOG_ERR, "so %llx control",
5381 (uint64_t)VM_KERNEL_ADDRPERM(so));
5382 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
5383 }
5384 if (data->m_type == MT_OOBDATA) {
5385 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
5386 (uint64_t)VM_KERNEL_ADDRPERM(so));
5387 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
5388 }
5389 error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
5390
5391 return error;
5392 }
5393
5394 /*
5395 * Callback from socket layer soshutdownxxx()
5396 *
5397 * We may delay the shutdown write if there's outgoing data in process.
5398 *
5399 * There is no point in delaying the shutdown read because the process
5400 * indicated that it does not want to read anymore data.
5401 */
5402 int
cfil_sock_shutdown(struct socket * so,int * how)5403 cfil_sock_shutdown(struct socket *so, int *how)
5404 {
5405 int error = 0;
5406
5407 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5408 return cfil_sock_udp_shutdown(so, how);
5409 }
5410
5411 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5412 goto done;
5413 }
5414
5415 socket_lock_assert_owned(so);
5416
5417 CFIL_LOG(LOG_INFO, "so %llx how %d",
5418 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
5419
5420 /*
5421 * Check the state of the socket before the content filter
5422 */
5423 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
5424 /* read already shut down */
5425 error = ENOTCONN;
5426 goto done;
5427 }
5428 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
5429 /* write already shut down */
5430 error = ENOTCONN;
5431 goto done;
5432 }
5433
5434 if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
5435 CFIL_LOG(LOG_ERR, "so %llx drop set",
5436 (uint64_t)VM_KERNEL_ADDRPERM(so));
5437 goto done;
5438 }
5439
5440 /*
5441 * shutdown read: SHUT_RD or SHUT_RDWR
5442 */
5443 if (*how != SHUT_WR) {
5444 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
5445 error = ENOTCONN;
5446 goto done;
5447 }
5448 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
5449 cfil_sock_notify_shutdown(so, SHUT_RD);
5450 }
5451 /*
5452 * shutdown write: SHUT_WR or SHUT_RDWR
5453 */
5454 if (*how != SHUT_RD) {
5455 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
5456 error = ENOTCONN;
5457 goto done;
5458 }
5459 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
5460 cfil_sock_notify_shutdown(so, SHUT_WR);
5461 /*
5462 * When outgoing data is pending, we delay the shutdown at the
5463 * protocol level until the content filters give the final
5464 * verdict on the pending data.
5465 */
5466 if (cfil_sock_data_pending(&so->so_snd) != 0) {
5467 /*
5468 * When shutting down the read and write sides at once
5469 * we can proceed to the final shutdown of the read
5470 * side. Otherwise, we just return.
5471 */
5472 if (*how == SHUT_WR) {
5473 error = EJUSTRETURN;
5474 } else if (*how == SHUT_RDWR) {
5475 *how = SHUT_RD;
5476 }
5477 }
5478 }
5479 done:
5480 return error;
5481 }
5482
5483 /*
5484 * This is called when the socket is closed and there is no more
5485 * opportunity for filtering
5486 */
5487 void
cfil_sock_is_closed(struct socket * so)5488 cfil_sock_is_closed(struct socket *so)
5489 {
5490 errno_t error = 0;
5491 int kcunit;
5492
5493 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5494 cfil_sock_udp_is_closed(so);
5495 return;
5496 }
5497
5498 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5499 return;
5500 }
5501
5502 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5503
5504 socket_lock_assert_owned(so);
5505
5506 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5507 /* Let the filters know of the closing */
5508 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
5509 }
5510
5511 /* Last chance to push passed data out */
5512 error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
5513 if (error == 0) {
5514 cfil_service_inject_queue(so, so->so_cfil, 1);
5515 }
5516 cfil_release_sockbuf(so, 1);
5517
5518 if (so->so_cfil != NULL) {
5519 so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
5520 }
5521
5522 /* Pending data needs to go */
5523 cfil_flush_queues(so, so->so_cfil);
5524
5525 CFIL_INFO_VERIFY(so->so_cfil);
5526 }
5527
5528 /*
5529 * This is called when the socket is disconnected so let the filters
5530 * know about the disconnection and that no more data will come
5531 *
5532 * The how parameter has the same values as soshutown()
5533 */
5534 void
cfil_sock_notify_shutdown(struct socket * so,int how)5535 cfil_sock_notify_shutdown(struct socket *so, int how)
5536 {
5537 errno_t error = 0;
5538 int kcunit;
5539
5540 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5541 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
5542 return;
5543 }
5544
5545 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5546 return;
5547 }
5548
5549 CFIL_LOG(LOG_INFO, "so %llx how %d",
5550 (uint64_t)VM_KERNEL_ADDRPERM(so), how);
5551
5552 socket_lock_assert_owned(so);
5553
5554 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5555 /* Disconnect incoming side */
5556 if (how != SHUT_WR) {
5557 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
5558 }
5559 /* Disconnect outgoing side */
5560 if (how != SHUT_RD) {
5561 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
5562 }
5563 }
5564 }
5565
5566 static int
cfil_filters_attached(struct socket * so)5567 cfil_filters_attached(struct socket *so)
5568 {
5569 struct cfil_entry *entry;
5570 uint32_t kcunit;
5571 int attached = 0;
5572
5573 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5574 return cfil_filters_udp_attached(so, FALSE);
5575 }
5576
5577 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5578 return 0;
5579 }
5580
5581 socket_lock_assert_owned(so);
5582
5583 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5584 entry = &so->so_cfil->cfi_entries[kcunit - 1];
5585
5586 /* Are we attached to the filter? */
5587 if (entry->cfe_filter == NULL) {
5588 continue;
5589 }
5590 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
5591 continue;
5592 }
5593 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
5594 continue;
5595 }
5596 attached = 1;
5597 break;
5598 }
5599
5600 return attached;
5601 }
5602
5603 /*
5604 * This is called when the socket is closed and we are waiting for
5605 * the filters to gives the final pass or drop
5606 */
5607 void
cfil_sock_close_wait(struct socket * so)5608 cfil_sock_close_wait(struct socket *so)
5609 {
5610 lck_mtx_t *mutex_held;
5611 struct timespec ts;
5612 int error;
5613
5614 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5615 cfil_sock_udp_close_wait(so);
5616 return;
5617 }
5618
5619 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5620 return;
5621 }
5622
5623 // This flow does not need to wait for close ack from user-space
5624 if (IS_NO_CLOSE_WAIT(so->so_cfil)) {
5625 if (so->so_cfil->cfi_debug) {
5626 cfil_info_log(LOG_ERR, so->so_cfil, "CFIL: SKIP CLOSE WAIT");
5627 }
5628 return;
5629 }
5630
5631 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5632
5633 if (so->so_proto->pr_getlock != NULL) {
5634 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5635 } else {
5636 mutex_held = so->so_proto->pr_domain->dom_mtx;
5637 }
5638 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5639
5640 while (cfil_filters_attached(so)) {
5641 /*
5642 * Notify the filters we are going away so they can detach
5643 */
5644 cfil_sock_notify_shutdown(so, SHUT_RDWR);
5645
5646 /*
5647 * Make sure we need to wait after the filter are notified
5648 * of the disconnection
5649 */
5650 if (cfil_filters_attached(so) == 0) {
5651 break;
5652 }
5653
5654 CFIL_LOG(LOG_INFO, "so %llx waiting",
5655 (uint64_t)VM_KERNEL_ADDRPERM(so));
5656
5657 ts.tv_sec = cfil_close_wait_timeout / 1000;
5658 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
5659 NSEC_PER_USEC * 1000;
5660
5661 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5662 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
5663 error = msleep((caddr_t)so->so_cfil, mutex_held,
5664 PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
5665
5666 // Woke up from sleep, validate if cfil_info is still valid
5667 if (so->so_cfil == NULL) {
5668 // cfil_info is not valid, do not continue
5669 return;
5670 }
5671
5672 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
5673
5674 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
5675 (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
5676
5677 /*
5678 * Force close in case of timeout
5679 */
5680 if (error != 0) {
5681 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5682 break;
5683 }
5684 }
5685 }
5686
5687 /*
5688 * Returns the size of the data held by the content filter by using
5689 */
5690 int32_t
cfil_sock_data_pending(struct sockbuf * sb)5691 cfil_sock_data_pending(struct sockbuf *sb)
5692 {
5693 struct socket *so = sb->sb_so;
5694 uint64_t pending = 0;
5695
5696 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5697 return cfil_sock_udp_data_pending(sb, FALSE);
5698 }
5699
5700 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
5701 struct cfi_buf *cfi_buf;
5702
5703 socket_lock_assert_owned(so);
5704
5705 if ((sb->sb_flags & SB_RECV) == 0) {
5706 cfi_buf = &so->so_cfil->cfi_snd;
5707 } else {
5708 cfi_buf = &so->so_cfil->cfi_rcv;
5709 }
5710
5711 pending = cfi_buf->cfi_pending_last -
5712 cfi_buf->cfi_pending_first;
5713
5714 /*
5715 * If we are limited by the "chars of mbufs used" roughly
5716 * adjust so we won't overcommit
5717 */
5718 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) {
5719 pending = cfi_buf->cfi_pending_mbcnt;
5720 }
5721 }
5722
5723 VERIFY(pending < INT32_MAX);
5724
5725 return (int32_t)(pending);
5726 }
5727
5728 /*
5729 * Return the socket buffer space used by data being held by content filters
5730 * so processes won't clog the socket buffer
5731 */
5732 int32_t
cfil_sock_data_space(struct sockbuf * sb)5733 cfil_sock_data_space(struct sockbuf *sb)
5734 {
5735 struct socket *so = sb->sb_so;
5736 uint64_t pending = 0;
5737
5738 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5739 return cfil_sock_udp_data_pending(sb, TRUE);
5740 }
5741
5742 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
5743 so->so_snd.sb_cfil_thread != current_thread()) {
5744 struct cfi_buf *cfi_buf;
5745
5746 socket_lock_assert_owned(so);
5747
5748 if ((sb->sb_flags & SB_RECV) == 0) {
5749 cfi_buf = &so->so_cfil->cfi_snd;
5750 } else {
5751 cfi_buf = &so->so_cfil->cfi_rcv;
5752 }
5753
5754 pending = cfi_buf->cfi_pending_last -
5755 cfi_buf->cfi_pending_first;
5756
5757 /*
5758 * If we are limited by the "chars of mbufs used" roughly
5759 * adjust so we won't overcommit
5760 */
5761 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
5762 pending = cfi_buf->cfi_pending_mbcnt;
5763 }
5764
5765 VERIFY(pending < INT32_MAX);
5766 }
5767
5768 return (int32_t)(pending);
5769 }
5770
5771 /*
5772 * A callback from the socket and protocol layer when data becomes
5773 * available in the socket buffer to give a chance for the content filter
5774 * to re-inject data that was held back
5775 */
5776 void
cfil_sock_buf_update(struct sockbuf * sb)5777 cfil_sock_buf_update(struct sockbuf *sb)
5778 {
5779 int outgoing;
5780 int error;
5781 struct socket *so = sb->sb_so;
5782
5783 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5784 cfil_sock_udp_buf_update(sb);
5785 return;
5786 }
5787
5788 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5789 return;
5790 }
5791
5792 if (!cfil_sbtrim) {
5793 return;
5794 }
5795
5796 socket_lock_assert_owned(so);
5797
5798 if ((sb->sb_flags & SB_RECV) == 0) {
5799 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
5800 return;
5801 }
5802 outgoing = 1;
5803 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5804 } else {
5805 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
5806 return;
5807 }
5808 outgoing = 0;
5809 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5810 }
5811
5812 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5813 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5814
5815 error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
5816 if (error == 0) {
5817 cfil_service_inject_queue(so, so->so_cfil, outgoing);
5818 }
5819 cfil_release_sockbuf(so, outgoing);
5820 }
5821
5822 int
sysctl_cfil_filter_list(struct sysctl_oid * oidp,void * arg1,int arg2,struct sysctl_req * req)5823 sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5824 struct sysctl_req *req)
5825 {
5826 #pragma unused(oidp, arg1, arg2)
5827 int error = 0;
5828 size_t len = 0;
5829 u_int32_t i;
5830
5831 /* Read only */
5832 if (req->newptr != USER_ADDR_NULL) {
5833 return EPERM;
5834 }
5835
5836 cfil_rw_lock_shared(&cfil_lck_rw);
5837
5838 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
5839 struct cfil_filter_stat filter_stat;
5840 struct content_filter *cfc = content_filters[i];
5841
5842 if (cfc == NULL) {
5843 continue;
5844 }
5845
5846 /* If just asking for the size */
5847 if (req->oldptr == USER_ADDR_NULL) {
5848 len += sizeof(struct cfil_filter_stat);
5849 continue;
5850 }
5851
5852 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
5853 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
5854 filter_stat.cfs_filter_id = cfc->cf_kcunit;
5855 filter_stat.cfs_flags = cfc->cf_flags;
5856 filter_stat.cfs_sock_count = cfc->cf_sock_count;
5857 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
5858
5859 error = SYSCTL_OUT(req, &filter_stat,
5860 sizeof(struct cfil_filter_stat));
5861 if (error != 0) {
5862 break;
5863 }
5864 }
5865 /* If just asking for the size */
5866 if (req->oldptr == USER_ADDR_NULL) {
5867 req->oldidx = len;
5868 }
5869
5870 cfil_rw_unlock_shared(&cfil_lck_rw);
5871
5872 if (cfil_log_level >= LOG_DEBUG) {
5873 if (req->oldptr != USER_ADDR_NULL) {
5874 for (i = 1; i <= MAX_CONTENT_FILTER; i++) {
5875 cfil_filter_show(i);
5876 }
5877 }
5878 }
5879
5880 return error;
5881 }
5882
5883 static int
sysctl_cfil_sock_list(struct sysctl_oid * oidp,void * arg1,int arg2,struct sysctl_req * req)5884 sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5885 struct sysctl_req *req)
5886 {
5887 #pragma unused(oidp, arg1, arg2)
5888 int error = 0;
5889 u_int32_t i;
5890 struct cfil_info *cfi;
5891
5892 /* Read only */
5893 if (req->newptr != USER_ADDR_NULL) {
5894 return EPERM;
5895 }
5896
5897 cfil_rw_lock_shared(&cfil_lck_rw);
5898
5899 /*
5900 * If just asking for the size,
5901 */
5902 if (req->oldptr == USER_ADDR_NULL) {
5903 req->oldidx = cfil_sock_attached_count *
5904 sizeof(struct cfil_sock_stat);
5905 /* Bump the length in case new sockets gets attached */
5906 req->oldidx += req->oldidx >> 3;
5907 goto done;
5908 }
5909
5910 TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
5911 struct cfil_entry *entry;
5912 struct cfil_sock_stat stat;
5913 struct socket *so = cfi->cfi_so;
5914
5915 bzero(&stat, sizeof(struct cfil_sock_stat));
5916 stat.cfs_len = sizeof(struct cfil_sock_stat);
5917 stat.cfs_sock_id = cfi->cfi_sock_id;
5918 stat.cfs_flags = cfi->cfi_flags;
5919
5920 if (so != NULL && so->so_proto != NULL && so->so_proto->pr_domain != NULL) {
5921 stat.cfs_pid = so->last_pid;
5922 memcpy(stat.cfs_uuid, so->last_uuid,
5923 sizeof(uuid_t));
5924 if (so->so_flags & SOF_DELEGATED) {
5925 stat.cfs_e_pid = so->e_pid;
5926 memcpy(stat.cfs_e_uuid, so->e_uuid,
5927 sizeof(uuid_t));
5928 } else {
5929 stat.cfs_e_pid = so->last_pid;
5930 memcpy(stat.cfs_e_uuid, so->last_uuid,
5931 sizeof(uuid_t));
5932 }
5933
5934 stat.cfs_sock_family = SOCK_DOM(so);
5935 stat.cfs_sock_type = SOCK_TYPE(so);
5936 stat.cfs_sock_protocol = GET_SO_PROTO(so);
5937 }
5938
5939 stat.cfs_snd.cbs_pending_first =
5940 cfi->cfi_snd.cfi_pending_first;
5941 stat.cfs_snd.cbs_pending_last =
5942 cfi->cfi_snd.cfi_pending_last;
5943 stat.cfs_snd.cbs_inject_q_len =
5944 cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
5945 stat.cfs_snd.cbs_pass_offset =
5946 cfi->cfi_snd.cfi_pass_offset;
5947
5948 stat.cfs_rcv.cbs_pending_first =
5949 cfi->cfi_rcv.cfi_pending_first;
5950 stat.cfs_rcv.cbs_pending_last =
5951 cfi->cfi_rcv.cfi_pending_last;
5952 stat.cfs_rcv.cbs_inject_q_len =
5953 cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
5954 stat.cfs_rcv.cbs_pass_offset =
5955 cfi->cfi_rcv.cfi_pass_offset;
5956
5957 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
5958 struct cfil_entry_stat *estat;
5959 struct cfe_buf *ebuf;
5960 struct cfe_buf_stat *sbuf;
5961
5962 entry = &cfi->cfi_entries[i];
5963
5964 estat = &stat.ces_entries[i];
5965
5966 estat->ces_len = sizeof(struct cfil_entry_stat);
5967 estat->ces_filter_id = entry->cfe_filter ?
5968 entry->cfe_filter->cf_kcunit : 0;
5969 estat->ces_flags = entry->cfe_flags;
5970 estat->ces_necp_control_unit =
5971 entry->cfe_necp_control_unit;
5972
5973 estat->ces_last_event.tv_sec =
5974 (int64_t)entry->cfe_last_event.tv_sec;
5975 estat->ces_last_event.tv_usec =
5976 (int64_t)entry->cfe_last_event.tv_usec;
5977
5978 estat->ces_last_action.tv_sec =
5979 (int64_t)entry->cfe_last_action.tv_sec;
5980 estat->ces_last_action.tv_usec =
5981 (int64_t)entry->cfe_last_action.tv_usec;
5982
5983 ebuf = &entry->cfe_snd;
5984 sbuf = &estat->ces_snd;
5985 sbuf->cbs_pending_first =
5986 cfil_queue_offset_first(&ebuf->cfe_pending_q);
5987 sbuf->cbs_pending_last =
5988 cfil_queue_offset_last(&ebuf->cfe_pending_q);
5989 sbuf->cbs_ctl_first =
5990 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5991 sbuf->cbs_ctl_last =
5992 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5993 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
5994 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
5995 sbuf->cbs_peeked = ebuf->cfe_peeked;
5996
5997 ebuf = &entry->cfe_rcv;
5998 sbuf = &estat->ces_rcv;
5999 sbuf->cbs_pending_first =
6000 cfil_queue_offset_first(&ebuf->cfe_pending_q);
6001 sbuf->cbs_pending_last =
6002 cfil_queue_offset_last(&ebuf->cfe_pending_q);
6003 sbuf->cbs_ctl_first =
6004 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
6005 sbuf->cbs_ctl_last =
6006 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
6007 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
6008 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
6009 sbuf->cbs_peeked = ebuf->cfe_peeked;
6010 }
6011 error = SYSCTL_OUT(req, &stat,
6012 sizeof(struct cfil_sock_stat));
6013 if (error != 0) {
6014 break;
6015 }
6016 }
6017 done:
6018 cfil_rw_unlock_shared(&cfil_lck_rw);
6019
6020 if (cfil_log_level >= LOG_DEBUG) {
6021 if (req->oldptr != USER_ADDR_NULL) {
6022 cfil_info_show();
6023 }
6024 }
6025
6026 return error;
6027 }
6028
6029 /*
6030 * UDP Socket Support
6031 */
6032 static void
cfil_hash_entry_log(int level,struct socket * so,struct soflow_hash_entry * entry,uint64_t sockId,const char * msg)6033 cfil_hash_entry_log(int level, struct socket *so, struct soflow_hash_entry *entry, uint64_t sockId, const char* msg)
6034 {
6035 char local[MAX_IPv6_STR_LEN + 6];
6036 char remote[MAX_IPv6_STR_LEN + 6];
6037 const void *addr;
6038
6039 // No sock or not UDP, no-op
6040 if (so == NULL || entry == NULL) {
6041 return;
6042 }
6043
6044 local[0] = remote[0] = 0x0;
6045
6046 switch (entry->soflow_family) {
6047 case AF_INET6:
6048 addr = &entry->soflow_laddr.addr6;
6049 inet_ntop(AF_INET6, addr, local, sizeof(local));
6050 addr = &entry->soflow_faddr.addr6;
6051 inet_ntop(AF_INET6, addr, remote, sizeof(local));
6052 break;
6053 case AF_INET:
6054 addr = &entry->soflow_laddr.addr46.ia46_addr4.s_addr;
6055 inet_ntop(AF_INET, addr, local, sizeof(local));
6056 addr = &entry->soflow_faddr.addr46.ia46_addr4.s_addr;
6057 inet_ntop(AF_INET, addr, remote, sizeof(local));
6058 break;
6059 default:
6060 return;
6061 }
6062
6063 CFIL_LOG(level, "<%s>: <%s(%d) so %llx cfil %p, entry %p, sockID %llu <%llx> feat_ctxt_id <%llu> lport %d fport %d laddr %s faddr %s hash %X",
6064 msg,
6065 IS_UDP(so) ? "UDP" : "proto", GET_SO_PROTO(so),
6066 (uint64_t)VM_KERNEL_ADDRPERM(so), entry->soflow_feat_ctxt, entry, sockId, sockId, entry->soflow_feat_ctxt_id,
6067 ntohs(entry->soflow_lport), ntohs(entry->soflow_fport), local, remote,
6068 entry->soflow_flowhash);
6069 }
6070
6071 static void
cfil_inp_log(int level,struct socket * so,const char * msg)6072 cfil_inp_log(int level, struct socket *so, const char* msg)
6073 {
6074 struct inpcb *inp = NULL;
6075 struct sockaddr_in *sin = NULL;
6076 struct sockaddr_in6 *sin6 = NULL;
6077 char local[MAX_IPv6_STR_LEN + 6];
6078 char remote[MAX_IPv6_STR_LEN + 6];
6079 ushort lport = 0;
6080 ushort fport = 0;
6081 const void *addr;
6082
6083 if (so == NULL) {
6084 return;
6085 }
6086
6087 inp = sotoinpcb(so);
6088 if (inp == NULL) {
6089 return;
6090 }
6091
6092 local[0] = remote[0] = 0x0;
6093
6094 if (inp->inp_vflag & INP_IPV6) {
6095 addr = &inp->in6p_laddr.s6_addr32;
6096 inet_ntop(AF_INET6, addr, local, sizeof(local));
6097 addr = &inp->in6p_faddr.s6_addr32;
6098 inet_ntop(AF_INET6, addr, remote, sizeof(remote));
6099 } else {
6100 addr = &inp->inp_laddr.s_addr;
6101 inet_ntop(AF_INET, addr, local, sizeof(local));
6102 addr = &inp->inp_faddr.s_addr;
6103 inet_ntop(AF_INET, addr, remote, sizeof(remote));
6104 }
6105 lport = inp->inp_lport;
6106 fport = inp->inp_fport;
6107
6108 if (so->so_cfil && so->so_cfil->cfi_so_attach_faddr.sa.sa_len > 0) {
6109 if (so->so_cfil->cfi_so_attach_faddr.sa.sa_family == AF_INET6) {
6110 sin6 = SIN6(&so->so_cfil->cfi_so_attach_faddr.sa);
6111 addr = &sin6->sin6_addr;
6112 inet_ntop(AF_INET6, addr, remote, sizeof(remote));
6113 fport = sin6->sin6_port;
6114 } else if (so->so_cfil->cfi_so_attach_faddr.sa.sa_family == AF_INET) {
6115 sin = SIN(&so->so_cfil->cfi_so_attach_faddr.sa);
6116 addr = &sin->sin_addr.s_addr;
6117 inet_ntop(AF_INET, addr, remote, sizeof(remote));
6118 fport = sin->sin_port;
6119 }
6120 }
6121 if (so->so_cfil && so->so_cfil->cfi_so_attach_laddr.sa.sa_len > 0) {
6122 if (so->so_cfil->cfi_so_attach_laddr.sa.sa_family == AF_INET6) {
6123 sin6 = SIN6(&so->so_cfil->cfi_so_attach_laddr.sa);
6124 addr = &sin6->sin6_addr;
6125 inet_ntop(AF_INET6, addr, local, sizeof(remote));
6126 fport = sin6->sin6_port;
6127 } else if (so->so_cfil->cfi_so_attach_laddr.sa.sa_family == AF_INET) {
6128 sin = SIN(&so->so_cfil->cfi_so_attach_laddr.sa);
6129 addr = &sin->sin_addr.s_addr;
6130 inet_ntop(AF_INET, addr, local, sizeof(remote));
6131 fport = sin->sin_port;
6132 }
6133 }
6134
6135 if (so->so_cfil != NULL) {
6136 CFIL_LOG(level, "<%s>: <%s so %llx cfil %p - flags 0x%x 0x%x, sockID %llu <%llx>> lport %d fport %d laddr %s faddr %s",
6137 msg, IS_UDP(so) ? "UDP" : "TCP",
6138 (uint64_t)VM_KERNEL_ADDRPERM(so), so->so_cfil, inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id, so->so_cfil->cfi_sock_id,
6139 ntohs(lport), ntohs(fport), local, remote);
6140 } else {
6141 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
6142 msg, IS_UDP(so) ? "UDP" : "TCP",
6143 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
6144 ntohs(lport), ntohs(fport), local, remote);
6145 }
6146 }
6147
6148 static void
cfil_info_log(int level,struct cfil_info * cfil_info,const char * msg)6149 cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
6150 {
6151 if (cfil_info == NULL) {
6152 return;
6153 }
6154
6155 if (cfil_info->cfi_hash_entry != NULL) {
6156 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
6157 } else {
6158 cfil_inp_log(level, cfil_info->cfi_so, msg);
6159 }
6160 }
6161
6162 static void
cfil_sock_udp_unlink_flow(struct socket * so,struct soflow_hash_entry * hash_entry,struct cfil_info * cfil_info)6163 cfil_sock_udp_unlink_flow(struct socket *so, struct soflow_hash_entry *hash_entry, struct cfil_info *cfil_info)
6164 {
6165 if (so == NULL || hash_entry == NULL || cfil_info == NULL) {
6166 return;
6167 }
6168
6169 if (so->so_flags & SOF_CONTENT_FILTER) {
6170 VERIFY(so->so_usecount > 0);
6171 so->so_usecount--;
6172 }
6173
6174 // Hold exclusive lock before clearing cfil_info hash entry link
6175 cfil_rw_lock_exclusive(&cfil_lck_rw);
6176
6177 cfil_info->cfi_hash_entry = NULL;
6178
6179 if (cfil_info->cfi_debug) {
6180 CFIL_LOG(LOG_ERR, "CFIL <%s>: <so %llx> - use count %d",
6181 IS_UDP(so) ? "UDP" : "TCP", (uint64_t)VM_KERNEL_ADDRPERM(so), so->so_usecount);
6182 }
6183
6184 cfil_rw_unlock_exclusive(&cfil_lck_rw);
6185 }
6186
6187 bool
check_port(struct sockaddr * addr,u_short port)6188 check_port(struct sockaddr *addr, u_short port)
6189 {
6190 struct sockaddr_in *sin = NULL;
6191 struct sockaddr_in6 *sin6 = NULL;
6192
6193 if (addr == NULL || port == 0) {
6194 return FALSE;
6195 }
6196
6197 switch (addr->sa_family) {
6198 case AF_INET:
6199 sin = SIN(addr);
6200 if (sin->sin_len < sizeof(*sin)) {
6201 return FALSE;
6202 }
6203 if (port == ntohs(sin->sin_port)) {
6204 return TRUE;
6205 }
6206 break;
6207 case AF_INET6:
6208 sin6 = SIN6(addr);
6209 if (sin6->sin6_len < sizeof(*sin6)) {
6210 return FALSE;
6211 }
6212 if (port == ntohs(sin6->sin6_port)) {
6213 return TRUE;
6214 }
6215 break;
6216 default:
6217 break;
6218 }
6219 return FALSE;
6220 }
6221
6222 cfil_sock_id_t
cfil_sock_id_from_datagram_socket(struct socket * so,struct sockaddr * local,struct sockaddr * remote)6223 cfil_sock_id_from_datagram_socket(struct socket *so, struct sockaddr *local, struct sockaddr *remote)
6224 {
6225 socket_lock_assert_owned(so);
6226
6227 if (so->so_flow_db == NULL) {
6228 return CFIL_SOCK_ID_NONE;
6229 }
6230 return (cfil_sock_id_t)soflow_db_get_feature_context_id(so->so_flow_db, local, remote);
6231 }
6232
6233 static struct cfil_info *
cfil_sock_udp_get_info(struct socket * so,uint32_t filter_control_unit,bool outgoing,struct soflow_hash_entry * hash_entry,struct sockaddr * local,struct sockaddr * remote)6234 cfil_sock_udp_get_info(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct soflow_hash_entry *hash_entry,
6235 struct sockaddr *local, struct sockaddr *remote)
6236 {
6237 int new_filter_control_unit = 0;
6238 struct cfil_info *cfil_info = NULL;
6239
6240 errno_t error = 0;
6241 socket_lock_assert_owned(so);
6242
6243 if (hash_entry == NULL || hash_entry->soflow_db == NULL) {
6244 return NULL;
6245 }
6246
6247 if (hash_entry->soflow_feat_ctxt != NULL && hash_entry->soflow_feat_ctxt_id != 0) {
6248 /* Drop pre-existing UDP flow if filter state changed */
6249 cfil_info = (struct cfil_info *) hash_entry->soflow_feat_ctxt;
6250 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6251 if (new_filter_control_unit > 0 &&
6252 new_filter_control_unit != cfil_info->cfi_filter_control_unit) {
6253 if (DO_PRESERVE_CONNECTIONS || (cfil_info->cfi_filter_policy_gencount == necp_socket_get_policy_gencount(so))) {
6254 // CFIL state has changed, but preserve the flow intentionally or if this is not a result of NECP policy change
6255 cfil_info->cfi_filter_control_unit = new_filter_control_unit;
6256 } else {
6257 CFIL_LOG(LOG_NOTICE, "CFIL: UDP(%s) <so %llx> - filter state changed - dropped pre-existing flow (old state 0x%x new state 0x%x)",
6258 outgoing ? "OUT" : "IN", (uint64_t)VM_KERNEL_ADDRPERM(so),
6259 cfil_info->cfi_filter_control_unit, new_filter_control_unit);
6260 return NULL;
6261 }
6262 }
6263 return cfil_info;
6264 }
6265
6266 cfil_info = cfil_info_alloc(so, hash_entry);
6267 if (cfil_info == NULL) {
6268 CFIL_LOG(LOG_ERR, "CFIL: <so %llx> UDP failed to alloc cfil_info", (uint64_t)VM_KERNEL_ADDRPERM(so));
6269 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6270 return NULL;
6271 }
6272 cfil_info->cfi_filter_control_unit = filter_control_unit;
6273 cfil_info->cfi_dir = outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN;
6274 cfil_info->cfi_debug = DEBUG_FLOW(sotoinpcb(so), so, local, remote);
6275 if (cfil_info->cfi_debug) {
6276 CFIL_LOG(LOG_ERR, "CFIL: <so %llx> UDP (outgoing %d) - debug flow with port %d", (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_log_port);
6277 CFIL_LOG(LOG_ERR, "CFIL: <so %llx> UDP so_gencnt %llx entry flowhash %x cfil %p sockID %llu <%llx>",
6278 (uint64_t)VM_KERNEL_ADDRPERM(so), so->so_gencnt, hash_entry->soflow_flowhash, cfil_info, cfil_info->cfi_sock_id, cfil_info->cfi_sock_id);
6279 }
6280
6281 if (cfil_info_attach_unit(so, filter_control_unit, cfil_info) == 0) {
6282 CFIL_INFO_FREE(cfil_info);
6283 CFIL_LOG(LOG_ERR, "CFIL: <so %llx> UDP cfil_info_attach_unit(%u) failed",
6284 (uint64_t)VM_KERNEL_ADDRPERM(so), filter_control_unit);
6285 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
6286 return NULL;
6287 }
6288
6289 if (cfil_info->cfi_debug) {
6290 CFIL_LOG(LOG_ERR, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu <%llx> attached",
6291 (uint64_t)VM_KERNEL_ADDRPERM(so),
6292 filter_control_unit, cfil_info->cfi_sock_id, cfil_info->cfi_sock_id);
6293 }
6294
6295 so->so_flags |= SOF_CONTENT_FILTER;
6296 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
6297
6298 /* Hold a reference on the socket for each flow */
6299 so->so_usecount++;
6300
6301 /* link cfil_info to flow */
6302 hash_entry->soflow_feat_ctxt = cfil_info;
6303 hash_entry->soflow_feat_ctxt_id = cfil_info->cfi_sock_id;
6304
6305 if (cfil_info->cfi_debug) {
6306 cfil_info_log(LOG_ERR, cfil_info, "CFIL: ADDED");
6307 }
6308
6309 error = cfil_dispatch_attach_event(so, cfil_info, 0,
6310 outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
6311 /* We can recover from flow control or out of memory errors */
6312 if (error != 0 && error != ENOBUFS && error != ENOMEM) {
6313 CFIL_LOG(LOG_ERR, "CFIL: UDP <so %llx> cfil_dispatch_attach_event failed <error %d>",
6314 (uint64_t)VM_KERNEL_ADDRPERM(so), error);
6315 return NULL;
6316 }
6317
6318 CFIL_INFO_VERIFY(cfil_info);
6319 return cfil_info;
6320 }
6321
6322 errno_t
cfil_sock_udp_handle_data(bool outgoing,struct socket * so,struct sockaddr * local,struct sockaddr * remote,struct mbuf * data,struct mbuf * control,uint32_t flags,struct soflow_hash_entry * hash_entry)6323 cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
6324 struct sockaddr *local, struct sockaddr *remote,
6325 struct mbuf *data, struct mbuf *control, uint32_t flags,
6326 struct soflow_hash_entry *hash_entry)
6327 {
6328 #pragma unused(outgoing, so, local, remote, data, control, flags)
6329 errno_t error = 0;
6330 uint32_t filter_control_unit;
6331 struct cfil_info *cfil_info = NULL;
6332
6333 socket_lock_assert_owned(so);
6334
6335 if (cfil_active_count == 0) {
6336 CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
6337 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
6338 return error;
6339 }
6340
6341 // Socket has been blessed
6342 if ((so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
6343 return error;
6344 }
6345
6346 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6347 if (filter_control_unit == 0) {
6348 CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
6349 return error;
6350 }
6351
6352 if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
6353 return error;
6354 }
6355
6356 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
6357 CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
6358 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
6359 return error;
6360 }
6361
6362 if (hash_entry == NULL) {
6363 CFIL_LOG(LOG_ERR, "CFIL: <so %llx> NULL soflow_hash_entry", (uint64_t)VM_KERNEL_ADDRPERM(so));
6364 return EPIPE;
6365 }
6366
6367 if (hash_entry->soflow_db == NULL) {
6368 CFIL_LOG(LOG_ERR, "CFIL: <so %llx> NULL soflow_hash_entry db", (uint64_t)VM_KERNEL_ADDRPERM(so));
6369 return EPIPE;
6370 }
6371
6372 cfil_info = cfil_sock_udp_get_info(so, filter_control_unit, outgoing, hash_entry, local, remote);
6373 if (cfil_info == NULL) {
6374 return EPIPE;
6375 }
6376 // Update last used timestamp, this is for flow Idle TO
6377
6378 if (cfil_info->cfi_debug) {
6379 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Got flow");
6380 }
6381
6382 if (cfil_info->cfi_flags & CFIF_DROP) {
6383 if (cfil_info->cfi_debug) {
6384 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP DROP");
6385 }
6386 return EPIPE;
6387 }
6388 if (control != NULL) {
6389 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
6390 }
6391 if (data->m_type == MT_OOBDATA) {
6392 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
6393 (uint64_t)VM_KERNEL_ADDRPERM(so));
6394 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
6395 }
6396
6397 error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
6398
6399 return error;
6400 }
6401
6402 struct cfil_udp_attached_context {
6403 bool need_wait;
6404 lck_mtx_t *mutex_held;
6405 int attached;
6406 };
6407
6408 static bool
cfil_filters_udp_attached_per_flow(struct socket * so,struct soflow_hash_entry * hash_entry,void * context)6409 cfil_filters_udp_attached_per_flow(struct socket *so,
6410 struct soflow_hash_entry *hash_entry,
6411 void *context)
6412 {
6413 struct cfil_udp_attached_context *apply_context = NULL;
6414 struct cfil_info * __single cfil_info = NULL;
6415 struct cfil_entry *entry = NULL;
6416 uint64_t sock_flow_id = 0;
6417 struct timespec ts;
6418 errno_t error = 0;
6419 int kcunit;
6420
6421 if (hash_entry->soflow_feat_ctxt == NULL || context == NULL) {
6422 return true;
6423 }
6424
6425 cfil_info = hash_entry->soflow_feat_ctxt;
6426 apply_context = (struct cfil_udp_attached_context *)context;
6427
6428 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6429 entry = &cfil_info->cfi_entries[kcunit - 1];
6430
6431 /* Are we attached to the filter? */
6432 if (entry->cfe_filter == NULL) {
6433 continue;
6434 }
6435
6436 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
6437 continue;
6438 }
6439 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
6440 continue;
6441 }
6442
6443 if (apply_context->need_wait == TRUE) {
6444 if (cfil_info->cfi_debug) {
6445 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP PER-FLOW WAIT FOR FLOW TO FINISH");
6446 }
6447
6448 ts.tv_sec = cfil_close_wait_timeout / 1000;
6449 ts.tv_nsec = (cfil_close_wait_timeout % 1000) * NSEC_PER_USEC * 1000;
6450
6451 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
6452 cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
6453 sock_flow_id = cfil_info->cfi_sock_id;
6454
6455 error = msleep((caddr_t)cfil_info, apply_context->mutex_held,
6456 PSOCK | PCATCH, "cfil_filters_udp_attached_per_flow", &ts);
6457
6458 // Woke up from sleep, validate if cfil_info is still valid
6459 if (so->so_flow_db == NULL ||
6460 (cfil_info != soflow_db_get_feature_context(so->so_flow_db, sock_flow_id))) {
6461 // cfil_info is not valid, do not continue
6462 return false;
6463 }
6464
6465 cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
6466
6467 if (cfil_info->cfi_debug) {
6468 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP PER-FLOW WAIT FOR FLOW DONE");
6469 }
6470
6471 /*
6472 * Force close in case of timeout
6473 */
6474 if (error != 0) {
6475 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
6476
6477 if (cfil_info->cfi_debug) {
6478 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP PER-FLOW WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6479 }
6480
6481 entry->cfe_flags |= CFEF_CFIL_DETACHED;
6482 return false;
6483 }
6484 }
6485 apply_context->attached = 1;
6486 return false;
6487 }
6488 return true;
6489 }
6490
6491 /*
6492 * Go through all UDP flows for specified socket and returns TRUE if
6493 * any flow is still attached. If need_wait is TRUE, wait on first
6494 * attached flow.
6495 */
6496 static int
cfil_filters_udp_attached(struct socket * so,bool need_wait)6497 cfil_filters_udp_attached(struct socket *so, bool need_wait)
6498 {
6499 struct cfil_udp_attached_context apply_context = { 0 };
6500 lck_mtx_t *mutex_held;
6501
6502 socket_lock_assert_owned(so);
6503
6504 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_flow_db != NULL) {
6505 if (so->so_proto->pr_getlock != NULL) {
6506 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
6507 } else {
6508 mutex_held = so->so_proto->pr_domain->dom_mtx;
6509 }
6510 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
6511
6512 apply_context.need_wait = need_wait;
6513 apply_context.mutex_held = mutex_held;
6514 soflow_db_apply(so->so_flow_db, cfil_filters_udp_attached_per_flow, (void *)&apply_context);
6515 }
6516
6517 return apply_context.attached;
6518 }
6519
6520 struct cfil_udp_data_pending_context {
6521 struct sockbuf *sb;
6522 uint64_t total_pending;
6523 };
6524
6525 static bool
cfil_sock_udp_data_pending_per_flow(struct socket * so,struct soflow_hash_entry * hash_entry,void * context)6526 cfil_sock_udp_data_pending_per_flow(struct socket *so,
6527 struct soflow_hash_entry *hash_entry,
6528 void *context)
6529 {
6530 #pragma unused(so)
6531 struct cfil_udp_data_pending_context *apply_context = NULL;
6532 struct cfil_info * __single cfil_info = NULL;
6533 struct cfi_buf *cfi_buf;
6534
6535 uint64_t pending = 0;
6536
6537 if (hash_entry->soflow_feat_ctxt == NULL || context == NULL) {
6538 return true;
6539 }
6540
6541 cfil_info = hash_entry->soflow_feat_ctxt;
6542 apply_context = (struct cfil_udp_data_pending_context *)context;
6543
6544 if (apply_context->sb == NULL) {
6545 return true;
6546 }
6547
6548 if ((apply_context->sb->sb_flags & SB_RECV) == 0) {
6549 cfi_buf = &cfil_info->cfi_snd;
6550 } else {
6551 cfi_buf = &cfil_info->cfi_rcv;
6552 }
6553
6554 pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
6555 /*
6556 * If we are limited by the "chars of mbufs used" roughly
6557 * adjust so we won't overcommit
6558 */
6559 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
6560 pending = cfi_buf->cfi_pending_mbcnt;
6561 }
6562
6563 apply_context->total_pending += pending;
6564 return true;
6565 }
6566
6567 int32_t
cfil_sock_udp_data_pending(struct sockbuf * sb,bool check_thread)6568 cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
6569 {
6570 struct cfil_udp_data_pending_context apply_context = { 0 };
6571 struct socket *so = sb->sb_so;
6572
6573 socket_lock_assert_owned(so);
6574
6575 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_flow_db != NULL &&
6576 (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
6577 apply_context.sb = sb;
6578 soflow_db_apply(so->so_flow_db, cfil_sock_udp_data_pending_per_flow, (void *)&apply_context);
6579
6580 VERIFY(apply_context.total_pending < INT32_MAX);
6581 }
6582
6583 return (int32_t)(apply_context.total_pending);
6584 }
6585
6586 struct cfil_udp_notify_shutdown_context {
6587 int how;
6588 int drop_flag;
6589 int shut_flag;
6590 int done_count;
6591 };
6592
6593 static bool
cfil_sock_udp_notify_shutdown_per_flow(struct socket * so,struct soflow_hash_entry * hash_entry,void * context)6594 cfil_sock_udp_notify_shutdown_per_flow(struct socket *so,
6595 struct soflow_hash_entry *hash_entry,
6596 void *context)
6597 {
6598 struct cfil_udp_notify_shutdown_context *apply_context = NULL;
6599 struct cfil_info * __single cfil_info = NULL;
6600 errno_t error = 0;
6601 int kcunit;
6602
6603 if (hash_entry->soflow_feat_ctxt == NULL || context == NULL) {
6604 return true;
6605 }
6606
6607 cfil_info = hash_entry->soflow_feat_ctxt;
6608 apply_context = (struct cfil_udp_notify_shutdown_context *)context;
6609
6610 // This flow is marked as DROP
6611 if (cfil_info->cfi_flags & apply_context->drop_flag) {
6612 apply_context->done_count++;
6613 return true;
6614 }
6615
6616 // This flow has been shut already, skip
6617 if (cfil_info->cfi_flags & apply_context->shut_flag) {
6618 return true;
6619 }
6620 // Mark flow as shut
6621 cfil_info->cfi_flags |= apply_context->shut_flag;
6622 apply_context->done_count++;
6623
6624 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6625 /* Disconnect incoming side */
6626 if (apply_context->how != SHUT_WR) {
6627 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
6628 }
6629 /* Disconnect outgoing side */
6630 if (apply_context->how != SHUT_RD) {
6631 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
6632 }
6633 }
6634
6635 if (cfil_info->cfi_debug) {
6636 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP PER-FLOW NOTIFY_SHUTDOWN");
6637 }
6638
6639 return true;
6640 }
6641
6642 int
cfil_sock_udp_notify_shutdown(struct socket * so,int how,int drop_flag,int shut_flag)6643 cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
6644 {
6645 struct cfil_udp_notify_shutdown_context apply_context = { 0 };
6646 errno_t error = 0;
6647
6648 socket_lock_assert_owned(so);
6649
6650 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_flow_db != NULL) {
6651 apply_context.how = how;
6652 apply_context.drop_flag = drop_flag;
6653 apply_context.shut_flag = shut_flag;
6654
6655 soflow_db_apply(so->so_flow_db, cfil_sock_udp_notify_shutdown_per_flow, (void *)&apply_context);
6656 }
6657
6658 if (apply_context.done_count == 0) {
6659 error = ENOTCONN;
6660 }
6661 return error;
6662 }
6663
6664 int
cfil_sock_udp_shutdown(struct socket * so,int * how)6665 cfil_sock_udp_shutdown(struct socket *so, int *how)
6666 {
6667 int error = 0;
6668
6669 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_flow_db == NULL)) {
6670 goto done;
6671 }
6672
6673 socket_lock_assert_owned(so);
6674
6675 CFIL_LOG(LOG_INFO, "so %llx how %d",
6676 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
6677
6678 /*
6679 * Check the state of the socket before the content filter
6680 */
6681 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
6682 /* read already shut down */
6683 error = ENOTCONN;
6684 goto done;
6685 }
6686 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
6687 /* write already shut down */
6688 error = ENOTCONN;
6689 goto done;
6690 }
6691
6692 /*
6693 * shutdown read: SHUT_RD or SHUT_RDWR
6694 */
6695 if (*how != SHUT_WR) {
6696 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
6697 if (error != 0) {
6698 goto done;
6699 }
6700 }
6701 /*
6702 * shutdown write: SHUT_WR or SHUT_RDWR
6703 */
6704 if (*how != SHUT_RD) {
6705 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
6706 if (error != 0) {
6707 goto done;
6708 }
6709
6710 /*
6711 * When outgoing data is pending, we delay the shutdown at the
6712 * protocol level until the content filters give the final
6713 * verdict on the pending data.
6714 */
6715 if (cfil_sock_data_pending(&so->so_snd) != 0) {
6716 /*
6717 * When shutting down the read and write sides at once
6718 * we can proceed to the final shutdown of the read
6719 * side. Otherwise, we just return.
6720 */
6721 if (*how == SHUT_WR) {
6722 error = EJUSTRETURN;
6723 } else if (*how == SHUT_RDWR) {
6724 *how = SHUT_RD;
6725 }
6726 }
6727 }
6728 done:
6729 return error;
6730 }
6731
6732 void
cfil_sock_udp_close_wait(struct socket * so)6733 cfil_sock_udp_close_wait(struct socket *so)
6734 {
6735 socket_lock_assert_owned(so);
6736
6737 while (cfil_filters_udp_attached(so, FALSE)) {
6738 /*
6739 * Notify the filters we are going away so they can detach
6740 */
6741 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
6742
6743 /*
6744 * Make sure we need to wait after the filter are notified
6745 * of the disconnection
6746 */
6747 if (cfil_filters_udp_attached(so, TRUE) == 0) {
6748 break;
6749 }
6750 }
6751 }
6752
6753 static bool
cfil_sock_udp_is_closed_per_flow(struct socket * so,struct soflow_hash_entry * hash_entry,void * context)6754 cfil_sock_udp_is_closed_per_flow(struct socket *so,
6755 struct soflow_hash_entry *hash_entry,
6756 void *context)
6757 {
6758 #pragma unused(context)
6759 struct cfil_info * __single cfil_info = NULL;
6760 errno_t error = 0;
6761 int kcunit;
6762
6763 if (hash_entry->soflow_feat_ctxt == NULL) {
6764 return true;
6765 }
6766
6767 cfil_info = hash_entry->soflow_feat_ctxt;
6768
6769 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6770 /* Let the filters know of the closing */
6771 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
6772 }
6773
6774 /* Last chance to push passed data out */
6775 error = cfil_acquire_sockbuf(so, cfil_info, 1);
6776 if (error == 0) {
6777 cfil_service_inject_queue(so, cfil_info, 1);
6778 }
6779 cfil_release_sockbuf(so, 1);
6780
6781 cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
6782
6783 /* Pending data needs to go */
6784 cfil_flush_queues(so, cfil_info);
6785
6786 CFIL_INFO_VERIFY(cfil_info);
6787
6788 if (cfil_info->cfi_debug) {
6789 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP PER-FLOW IS_CLOSED");
6790 }
6791
6792 return true;
6793 }
6794
6795 void
cfil_sock_udp_is_closed(struct socket * so)6796 cfil_sock_udp_is_closed(struct socket *so)
6797 {
6798 socket_lock_assert_owned(so);
6799
6800 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_flow_db != NULL) {
6801 soflow_db_apply(so->so_flow_db, cfil_sock_udp_is_closed_per_flow, NULL);
6802 }
6803 }
6804
6805 static bool
cfil_sock_udp_buf_update_per_flow(struct socket * so,struct soflow_hash_entry * hash_entry,void * context)6806 cfil_sock_udp_buf_update_per_flow(struct socket *so,
6807 struct soflow_hash_entry *hash_entry,
6808 void *context)
6809 {
6810 struct cfil_info * __single cfil_info = NULL;
6811 struct sockbuf *sb = NULL;
6812 errno_t error = 0;
6813 int outgoing;
6814
6815 if (hash_entry->soflow_feat_ctxt == NULL || context == NULL) {
6816 return true;
6817 }
6818
6819 cfil_info = hash_entry->soflow_feat_ctxt;
6820 sb = (struct sockbuf *) context;
6821
6822 if ((sb->sb_flags & SB_RECV) == 0) {
6823 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
6824 return true;
6825 }
6826 outgoing = 1;
6827 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
6828 } else {
6829 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
6830 return true;
6831 }
6832 outgoing = 0;
6833 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
6834 }
6835
6836 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
6837 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
6838
6839 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
6840 if (error == 0) {
6841 cfil_service_inject_queue(so, cfil_info, outgoing);
6842 }
6843 cfil_release_sockbuf(so, outgoing);
6844 return true;
6845 }
6846
6847 void
cfil_sock_udp_buf_update(struct sockbuf * sb)6848 cfil_sock_udp_buf_update(struct sockbuf *sb)
6849 {
6850 struct socket *so = sb->sb_so;
6851
6852 socket_lock_assert_owned(so);
6853
6854 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_flow_db != NULL) {
6855 if (!cfil_sbtrim) {
6856 return;
6857 }
6858 soflow_db_apply(so->so_flow_db, cfil_sock_udp_buf_update_per_flow, (void *)sb);
6859 }
6860 }
6861
6862 void
cfil_filter_show(u_int32_t kcunit)6863 cfil_filter_show(u_int32_t kcunit)
6864 {
6865 struct content_filter *cfc = NULL;
6866 struct cfil_entry *entry;
6867 int count = 0;
6868
6869 if (kcunit > MAX_CONTENT_FILTER) {
6870 return;
6871 }
6872
6873 cfil_rw_lock_shared(&cfil_lck_rw);
6874
6875 if (content_filters[kcunit - 1] == NULL) {
6876 cfil_rw_unlock_shared(&cfil_lck_rw);
6877 return;
6878 }
6879 cfc = content_filters[kcunit - 1];
6880
6881 CFIL_LOG(LOG_DEBUG, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6882 kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
6883 if (cfc->cf_flags & CFF_DETACHING) {
6884 CFIL_LOG(LOG_DEBUG, "CFIL: FILTER SHOW:-DETACHING");
6885 }
6886 if (cfc->cf_flags & CFF_ACTIVE) {
6887 CFIL_LOG(LOG_DEBUG, "CFIL: FILTER SHOW:-ACTIVE");
6888 }
6889 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6890 CFIL_LOG(LOG_DEBUG, "CFIL: FILTER SHOW:-FLOW CONTROLLED");
6891 }
6892
6893 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
6894 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
6895 struct cfil_info *cfil_info = entry->cfe_cfil_info;
6896
6897 count++;
6898
6899 if (entry->cfe_flags & CFEF_CFIL_DETACHED) {
6900 cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: FILTER SHOW:-DETACHED");
6901 } else {
6902 cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: FILTER SHOW:-ATTACHED");
6903 }
6904 }
6905 }
6906
6907 CFIL_LOG(LOG_DEBUG, "CFIL: FILTER SHOW:Filter - total entries shown: %d", count);
6908
6909 cfil_rw_unlock_shared(&cfil_lck_rw);
6910 }
6911
6912 void
cfil_info_show(void)6913 cfil_info_show(void)
6914 {
6915 struct cfil_info *cfil_info;
6916 int count = 0;
6917
6918 cfil_rw_lock_shared(&cfil_lck_rw);
6919
6920 CFIL_LOG(LOG_DEBUG, "CFIL: INFO SHOW:count %d", cfil_sock_attached_count);
6921
6922 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6923 count++;
6924
6925 cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: INFO SHOW");
6926
6927 if (cfil_info->cfi_flags & CFIF_DROP) {
6928 CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - DROP");
6929 }
6930 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT) {
6931 CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - CLOSE_WAIT");
6932 }
6933 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED) {
6934 CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - SOCK_CLOSED");
6935 }
6936 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) {
6937 CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - RETRY_INJECT_IN");
6938 }
6939 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) {
6940 CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
6941 }
6942 if (cfil_info->cfi_flags & CFIF_SHUT_WR) {
6943 CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - SHUT_WR");
6944 }
6945 if (cfil_info->cfi_flags & CFIF_SHUT_RD) {
6946 CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - SHUT_RD");
6947 }
6948 }
6949
6950 CFIL_LOG(LOG_DEBUG, "CFIL: INFO SHOW:total cfil_info shown: %d", count);
6951
6952 cfil_rw_unlock_shared(&cfil_lck_rw);
6953 }
6954
6955 bool
cfil_info_action_timed_out(struct cfil_info * cfil_info,int timeout)6956 cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
6957 {
6958 struct cfil_entry *entry;
6959 struct timeval current_tv;
6960 struct timeval diff_time;
6961
6962 if (cfil_info == NULL) {
6963 return false;
6964 }
6965
6966 /*
6967 * If we have queued up more data than passed offset and we haven't received
6968 * an action from user space for a while (the user space filter might have crashed),
6969 * return action timed out.
6970 */
6971 if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
6972 cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
6973 microuptime(¤t_tv);
6974
6975 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6976 entry = &cfil_info->cfi_entries[kcunit - 1];
6977
6978 if (entry->cfe_filter == NULL) {
6979 continue;
6980 }
6981
6982 if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
6983 cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
6984 // haven't gotten an action from this filter, check timeout
6985 timersub(¤t_tv, &entry->cfe_last_action, &diff_time);
6986 if (diff_time.tv_sec >= timeout) {
6987 if (cfil_info->cfi_debug) {
6988 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
6989 }
6990 return true;
6991 }
6992 }
6993 }
6994 }
6995 return false;
6996 }
6997
6998 bool
cfil_info_buffer_threshold_exceeded(struct cfil_info * cfil_info)6999 cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
7000 {
7001 if (cfil_info == NULL) {
7002 return false;
7003 }
7004
7005 /*
7006 * Clean up flow if it exceeded queue thresholds
7007 */
7008 if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
7009 cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
7010 if (cfil_info->cfi_debug) {
7011 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded:mbuf max < count: %d bytes: %d > tail drop count < OUT: %d IN: %d > ",
7012 cfil_udp_gc_mbuf_num_max,
7013 cfil_udp_gc_mbuf_cnt_max,
7014 cfil_info->cfi_snd.cfi_tail_drop_cnt,
7015 cfil_info->cfi_rcv.cfi_tail_drop_cnt);
7016 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
7017 }
7018 return true;
7019 }
7020
7021 return false;
7022 }
7023
7024 static bool
cfil_dgram_gc_needed(struct socket * so,struct soflow_hash_entry * hash_entry,u_int64_t current_time)7025 cfil_dgram_gc_needed(struct socket *so, struct soflow_hash_entry *hash_entry, u_int64_t current_time)
7026 {
7027 #pragma unused(current_time)
7028 struct cfil_info *cfil_info = NULL;
7029
7030 if (so == NULL || hash_entry == NULL || hash_entry->soflow_feat_ctxt == NULL) {
7031 return false;
7032 }
7033 cfil_info = (struct cfil_info *) hash_entry->soflow_feat_ctxt;
7034
7035 cfil_rw_lock_shared(&cfil_lck_rw);
7036
7037 if (cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
7038 cfil_info_buffer_threshold_exceeded(cfil_info)) {
7039 if (cfil_info->cfi_debug) {
7040 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP PER-FLOW GC NEEDED");
7041 }
7042 cfil_rw_unlock_shared(&cfil_lck_rw);
7043 return true;
7044 }
7045
7046 cfil_rw_unlock_shared(&cfil_lck_rw);
7047 return false;
7048 }
7049
7050 static bool
cfil_dgram_gc_perform(struct socket * so,struct soflow_hash_entry * hash_entry)7051 cfil_dgram_gc_perform(struct socket *so, struct soflow_hash_entry *hash_entry)
7052 {
7053 struct cfil_info *cfil_info = NULL;
7054
7055 if (so == NULL || hash_entry == NULL || hash_entry->soflow_feat_ctxt == NULL) {
7056 return false;
7057 }
7058 cfil_info = (struct cfil_info *) hash_entry->soflow_feat_ctxt;
7059
7060 if (cfil_info->cfi_debug) {
7061 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP PER-FLOW GC PERFORM");
7062 }
7063
7064 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7065 /* Let the filters know of the closing */
7066 cfil_dispatch_closed_event(so, cfil_info, kcunit);
7067 }
7068 cfil_sock_udp_unlink_flow(so, hash_entry, cfil_info);
7069 CFIL_INFO_FREE(cfil_info);
7070 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
7071 return true;
7072 }
7073
7074 static bool
cfil_dgram_detach_entry(struct socket * so,struct soflow_hash_entry * hash_entry)7075 cfil_dgram_detach_entry(struct socket *so, struct soflow_hash_entry *hash_entry)
7076 {
7077 struct cfil_info *cfil_info = NULL;
7078
7079 if (hash_entry == NULL || hash_entry->soflow_feat_ctxt == NULL) {
7080 return true;
7081 }
7082 cfil_info = (struct cfil_info *) hash_entry->soflow_feat_ctxt;
7083
7084 if (cfil_info->cfi_debug) {
7085 cfil_info_log(LOG_ERR, cfil_info, "CFIL: DGRAM DETACH ENTRY");
7086 }
7087
7088 cfil_sock_udp_unlink_flow(so, hash_entry, cfil_info);
7089 CFIL_INFO_FREE(cfil_info);
7090 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
7091
7092 return true;
7093 }
7094
7095 static bool
cfil_dgram_detach_db(struct socket * so,struct soflow_db * db)7096 cfil_dgram_detach_db(struct socket *so, struct soflow_db *db)
7097 {
7098 #pragma unused(db)
7099 if (so && so->so_flags & SOF_CONTENT_FILTER) {
7100 so->so_flags &= ~SOF_CONTENT_FILTER;
7101 CFIL_LOG(LOG_DEBUG, "CFIL: DGRAM DETACH DB <so %llx>", (uint64_t)VM_KERNEL_ADDRPERM(so));
7102 }
7103 return true;
7104 }
7105
7106 struct m_tag *
cfil_dgram_save_socket_state(struct cfil_info * cfil_info,struct mbuf * m)7107 cfil_dgram_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
7108 {
7109 struct m_tag *tag = NULL;
7110 struct cfil_tag *ctag = NULL;
7111 struct soflow_hash_entry *hash_entry = NULL;
7112 struct inpcb *inp = NULL;
7113
7114 if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
7115 cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
7116 return NULL;
7117 }
7118
7119 inp = sotoinpcb(cfil_info->cfi_so);
7120
7121 /* Allocate a tag */
7122 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
7123 sizeof(struct cfil_tag), M_DONTWAIT, m);
7124
7125 if (tag) {
7126 ctag = (struct cfil_tag *)(tag->m_tag_data);
7127 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
7128 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
7129 ctag->cfil_inp_flags = inp ? inp->inp_flags : 0;
7130
7131 hash_entry = cfil_info->cfi_hash_entry;
7132 if (hash_entry->soflow_family == AF_INET6) {
7133 fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
7134 &hash_entry->soflow_faddr.addr6,
7135 hash_entry->soflow_fport, hash_entry->soflow_faddr6_ifscope);
7136 } else if (hash_entry->soflow_family == AF_INET) {
7137 fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
7138 hash_entry->soflow_faddr.addr46.ia46_addr4,
7139 hash_entry->soflow_fport);
7140 }
7141 m_tag_prepend(m, tag);
7142 return tag;
7143 }
7144 return NULL;
7145 }
7146
7147 struct m_tag *
cfil_dgram_get_socket_state(struct mbuf * m,uint32_t * state_change_cnt,uint32_t * options,struct sockaddr ** faddr,int * inp_flags)7148 cfil_dgram_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, uint32_t *options,
7149 struct sockaddr **faddr, int *inp_flags)
7150 {
7151 struct m_tag *tag = NULL;
7152 struct cfil_tag *ctag = NULL;
7153
7154 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP);
7155 if (tag) {
7156 ctag = (struct cfil_tag *)(tag->m_tag_data);
7157 if (state_change_cnt) {
7158 *state_change_cnt = ctag->cfil_so_state_change_cnt;
7159 }
7160 if (options) {
7161 *options = ctag->cfil_so_options;
7162 }
7163 if (faddr) {
7164 *faddr = SA(&ctag->cfil_faddr);
7165 }
7166 if (inp_flags) {
7167 *inp_flags = ctag->cfil_inp_flags;
7168 }
7169
7170 /*
7171 * Unlink tag and hand it over to caller.
7172 * Note that caller will be responsible to free it.
7173 */
7174 m_tag_unlink(m, tag);
7175 return tag;
7176 }
7177 return NULL;
7178 }
7179
7180 boolean_t
cfil_dgram_peek_socket_state(struct mbuf * m,int * inp_flags)7181 cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags)
7182 {
7183 struct m_tag *tag = NULL;
7184 struct cfil_tag *ctag = NULL;
7185
7186 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP);
7187 if (tag) {
7188 ctag = (struct cfil_tag *)(tag->m_tag_data);
7189 if (inp_flags) {
7190 *inp_flags = ctag->cfil_inp_flags;
7191 }
7192 return true;
7193 }
7194 return false;
7195 }
7196
7197 static int
cfil_dispatch_stats_event_locked(int kcunit,struct cfil_stats_report_buffer * buffer,uint32_t stats_count)7198 cfil_dispatch_stats_event_locked(int kcunit, struct cfil_stats_report_buffer *buffer, uint32_t stats_count)
7199 {
7200 struct content_filter *cfc = NULL;
7201 errno_t error = 0;
7202 size_t msgsize = 0;
7203
7204 if (buffer == NULL || stats_count == 0) {
7205 return error;
7206 }
7207
7208 if (kcunit > MAX_CONTENT_FILTER) {
7209 return error;
7210 }
7211
7212 cfc = content_filters[kcunit - 1];
7213 if (cfc == NULL) {
7214 return error;
7215 }
7216
7217 /* Would be wasteful to try */
7218 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
7219 error = ENOBUFS;
7220 goto done;
7221 }
7222
7223 msgsize = sizeof(struct cfil_msg_stats_report) + (sizeof(struct cfil_msg_sock_stats) * stats_count);
7224 buffer->msghdr.cfm_len = (uint32_t)msgsize;
7225 buffer->msghdr.cfm_version = 1;
7226 buffer->msghdr.cfm_type = CFM_TYPE_EVENT;
7227 buffer->msghdr.cfm_op = CFM_OP_STATS;
7228 buffer->msghdr.cfm_sock_id = 0;
7229 buffer->count = stats_count;
7230
7231 if (cfil_log_stats) {
7232 CFIL_LOG(LOG_DEBUG, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
7233 kcunit,
7234 (unsigned long)msgsize,
7235 (unsigned long)sizeof(struct cfil_msg_stats_report),
7236 (unsigned long)sizeof(struct cfil_msg_sock_stats),
7237 (unsigned long)stats_count);
7238 }
7239
7240 error = ctl_enqueuedata(cfc->cf_kcref, cfc->cf_kcunit,
7241 buffer,
7242 sizeof(struct cfil_stats_report_buffer),
7243 CTL_DATA_EOR);
7244 if (error != 0) {
7245 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed:%d", error);
7246 goto done;
7247 }
7248 OSIncrementAtomic(&cfil_stats.cfs_stats_event_ok);
7249
7250 if (cfil_log_stats) {
7251 CFIL_LOG(LOG_DEBUG, "CFIL: STATS REPORT:send msg to %d", kcunit);
7252 }
7253 done:
7254
7255 if (error == ENOBUFS) {
7256 OSIncrementAtomic(
7257 &cfil_stats.cfs_stats_event_flow_control);
7258
7259 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
7260 cfil_rw_lock_exclusive(&cfil_lck_rw);
7261 }
7262
7263 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
7264
7265 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
7266 } else if (error != 0) {
7267 OSIncrementAtomic(&cfil_stats.cfs_stats_event_fail);
7268 }
7269
7270 return error;
7271 }
7272
7273 static void
cfil_stats_report_thread_sleep(bool forever)7274 cfil_stats_report_thread_sleep(bool forever)
7275 {
7276 if (cfil_log_stats) {
7277 CFIL_LOG(LOG_DEBUG, "CFIL: STATS COLLECTION SLEEP");
7278 }
7279
7280 if (forever) {
7281 (void) assert_wait((event_t) &cfil_sock_attached_stats_count,
7282 THREAD_INTERRUPTIBLE);
7283 } else {
7284 uint64_t deadline = 0;
7285 nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC, &deadline);
7286 clock_absolutetime_interval_to_deadline(deadline, &deadline);
7287
7288 (void) assert_wait_deadline(&cfil_sock_attached_stats_count,
7289 THREAD_INTERRUPTIBLE, deadline);
7290 }
7291 }
7292
7293 static void
cfil_stats_report_thread_func(void * v,wait_result_t w)7294 cfil_stats_report_thread_func(void *v, wait_result_t w)
7295 {
7296 #pragma unused(v, w)
7297
7298 ASSERT(cfil_stats_report_thread == current_thread());
7299 thread_set_thread_name(current_thread(), "CFIL_STATS_REPORT");
7300
7301 // Kick off gc shortly
7302 cfil_stats_report_thread_sleep(false);
7303 thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7304 /* NOTREACHED */
7305 }
7306
7307 static bool
cfil_stats_collect_flow_stats_for_filter(int kcunit,struct cfil_info * cfil_info,struct cfil_entry * entry,struct timeval current_tv)7308 cfil_stats_collect_flow_stats_for_filter(int kcunit,
7309 struct cfil_info *cfil_info,
7310 struct cfil_entry *entry,
7311 struct timeval current_tv)
7312 {
7313 struct cfil_stats_report_buffer *buffer = NULL;
7314 struct cfil_msg_sock_stats *flow_array = NULL;
7315 struct cfil_msg_sock_stats *stats = NULL;
7316 struct inpcb *inp = NULL;
7317 struct timeval diff_time;
7318 uint64_t diff_time_usecs;
7319 int index = 0;
7320
7321 if (entry->cfe_stats_report_frequency == 0) {
7322 return false;
7323 }
7324
7325 buffer = global_cfil_stats_report_buffers[kcunit - 1];
7326 if (buffer == NULL) {
7327 CFIL_LOG(LOG_ERR, "CFIL: STATS: no buffer");
7328 return false;
7329 }
7330
7331 timersub(¤t_tv, &entry->cfe_stats_report_ts, &diff_time);
7332 diff_time_usecs = (diff_time.tv_sec * USEC_PER_SEC) + diff_time.tv_usec;
7333
7334 if (cfil_info->cfi_debug && cfil_log_stats) {
7335 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu(usecs %llu) @freq %llu usecs sockID %llu <%llx>",
7336 (unsigned long long)entry->cfe_stats_report_ts.tv_sec,
7337 (unsigned long long)entry->cfe_stats_report_ts.tv_usec,
7338 (unsigned long long)current_tv.tv_sec,
7339 (unsigned long long)current_tv.tv_usec,
7340 (unsigned long long)diff_time.tv_sec,
7341 (unsigned long long)diff_time.tv_usec,
7342 (unsigned long long)diff_time_usecs,
7343 (unsigned long long)((entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC),
7344 cfil_info->cfi_sock_id, cfil_info->cfi_sock_id);
7345 }
7346
7347 // Compare elapsed time in usecs
7348 if (diff_time_usecs >= (entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC) {
7349 if (cfil_info->cfi_debug && cfil_log_stats) {
7350 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - in %llu reported %llu",
7351 cfil_info->cfi_byte_inbound_count,
7352 entry->cfe_byte_inbound_count_reported);
7353 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - out %llu reported %llu",
7354 cfil_info->cfi_byte_outbound_count,
7355 entry->cfe_byte_outbound_count_reported);
7356 }
7357 // Check if flow has new bytes that have not been reported
7358 if (entry->cfe_byte_inbound_count_reported < cfil_info->cfi_byte_inbound_count ||
7359 entry->cfe_byte_outbound_count_reported < cfil_info->cfi_byte_outbound_count) {
7360 flow_array = (struct cfil_msg_sock_stats *)&buffer->stats;
7361 index = global_cfil_stats_counts[kcunit - 1];
7362
7363 stats = &flow_array[index];
7364 stats->cfs_sock_id = cfil_info->cfi_sock_id;
7365 stats->cfs_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
7366 stats->cfs_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
7367
7368 if (entry->cfe_laddr_sent == false) {
7369 /* cache it if necessary */
7370 if (cfil_info->cfi_so_attach_laddr.sa.sa_len == 0) {
7371 inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
7372 if (inp != NULL) {
7373 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
7374 union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
7375 union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
7376 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
7377 src, dst, !IS_INP_V6(inp), outgoing);
7378 }
7379 }
7380
7381 if (cfil_info->cfi_so_attach_laddr.sa.sa_len != 0) {
7382 stats->cfs_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
7383 entry->cfe_laddr_sent = true;
7384 }
7385 }
7386
7387 global_cfil_stats_counts[kcunit - 1]++;
7388
7389 entry->cfe_stats_report_ts = current_tv;
7390 entry->cfe_byte_inbound_count_reported = cfil_info->cfi_byte_inbound_count;
7391 entry->cfe_byte_outbound_count_reported = cfil_info->cfi_byte_outbound_count;
7392 if (cfil_info->cfi_debug && cfil_log_stats) {
7393 cfil_info_log(LOG_ERR, cfil_info, "CFIL: STATS COLLECTED");
7394 }
7395 CFI_ADD_TIME_LOG(cfil_info, ¤t_tv, &cfil_info->cfi_first_event, CFM_OP_STATS);
7396 return true;
7397 }
7398 }
7399 return false;
7400 }
7401
7402 static void
cfil_stats_report(void * v,wait_result_t w)7403 cfil_stats_report(void *v, wait_result_t w)
7404 {
7405 #pragma unused(v, w)
7406
7407 struct cfil_info *cfil_info = NULL;
7408 struct cfil_entry *entry = NULL;
7409 struct timeval current_tv;
7410 uint32_t flow_count = 0;
7411 uint64_t saved_next_sock_id = 0; // Next sock id to be reported for next loop
7412 bool flow_reported = false;
7413
7414 if (cfil_log_stats) {
7415 CFIL_LOG(LOG_DEBUG, "CFIL: STATS COLLECTION RUNNING");
7416 }
7417
7418 do {
7419 // Collect all sock ids of flows that has new stats
7420 cfil_rw_lock_shared(&cfil_lck_rw);
7421
7422 if (cfil_sock_attached_stats_count == 0) {
7423 if (cfil_log_stats) {
7424 CFIL_LOG(LOG_DEBUG, "CFIL: STATS: no flow");
7425 }
7426 cfil_rw_unlock_shared(&cfil_lck_rw);
7427 goto go_sleep;
7428 }
7429
7430 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7431 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL) {
7432 memset(global_cfil_stats_report_buffers[kcunit - 1], 0, sizeof(struct cfil_stats_report_buffer));
7433 }
7434 global_cfil_stats_counts[kcunit - 1] = 0;
7435 }
7436
7437 microuptime(¤t_tv);
7438 flow_count = 0;
7439
7440 TAILQ_FOREACH(cfil_info, &cfil_sock_head_stats, cfi_link_stats) {
7441 if (saved_next_sock_id != 0 &&
7442 saved_next_sock_id == cfil_info->cfi_sock_id) {
7443 // Here is where we left off previously, start accumulating
7444 saved_next_sock_id = 0;
7445 }
7446
7447 if (saved_next_sock_id == 0) {
7448 if (flow_count >= CFIL_STATS_REPORT_MAX_COUNT) {
7449 // Examine a fixed number of flows each round. Remember the current flow
7450 // so we can start from here for next loop
7451 saved_next_sock_id = cfil_info->cfi_sock_id;
7452 break;
7453 }
7454
7455 flow_reported = false;
7456 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7457 entry = &cfil_info->cfi_entries[kcunit - 1];
7458 if (entry->cfe_filter == NULL) {
7459 if (cfil_info->cfi_debug && cfil_log_stats) {
7460 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - so %llx no filter",
7461 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0);
7462 }
7463 continue;
7464 }
7465
7466 if ((entry->cfe_stats_report_frequency > 0) &&
7467 cfil_stats_collect_flow_stats_for_filter(kcunit, cfil_info, entry, current_tv) == true) {
7468 flow_reported = true;
7469 }
7470 }
7471 if (flow_reported == true) {
7472 flow_count++;
7473 }
7474 }
7475 }
7476
7477 if (flow_count > 0) {
7478 if (cfil_log_stats) {
7479 CFIL_LOG(LOG_DEBUG, "CFIL: STATS reporting for %d flows", flow_count);
7480 }
7481 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7482 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL &&
7483 global_cfil_stats_counts[kcunit - 1] > 0) {
7484 cfil_dispatch_stats_event_locked(kcunit,
7485 global_cfil_stats_report_buffers[kcunit - 1],
7486 global_cfil_stats_counts[kcunit - 1]);
7487 }
7488 }
7489 } else {
7490 cfil_rw_unlock_shared(&cfil_lck_rw);
7491 goto go_sleep;
7492 }
7493
7494 cfil_rw_unlock_shared(&cfil_lck_rw);
7495
7496 // Loop again if we haven't finished the whole cfil_info list
7497 } while (saved_next_sock_id != 0);
7498
7499 go_sleep:
7500
7501 // Sleep forever (until waken up) if no more flow to report
7502 cfil_rw_lock_shared(&cfil_lck_rw);
7503 cfil_stats_report_thread_sleep(cfil_sock_attached_stats_count == 0 ? true : false);
7504 cfil_rw_unlock_shared(&cfil_lck_rw);
7505 thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7506 /* NOTREACHED */
7507 }
7508