1 /*
2 * Copyright (c) 2013-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * THEORY OF OPERATION
26 *
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by INET/INET6 sockets.
30 *
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
34 * back to the kernel.
35 *
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
40 *
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
44 *
45 * It should be noted that messages about many INET/INET6 sockets can be multiplexed
46 * over a single kernel control socket.
47 *
48 * Notes:
49 * - The current implementation supports all INET/INET6 sockets (i.e. TCP,
50 * UDP, ICMP, etc).
51 * - The current implementation supports up to two simultaneous content filters
52 * for iOS devices and eight simultaneous content filters for OSX.
53 *
54 *
55 * NECP FILTER CONTROL UNIT
56 *
57 * A user space filter agent uses the Network Extension Control Policy (NECP)
58 * database to specify which INET/INET6 sockets need to be filtered. The NECP
59 * criteria may be based on a variety of properties like user ID or proc UUID.
60 *
61 * The NECP "filter control unit" is used by the socket content filter subsystem
62 * to deliver the relevant INET/INET6 content information to the appropriate
63 * user space filter agent via its kernel control socket instance.
64 * This works as follows:
65 *
66 * 1) The user space filter agent specifies an NECP filter control unit when
67 * in adds its filtering rules to the NECP database.
68 *
69 * 2) The user space filter agent also sets its NECP filter control unit on the
70 * content filter kernel control socket via the socket option
71 * CFIL_OPT_NECP_CONTROL_UNIT.
72 *
73 * 3) The NECP database is consulted to find out if a given INET/INET6 socket
74 * needs to be subjected to content filtering and returns the corresponding
75 * NECP filter control unit -- the NECP filter control unit is actually
76 * stored in the INET/INET6 socket structure so the NECP lookup is really simple.
77 *
78 * 4) The NECP filter control unit is then used to find the corresponding
79 * kernel control socket instance.
80 *
81 * Note: NECP currently supports a single filter control unit per INET/INET6 socket
82 * but this restriction may be soon lifted.
83 *
84 *
85 * THE MESSAGING PROTOCOL
86 *
87 * The socket content filter subsystem and a user space filter agent
88 * communicate over the kernel control socket via an asynchronous
89 * messaging protocol (this is not a request-response protocol).
90 * The socket content filter subsystem sends event messages to the user
91 * space filter agent about the INET/INET6 sockets it is interested to filter.
92 * The user space filter agent sends action messages to either allow
93 * data to pass or to disallow the data flow (and drop the connection).
94 *
95 * All messages over a content filter kernel control socket share the same
96 * common header of type "struct cfil_msg_hdr". The message type tells if
97 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
98 * The message header field "cfm_sock_id" identifies a given INET/INET6 flow.
99 * For TCP, flows are per-socket. For UDP and other datagrame protocols, there
100 * could be multiple flows per socket.
101 *
102 * Note the message header length field may be padded for alignment and can
103 * be larger than the actual content of the message.
104 * The field "cfm_op" describe the kind of event or action.
105 *
106 * Here are the kinds of content filter events:
107 * - CFM_OP_SOCKET_ATTACHED: a new INET/INET6 socket is being filtered
108 * - CFM_OP_SOCKET_CLOSED: A INET/INET6 socket is closed
109 * - CFM_OP_DATA_OUT: A span of data is being sent on a INET/INET6 socket
110 * - CFM_OP_DATA_IN: A span of data is being or received on a INET/INET6 socket
111 *
112 *
113 * EVENT MESSAGES
114 *
115 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
116 * data that is being sent or received. The position of this span of data
117 * in the data flow is described by a set of start and end offsets. These
118 * are absolute 64 bits offsets. The first byte sent (or received) starts
119 * at offset 0 and ends at offset 1. The length of the content data
120 * is given by the difference between the end offset and the start offset.
121 *
122 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
123 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
124 * action message is sent by the user space filter agent.
125 *
126 * Note: absolute 64 bits offsets should be large enough for the foreseeable
127 * future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
128 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
129 *
130 * They are two kinds of primary content filter actions:
131 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
132 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
133 *
134 * There is also an action to mark a given client flow as already filtered
135 * at a higher level, CFM_OP_BLESS_CLIENT.
136 *
137 *
138 * ACTION MESSAGES
139 *
140 * The CFM_OP_DATA_UPDATE action messages let the user space filter
141 * agent allow data to flow up to the specified pass offset -- there
142 * is a pass offset for outgoing data and a pass offset for incoming data.
143 * When a new INET/INET6 socket is attached to the content filter and a flow is
144 * created, each pass offset is initially set to 0 so no data is allowed to pass by
145 * default. When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
146 * then the data flow becomes unrestricted.
147 *
148 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
149 * with a pass offset smaller than the pass offset of a previous
150 * CFM_OP_DATA_UPDATE message is silently ignored.
151 *
152 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
153 * to tell the kernel how much data it wants to see by using the peek offsets.
154 * Just like pass offsets, there is a peek offset for each direction.
155 * When a new INET/INET6 flow is created, each peek offset is initially set to 0
156 * so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages are dispatched by default
157 * until a CFM_OP_DATA_UPDATE action message with a greater than 0 peek offset is sent
158 * by the user space filter agent. When the peek offset is set to CFM_MAX_OFFSET via
159 * a CFM_OP_DATA_UPDATE then the flow of update data events becomes unrestricted.
160 *
161 * Note that peek offsets cannot be smaller than the corresponding pass offset.
162 * Also a peek offsets cannot be smaller than the corresponding end offset
163 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
164 * to set a too small peek value is silently ignored.
165 *
166 *
167 * PER FLOW "struct cfil_info"
168 *
169 * As soon as a INET/INET6 socket gets attached to a content filter, a
170 * "struct cfil_info" is created to hold the content filtering state for this
171 * socket. For UDP and other datagram protocols, as soon as traffic is seen for
172 * each new flow identified by its 4-tuple of source address/port and destination
173 * address/port, a "struct cfil_info" is created. Each datagram socket may
174 * have multiple flows maintained in a hash table of "struct cfil_info" entries.
175 *
176 * The content filtering state is made of the following information
177 * for each direction:
178 * - The current pass offset;
179 * - The first and last offsets of the data pending, waiting for a filtering
180 * decision;
181 * - The inject queue for data that passed the filters and that needs
182 * to be re-injected;
183 * - A content filter specific state in a set of "struct cfil_entry"
184 *
185 *
186 * CONTENT FILTER STATE "struct cfil_entry"
187 *
188 * The "struct cfil_entry" maintains the information most relevant to the
189 * message handling over a kernel control socket with a user space filter agent.
190 *
191 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
192 * to the kernel control socket unit it corresponds to and also has a pointer
193 * to the corresponding "struct content_filter".
194 *
195 * For each direction, "struct cfil_entry" maintains the following information:
196 * - The pass offset
197 * - The peek offset
198 * - The offset of the last data peeked at by the filter
199 * - A queue of data that's waiting to be delivered to the user space filter
200 * agent on the kernel control socket
201 * - A queue of data for which event messages have been sent on the kernel
202 * control socket and are pending for a filtering decision.
203 *
204 *
205 * CONTENT FILTER QUEUES
206 *
207 * Data that is being filtered is steered away from the INET/INET6 socket buffer
208 * and instead will sit in one of three content filter queues until the data
209 * can be re-injected into the INET/INET6 socket buffer.
210 *
211 * A content filter queue is represented by "struct cfil_queue" that contains
212 * a list of mbufs and the start and end offset of the data span of
213 * the list of mbufs.
214 *
215 * The data moves into the three content filter queues according to this
216 * sequence:
217 * a) The "cfe_ctl_q" of "struct cfil_entry"
218 * b) The "cfe_pending_q" of "struct cfil_entry"
219 * c) The "cfi_inject_q" of "struct cfil_info"
220 *
221 * Note: The sequence (a),(b) may be repeated several times if there is more
222 * than one content filter attached to the INET/INET6 socket.
223 *
224 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
225 * kernel conntrol socket for two reasons:
226 * - The peek offset is less that the end offset of the mbuf data
227 * - The kernel control socket is flow controlled
228 *
229 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
230 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
231 * socket and are waiting for a pass action message fromn the user space
232 * filter agent. An mbuf length must be fully allowed to pass to be removed
233 * from the cfe_pending_q.
234 *
235 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
236 * by the user space filter agent and that needs to be re-injected into the
237 * INET/INET6 socket.
238 *
239 *
240 * IMPACT ON FLOW CONTROL
241 *
242 * An essential aspect of the content filer subsystem is to minimize the
243 * impact on flow control of the INET/INET6 sockets being filtered.
244 *
245 * The processing overhead of the content filtering may have an effect on
246 * flow control by adding noticeable delays and cannot be eliminated --
247 * care must be taken by the user space filter agent to minimize the
248 * processing delays.
249 *
250 * The amount of data being filtered is kept in buffers while waiting for
251 * a decision by the user space filter agent. This amount of data pending
252 * needs to be subtracted from the amount of data available in the
253 * corresponding INET/INET6 socket buffer. This is done by modifying
254 * sbspace() and tcp_sbspace() to account for amount of data pending
255 * in the content filter.
256 *
257 *
258 * LOCKING STRATEGY
259 *
260 * The global state of content filter subsystem is protected by a single
261 * read-write lock "cfil_lck_rw". The data flow can be done with the
262 * cfil read-write lock held as shared so it can be re-entered from multiple
263 * threads.
264 *
265 * The per INET/INET6 socket content filterstate -- "struct cfil_info" -- is
266 * protected by the socket lock.
267 *
268 * A INET/INET6 socket lock cannot be taken while the cfil read-write lock
269 * is held. That's why we have some sequences where we drop the cfil read-write
270 * lock before taking the INET/INET6 lock.
271 *
272 * It is also important to lock the INET/INET6 socket buffer while the content
273 * filter is modifying the amount of pending data. Otherwise the calculations
274 * in sbspace() and tcp_sbspace() could be wrong.
275 *
276 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
277 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
278 *
279 * Actually "cfe_link" and "cfe_filter" are protected by both by
280 * "cfil_lck_rw" and the socket lock: they may be modified only when
281 * "cfil_lck_rw" is exclusive and the socket is locked.
282 *
283 * To read the other fields of "struct content_filter" we have to take
284 * "cfil_lck_rw" in shared mode.
285 *
286 * DATAGRAM SPECIFICS:
287 *
288 * The socket content filter supports all INET/INET6 protocols. However
289 * the treatments for TCP sockets and for datagram (UDP, ICMP, etc) sockets
290 * are slightly different.
291 *
292 * Each datagram socket may have multiple flows. Each flow is identified
293 * by the flow's source address/port and destination address/port tuple
294 * and is represented as a "struct cfil_info" entry. For each socket,
295 * a hash table is used to maintain the collection of flows under that socket.
296 *
297 * Each datagram flow is uniquely identified by it's "struct cfil_info" cfi_sock_id.
298 * The highest 32-bits of the cfi_sock_id contains the socket's so_gencnt. This portion
299 * of the cfi_sock_id is used locate the socket during socket lookup. The lowest 32-bits
300 * of the cfi_sock_id contains a hash of the flow's 4-tuple. This portion of the cfi_sock_id
301 * is used as the hash value for the flow hash table lookup within the parent socket.
302 *
303 * Since datagram sockets may not be connected, flow states may not be maintained in the
304 * socket structures and thus have to be saved for each packet. These saved states will be
305 * used for both outgoing and incoming reinjections. For outgoing packets, destination
306 * address/port as well as the current socket states will be saved. During reinjection,
307 * these saved states will be used instead. For incoming packets, control and address
308 * mbufs will be chained to the data. During reinjection, the whole chain will be queued
309 * onto the incoming socket buffer.
310 *
311 * LIMITATIONS
312 *
313 * - Support all INET/INET6 sockets, such as TCP, UDP, ICMP, etc
314 *
315 * - Does not support TCP unordered messages
316 */
317
318 /*
319 * TO DO LIST
320 *
321 * Deal with OOB
322 *
323 */
324
325 #include <sys/types.h>
326 #include <sys/kern_control.h>
327 #include <sys/queue.h>
328 #include <sys/domain.h>
329 #include <sys/protosw.h>
330 #include <sys/syslog.h>
331 #include <sys/systm.h>
332 #include <sys/param.h>
333 #include <sys/mbuf.h>
334
335 #include <kern/locks.h>
336 #include <kern/zalloc.h>
337 #include <kern/debug.h>
338
339 #include <net/ntstat.h>
340 #include <net/content_filter.h>
341 #include <net/content_filter_crypto.h>
342
343 #define _IP_VHL
344 #include <netinet/ip.h>
345 #include <netinet/in_pcb.h>
346 #include <netinet/tcp.h>
347 #include <netinet/tcp_var.h>
348 #include <netinet/udp.h>
349 #include <netinet/udp_var.h>
350 #include <kern/socket_flows.h>
351
352 #include <string.h>
353 #include <libkern/libkern.h>
354 #include <kern/sched_prim.h>
355 #include <kern/task.h>
356 #include <mach/task_info.h>
357
358 #include <net/sockaddr_utils.h>
359
360 #define MAX_CONTENT_FILTER 8
361
362 extern int tcp_msl;
363 extern struct inpcbinfo ripcbinfo;
364 struct cfil_entry;
365
366 /*
367 * The structure content_filter represents a user space content filter
368 * It's created and associated with a kernel control socket instance
369 */
370 struct content_filter {
371 kern_ctl_ref cf_kcref;
372 u_int32_t cf_kcunit;
373 u_int32_t cf_flags;
374
375 uint32_t cf_necp_control_unit;
376
377 uint32_t cf_sock_count;
378 TAILQ_HEAD(, cfil_entry) cf_sock_entries;
379
380 cfil_crypto_state_t cf_crypto_state;
381 };
382
383 #define CFF_ACTIVE 0x01
384 #define CFF_DETACHING 0x02
385 #define CFF_FLOW_CONTROLLED 0x04
386 #define CFF_PRESERVE_CONNECTIONS 0x08
387
388 struct content_filter *content_filters[MAX_CONTENT_FILTER];
389 uint32_t cfil_active_count = 0; /* Number of active content filters */
390 uint32_t cfil_sock_attached_count = 0; /* Number of sockets attachements */
391 uint32_t cfil_sock_attached_stats_count = 0; /* Number of sockets requested periodic stats report */
392 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
393
394 static kern_ctl_ref cfil_kctlref = NULL;
395
396 static LCK_GRP_DECLARE(cfil_lck_grp, "content filter");
397 static LCK_RW_DECLARE(cfil_lck_rw, &cfil_lck_grp);
398
399 #define CFIL_RW_LCK_MAX 8
400
401 int cfil_rw_nxt_lck = 0;
402 void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
403
404 int cfil_rw_nxt_unlck = 0;
405 void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
406
407 static KALLOC_TYPE_DEFINE(content_filter_zone, struct content_filter, NET_KT_DEFAULT);
408
409 MBUFQ_HEAD(cfil_mqhead);
410
411 struct cfil_queue {
412 uint64_t q_start; /* offset of first byte in queue */
413 uint64_t q_end; /* offset of last byte in queue */
414 struct cfil_mqhead q_mq;
415 };
416
417 /*
418 * struct cfil_entry
419 *
420 * The is one entry per content filter
421 */
422 struct cfil_entry {
423 TAILQ_ENTRY(cfil_entry) cfe_link;
424 SLIST_ENTRY(cfil_entry) cfe_order_link;
425 struct content_filter *cfe_filter;
426
427 struct cfil_info *cfe_cfil_info;
428 uint32_t cfe_flags;
429 uint32_t cfe_necp_control_unit;
430 struct timeval cfe_last_event; /* To user space */
431 struct timeval cfe_last_action; /* From user space */
432 uint64_t cfe_byte_inbound_count_reported; /* stats already been reported */
433 uint64_t cfe_byte_outbound_count_reported; /* stats already been reported */
434 struct timeval cfe_stats_report_ts; /* Timestamp for last stats report */
435 uint32_t cfe_stats_report_frequency; /* Interval for stats report in msecs */
436 boolean_t cfe_laddr_sent;
437
438 struct cfe_buf {
439 /*
440 * cfe_pending_q holds data that has been delivered to
441 * the filter and for which we are waiting for an action
442 */
443 struct cfil_queue cfe_pending_q;
444 /*
445 * This queue is for data that has not be delivered to
446 * the content filter (new data, pass peek or flow control)
447 */
448 struct cfil_queue cfe_ctl_q;
449
450 uint64_t cfe_pass_offset;
451 uint64_t cfe_peek_offset;
452 uint64_t cfe_peeked;
453 } cfe_snd, cfe_rcv;
454 };
455
456 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
457 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
458 #define CFEF_DATA_START 0x0004 /* can send data event */
459 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
460 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
461 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
462 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
463 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
464
465
466 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
467 struct timeval64 _tdiff; \
468 size_t offset = (cfil)->cfi_op_list_ctr; \
469 if (offset < CFI_MAX_TIME_LOG_ENTRY) { \
470 timersub(t1, t0, &_tdiff); \
471 (cfil)->cfi_op_time[offset] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000); \
472 (cfil)->cfi_op_list[offset] = (unsigned char)op; \
473 (cfil)->cfi_op_list_ctr ++; \
474 }
475
476 /*
477 * struct cfil_info
478 *
479 * There is a struct cfil_info per socket
480 */
481 struct cfil_info {
482 TAILQ_ENTRY(cfil_info) cfi_link;
483 TAILQ_ENTRY(cfil_info) cfi_link_stats;
484 struct socket *cfi_so;
485 uint64_t cfi_flags;
486 uint64_t cfi_sock_id;
487 struct timeval64 cfi_first_event;
488 uint32_t cfi_op_list_ctr;
489 uint32_t cfi_op_time[CFI_MAX_TIME_LOG_ENTRY]; /* time interval in microseconds since first event */
490 unsigned char cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
491 union sockaddr_in_4_6 cfi_so_attach_faddr; /* faddr at the time of attach */
492 union sockaddr_in_4_6 cfi_so_attach_laddr; /* laddr at the time of attach */
493
494 int cfi_dir;
495 uint64_t cfi_byte_inbound_count;
496 uint64_t cfi_byte_outbound_count;
497
498 boolean_t cfi_isSignatureLatest; /* Indicates if signature covers latest flow attributes */
499 u_int32_t cfi_filter_control_unit;
500 u_int32_t cfi_debug;
501 struct cfi_buf {
502 /*
503 * cfi_pending_first and cfi_pending_last describe the total
504 * amount of data outstanding for all the filters on
505 * this socket and data in the flow queue
506 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
507 */
508 uint64_t cfi_pending_first;
509 uint64_t cfi_pending_last;
510 uint32_t cfi_pending_mbcnt;
511 uint32_t cfi_pending_mbnum;
512 uint32_t cfi_tail_drop_cnt;
513 /*
514 * cfi_pass_offset is the minimum of all the filters
515 */
516 uint64_t cfi_pass_offset;
517 /*
518 * cfi_inject_q holds data that needs to be re-injected
519 * into the socket after filtering and that can
520 * be queued because of flow control
521 */
522 struct cfil_queue cfi_inject_q;
523 } cfi_snd, cfi_rcv;
524
525 struct cfil_entry cfi_entries[MAX_CONTENT_FILTER];
526 struct soflow_hash_entry *cfi_hash_entry;
527 SLIST_HEAD(, cfil_entry) cfi_ordered_entries;
528 os_refcnt_t cfi_ref_count;
529 } __attribute__((aligned(8)));
530
531 #define CFIF_DROP 0x0001 /* drop action applied */
532 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
533 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
534 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
535 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
536 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
537 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
538 #define CFIF_SOCKET_CONNECTED 0x0100 /* socket is connected */
539 #define CFIF_INITIAL_VERDICT 0x0200 /* received initial verdict */
540 #define CFIF_NO_CLOSE_WAIT 0x0400 /* do not wait to close */
541 #define CFIF_SO_DELAYED_DEAD 0x0800 /* Delayed socket DEAD marking */
542 #define CFIF_SO_DELAYED_TCP_TIME_WAIT 0x1000 /* Delayed TCP FIN TIME WAIT */
543
544 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
545 #define CFI_SHIFT_GENCNT 32
546 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
547 #define CFI_SHIFT_FLOWHASH 0
548
549 #define CFI_ENTRY_KCUNIT(i, e) ((uint32_t)(((e) - &((i)->cfi_entries[0])) + 1))
550
551 static KALLOC_TYPE_DEFINE(cfil_info_zone, struct cfil_info, NET_KT_DEFAULT);
552
553 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
554 TAILQ_HEAD(cfil_sock_head_stats, cfil_info) cfil_sock_head_stats;
555
556 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
557 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
558
559 /*
560 * UDP Socket Support
561 */
562 #define IS_ICMP(so) (so && (SOCK_CHECK_TYPE(so, SOCK_RAW) || SOCK_CHECK_TYPE(so, SOCK_DGRAM)) && \
563 (SOCK_CHECK_PROTO(so, IPPROTO_ICMP) || SOCK_CHECK_PROTO(so, IPPROTO_ICMPV6)))
564 #define IS_RAW(so) (so && SOCK_CHECK_TYPE(so, SOCK_RAW) && SOCK_CHECK_PROTO(so, IPPROTO_RAW))
565
566 #define OPTIONAL_IP_HEADER(so) (!IS_TCP(so) && !IS_UDP(so))
567 #define GET_SO_PROTOCOL(so) (so ? SOCK_PROTO(so) : IPPROTO_IP)
568 #define GET_SO_INP_PROTOCOL(so) ((so && sotoinpcb(so)) ? sotoinpcb(so)->inp_ip_p : IPPROTO_IP)
569 #define GET_SO_PROTO(so) ((GET_SO_PROTOCOL(so) != IPPROTO_IP) ? GET_SO_PROTOCOL(so) : GET_SO_INP_PROTOCOL(so))
570 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
571
572 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
573 ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
574 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
575 cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
576 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
577 #define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
578 #define NULLADDRESS(addr) ((addr.sa.sa_len == 0) || \
579 (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
580 (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
581
582 #define SKIP_FILTER_FOR_TCP_SOCKET(so) \
583 (so == NULL || \
584 (!SOCK_CHECK_DOM(so, PF_INET) && !SOCK_CHECK_DOM(so, PF_INET6)) || \
585 !SOCK_CHECK_TYPE(so, SOCK_STREAM) || \
586 !SOCK_CHECK_PROTO(so, IPPROTO_TCP) || \
587 (so->so_flags & SOF_MP_SUBFLOW) != 0 || \
588 (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0)
589
590 /*
591 * Special handling for 0.0.0.0-faddr TCP flows. This flows will be changed to loopback addr by TCP and
592 * may result in an immediate TCP RESET and socket close. This leads to CFIL blocking the owner thread for
593 * 1 sec waiting for ack from user-space provider (ack recevied by CFIL but socket already removed from
594 * global socket list). To avoid this, identify these flows and do not perform the close-wait blocking.
595 * These flows are identified as destined to Loopback address and were disconnected shortly after connect
596 * (before initial-verdict received).
597 */
598 #define IS_LOOPBACK_FADDR(inp) \
599 (inp && ((IS_INP_V6(inp) && IN6_IS_ADDR_LOOPBACK(&inp->in6p_faddr)) || (ntohl(inp->inp_faddr.s_addr) == INADDR_LOOPBACK)))
600
601 #define SET_NO_CLOSE_WAIT(inp, cfil_info) \
602 if (inp && cfil_info && !(cfil_info->cfi_flags & CFIF_INITIAL_VERDICT) && IS_LOOPBACK_FADDR(inp)) { \
603 cfil_info->cfi_flags |= CFIF_NO_CLOSE_WAIT; \
604 }
605
606 #define IS_NO_CLOSE_WAIT(cfil_info) (cfil_info && (cfil_info->cfi_flags & CFIF_NO_CLOSE_WAIT))
607
608 os_refgrp_decl(static, cfil_refgrp, "CFILRefGroup", NULL);
609
610 #define CFIL_INFO_FREE(cfil_info) \
611 if (cfil_info && (os_ref_release(&cfil_info->cfi_ref_count) == 0)) { \
612 cfil_info_free(cfil_info); \
613 }
614
615 #define SOCKET_PID(so) ((so->so_flags & SOF_DELEGATED) ? so->e_pid : so->last_pid)
616 #define MATCH_PID(so) (so && (cfil_log_pid == SOCKET_PID(so)))
617 #define MATCH_PORT(inp, local, remote) \
618 ((inp && ntohs(inp->inp_lport) == cfil_log_port) || (inp && ntohs(inp->inp_fport) == cfil_log_port) || \
619 check_port(local, cfil_log_port) || check_port(remote, cfil_log_port))
620 #define MATCH_PROTO(so) (GET_SO_PROTO(so) == cfil_log_proto)
621
622 #define DEBUG_FLOW(inp, so, local, remote) \
623 ((cfil_log_port && MATCH_PORT(inp, local, remote)) || (cfil_log_pid && MATCH_PID(so)) || (cfil_log_proto && MATCH_PROTO(so)))
624
625 #define SO_DELAYED_DEAD_SET(so, set) \
626 if (so->so_cfil) { \
627 if (set) { \
628 so->so_cfil->cfi_flags |= CFIF_SO_DELAYED_DEAD; \
629 } else { \
630 so->so_cfil->cfi_flags &= ~CFIF_SO_DELAYED_DEAD; \
631 } \
632 } else if (so->so_flow_db) { \
633 if (set) { \
634 so->so_flow_db->soflow_db_flags |= SOFLOWF_SO_DELAYED_DEAD; \
635 } else { \
636 so->so_flow_db->soflow_db_flags &= ~SOFLOWF_SO_DELAYED_DEAD; \
637 } \
638 }
639
640 #define SO_DELAYED_DEAD_GET(so) \
641 (so->so_cfil ? (so->so_cfil->cfi_flags & CFIF_SO_DELAYED_DEAD) : \
642 (so->so_flow_db) ? (so->so_flow_db->soflow_db_flags & SOFLOWF_SO_DELAYED_DEAD) : false)
643
644 #define SO_DELAYED_TCP_TIME_WAIT_SET(so, set) \
645 if (so->so_cfil) { \
646 if (set) { \
647 so->so_cfil->cfi_flags |= CFIF_SO_DELAYED_TCP_TIME_WAIT; \
648 } else { \
649 so->so_cfil->cfi_flags &= ~CFIF_SO_DELAYED_TCP_TIME_WAIT; \
650 } \
651 }
652
653 #define SO_DELAYED_TCP_TIME_WAIT_GET(so) \
654 (so->so_cfil ? (so->so_cfil->cfi_flags & CFIF_SO_DELAYED_TCP_TIME_WAIT) : false)
655
656 /*
657 * Periodic Statistics Report:
658 */
659 static struct thread *cfil_stats_report_thread;
660 #define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC 500 // Highest report frequency
661 #define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
662 #define CFIL_STATS_REPORT_MAX_COUNT 50 // Max stats to be reported per run
663
664 /* This buffer must have same layout as struct cfil_msg_stats_report */
665 struct cfil_stats_report_buffer {
666 struct cfil_msg_hdr msghdr;
667 uint32_t count;
668 struct cfil_msg_sock_stats stats[CFIL_STATS_REPORT_MAX_COUNT];
669 };
670 static struct cfil_stats_report_buffer *global_cfil_stats_report_buffers[MAX_CONTENT_FILTER];
671 static uint32_t global_cfil_stats_counts[MAX_CONTENT_FILTER];
672
673 /*
674 * UDP Garbage Collection:
675 */
676 #define UDP_FLOW_GC_ACTION_TO 10 // Flow Action Timeout (no action from user space) in seconds
677 #define UDP_FLOW_GC_MAX_COUNT 100 // Max UDP flows to be handled per run
678
679 /*
680 * UDP flow queue thresholds
681 */
682 #define UDP_FLOW_GC_MBUF_CNT_MAX (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
683 #define UDP_FLOW_GC_MBUF_NUM_MAX (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
684 #define UDP_FLOW_GC_MBUF_SHIFT 5 // Shift to get 1/32 of platform limits
685 /*
686 * UDP flow queue threshold globals:
687 */
688 static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
689 static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
690
691 /*
692 * CFIL specific mbuf tag:
693 * Save state of socket at the point of data entry into cfil.
694 * Use saved state for reinjection at protocol layer.
695 */
696 struct cfil_tag {
697 union sockaddr_in_4_6 cfil_faddr;
698 uint32_t cfil_so_state_change_cnt;
699 uint32_t cfil_so_options;
700 int cfil_inp_flags;
701 };
702
703 /*
704 * Global behavior flags:
705 */
706 #define CFIL_BEHAVIOR_FLAG_PRESERVE_CONNECTIONS 0x00000001
707 static uint32_t cfil_behavior_flags = 0;
708
709 #define DO_PRESERVE_CONNECTIONS (cfil_behavior_flags & CFIL_BEHAVIOR_FLAG_PRESERVE_CONNECTIONS)
710
711 /*
712 * Statistics
713 */
714
715 struct cfil_stats cfil_stats;
716
717 /*
718 * For troubleshooting
719 */
720 int cfil_log_level = LOG_ERR;
721 int cfil_log_port = 0;
722 int cfil_log_pid = 0;
723 int cfil_log_proto = 0;
724 int cfil_log_data = 0;
725 int cfil_log_stats = 0;
726 int cfil_debug = 1;
727
728 /*
729 * Sysctls for logs and statistics
730 */
731 static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
732 struct sysctl_req *);
733 static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
734 struct sysctl_req *);
735
736 SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "cfil");
737
738 SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
739 &cfil_log_level, 0, "");
740
741 SYSCTL_INT(_net_cfil, OID_AUTO, log_port, CTLFLAG_RW | CTLFLAG_LOCKED,
742 &cfil_log_port, 0, "");
743
744 SYSCTL_INT(_net_cfil, OID_AUTO, log_pid, CTLFLAG_RW | CTLFLAG_LOCKED,
745 &cfil_log_pid, 0, "");
746
747 SYSCTL_INT(_net_cfil, OID_AUTO, log_proto, CTLFLAG_RW | CTLFLAG_LOCKED,
748 &cfil_log_proto, 0, "");
749
750 SYSCTL_INT(_net_cfil, OID_AUTO, log_data, CTLFLAG_RW | CTLFLAG_LOCKED,
751 &cfil_log_data, 0, "");
752
753 SYSCTL_INT(_net_cfil, OID_AUTO, log_stats, CTLFLAG_RW | CTLFLAG_LOCKED,
754 &cfil_log_stats, 0, "");
755
756 SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
757 &cfil_debug, 0, "");
758
759 SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD | CTLFLAG_LOCKED,
760 &cfil_sock_attached_count, 0, "");
761
762 SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD | CTLFLAG_LOCKED,
763 &cfil_active_count, 0, "");
764
765 SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
766 &cfil_close_wait_timeout, 0, "");
767
768 SYSCTL_UINT(_net_cfil, OID_AUTO, behavior_flags, CTLFLAG_RW | CTLFLAG_LOCKED,
769 &cfil_behavior_flags, 0, "");
770
771 static int cfil_sbtrim = 1;
772 SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW | CTLFLAG_LOCKED,
773 &cfil_sbtrim, 0, "");
774
775 SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD | CTLFLAG_LOCKED,
776 0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
777
778 SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD | CTLFLAG_LOCKED,
779 0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
780
781 SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
782 &cfil_stats, cfil_stats, "");
783
784 /*
785 * Forward declaration to appease the compiler
786 */
787 static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
788 uint64_t, uint64_t);
789 static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
790 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
791 static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr *);
792 static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
793 static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
794 struct mbuf *, struct mbuf *, uint32_t);
795 static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
796 struct mbuf *, uint32_t);
797 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
798 struct in_addr, u_int16_t);
799 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
800 struct in6_addr *, u_int16_t, uint32_t);
801
802 static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t, int);
803 static void cfil_info_free(struct cfil_info *);
804 static struct cfil_info * cfil_info_alloc(struct socket *, struct soflow_hash_entry *);
805 static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
806 static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
807 static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
808 static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
809 static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
810 static void cfil_info_verify(struct cfil_info *);
811 static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
812 uint64_t, uint64_t);
813 static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
814 static void cfil_release_sockbuf(struct socket *, int);
815 static int cfil_filters_attached(struct socket *);
816
817 static void cfil_rw_lock_exclusive(lck_rw_t *);
818 static void cfil_rw_unlock_exclusive(lck_rw_t *);
819 static void cfil_rw_lock_shared(lck_rw_t *);
820 static void cfil_rw_unlock_shared(lck_rw_t *);
821 static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
822 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
823
824 static unsigned int cfil_data_length(struct mbuf *, int *, int *);
825 static struct cfil_info *cfil_sock_udp_get_info(struct socket *, uint32_t, bool, struct soflow_hash_entry *, struct sockaddr *, struct sockaddr *);
826 static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
827 struct mbuf *, struct mbuf *, uint32_t, struct soflow_hash_entry *);
828 static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
829 static void cfil_sock_udp_is_closed(struct socket *);
830 static int cfil_sock_udp_notify_shutdown(struct socket *, int, int, int);
831 static int cfil_sock_udp_shutdown(struct socket *, int *);
832 static void cfil_sock_udp_close_wait(struct socket *);
833 static void cfil_sock_udp_buf_update(struct sockbuf *);
834 static int cfil_filters_udp_attached(struct socket *, bool);
835 static void cfil_get_flow_address_v6(struct soflow_hash_entry *, struct inpcb *,
836 struct in6_addr **, struct in6_addr **,
837 u_int16_t *, u_int16_t *);
838 static void cfil_get_flow_address(struct soflow_hash_entry *, struct inpcb *,
839 struct in_addr *, struct in_addr *,
840 u_int16_t *, u_int16_t *);
841 static void cfil_info_log(int, struct cfil_info *, const char *);
842 void cfil_filter_show(u_int32_t);
843 void cfil_info_show(void);
844 bool cfil_info_action_timed_out(struct cfil_info *, int);
845 bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
846 struct m_tag *cfil_dgram_save_socket_state(struct cfil_info *, struct mbuf *);
847 boolean_t cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags);
848 static void cfil_sock_received_verdict(struct socket *so);
849 static void cfil_fill_event_msg_addresses(struct soflow_hash_entry *, struct inpcb *,
850 union sockaddr_in_4_6 *, union sockaddr_in_4_6 *,
851 boolean_t, boolean_t);
852 static void cfil_stats_report_thread_func(void *, wait_result_t);
853 static void cfil_stats_report(void *v, wait_result_t w);
854 static bool cfil_dgram_gc_needed(struct socket *, struct soflow_hash_entry *, u_int64_t);
855 static bool cfil_dgram_gc_perform(struct socket *, struct soflow_hash_entry *);
856 static bool cfil_dgram_detach_entry(struct socket *, struct soflow_hash_entry *);
857 static bool cfil_dgram_detach_db(struct socket *, struct soflow_db *);
858 bool check_port(struct sockaddr *, u_short);
859
860 /*
861 * Content filter global read write lock
862 */
863
864 static void
cfil_rw_lock_exclusive(lck_rw_t * lck)865 cfil_rw_lock_exclusive(lck_rw_t *lck)
866 {
867 void * __single lr_saved;
868
869 lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
870
871 lck_rw_lock_exclusive(lck);
872
873 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
874 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
875 }
876
877 static void
cfil_rw_unlock_exclusive(lck_rw_t * lck)878 cfil_rw_unlock_exclusive(lck_rw_t *lck)
879 {
880 void * __single lr_saved;
881
882 lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
883
884 lck_rw_unlock_exclusive(lck);
885
886 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
887 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
888 }
889
890 static void
cfil_rw_lock_shared(lck_rw_t * lck)891 cfil_rw_lock_shared(lck_rw_t *lck)
892 {
893 void * __single lr_saved;
894
895 lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
896
897 lck_rw_lock_shared(lck);
898
899 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
900 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
901 }
902
903 static void
cfil_rw_unlock_shared(lck_rw_t * lck)904 cfil_rw_unlock_shared(lck_rw_t *lck)
905 {
906 void * __single lr_saved;
907
908 lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
909
910 lck_rw_unlock_shared(lck);
911
912 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
913 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
914 }
915
916 static boolean_t
cfil_rw_lock_shared_to_exclusive(lck_rw_t * lck)917 cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
918 {
919 boolean_t upgraded;
920 void * __single lr_saved;
921
922 lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
923
924 upgraded = lck_rw_lock_shared_to_exclusive(lck);
925 if (upgraded) {
926 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
927 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
928 }
929 return upgraded;
930 }
931
932 static void
cfil_rw_lock_exclusive_to_shared(lck_rw_t * lck)933 cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
934 {
935 void * __single lr_saved;
936
937 lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
938
939 lck_rw_lock_exclusive_to_shared(lck);
940
941 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
942 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
943 }
944
945 static void
cfil_rw_lock_assert_held(lck_rw_t * lck,int exclusive)946 cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
947 {
948 #if !MACH_ASSERT
949 #pragma unused(lck, exclusive)
950 #endif
951 LCK_RW_ASSERT(lck,
952 exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
953 }
954
955 /*
956 * Return the number of bytes in the mbuf chain using the same
957 * method as m_length() or sballoc()
958 *
959 * Returns data len - starting from PKT start
960 * - retmbcnt - optional param to get total mbuf bytes in chain
961 * - retmbnum - optional param to get number of mbufs in chain
962 */
963 static unsigned int
cfil_data_length(struct mbuf * m,int * retmbcnt,int * retmbnum)964 cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
965 {
966 struct mbuf *m0;
967 unsigned int pktlen = 0;
968 int mbcnt;
969 int mbnum;
970
971 // Locate M_PKTHDR and mark as start of data if present
972 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
973 if (m0->m_flags & M_PKTHDR) {
974 m = m0;
975 break;
976 }
977 }
978
979 if (retmbcnt == NULL && retmbnum == NULL) {
980 return m_length(m);
981 }
982
983 pktlen = 0;
984 mbcnt = 0;
985 mbnum = 0;
986 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
987 pktlen += m0->m_len;
988 mbnum++;
989 mbcnt += _MSIZE;
990 if (m0->m_flags & M_EXT) {
991 mbcnt += m0->m_ext.ext_size;
992 }
993 }
994 if (retmbcnt) {
995 *retmbcnt = mbcnt;
996 }
997 if (retmbnum) {
998 *retmbnum = mbnum;
999 }
1000 return pktlen;
1001 }
1002
1003 static struct mbuf *
cfil_data_start(struct mbuf * m)1004 cfil_data_start(struct mbuf *m)
1005 {
1006 struct mbuf *m0;
1007
1008 // Locate M_PKTHDR and use it as start of data if present
1009 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
1010 if (m0->m_flags & M_PKTHDR) {
1011 return m0;
1012 }
1013 }
1014 return m;
1015 }
1016
1017 /*
1018 * Common mbuf queue utilities
1019 */
1020
1021 static inline void
cfil_queue_init(struct cfil_queue * cfq)1022 cfil_queue_init(struct cfil_queue *cfq)
1023 {
1024 cfq->q_start = 0;
1025 cfq->q_end = 0;
1026 MBUFQ_INIT(&cfq->q_mq);
1027 }
1028
1029 static inline uint64_t
cfil_queue_drain(struct cfil_queue * cfq)1030 cfil_queue_drain(struct cfil_queue *cfq)
1031 {
1032 uint64_t drained = cfq->q_start - cfq->q_end;
1033 cfq->q_start = 0;
1034 cfq->q_end = 0;
1035 MBUFQ_DRAIN(&cfq->q_mq);
1036
1037 return drained;
1038 }
1039
1040 /* Return 1 when empty, 0 otherwise */
1041 static inline int
cfil_queue_empty(struct cfil_queue * cfq)1042 cfil_queue_empty(struct cfil_queue *cfq)
1043 {
1044 return MBUFQ_EMPTY(&cfq->q_mq);
1045 }
1046
1047 static inline uint64_t
cfil_queue_offset_first(struct cfil_queue * cfq)1048 cfil_queue_offset_first(struct cfil_queue *cfq)
1049 {
1050 return cfq->q_start;
1051 }
1052
1053 static inline uint64_t
cfil_queue_offset_last(struct cfil_queue * cfq)1054 cfil_queue_offset_last(struct cfil_queue *cfq)
1055 {
1056 return cfq->q_end;
1057 }
1058
1059 static inline uint64_t
cfil_queue_len(struct cfil_queue * cfq)1060 cfil_queue_len(struct cfil_queue *cfq)
1061 {
1062 return cfq->q_end - cfq->q_start;
1063 }
1064
1065 /*
1066 * Routines to verify some fundamental assumptions
1067 */
1068
1069 static void
cfil_queue_verify(struct cfil_queue * cfq)1070 cfil_queue_verify(struct cfil_queue *cfq)
1071 {
1072 mbuf_t chain;
1073 mbuf_t m;
1074 mbuf_t n;
1075 uint64_t queuesize = 0;
1076
1077 /* Verify offset are ordered */
1078 VERIFY(cfq->q_start <= cfq->q_end);
1079
1080 /*
1081 * When queue is empty, the offsets are equal otherwise the offsets
1082 * are different
1083 */
1084 VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
1085 (!MBUFQ_EMPTY(&cfq->q_mq) &&
1086 cfq->q_start != cfq->q_end));
1087
1088 MBUFQ_FOREACH(chain, &cfq->q_mq) {
1089 size_t chainsize = 0;
1090 m = chain;
1091 unsigned int mlen = cfil_data_length(m, NULL, NULL);
1092 // skip the addr and control stuff if present
1093 m = cfil_data_start(m);
1094
1095 if (m == NULL ||
1096 m == (void *)M_TAG_FREE_PATTERN ||
1097 m->m_next == (void *)M_TAG_FREE_PATTERN ||
1098 m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) {
1099 panic("%s - mq %p is free at %p", __func__,
1100 &cfq->q_mq, m);
1101 }
1102 for (n = m; n != NULL; n = n->m_next) {
1103 if (!m_has_mtype(n, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
1104 panic("%s - %p unsupported type %u", __func__,
1105 n, n->m_type);
1106 }
1107 chainsize += n->m_len;
1108 }
1109 if (mlen != chainsize) {
1110 panic("%s - %p m_length() %u != chainsize %lu",
1111 __func__, m, mlen, chainsize);
1112 }
1113 queuesize += chainsize;
1114 }
1115 OS_ANALYZER_SUPPRESS("81031590") if (queuesize != cfq->q_end - cfq->q_start) {
1116 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
1117 m, queuesize, cfq->q_end - cfq->q_start);
1118 }
1119 }
1120
1121 static void
cfil_queue_enqueue(struct cfil_queue * cfq,mbuf_t m,size_t len)1122 cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
1123 {
1124 CFIL_QUEUE_VERIFY(cfq);
1125
1126 MBUFQ_ENQUEUE(&cfq->q_mq, m);
1127 cfq->q_end += len;
1128
1129 CFIL_QUEUE_VERIFY(cfq);
1130 }
1131
1132 static void
cfil_queue_remove(struct cfil_queue * cfq,mbuf_t m,size_t len)1133 cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1134 {
1135 CFIL_QUEUE_VERIFY(cfq);
1136
1137 VERIFY(cfil_data_length(m, NULL, NULL) == len);
1138
1139 MBUFQ_REMOVE(&cfq->q_mq, m);
1140 MBUFQ_NEXT(m) = NULL;
1141 cfq->q_start += len;
1142
1143 CFIL_QUEUE_VERIFY(cfq);
1144 }
1145
1146 static mbuf_t
cfil_queue_first(struct cfil_queue * cfq)1147 cfil_queue_first(struct cfil_queue *cfq)
1148 {
1149 return MBUFQ_FIRST(&cfq->q_mq);
1150 }
1151
1152 static mbuf_t
cfil_queue_next(struct cfil_queue * cfq,mbuf_t m)1153 cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1154 {
1155 #pragma unused(cfq)
1156 return MBUFQ_NEXT(m);
1157 }
1158
1159 static void
cfil_entry_buf_verify(struct cfe_buf * cfe_buf)1160 cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1161 {
1162 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1163 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1164
1165 /* Verify the queues are ordered so that pending is before ctl */
1166 VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1167
1168 /* The peek offset cannot be less than the pass offset */
1169 VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1170
1171 /* Make sure we've updated the offset we peeked at */
1172 VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1173 }
1174
1175 static void
cfil_entry_verify(struct cfil_entry * entry)1176 cfil_entry_verify(struct cfil_entry *entry)
1177 {
1178 cfil_entry_buf_verify(&entry->cfe_snd);
1179 cfil_entry_buf_verify(&entry->cfe_rcv);
1180 }
1181
1182 static void
cfil_info_buf_verify(struct cfi_buf * cfi_buf)1183 cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1184 {
1185 CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1186
1187 VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1188 }
1189
1190 static void
cfil_info_verify(struct cfil_info * cfil_info)1191 cfil_info_verify(struct cfil_info *cfil_info)
1192 {
1193 int i;
1194
1195 if (cfil_info == NULL) {
1196 return;
1197 }
1198
1199 cfil_info_buf_verify(&cfil_info->cfi_snd);
1200 cfil_info_buf_verify(&cfil_info->cfi_rcv);
1201
1202 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
1203 cfil_entry_verify(&cfil_info->cfi_entries[i]);
1204 }
1205 }
1206
1207 static void
verify_content_filter(struct content_filter * cfc)1208 verify_content_filter(struct content_filter *cfc)
1209 {
1210 struct cfil_entry *entry;
1211 uint32_t count = 0;
1212
1213 VERIFY(cfc->cf_sock_count >= 0);
1214
1215 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1216 count++;
1217 VERIFY(cfc == entry->cfe_filter);
1218 }
1219 VERIFY(count == cfc->cf_sock_count);
1220 }
1221
1222 /*
1223 * Kernel control socket callbacks
1224 */
1225 static errno_t
cfil_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)1226 cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1227 void **unitinfo)
1228 {
1229 errno_t error = 0;
1230 struct content_filter * __single cfc = NULL;
1231
1232 CFIL_LOG(LOG_NOTICE, "");
1233
1234 cfc = zalloc_flags(content_filter_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1235
1236 cfil_rw_lock_exclusive(&cfil_lck_rw);
1237
1238 if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1239 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1240 error = EINVAL;
1241 } else if (content_filters[sac->sc_unit - 1] != NULL) {
1242 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1243 error = EADDRINUSE;
1244 } else {
1245 /*
1246 * kernel control socket kcunit numbers start at 1
1247 */
1248 content_filters[sac->sc_unit - 1] = cfc;
1249
1250 cfc->cf_kcref = kctlref;
1251 cfc->cf_kcunit = sac->sc_unit;
1252 TAILQ_INIT(&cfc->cf_sock_entries);
1253
1254 *unitinfo = cfc;
1255 cfil_active_count++;
1256
1257 if (cfil_active_count == 1) {
1258 soflow_feat_set_functions(cfil_dgram_gc_needed, cfil_dgram_gc_perform,
1259 cfil_dgram_detach_entry, cfil_dgram_detach_db);
1260 }
1261
1262 // Allocate periodic stats buffer for this filter
1263 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] == NULL) {
1264 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1265
1266 struct cfil_stats_report_buffer * __single buf;
1267
1268 buf = kalloc_type(struct cfil_stats_report_buffer,
1269 Z_WAITOK | Z_ZERO | Z_NOFAIL);
1270
1271 cfil_rw_lock_exclusive(&cfil_lck_rw);
1272
1273 /* Another thread may have won the race */
1274 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1275 kfree_type(struct cfil_stats_report_buffer, buf);
1276 } else {
1277 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = buf;
1278 }
1279 }
1280 }
1281 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1282
1283 if (error != 0 && cfc != NULL) {
1284 zfree(content_filter_zone, cfc);
1285 }
1286
1287 if (error == 0) {
1288 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1289 } else {
1290 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1291 }
1292
1293 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1294 error, cfil_active_count, sac->sc_unit);
1295
1296 return error;
1297 }
1298
1299 static void
cfil_update_behavior_flags(void)1300 cfil_update_behavior_flags(void)
1301 {
1302 struct content_filter *cfc = NULL;
1303
1304 // Update global flag
1305 bool preserve_connections = false;
1306 for (int i = 0; i < MAX_CONTENT_FILTER; i++) {
1307 cfc = content_filters[i];
1308 if (cfc != NULL) {
1309 if (cfc->cf_flags & CFF_PRESERVE_CONNECTIONS) {
1310 preserve_connections = true;
1311 } else {
1312 preserve_connections = false;
1313 break;
1314 }
1315 }
1316 }
1317 if (preserve_connections == true) {
1318 cfil_behavior_flags |= CFIL_BEHAVIOR_FLAG_PRESERVE_CONNECTIONS;
1319 } else {
1320 cfil_behavior_flags &= ~CFIL_BEHAVIOR_FLAG_PRESERVE_CONNECTIONS;
1321 }
1322 CFIL_LOG(LOG_INFO, "CFIL Preserve Connections - %s",
1323 (cfil_behavior_flags & CFIL_BEHAVIOR_FLAG_PRESERVE_CONNECTIONS) ? "On" : "Off");
1324 }
1325
1326 static errno_t
cfil_ctl_disconnect(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo)1327 cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1328 {
1329 #pragma unused(kctlref)
1330 errno_t error = 0;
1331 struct content_filter * __single cfc;
1332 struct cfil_entry *entry;
1333 uint64_t sock_flow_id = 0;
1334
1335 CFIL_LOG(LOG_NOTICE, "");
1336
1337 if (kcunit > MAX_CONTENT_FILTER) {
1338 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1339 kcunit, MAX_CONTENT_FILTER);
1340 error = EINVAL;
1341 goto done;
1342 }
1343
1344 cfc = (struct content_filter *)unitinfo;
1345 if (cfc == NULL) {
1346 goto done;
1347 }
1348
1349 cfil_rw_lock_exclusive(&cfil_lck_rw);
1350 if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1351 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1352 kcunit);
1353 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1354 goto done;
1355 }
1356 cfc->cf_flags |= CFF_DETACHING;
1357 /*
1358 * Remove all sockets from the filter
1359 */
1360 while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1361 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1362
1363 verify_content_filter(cfc);
1364 /*
1365 * Accept all outstanding data by pushing to next filter
1366 * or back to socket
1367 *
1368 * TBD: Actually we should make sure all data has been pushed
1369 * back to socket
1370 */
1371 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1372 struct cfil_info *cfil_info = entry->cfe_cfil_info;
1373 struct socket *so = cfil_info->cfi_so;
1374 sock_flow_id = cfil_info->cfi_sock_id;
1375
1376 /* Need to let data flow immediately */
1377 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1378 CFEF_DATA_START;
1379
1380 // Before we release global lock, retain the cfil_info -
1381 // We attempt to retain a valid cfil_info to prevent any deallocation until
1382 // we are done. Abort retain if cfil_info has already entered the free code path.
1383 if (cfil_info == NULL || os_ref_retain_try(&cfil_info->cfi_ref_count) == false) {
1384 // Failing to retain cfil_info means detach is in progress already,
1385 // remove entry from filter list and move on.
1386 entry->cfe_filter = NULL;
1387 entry->cfe_necp_control_unit = 0;
1388 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1389 cfc->cf_sock_count--;
1390 continue;
1391 }
1392
1393 /*
1394 * Respect locking hierarchy
1395 */
1396 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1397
1398 // Search for socket from cfil_info sock_flow_id and lock so
1399 so = cfil_socket_from_sock_id(sock_flow_id, false);
1400 if (so == NULL || so != cfil_info->cfi_so) {
1401 cfil_rw_lock_exclusive(&cfil_lck_rw);
1402
1403 // Socket has already been disconnected and removed from socket list.
1404 // Remove entry from filter list and move on.
1405 if (entry == TAILQ_FIRST(&cfc->cf_sock_entries)) {
1406 entry->cfe_filter = NULL;
1407 entry->cfe_necp_control_unit = 0;
1408 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1409 cfc->cf_sock_count--;
1410 }
1411
1412 goto release_cfil_info;
1413 }
1414
1415 /*
1416 * When cfe_filter is NULL the filter is detached
1417 * and the entry has been removed from cf_sock_entries
1418 */
1419 if ((so->so_cfil == NULL && so->so_flow_db == NULL) || entry->cfe_filter == NULL) {
1420 cfil_rw_lock_exclusive(&cfil_lck_rw);
1421 goto release;
1422 }
1423
1424 (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
1425 CFM_MAX_OFFSET,
1426 CFM_MAX_OFFSET);
1427
1428 (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
1429 CFM_MAX_OFFSET,
1430 CFM_MAX_OFFSET);
1431
1432 cfil_rw_lock_exclusive(&cfil_lck_rw);
1433
1434 /*
1435 * Check again to make sure if the cfil_info is still valid
1436 * as the socket may have been unlocked when when calling
1437 * cfil_acquire_sockbuf()
1438 */
1439 if (entry->cfe_filter == NULL ||
1440 (so->so_cfil == NULL && soflow_db_get_feature_context(so->so_flow_db, sock_flow_id) == NULL)) {
1441 goto release;
1442 }
1443
1444 /* The filter is now detached */
1445 entry->cfe_flags |= CFEF_CFIL_DETACHED;
1446
1447 if (cfil_info->cfi_debug) {
1448 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER DISCONNECTED");
1449 }
1450
1451 CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1452 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1453 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1454 cfil_filters_attached(so) == 0) {
1455 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1456 (uint64_t)VM_KERNEL_ADDRPERM(so));
1457 wakeup((caddr_t)cfil_info);
1458 }
1459
1460 /*
1461 * Remove the filter entry from the content filter
1462 * but leave the rest of the state intact as the queues
1463 * may not be empty yet
1464 */
1465 entry->cfe_filter = NULL;
1466 entry->cfe_necp_control_unit = 0;
1467
1468 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1469 cfc->cf_sock_count--;
1470
1471 // This is the last filter disconnecting, clear the cfil_info
1472 // saved control unit so we will be able to drop this flow if
1473 // a new filter get installed.
1474 if (cfil_active_count == 1) {
1475 cfil_info->cfi_filter_control_unit = 0;
1476 }
1477 release:
1478 socket_unlock(so, 1);
1479
1480 release_cfil_info:
1481 /*
1482 * Release reference on cfil_info. To avoid double locking,
1483 * temporarily unlock in case it has been detached and we
1484 * end up freeing it which will take the global lock again.
1485 */
1486 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1487 CFIL_INFO_FREE(cfil_info);
1488 cfil_rw_lock_exclusive(&cfil_lck_rw);
1489 }
1490 }
1491 verify_content_filter(cfc);
1492
1493 /* Free the stats buffer for this filter */
1494 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1495 kfree_type(struct cfil_stats_report_buffer,
1496 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1]);
1497 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = NULL;
1498 }
1499 VERIFY(cfc->cf_sock_count == 0);
1500
1501 /*
1502 * Make filter inactive
1503 */
1504 content_filters[kcunit - 1] = NULL;
1505 cfil_active_count--;
1506 cfil_update_behavior_flags();
1507 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1508
1509 if (cfc->cf_crypto_state != NULL) {
1510 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
1511 cfc->cf_crypto_state = NULL;
1512 }
1513
1514 zfree(content_filter_zone, cfc);
1515 done:
1516 if (error == 0) {
1517 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1518 } else {
1519 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1520 }
1521
1522 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1523 error, cfil_active_count, kcunit);
1524
1525 return error;
1526 }
1527
1528 /*
1529 * cfil_acquire_sockbuf()
1530 *
1531 * Prevent any other thread from acquiring the sockbuf
1532 * We use sb_cfil_thread as a semaphore to prevent other threads from
1533 * messing with the sockbuf -- see sblock()
1534 * Note: We do not set SB_LOCK here because the thread may check or modify
1535 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1536 * sblock(), sbunlock() or sodefunct()
1537 */
1538 static int
cfil_acquire_sockbuf(struct socket * so,struct cfil_info * cfil_info,int outgoing)1539 cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
1540 {
1541 thread_t __single tp = current_thread();
1542 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1543 lck_mtx_t *mutex_held;
1544 int error = 0;
1545
1546 /*
1547 * Wait until no thread is holding the sockbuf and other content
1548 * filter threads have released the sockbuf
1549 */
1550 while ((sb->sb_flags & SB_LOCK) ||
1551 (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1552 if (so->so_proto->pr_getlock != NULL) {
1553 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1554 } else {
1555 mutex_held = so->so_proto->pr_domain->dom_mtx;
1556 }
1557
1558 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1559
1560 sb->sb_wantlock++;
1561 VERIFY(sb->sb_wantlock != 0);
1562
1563 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1564 NULL);
1565
1566 VERIFY(sb->sb_wantlock != 0);
1567 sb->sb_wantlock--;
1568 }
1569 /*
1570 * Use reference count for repetitive calls on same thread
1571 */
1572 if (sb->sb_cfil_refs == 0) {
1573 VERIFY(sb->sb_cfil_thread == NULL);
1574 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1575
1576 sb->sb_cfil_thread = tp;
1577 sb->sb_flags |= SB_LOCK;
1578 }
1579 sb->sb_cfil_refs++;
1580
1581 /* We acquire the socket buffer when we need to cleanup */
1582 if (cfil_info == NULL) {
1583 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1584 (uint64_t)VM_KERNEL_ADDRPERM(so));
1585 error = 0;
1586 } else if (cfil_info->cfi_flags & CFIF_DROP) {
1587 CFIL_LOG(LOG_ERR, "so %llx drop set",
1588 (uint64_t)VM_KERNEL_ADDRPERM(so));
1589 error = EPIPE;
1590 }
1591
1592 return error;
1593 }
1594
1595 static void
cfil_release_sockbuf(struct socket * so,int outgoing)1596 cfil_release_sockbuf(struct socket *so, int outgoing)
1597 {
1598 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1599 thread_t __single tp = current_thread();
1600
1601 socket_lock_assert_owned(so);
1602
1603 if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) {
1604 panic("%s sb_cfil_thread %p not current %p", __func__,
1605 sb->sb_cfil_thread, tp);
1606 }
1607 /*
1608 * Don't panic if we are defunct because SB_LOCK has
1609 * been cleared by sodefunct()
1610 */
1611 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
1612 panic("%s SB_LOCK not set on %p", __func__,
1613 sb);
1614 }
1615 /*
1616 * We can unlock when the thread unwinds to the last reference
1617 */
1618 sb->sb_cfil_refs--;
1619 if (sb->sb_cfil_refs == 0) {
1620 sb->sb_cfil_thread = NULL;
1621 sb->sb_flags &= ~SB_LOCK;
1622
1623 if (sb->sb_wantlock > 0) {
1624 wakeup(&sb->sb_flags);
1625 }
1626 }
1627 }
1628
1629 cfil_sock_id_t
cfil_sock_id_from_socket(struct socket * so)1630 cfil_sock_id_from_socket(struct socket *so)
1631 {
1632 if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) {
1633 return so->so_cfil->cfi_sock_id;
1634 } else {
1635 return CFIL_SOCK_ID_NONE;
1636 }
1637 }
1638
1639 /*
1640 * cfil_socket_safe_lock -
1641 * This routine attempts to lock the socket safely.
1642 *
1643 * The passed in pcbinfo is assumed to be locked and must be unlocked once the
1644 * inp state is safeguarded and before we attempt to lock/unlock the socket.
1645 * This is to prevent getting blocked by socket_lock() while holding the pcbinfo
1646 * lock, avoiding potential deadlock with other processes contending for the same
1647 * resources. This is also to avoid double locking the pcbinfo for rip sockets
1648 * since rip_unlock() will lock ripcbinfo if it needs to dispose inpcb when
1649 * so_usecount is 0.
1650 */
1651 static bool
cfil_socket_safe_lock(struct inpcb * inp,struct inpcbinfo * pcbinfo)1652 cfil_socket_safe_lock(struct inpcb *inp, struct inpcbinfo *pcbinfo)
1653 {
1654 struct socket *so = NULL;
1655
1656 VERIFY(pcbinfo != NULL);
1657
1658 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1659 // Safeguarded the inp state, unlock pcbinfo before locking socket.
1660 lck_rw_done(&pcbinfo->ipi_lock);
1661
1662 so = inp->inp_socket;
1663 socket_lock(so, 1);
1664 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1665 return true;
1666 }
1667 } else {
1668 // Failed to safeguarded the inp state, unlock pcbinfo and abort.
1669 lck_rw_done(&pcbinfo->ipi_lock);
1670 }
1671
1672 if (so) {
1673 socket_unlock(so, 1);
1674 }
1675 return false;
1676 }
1677
1678 static struct socket *
cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id,bool udp_only)1679 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1680 {
1681 struct socket *so = NULL;
1682 u_int64_t gencnt = cfil_sock_id >> 32;
1683 u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1684 struct inpcb *inp = NULL;
1685 struct inpcbinfo *pcbinfo = NULL;
1686
1687 if (udp_only) {
1688 goto find_udp;
1689 }
1690
1691 pcbinfo = &tcbinfo;
1692 lck_rw_lock_shared(&pcbinfo->ipi_lock);
1693 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1694 if (inp->inp_state != INPCB_STATE_DEAD &&
1695 inp->inp_socket != NULL &&
1696 inp->inp_flowhash == flowhash &&
1697 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1698 inp->inp_socket->so_cfil != NULL) {
1699 if (cfil_socket_safe_lock(inp, pcbinfo)) {
1700 so = inp->inp_socket;
1701 }
1702 /* pcbinfo is already unlocked, we are done. */
1703 goto done;
1704 }
1705 }
1706 lck_rw_done(&pcbinfo->ipi_lock);
1707 if (so != NULL) {
1708 goto done;
1709 }
1710
1711 find_udp:
1712
1713 pcbinfo = &udbinfo;
1714 lck_rw_lock_shared(&pcbinfo->ipi_lock);
1715 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1716 if (inp->inp_state != INPCB_STATE_DEAD &&
1717 inp->inp_socket != NULL &&
1718 inp->inp_socket->so_flow_db != NULL &&
1719 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1720 if (cfil_socket_safe_lock(inp, pcbinfo)) {
1721 so = inp->inp_socket;
1722 }
1723 /* pcbinfo is already unlocked, we are done. */
1724 goto done;
1725 }
1726 }
1727 lck_rw_done(&pcbinfo->ipi_lock);
1728 if (so != NULL) {
1729 goto done;
1730 }
1731
1732 pcbinfo = &ripcbinfo;
1733 lck_rw_lock_shared(&pcbinfo->ipi_lock);
1734 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1735 if (inp->inp_state != INPCB_STATE_DEAD &&
1736 inp->inp_socket != NULL &&
1737 inp->inp_socket->so_flow_db != NULL &&
1738 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1739 if (cfil_socket_safe_lock(inp, pcbinfo)) {
1740 so = inp->inp_socket;
1741 }
1742 /* pcbinfo is already unlocked, we are done. */
1743 goto done;
1744 }
1745 }
1746 lck_rw_done(&pcbinfo->ipi_lock);
1747
1748 done:
1749 if (so == NULL) {
1750 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1751 CFIL_LOG(LOG_DEBUG,
1752 "no socket for sock_id %llx gencnt %llx flowhash %x",
1753 cfil_sock_id, gencnt, flowhash);
1754 }
1755
1756 return so;
1757 }
1758
1759 static struct socket *
cfil_socket_from_client_uuid(uuid_t necp_client_uuid,bool * cfil_attached)1760 cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1761 {
1762 struct socket *so = NULL;
1763 struct inpcb *inp = NULL;
1764 struct inpcbinfo *pcbinfo = &tcbinfo;
1765
1766 lck_rw_lock_shared(&pcbinfo->ipi_lock);
1767 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1768 if (inp->inp_state != INPCB_STATE_DEAD &&
1769 inp->inp_socket != NULL &&
1770 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1771 *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1772 if (cfil_socket_safe_lock(inp, pcbinfo)) {
1773 so = inp->inp_socket;
1774 }
1775 /* pcbinfo is already unlocked, we are done. */
1776 goto done;
1777 }
1778 }
1779 lck_rw_done(&pcbinfo->ipi_lock);
1780 if (so != NULL) {
1781 goto done;
1782 }
1783
1784 pcbinfo = &udbinfo;
1785 lck_rw_lock_shared(&pcbinfo->ipi_lock);
1786 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1787 if (inp->inp_state != INPCB_STATE_DEAD &&
1788 inp->inp_socket != NULL &&
1789 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1790 *cfil_attached = (inp->inp_socket->so_flow_db != NULL);
1791 if (cfil_socket_safe_lock(inp, pcbinfo)) {
1792 so = inp->inp_socket;
1793 }
1794 /* pcbinfo is already unlocked, we are done. */
1795 goto done;
1796 }
1797 }
1798 lck_rw_done(&pcbinfo->ipi_lock);
1799
1800 done:
1801 return so;
1802 }
1803
1804 static void
cfil_info_stats_toggle(struct cfil_info * cfil_info,struct cfil_entry * entry,uint32_t report_frequency)1805 cfil_info_stats_toggle(struct cfil_info *cfil_info, struct cfil_entry *entry, uint32_t report_frequency)
1806 {
1807 struct cfil_info *cfil = NULL;
1808 Boolean found = FALSE;
1809 int kcunit;
1810
1811 if (cfil_info == NULL) {
1812 return;
1813 }
1814
1815 if (report_frequency) {
1816 if (entry == NULL) {
1817 return;
1818 }
1819
1820 // Update stats reporting frequency.
1821 if (entry->cfe_stats_report_frequency != report_frequency) {
1822 entry->cfe_stats_report_frequency = report_frequency;
1823 if (entry->cfe_stats_report_frequency < CFIL_STATS_REPORT_INTERVAL_MIN_MSEC) {
1824 entry->cfe_stats_report_frequency = CFIL_STATS_REPORT_INTERVAL_MIN_MSEC;
1825 }
1826 microuptime(&entry->cfe_stats_report_ts);
1827
1828 // Insert cfil_info into list only if it is not in yet.
1829 TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1830 if (cfil == cfil_info) {
1831 return;
1832 }
1833 }
1834
1835 TAILQ_INSERT_TAIL(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1836
1837 // Wake up stats thread if this is first flow added
1838 if (cfil_sock_attached_stats_count == 0) {
1839 thread_wakeup((caddr_t)&cfil_sock_attached_stats_count);
1840 }
1841 cfil_sock_attached_stats_count++;
1842
1843 if (cfil_info->cfi_debug && cfil_log_stats) {
1844 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu <%llx>> stats frequency %d msecs",
1845 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1846 cfil_info->cfi_sock_id, cfil_info->cfi_sock_id,
1847 entry->cfe_stats_report_frequency);
1848 }
1849 }
1850 } else {
1851 // Turn off stats reporting for this filter.
1852 if (entry != NULL) {
1853 // Already off, no change.
1854 if (entry->cfe_stats_report_frequency == 0) {
1855 return;
1856 }
1857
1858 entry->cfe_stats_report_frequency = 0;
1859 // If cfil_info still has filter(s) asking for stats, no need to remove from list.
1860 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
1861 if (cfil_info->cfi_entries[kcunit - 1].cfe_stats_report_frequency > 0) {
1862 return;
1863 }
1864 }
1865 }
1866
1867 // No more filter asking for stats for this cfil_info, remove from list.
1868 if (!TAILQ_EMPTY(&cfil_sock_head_stats)) {
1869 found = FALSE;
1870 TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1871 if (cfil == cfil_info) {
1872 found = TRUE;
1873 break;
1874 }
1875 }
1876 if (found) {
1877 cfil_sock_attached_stats_count--;
1878 TAILQ_REMOVE(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1879 if (cfil_info->cfi_debug && cfil_log_stats) {
1880 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu <%llx>> stats frequency reset",
1881 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1882 cfil_info->cfi_sock_id, cfil_info->cfi_sock_id);
1883 }
1884 }
1885 }
1886 }
1887 }
1888
1889 static errno_t
cfil_ctl_send(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo,mbuf_t m,int flags)1890 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1891 int flags)
1892 {
1893 #pragma unused(kctlref, flags)
1894 errno_t error = 0;
1895 struct cfil_msg_hdr *msghdr;
1896 struct content_filter *cfc = (struct content_filter *)unitinfo;
1897 struct socket *so;
1898 struct cfil_msg_action * __single action_msg;
1899 struct cfil_entry *entry;
1900 struct cfil_info * __single cfil_info = NULL;
1901 unsigned int data_len = 0;
1902
1903 CFIL_LOG(LOG_INFO, "");
1904
1905 if (cfc == NULL) {
1906 CFIL_LOG(LOG_ERR, "no unitinfo");
1907 error = EINVAL;
1908 goto done;
1909 }
1910
1911 if (kcunit > MAX_CONTENT_FILTER) {
1912 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1913 kcunit, MAX_CONTENT_FILTER);
1914 error = EINVAL;
1915 goto done;
1916 }
1917 if (m == NULL) {
1918 CFIL_LOG(LOG_ERR, "null mbuf");
1919 error = EINVAL;
1920 goto done;
1921 }
1922 data_len = m_length(m);
1923
1924 if (data_len < sizeof(struct cfil_msg_hdr)) {
1925 CFIL_LOG(LOG_ERR, "too short %u", data_len);
1926 error = EINVAL;
1927 goto done;
1928 }
1929 msghdr = mtod(m, struct cfil_msg_hdr *);
1930 if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1931 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1932 error = EINVAL;
1933 goto done;
1934 }
1935 if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1936 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1937 error = EINVAL;
1938 goto done;
1939 }
1940 if (msghdr->cfm_len > data_len) {
1941 CFIL_LOG(LOG_ERR, "bad length %u", msghdr->cfm_len);
1942 error = EINVAL;
1943 goto done;
1944 }
1945
1946 /* Validate action operation */
1947 switch (msghdr->cfm_op) {
1948 case CFM_OP_DATA_UPDATE:
1949 OSIncrementAtomic(
1950 &cfil_stats.cfs_ctl_action_data_update);
1951 break;
1952 case CFM_OP_DROP:
1953 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1954 break;
1955 case CFM_OP_BLESS_CLIENT:
1956 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1957 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1958 error = EINVAL;
1959 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1960 msghdr->cfm_len,
1961 msghdr->cfm_op);
1962 goto done;
1963 }
1964 error = cfil_action_bless_client(kcunit, msghdr);
1965 goto done;
1966 case CFM_OP_SET_CRYPTO_KEY:
1967 if (msghdr->cfm_len != sizeof(struct cfil_msg_set_crypto_key)) {
1968 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1969 error = EINVAL;
1970 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1971 msghdr->cfm_len,
1972 msghdr->cfm_op);
1973 goto done;
1974 }
1975 error = cfil_action_set_crypto_key(kcunit, msghdr);
1976 goto done;
1977 default:
1978 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1979 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1980 error = EINVAL;
1981 goto done;
1982 }
1983 if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1984 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1985 error = EINVAL;
1986 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1987 msghdr->cfm_len,
1988 msghdr->cfm_op);
1989 goto done;
1990 }
1991 cfil_rw_lock_shared(&cfil_lck_rw);
1992 if (cfc != (void *)content_filters[kcunit - 1]) {
1993 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1994 kcunit);
1995 error = EINVAL;
1996 cfil_rw_unlock_shared(&cfil_lck_rw);
1997 goto done;
1998 }
1999 cfil_rw_unlock_shared(&cfil_lck_rw);
2000
2001 // Search for socket (TCP+UDP and lock so)
2002 so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
2003 if (so == NULL) {
2004 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
2005 msghdr->cfm_sock_id);
2006 error = EINVAL;
2007 goto done;
2008 }
2009
2010 cfil_info = so->so_flow_db != NULL ?
2011 soflow_db_get_feature_context(so->so_flow_db, msghdr->cfm_sock_id) : so->so_cfil;
2012
2013 // We should not obtain global lock here in order to avoid deadlock down the path.
2014 // But we attempt to retain a valid cfil_info to prevent any deallocation until
2015 // we are done. Abort retain if cfil_info has already entered the free code path.
2016 if (cfil_info && os_ref_retain_try(&cfil_info->cfi_ref_count) == false) {
2017 socket_unlock(so, 1);
2018 goto done;
2019 }
2020
2021 if (cfil_info == NULL) {
2022 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
2023 (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
2024 error = EINVAL;
2025 goto unlock;
2026 } else if (cfil_info->cfi_flags & CFIF_DROP) {
2027 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
2028 (uint64_t)VM_KERNEL_ADDRPERM(so));
2029 error = EINVAL;
2030 goto unlock;
2031 }
2032
2033 if (cfil_info->cfi_debug) {
2034 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED MSG FROM FILTER");
2035 }
2036
2037 entry = &cfil_info->cfi_entries[kcunit - 1];
2038 if (entry->cfe_filter == NULL) {
2039 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
2040 (uint64_t)VM_KERNEL_ADDRPERM(so));
2041 error = EINVAL;
2042 goto unlock;
2043 }
2044
2045 if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) {
2046 entry->cfe_flags |= CFEF_DATA_START;
2047 } else {
2048 CFIL_LOG(LOG_ERR,
2049 "so %llx attached not sent for %u",
2050 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
2051 error = EINVAL;
2052 goto unlock;
2053 }
2054
2055 microuptime(&entry->cfe_last_action);
2056 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
2057
2058 action_msg = (struct cfil_msg_action *)msghdr;
2059
2060 switch (msghdr->cfm_op) {
2061 case CFM_OP_DATA_UPDATE:
2062
2063 if (cfil_info->cfi_debug) {
2064 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DATA_UPDATE");
2065 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu <%llx>> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2066 (uint64_t)VM_KERNEL_ADDRPERM(so),
2067 cfil_info->cfi_sock_id, cfil_info->cfi_sock_id,
2068 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2069 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2070 }
2071
2072 /*
2073 * Received verdict, at this point we know this
2074 * socket connection is allowed. Unblock thread
2075 * immediately before proceeding to process the verdict.
2076 */
2077 cfil_sock_received_verdict(so);
2078
2079 if (action_msg->cfa_out_peek_offset != 0 ||
2080 action_msg->cfa_out_pass_offset != 0) {
2081 error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
2082 action_msg->cfa_out_pass_offset,
2083 action_msg->cfa_out_peek_offset);
2084 }
2085 if (error == EJUSTRETURN) {
2086 error = 0;
2087 }
2088 if (error != 0) {
2089 break;
2090 }
2091 if (action_msg->cfa_in_peek_offset != 0 ||
2092 action_msg->cfa_in_pass_offset != 0) {
2093 error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
2094 action_msg->cfa_in_pass_offset,
2095 action_msg->cfa_in_peek_offset);
2096 }
2097 if (error == EJUSTRETURN) {
2098 error = 0;
2099 }
2100
2101 // Toggle stats reporting according to received verdict.
2102 cfil_rw_lock_exclusive(&cfil_lck_rw);
2103 cfil_info_stats_toggle(cfil_info, entry, action_msg->cfa_stats_frequency);
2104 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2105
2106 break;
2107
2108 case CFM_OP_DROP:
2109 if (cfil_info->cfi_debug) {
2110 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DROP");
2111 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu <%llx>> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2112 (uint64_t)VM_KERNEL_ADDRPERM(so),
2113 cfil_info->cfi_sock_id, cfil_info->cfi_sock_id,
2114 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2115 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2116 }
2117
2118 error = cfil_action_drop(so, cfil_info, kcunit);
2119 cfil_sock_received_verdict(so);
2120 break;
2121
2122 default:
2123 error = EINVAL;
2124 break;
2125 }
2126 unlock:
2127 CFIL_INFO_FREE(cfil_info)
2128 socket_unlock(so, 1);
2129 done:
2130 mbuf_freem(m);
2131
2132 if (error == 0) {
2133 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
2134 } else {
2135 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
2136 }
2137
2138 return error;
2139 }
2140
2141 static errno_t
cfil_ctl_getopt(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo,int opt,void * data,size_t * len)2142 cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2143 int opt, void *data, size_t *len)
2144 {
2145 #pragma unused(kctlref, opt)
2146 struct cfil_info * __single cfil_info = NULL;
2147 errno_t error = 0;
2148 struct content_filter *cfc = (struct content_filter *)unitinfo;
2149
2150 CFIL_LOG(LOG_NOTICE, "");
2151
2152 if (cfc == NULL) {
2153 CFIL_LOG(LOG_ERR, "no unitinfo");
2154 return EINVAL;
2155 }
2156
2157 cfil_rw_lock_shared(&cfil_lck_rw);
2158
2159 if (kcunit > MAX_CONTENT_FILTER) {
2160 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2161 kcunit, MAX_CONTENT_FILTER);
2162 error = EINVAL;
2163 goto done;
2164 }
2165 if (cfc != (void *)content_filters[kcunit - 1]) {
2166 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2167 kcunit);
2168 error = EINVAL;
2169 goto done;
2170 }
2171 switch (opt) {
2172 case CFIL_OPT_NECP_CONTROL_UNIT:
2173 if (*len < sizeof(uint32_t)) {
2174 CFIL_LOG(LOG_ERR, "len too small %lu", *len);
2175 error = EINVAL;
2176 goto done;
2177 }
2178 if (data != NULL) {
2179 *(uint32_t *)data = cfc->cf_necp_control_unit;
2180 }
2181 break;
2182 case CFIL_OPT_PRESERVE_CONNECTIONS:
2183 if (*len < sizeof(uint32_t)) {
2184 CFIL_LOG(LOG_ERR, "CFIL_OPT_PRESERVE_CONNECTIONS len too small %lu", *len);
2185 error = EINVAL;
2186 goto done;
2187 }
2188 if (data != NULL) {
2189 *(uint32_t *)data = (cfc->cf_flags & CFF_PRESERVE_CONNECTIONS) ? true : false;
2190 }
2191 break;
2192 case CFIL_OPT_GET_SOCKET_INFO:
2193 if (*len != sizeof(struct cfil_opt_sock_info)) {
2194 CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
2195 error = EINVAL;
2196 goto done;
2197 }
2198 if (data == NULL) {
2199 CFIL_LOG(LOG_ERR, "data not passed");
2200 error = EINVAL;
2201 goto done;
2202 }
2203
2204 struct cfil_opt_sock_info *sock_info =
2205 (struct cfil_opt_sock_info *) data;
2206
2207 // Unlock here so that we never hold both cfil_lck_rw and the
2208 // socket_lock at the same time. Otherwise, this can deadlock
2209 // because soclose() takes the socket_lock and then exclusive
2210 // cfil_lck_rw and we require the opposite order.
2211
2212 // WARNING: Be sure to never use anything protected
2213 // by cfil_lck_rw beyond this point.
2214 // WARNING: Be sure to avoid fallthrough and
2215 // goto return_already_unlocked from this branch.
2216 cfil_rw_unlock_shared(&cfil_lck_rw);
2217
2218 // Search (TCP+UDP) and lock socket
2219 struct socket *sock =
2220 cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
2221 if (sock == NULL) {
2222 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
2223 sock_info->cfs_sock_id);
2224 error = ENOENT;
2225 goto return_already_unlocked;
2226 }
2227
2228 cfil_info = (sock->so_flow_db != NULL) ?
2229 soflow_db_get_feature_context(sock->so_flow_db, sock_info->cfs_sock_id) : sock->so_cfil;
2230
2231 if (cfil_info == NULL) {
2232 CFIL_LOG(LOG_INFO, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
2233 (uint64_t)VM_KERNEL_ADDRPERM(sock));
2234 error = EINVAL;
2235 socket_unlock(sock, 1);
2236 goto return_already_unlocked;
2237 }
2238
2239 if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL) {
2240 CFIL_LOG(LOG_INFO, "CFIL: GET_SOCKET_INFO failed: so %llx NULL so_proto / pr_domain",
2241 (uint64_t)VM_KERNEL_ADDRPERM(sock));
2242 error = EINVAL;
2243 socket_unlock(sock, 1);
2244 goto return_already_unlocked;
2245 }
2246
2247 // Fill out family, type, and protocol
2248 sock_info->cfs_sock_family = SOCK_DOM(sock);
2249 sock_info->cfs_sock_type = SOCK_TYPE(sock);
2250 sock_info->cfs_sock_protocol = GET_SO_PROTO(sock);
2251
2252 // Source and destination addresses
2253 struct inpcb *inp = sotoinpcb(sock);
2254 if (inp->inp_vflag & INP_IPV6) {
2255 struct in6_addr * __single laddr = NULL, * __single faddr = NULL;
2256 u_int16_t lport = 0, fport = 0;
2257
2258 cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
2259 &laddr, &faddr, &lport, &fport);
2260 fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport, inp->inp_lifscope);
2261 fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport, inp->inp_fifscope);
2262 } else if (inp->inp_vflag & INP_IPV4) {
2263 struct in_addr laddr = {.s_addr = 0}, faddr = {.s_addr = 0};
2264 u_int16_t lport = 0, fport = 0;
2265
2266 cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
2267 &laddr, &faddr, &lport, &fport);
2268 fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2269 fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2270 }
2271
2272 // Set the pid info
2273 sock_info->cfs_pid = sock->last_pid;
2274 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
2275
2276 if (sock->so_flags & SOF_DELEGATED) {
2277 sock_info->cfs_e_pid = sock->e_pid;
2278 memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
2279 }
2280 #if defined(XNU_TARGET_OS_OSX)
2281 else if (!uuid_is_null(sock->so_ruuid)) {
2282 sock_info->cfs_e_pid = sock->so_rpid;
2283 memcpy(sock_info->cfs_e_uuid, sock->so_ruuid, sizeof(uuid_t));
2284 }
2285 #endif
2286 else {
2287 sock_info->cfs_e_pid = sock->last_pid;
2288 memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
2289 }
2290
2291 socket_unlock(sock, 1);
2292
2293 goto return_already_unlocked;
2294 default:
2295 error = ENOPROTOOPT;
2296 break;
2297 }
2298 done:
2299 cfil_rw_unlock_shared(&cfil_lck_rw);
2300
2301 return error;
2302
2303 return_already_unlocked:
2304
2305 return error;
2306 }
2307
2308 static errno_t
cfil_ctl_setopt(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo,int opt,void * data,size_t len)2309 cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2310 int opt, void *data, size_t len)
2311 {
2312 #pragma unused(kctlref, opt)
2313 errno_t error = 0;
2314 struct content_filter *cfc = (struct content_filter *)unitinfo;
2315
2316 CFIL_LOG(LOG_NOTICE, "");
2317
2318 if (cfc == NULL) {
2319 CFIL_LOG(LOG_ERR, "no unitinfo");
2320 return EINVAL;
2321 }
2322
2323 cfil_rw_lock_exclusive(&cfil_lck_rw);
2324
2325 if (kcunit > MAX_CONTENT_FILTER) {
2326 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2327 kcunit, MAX_CONTENT_FILTER);
2328 error = EINVAL;
2329 goto done;
2330 }
2331 if (cfc != (void *)content_filters[kcunit - 1]) {
2332 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2333 kcunit);
2334 error = EINVAL;
2335 goto done;
2336 }
2337 switch (opt) {
2338 case CFIL_OPT_NECP_CONTROL_UNIT:
2339 if (len < sizeof(uint32_t)) {
2340 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2341 "len too small %lu", len);
2342 error = EINVAL;
2343 goto done;
2344 }
2345 if (cfc->cf_necp_control_unit != 0) {
2346 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2347 "already set %u",
2348 cfc->cf_necp_control_unit);
2349 error = EINVAL;
2350 goto done;
2351 }
2352 cfc->cf_necp_control_unit = *(uint32_t *)data;
2353 break;
2354 case CFIL_OPT_PRESERVE_CONNECTIONS:
2355 if (len < sizeof(uint32_t)) {
2356 CFIL_LOG(LOG_ERR, "CFIL_OPT_PRESERVE_CONNECTIONS "
2357 "len too small %lu", len);
2358 error = EINVAL;
2359 goto done;
2360 }
2361 uint32_t preserve_connections = *((uint32_t *)data);
2362 CFIL_LOG(LOG_INFO, "CFIL_OPT_PRESERVE_CONNECTIONS got %d (kcunit %d)", preserve_connections, kcunit);
2363 if (preserve_connections) {
2364 cfc->cf_flags |= CFF_PRESERVE_CONNECTIONS;
2365 } else {
2366 cfc->cf_flags &= ~CFF_PRESERVE_CONNECTIONS;
2367 }
2368
2369 cfil_update_behavior_flags();
2370 break;
2371 default:
2372 error = ENOPROTOOPT;
2373 break;
2374 }
2375 done:
2376 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2377
2378 return error;
2379 }
2380
2381
2382 static void
cfil_ctl_rcvd(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo,int flags)2383 cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
2384 {
2385 #pragma unused(kctlref, flags)
2386 struct content_filter *cfc = (struct content_filter *)unitinfo;
2387 struct socket *so = NULL;
2388 int error;
2389 struct cfil_entry *entry;
2390 struct cfil_info *cfil_info = NULL;
2391
2392 CFIL_LOG(LOG_INFO, "");
2393
2394 if (cfc == NULL) {
2395 CFIL_LOG(LOG_ERR, "no unitinfo");
2396 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2397 return;
2398 }
2399
2400 if (kcunit > MAX_CONTENT_FILTER) {
2401 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2402 kcunit, MAX_CONTENT_FILTER);
2403 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2404 return;
2405 }
2406 cfil_rw_lock_shared(&cfil_lck_rw);
2407 if (cfc != (void *)content_filters[kcunit - 1]) {
2408 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2409 kcunit);
2410 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2411 goto done;
2412 }
2413 /* Let's assume the flow control is lifted */
2414 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2415 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
2416 cfil_rw_lock_exclusive(&cfil_lck_rw);
2417 }
2418
2419 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
2420
2421 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
2422 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
2423 }
2424 /*
2425 * Flow control will be raised again as soon as an entry cannot enqueue
2426 * to the kernel control socket
2427 */
2428 while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
2429 verify_content_filter(cfc);
2430
2431 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
2432
2433 /* Find an entry that is flow controlled */
2434 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
2435 if (entry->cfe_cfil_info == NULL ||
2436 entry->cfe_cfil_info->cfi_so == NULL) {
2437 continue;
2438 }
2439 if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) {
2440 continue;
2441 }
2442 }
2443 if (entry == NULL) {
2444 break;
2445 }
2446
2447 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
2448
2449 cfil_info = entry->cfe_cfil_info;
2450 so = cfil_info->cfi_so;
2451
2452 if (cfil_info == NULL || os_ref_retain_try(&cfil_info->cfi_ref_count) == false) {
2453 break;
2454 }
2455
2456 cfil_rw_unlock_shared(&cfil_lck_rw);
2457 socket_lock(so, 1);
2458
2459 do {
2460 error = cfil_acquire_sockbuf(so, cfil_info, 1);
2461 if (error == 0) {
2462 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
2463 }
2464 cfil_release_sockbuf(so, 1);
2465 if (error != 0) {
2466 break;
2467 }
2468
2469 error = cfil_acquire_sockbuf(so, cfil_info, 0);
2470 if (error == 0) {
2471 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
2472 }
2473 cfil_release_sockbuf(so, 0);
2474 } while (0);
2475
2476 CFIL_INFO_FREE(cfil_info);
2477 socket_lock_assert_owned(so);
2478 socket_unlock(so, 1);
2479
2480 cfil_rw_lock_shared(&cfil_lck_rw);
2481 }
2482 done:
2483 cfil_rw_unlock_shared(&cfil_lck_rw);
2484 }
2485
2486 struct cflil_tag_container {
2487 struct m_tag cfil_m_tag;
2488 struct cfil_tag cfil_tag;
2489 };
2490
2491 static struct m_tag *
m_tag_kalloc_cfil_udp(u_int32_t id,u_int16_t type,uint16_t len,int wait)2492 m_tag_kalloc_cfil_udp(u_int32_t id, u_int16_t type, uint16_t len, int wait)
2493 {
2494 struct cflil_tag_container *tag_container;
2495 struct m_tag *tag = NULL;
2496
2497 assert3u(id, ==, KERNEL_MODULE_TAG_ID);
2498 assert3u(type, ==, KERNEL_TAG_TYPE_CFIL_UDP);
2499 assert3u(len, ==, sizeof(struct cfil_tag));
2500
2501 if (len != sizeof(struct cfil_tag)) {
2502 return NULL;
2503 }
2504
2505 tag_container = kalloc_type(struct cflil_tag_container, wait | M_ZERO);
2506 if (tag_container != NULL) {
2507 tag = &tag_container->cfil_m_tag;
2508
2509 assert3p(tag, ==, tag_container);
2510
2511 M_TAG_INIT(tag, id, type, len, &tag_container->cfil_tag, NULL);
2512 }
2513
2514 return tag;
2515 }
2516
2517 static void
m_tag_kfree_cfil_udp(struct m_tag * tag)2518 m_tag_kfree_cfil_udp(struct m_tag *tag)
2519 {
2520 struct cflil_tag_container * __single tag_container = (struct cflil_tag_container *)tag;
2521
2522 kfree_type(struct cflil_tag_container, tag_container);
2523 }
2524
2525 void
cfil_register_m_tag(void)2526 cfil_register_m_tag(void)
2527 {
2528 errno_t error = 0;
2529
2530 error = m_register_internal_tag_type(KERNEL_TAG_TYPE_CFIL_UDP, sizeof(struct cfil_tag),
2531 m_tag_kalloc_cfil_udp, m_tag_kfree_cfil_udp);
2532
2533 assert3u(error, ==, 0);
2534 }
2535
2536 void
cfil_init(void)2537 cfil_init(void)
2538 {
2539 struct kern_ctl_reg kern_ctl;
2540 errno_t error = 0;
2541 unsigned int mbuf_limit = 0;
2542
2543 CFIL_LOG(LOG_NOTICE, "");
2544
2545 /*
2546 * Compile time verifications
2547 */
2548 _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2549 _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2550 _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2551 _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2552
2553 /*
2554 * Runtime time verifications
2555 */
2556 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2557 sizeof(uint32_t)));
2558 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2559 sizeof(uint32_t)));
2560 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2561 sizeof(uint32_t)));
2562 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2563 sizeof(uint32_t)));
2564
2565 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2566 sizeof(uint32_t)));
2567 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2568 sizeof(uint32_t)));
2569
2570 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2571 sizeof(uint32_t)));
2572 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2573 sizeof(uint32_t)));
2574 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2575 sizeof(uint32_t)));
2576 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2577 sizeof(uint32_t)));
2578
2579 /*
2580 * Allocate locks
2581 */
2582 TAILQ_INIT(&cfil_sock_head);
2583 TAILQ_INIT(&cfil_sock_head_stats);
2584
2585 /*
2586 * Register kernel control
2587 */
2588 bzero(&kern_ctl, sizeof(kern_ctl));
2589 strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2590 sizeof(kern_ctl.ctl_name));
2591 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2592 kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2593 kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2594 kern_ctl.ctl_connect = cfil_ctl_connect;
2595 kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2596 kern_ctl.ctl_send = cfil_ctl_send;
2597 kern_ctl.ctl_getopt = cfil_ctl_getopt;
2598 kern_ctl.ctl_setopt = cfil_ctl_setopt;
2599 kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2600 error = ctl_register(&kern_ctl, &cfil_kctlref);
2601 if (error != 0) {
2602 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2603 return;
2604 }
2605
2606 // Spawn thread for statistics reporting
2607 if (kernel_thread_start(cfil_stats_report_thread_func, NULL,
2608 &cfil_stats_report_thread) != KERN_SUCCESS) {
2609 panic_plain("%s: Can't create statistics report thread", __func__);
2610 /* NOTREACHED */
2611 }
2612 /* this must not fail */
2613 VERIFY(cfil_stats_report_thread != NULL);
2614
2615 // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2616 mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2617 cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2618 cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2619
2620 memset(&global_cfil_stats_report_buffers, 0, sizeof(global_cfil_stats_report_buffers));
2621 }
2622
2623 struct cfil_info *
cfil_info_alloc(struct socket * so,struct soflow_hash_entry * hash_entry)2624 cfil_info_alloc(struct socket *so, struct soflow_hash_entry *hash_entry)
2625 {
2626 int kcunit;
2627 struct cfil_info *cfil_info = NULL;
2628 struct inpcb *inp = sotoinpcb(so);
2629
2630 CFIL_LOG(LOG_INFO, "");
2631
2632 socket_lock_assert_owned(so);
2633
2634 cfil_info = zalloc_flags(cfil_info_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2635 os_ref_init(&cfil_info->cfi_ref_count, &cfil_refgrp);
2636
2637 cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2638 cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2639
2640 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2641 struct cfil_entry *entry;
2642
2643 entry = &cfil_info->cfi_entries[kcunit - 1];
2644 entry->cfe_cfil_info = cfil_info;
2645
2646 /* Initialize the filter entry */
2647 entry->cfe_filter = NULL;
2648 entry->cfe_flags = 0;
2649 entry->cfe_necp_control_unit = 0;
2650 entry->cfe_snd.cfe_pass_offset = 0;
2651 entry->cfe_snd.cfe_peek_offset = 0;
2652 entry->cfe_snd.cfe_peeked = 0;
2653 entry->cfe_rcv.cfe_pass_offset = 0;
2654 entry->cfe_rcv.cfe_peek_offset = 0;
2655 entry->cfe_rcv.cfe_peeked = 0;
2656 /*
2657 * Timestamp the last action to avoid pre-maturely
2658 * triggering garbage collection
2659 */
2660 microuptime(&entry->cfe_last_action);
2661
2662 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2663 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2664 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2665 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2666 }
2667
2668 cfil_rw_lock_exclusive(&cfil_lck_rw);
2669
2670 /*
2671 * Create a cfi_sock_id that's not the socket pointer!
2672 */
2673
2674 if (hash_entry == NULL) {
2675 // This is the TCP case, cfil_info is tracked per socket
2676 if (inp->inp_flowhash == 0) {
2677 inp_calc_flowhash(inp);
2678 ASSERT(inp->inp_flowhash != 0);
2679 }
2680
2681 so->so_cfil = cfil_info;
2682 cfil_info->cfi_so = so;
2683 cfil_info->cfi_sock_id =
2684 ((so->so_gencnt << 32) | inp->inp_flowhash);
2685 } else {
2686 // This is the UDP case, cfil_info is tracked in per-socket hash
2687 cfil_info->cfi_so = so;
2688 cfil_info->cfi_hash_entry = hash_entry;
2689 cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->soflow_flowhash & 0xffffffff));
2690 }
2691
2692 TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2693 SLIST_INIT(&cfil_info->cfi_ordered_entries);
2694
2695 cfil_sock_attached_count++;
2696
2697 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2698
2699 if (cfil_info != NULL) {
2700 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2701 } else {
2702 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2703 }
2704
2705 return cfil_info;
2706 }
2707
2708 int
cfil_info_attach_unit(struct socket * so,uint32_t filter_control_unit,struct cfil_info * cfil_info)2709 cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
2710 {
2711 int kcunit;
2712 int attached = 0;
2713
2714 CFIL_LOG(LOG_INFO, "");
2715
2716 socket_lock_assert_owned(so);
2717
2718 cfil_rw_lock_exclusive(&cfil_lck_rw);
2719
2720 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2721 struct content_filter *cfc = content_filters[kcunit - 1];
2722 struct cfil_entry *entry;
2723 struct cfil_entry *iter_entry;
2724 struct cfil_entry *iter_prev;
2725
2726 if (cfc == NULL) {
2727 continue;
2728 }
2729 if (!(cfc->cf_necp_control_unit & filter_control_unit)) {
2730 continue;
2731 }
2732
2733 entry = &cfil_info->cfi_entries[kcunit - 1];
2734
2735 entry->cfe_filter = cfc;
2736 entry->cfe_necp_control_unit = cfc->cf_necp_control_unit;
2737 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2738 cfc->cf_sock_count++;
2739
2740 /* Insert the entry into the list ordered by control unit */
2741 iter_prev = NULL;
2742 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
2743 if (entry->cfe_necp_control_unit < iter_entry->cfe_necp_control_unit) {
2744 break;
2745 }
2746 iter_prev = iter_entry;
2747 }
2748
2749 if (iter_prev == NULL) {
2750 SLIST_INSERT_HEAD(&cfil_info->cfi_ordered_entries, entry, cfe_order_link);
2751 } else {
2752 SLIST_INSERT_AFTER(iter_prev, entry, cfe_order_link);
2753 }
2754
2755 verify_content_filter(cfc);
2756 attached = 1;
2757 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2758 }
2759
2760 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2761
2762 return attached;
2763 }
2764
2765 static void
cfil_info_free(struct cfil_info * cfil_info)2766 cfil_info_free(struct cfil_info *cfil_info)
2767 {
2768 int kcunit;
2769 uint64_t in_drain = 0;
2770 uint64_t out_drained = 0;
2771
2772 if (cfil_info == NULL) {
2773 return;
2774 }
2775
2776 CFIL_LOG(LOG_INFO, "");
2777
2778 cfil_rw_lock_exclusive(&cfil_lck_rw);
2779
2780 if (cfil_info->cfi_debug) {
2781 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FREEING CFIL_INFO");
2782 }
2783
2784 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2785 struct cfil_entry *entry;
2786 struct content_filter *cfc;
2787
2788 entry = &cfil_info->cfi_entries[kcunit - 1];
2789
2790 /* Don't be silly and try to detach twice */
2791 if (entry->cfe_filter == NULL) {
2792 continue;
2793 }
2794
2795 cfc = content_filters[kcunit - 1];
2796
2797 VERIFY(cfc == entry->cfe_filter);
2798
2799 entry->cfe_filter = NULL;
2800 entry->cfe_necp_control_unit = 0;
2801 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2802 cfc->cf_sock_count--;
2803
2804 verify_content_filter(cfc);
2805 }
2806
2807 cfil_sock_attached_count--;
2808 TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2809
2810 // Turn off stats reporting for cfil_info.
2811 cfil_info_stats_toggle(cfil_info, NULL, 0);
2812
2813 out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2814 in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2815
2816 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2817 struct cfil_entry *entry;
2818
2819 entry = &cfil_info->cfi_entries[kcunit - 1];
2820 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2821 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2822 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2823 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2824 }
2825 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2826
2827 if (out_drained) {
2828 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2829 }
2830 if (in_drain) {
2831 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2832 }
2833
2834 zfree(cfil_info_zone, cfil_info);
2835 }
2836
2837 /*
2838 * Received a verdict from userspace for a socket.
2839 * Perform any delayed operation if needed.
2840 */
2841 static void
cfil_sock_received_verdict(struct socket * so)2842 cfil_sock_received_verdict(struct socket *so)
2843 {
2844 if (so == NULL || so->so_cfil == NULL) {
2845 return;
2846 }
2847
2848 so->so_cfil->cfi_flags |= CFIF_INITIAL_VERDICT;
2849
2850 /*
2851 * If socket has already been connected, trigger
2852 * soisconnected now.
2853 */
2854 if (so->so_cfil->cfi_flags & CFIF_SOCKET_CONNECTED) {
2855 so->so_cfil->cfi_flags &= ~CFIF_SOCKET_CONNECTED;
2856 soisconnected(so);
2857 return;
2858 }
2859 }
2860
2861 /*
2862 * Entry point from Sockets layer
2863 * The socket is locked.
2864 *
2865 * Checks if a connected socket is subject to filter and
2866 * pending the initial verdict.
2867 */
2868 boolean_t
cfil_sock_connected_pending_verdict(struct socket * so)2869 cfil_sock_connected_pending_verdict(struct socket *so)
2870 {
2871 if (so == NULL || so->so_cfil == NULL) {
2872 return false;
2873 }
2874
2875 if (so->so_cfil->cfi_flags & CFIF_INITIAL_VERDICT) {
2876 return false;
2877 } else {
2878 /*
2879 * Remember that this protocol is already connected, so
2880 * we will trigger soisconnected() upon receipt of
2881 * initial verdict later.
2882 */
2883 so->so_cfil->cfi_flags |= CFIF_SOCKET_CONNECTED;
2884 return true;
2885 }
2886 }
2887
2888 /*
2889 * Entry point from Flow Divert
2890 * The socket is locked.
2891 *
2892 * Mark socket as DEAD if all CFIL data has been processed by filter(s).
2893 * Otherwise, delay the marking until all data has been processed.
2894 */
2895 boolean_t
cfil_sock_is_dead(struct socket * so)2896 cfil_sock_is_dead(struct socket *so)
2897 {
2898 struct inpcb *inp = NULL;
2899
2900 if (so == NULL) {
2901 return false;
2902 }
2903
2904 socket_lock_assert_owned(so);
2905
2906 if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
2907 int32_t pending_snd = cfil_sock_data_pending(&so->so_snd);
2908 int32_t pending_rcv = cfil_sock_data_pending(&so->so_rcv);
2909 if (pending_snd || pending_rcv) {
2910 SO_DELAYED_DEAD_SET(so, true)
2911 return false;
2912 }
2913 }
2914
2915 inp = sotoinpcb(so);
2916 if (inp != NULL) {
2917 inp->inp_state = INPCB_STATE_DEAD;
2918 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
2919 SO_DELAYED_DEAD_SET(so, false)
2920 return true;
2921 }
2922 return false;
2923 }
2924
2925 /*
2926 * Entry point from tcp_timer.c
2927 * The socket is locked.
2928 *
2929 * Perform TCP FIN time wait handling if all CFIL data has been processed by filter(s).
2930 * Otherwise, delay until all data has been processed.
2931 */
2932 boolean_t
cfil_sock_tcp_add_time_wait(struct socket * so)2933 cfil_sock_tcp_add_time_wait(struct socket *so)
2934 {
2935 struct inpcb *inp = NULL;
2936 struct tcpcb *tp = NULL;
2937
2938 // Only handle TCP sockets
2939 if (so == NULL || !IS_TCP(so)) {
2940 return false;
2941 }
2942
2943 socket_lock_assert_owned(so);
2944
2945 if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
2946 int32_t pending_snd = cfil_sock_data_pending(&so->so_snd);
2947 int32_t pending_rcv = cfil_sock_data_pending(&so->so_rcv);
2948 if (pending_snd || pending_rcv) {
2949 SO_DELAYED_TCP_TIME_WAIT_SET(so, true)
2950 return false;
2951 }
2952 }
2953
2954 inp = sotoinpcb(so);
2955 tp = inp ? intotcpcb(inp) : NULL;
2956 if (tp != NULL) {
2957 add_to_time_wait_now(tp, 2 * tcp_msl);
2958 SO_DELAYED_TCP_TIME_WAIT_SET(so, false)
2959 return true;
2960 }
2961 return false;
2962 }
2963
2964 boolean_t
cfil_filter_present(void)2965 cfil_filter_present(void)
2966 {
2967 return cfil_active_count > 0;
2968 }
2969
2970 /*
2971 * Entry point from Sockets layer
2972 * The socket is locked.
2973 */
2974 errno_t
cfil_sock_attach(struct socket * so,struct sockaddr * local,struct sockaddr * remote,int dir)2975 cfil_sock_attach(struct socket *so, struct sockaddr *local, struct sockaddr *remote, int dir)
2976 {
2977 errno_t error = 0;
2978 uint32_t filter_control_unit;
2979 int debug = 0;
2980
2981 socket_lock_assert_owned(so);
2982
2983 if (so->so_flags1 & SOF1_FLOW_DIVERT_SKIP) {
2984 /*
2985 * This socket has already been evaluated (and ultimately skipped) by
2986 * flow divert, so it has also already been through content filter if there
2987 * is one.
2988 */
2989 goto done;
2990 }
2991
2992 /* Limit ourselves to TCP that are not MPTCP subflows */
2993 if (SKIP_FILTER_FOR_TCP_SOCKET(so)) {
2994 goto done;
2995 }
2996
2997 debug = DEBUG_FLOW(sotoinpcb(so), so, local, remote);
2998 if (debug) {
2999 CFIL_LOG(LOG_ERR, "CFIL: TCP (dir %d) - debug flow with port %d", dir, cfil_log_port);
3000 }
3001
3002 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
3003 if (filter_control_unit == 0) {
3004 goto done;
3005 }
3006
3007 if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
3008 goto done;
3009 }
3010 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
3011 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
3012 goto done;
3013 }
3014 if (cfil_active_count == 0) {
3015 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
3016 goto done;
3017 }
3018 if (so->so_cfil != NULL) {
3019 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
3020 CFIL_LOG(LOG_ERR, "already attached");
3021 goto done;
3022 } else {
3023 cfil_info_alloc(so, NULL);
3024 if (so->so_cfil == NULL) {
3025 error = ENOMEM;
3026 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
3027 goto done;
3028 }
3029 so->so_cfil->cfi_dir = dir;
3030 so->so_cfil->cfi_filter_control_unit = filter_control_unit;
3031 so->so_cfil->cfi_debug = debug;
3032 }
3033 if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
3034 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
3035 filter_control_unit);
3036 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
3037 goto done;
3038 }
3039 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llu <%llx>",
3040 (uint64_t)VM_KERNEL_ADDRPERM(so),
3041 filter_control_unit, so->so_cfil->cfi_sock_id, so->so_cfil->cfi_sock_id);
3042
3043 so->so_flags |= SOF_CONTENT_FILTER;
3044 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
3045
3046 /* Hold a reference on the socket */
3047 so->so_usecount++;
3048
3049 /*
3050 * Save passed addresses for attach event msg (in case resend
3051 * is needed.
3052 */
3053 if (remote != NULL && (remote->sa_len <= sizeof(union sockaddr_in_4_6))) {
3054 SOCKADDR_COPY(remote, SA(&so->so_cfil->cfi_so_attach_faddr), remote->sa_len);
3055 }
3056 if (local != NULL && (local->sa_len <= sizeof(union sockaddr_in_4_6))) {
3057 SOCKADDR_COPY(local, SA(&so->so_cfil->cfi_so_attach_laddr), local->sa_len);
3058 }
3059
3060 if (so->so_cfil->cfi_debug) {
3061 cfil_info_log(LOG_ERR, so->so_cfil, "CFIL: ADDED");
3062 }
3063
3064 error = cfil_dispatch_attach_event(so, so->so_cfil, 0, dir);
3065 /* We can recover from flow control or out of memory errors */
3066 if (error == ENOBUFS || error == ENOMEM) {
3067 error = 0;
3068 } else if (error != 0) {
3069 goto done;
3070 }
3071
3072 CFIL_INFO_VERIFY(so->so_cfil);
3073 done:
3074 return error;
3075 }
3076
3077 /*
3078 * Entry point from Sockets layer
3079 * The socket is locked.
3080 */
3081 errno_t
cfil_sock_detach(struct socket * so)3082 cfil_sock_detach(struct socket *so)
3083 {
3084 if (NEED_DGRAM_FLOW_TRACKING(so)) {
3085 return 0;
3086 }
3087
3088 if (so->so_cfil) {
3089 if (so->so_flags & SOF_CONTENT_FILTER) {
3090 so->so_flags &= ~SOF_CONTENT_FILTER;
3091 VERIFY(so->so_usecount > 0);
3092 so->so_usecount--;
3093 }
3094 CFIL_INFO_FREE(so->so_cfil);
3095 so->so_cfil = NULL;
3096 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
3097 }
3098 return 0;
3099 }
3100
3101 /*
3102 * Fill in the address info of an event message from either
3103 * the socket or passed in address info.
3104 */
3105 static void
cfil_fill_event_msg_addresses(struct soflow_hash_entry * entry,struct inpcb * inp,union sockaddr_in_4_6 * sin_src,union sockaddr_in_4_6 * sin_dst,boolean_t isIPv4,boolean_t outgoing)3106 cfil_fill_event_msg_addresses(struct soflow_hash_entry *entry, struct inpcb *inp,
3107 union sockaddr_in_4_6 *sin_src, union sockaddr_in_4_6 *sin_dst,
3108 boolean_t isIPv4, boolean_t outgoing)
3109 {
3110 if (isIPv4) {
3111 struct in_addr laddr = {0}, faddr = {0};
3112 u_int16_t lport = 0, fport = 0;
3113
3114 cfil_get_flow_address(entry, inp, &laddr, &faddr, &lport, &fport);
3115
3116 if (outgoing) {
3117 fill_ip_sockaddr_4_6(sin_src, laddr, lport);
3118 fill_ip_sockaddr_4_6(sin_dst, faddr, fport);
3119 } else {
3120 fill_ip_sockaddr_4_6(sin_src, faddr, fport);
3121 fill_ip_sockaddr_4_6(sin_dst, laddr, lport);
3122 }
3123 } else {
3124 struct in6_addr * __single laddr = NULL, * __single faddr = NULL;
3125 u_int16_t lport = 0, fport = 0;
3126 const u_int32_t lifscope = inp ? inp->inp_lifscope : IFSCOPE_UNKNOWN;
3127 const u_int32_t fifscope = inp ? inp->inp_fifscope : IFSCOPE_UNKNOWN;
3128
3129 cfil_get_flow_address_v6(entry, inp, &laddr, &faddr, &lport, &fport);
3130 if (outgoing) {
3131 fill_ip6_sockaddr_4_6(sin_src, laddr, lport, lifscope);
3132 fill_ip6_sockaddr_4_6(sin_dst, faddr, fport, fifscope);
3133 } else {
3134 fill_ip6_sockaddr_4_6(sin_src, faddr, fport, fifscope);
3135 fill_ip6_sockaddr_4_6(sin_dst, laddr, lport, lifscope);
3136 }
3137 }
3138 }
3139
3140 static boolean_t
cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,struct cfil_info * cfil_info,struct cfil_msg_sock_attached * msg)3141 cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,
3142 struct cfil_info *cfil_info,
3143 struct cfil_msg_sock_attached *msg)
3144 {
3145 struct cfil_crypto_data data = {};
3146 struct iovec extra_data[1] = { { NULL, 0 } };
3147
3148 if (crypto_state == NULL || msg == NULL || cfil_info == NULL) {
3149 return false;
3150 }
3151
3152 data.sock_id = msg->cfs_msghdr.cfm_sock_id;
3153 data.direction = msg->cfs_conn_dir;
3154
3155 data.pid = msg->cfs_pid;
3156 data.effective_pid = msg->cfs_e_pid;
3157 uuid_copy(data.uuid, msg->cfs_uuid);
3158 uuid_copy(data.effective_uuid, msg->cfs_e_uuid);
3159 data.socketProtocol = msg->cfs_sock_protocol;
3160 if (data.direction == CFS_CONNECTION_DIR_OUT) {
3161 data.remote.sin6 = msg->cfs_dst.sin6;
3162 data.local.sin6 = msg->cfs_src.sin6;
3163 } else {
3164 data.remote.sin6 = msg->cfs_src.sin6;
3165 data.local.sin6 = msg->cfs_dst.sin6;
3166 }
3167
3168 size_t len = strbuflen(msg->cfs_remote_domain_name, sizeof(msg->cfs_remote_domain_name));
3169 if (len > 0) {
3170 extra_data[0].iov_base = msg->cfs_remote_domain_name;
3171 extra_data[0].iov_len = len;
3172 }
3173
3174 // At attach, if local address is already present, no need to re-sign subsequent data messages.
3175 if (!NULLADDRESS(data.local)) {
3176 cfil_info->cfi_isSignatureLatest = true;
3177 }
3178
3179 msg->cfs_signature_length = sizeof(cfil_crypto_signature);
3180 if (cfil_crypto_sign_data(crypto_state, &data, extra_data, sizeof(extra_data) / sizeof(extra_data[0]), msg->cfs_signature, &msg->cfs_signature_length) != 0) {
3181 msg->cfs_signature_length = 0;
3182 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign attached msg <sockID %llu <%llx>>",
3183 msg->cfs_msghdr.cfm_sock_id, msg->cfs_msghdr.cfm_sock_id);
3184 return false;
3185 }
3186
3187 return true;
3188 }
3189
3190 struct cfil_sign_parameters {
3191 cfil_crypto_state_t csp_state;
3192 struct cfil_crypto_data *csp_data;
3193 uint8_t * __indexable csp_signature;
3194 uint32_t *csp_signature_size;
3195 };
3196
3197 static void
cfil_sign_with_domain_name(char * domain_name __null_terminated,void * ctx)3198 cfil_sign_with_domain_name(char *domain_name __null_terminated, void *ctx)
3199 {
3200 struct cfil_sign_parameters *parameters = (struct cfil_sign_parameters *)ctx;
3201 struct iovec extra_data[1] = { { NULL, 0 } };
3202
3203 if (parameters == NULL) {
3204 return;
3205 }
3206
3207 if (domain_name != NULL) {
3208 extra_data[0].iov_base = __unsafe_null_terminated_to_indexable(domain_name);
3209 extra_data[0].iov_len = strlen(domain_name);
3210 }
3211
3212 *(parameters->csp_signature_size) = sizeof(cfil_crypto_signature);
3213 if (cfil_crypto_sign_data(parameters->csp_state, parameters->csp_data,
3214 extra_data, sizeof(extra_data) / sizeof(extra_data[0]),
3215 parameters->csp_signature, parameters->csp_signature_size) != 0) {
3216 *(parameters->csp_signature_size) = 0;
3217 }
3218 }
3219
3220 static boolean_t
cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,struct socket * so,struct cfil_info * cfil_info,struct cfil_msg_data_event * msg)3221 cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,
3222 struct socket *so, struct cfil_info *cfil_info,
3223 struct cfil_msg_data_event *msg)
3224 {
3225 struct cfil_crypto_data data = {};
3226
3227 if (crypto_state == NULL || msg == NULL ||
3228 so == NULL || cfil_info == NULL) {
3229 return false;
3230 }
3231
3232 data.sock_id = cfil_info->cfi_sock_id;
3233 data.direction = cfil_info->cfi_dir;
3234 data.pid = so->last_pid;
3235 memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3236 if (so->so_flags & SOF_DELEGATED) {
3237 data.effective_pid = so->e_pid;
3238 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3239 }
3240 #if defined(XNU_TARGET_OS_OSX)
3241 else if (!uuid_is_null(so->so_ruuid)) {
3242 data.effective_pid = so->so_rpid;
3243 memcpy(data.effective_uuid, so->so_ruuid, sizeof(uuid_t));
3244 }
3245 #endif
3246 else {
3247 data.effective_pid = so->last_pid;
3248 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3249 }
3250 data.socketProtocol = GET_SO_PROTO(so);
3251
3252 if (data.direction == CFS_CONNECTION_DIR_OUT) {
3253 data.remote.sin6 = msg->cfc_dst.sin6;
3254 data.local.sin6 = msg->cfc_src.sin6;
3255 } else {
3256 data.remote.sin6 = msg->cfc_src.sin6;
3257 data.local.sin6 = msg->cfc_dst.sin6;
3258 }
3259
3260 // At first data, local address may show up for the first time, update address cache and
3261 // no need to re-sign subsequent data messages anymore.
3262 if (!NULLADDRESS(data.local)) {
3263 memcpy(&cfil_info->cfi_so_attach_laddr, &data.local, data.local.sa.sa_len);
3264 cfil_info->cfi_isSignatureLatest = true;
3265 }
3266
3267 struct cfil_sign_parameters parameters = {
3268 .csp_state = crypto_state,
3269 .csp_data = &data,
3270 .csp_signature = msg->cfd_signature,
3271 .csp_signature_size = &msg->cfd_signature_length,
3272 };
3273 necp_with_inp_domain_name(so, ¶meters, cfil_sign_with_domain_name);
3274
3275 if (msg->cfd_signature_length == 0) {
3276 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign data msg <sockID %llu <%llx>>",
3277 msg->cfd_msghdr.cfm_sock_id, msg->cfd_msghdr.cfm_sock_id);
3278 return false;
3279 }
3280
3281 return true;
3282 }
3283
3284 static boolean_t
cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,struct socket * so,struct cfil_info * cfil_info,struct cfil_msg_sock_closed * msg)3285 cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,
3286 struct socket *so, struct cfil_info *cfil_info,
3287 struct cfil_msg_sock_closed *msg)
3288 {
3289 struct cfil_crypto_data data = {};
3290 struct soflow_hash_entry hash_entry = {};
3291 struct soflow_hash_entry *hash_entry_ptr = NULL;
3292 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3293
3294 if (crypto_state == NULL || msg == NULL ||
3295 so == NULL || inp == NULL || cfil_info == NULL) {
3296 return false;
3297 }
3298
3299 data.sock_id = cfil_info->cfi_sock_id;
3300 data.direction = cfil_info->cfi_dir;
3301
3302 data.pid = so->last_pid;
3303 memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3304 if (so->so_flags & SOF_DELEGATED) {
3305 data.effective_pid = so->e_pid;
3306 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3307 }
3308 #if defined(XNU_TARGET_OS_OSX)
3309 else if (!uuid_is_null(so->so_ruuid)) {
3310 data.effective_pid = so->so_rpid;
3311 memcpy(data.effective_uuid, so->so_ruuid, sizeof(uuid_t));
3312 }
3313 #endif
3314 else {
3315 data.effective_pid = so->last_pid;
3316 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3317 }
3318 data.socketProtocol = GET_SO_PROTO(so);
3319
3320 /*
3321 * Fill in address info:
3322 * For UDP, use the cfil_info hash entry directly.
3323 * For TCP, compose an hash entry with the saved addresses.
3324 */
3325 if (cfil_info->cfi_hash_entry != NULL) {
3326 hash_entry_ptr = cfil_info->cfi_hash_entry;
3327 } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3328 cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3329 soflow_fill_hash_entry_from_address(&hash_entry, TRUE, SA(&cfil_info->cfi_so_attach_laddr), FALSE);
3330 soflow_fill_hash_entry_from_address(&hash_entry, FALSE, SA(&cfil_info->cfi_so_attach_faddr), FALSE);
3331 hash_entry_ptr = &hash_entry;
3332 }
3333 if (hash_entry_ptr != NULL) {
3334 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
3335 union sockaddr_in_4_6 *src = outgoing ? &data.local : &data.remote;
3336 union sockaddr_in_4_6 *dst = outgoing ? &data.remote : &data.local;
3337 cfil_fill_event_msg_addresses(hash_entry_ptr, inp, src, dst, !IS_INP_V6(inp), outgoing);
3338 }
3339
3340 data.byte_count_in = cfil_info->cfi_byte_inbound_count;
3341 data.byte_count_out = cfil_info->cfi_byte_outbound_count;
3342
3343 struct cfil_sign_parameters parameters = {
3344 .csp_state = crypto_state,
3345 .csp_data = &data,
3346 .csp_signature = msg->cfc_signature,
3347 .csp_signature_size = &msg->cfc_signature_length
3348 };
3349 necp_with_inp_domain_name(so, ¶meters, cfil_sign_with_domain_name);
3350
3351 if (msg->cfc_signature_length == 0) {
3352 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign closed msg <sockID %llu <%llx>>",
3353 msg->cfc_msghdr.cfm_sock_id, msg->cfc_msghdr.cfm_sock_id);
3354 return false;
3355 }
3356
3357 return true;
3358 }
3359
3360 static void
cfil_populate_attached_msg_domain_name(char * domain_name __null_terminated,void * ctx)3361 cfil_populate_attached_msg_domain_name(char *domain_name __null_terminated, void *ctx)
3362 {
3363 struct cfil_msg_sock_attached *msg_attached = (struct cfil_msg_sock_attached *)ctx;
3364
3365 if (msg_attached == NULL) {
3366 return;
3367 }
3368
3369 if (domain_name != NULL) {
3370 strlcpy(msg_attached->cfs_remote_domain_name, domain_name, sizeof(msg_attached->cfs_remote_domain_name));
3371 }
3372 }
3373
3374 static bool
cfil_copy_audit_token(pid_t pid,audit_token_t * buffer)3375 cfil_copy_audit_token(pid_t pid, audit_token_t *buffer)
3376 {
3377 bool success = false;
3378 proc_t p = proc_find(pid);
3379 if (p != PROC_NULL) {
3380 task_t __single t = proc_task(p);
3381 if (t != TASK_NULL) {
3382 audit_token_t audit_token = {};
3383 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
3384 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count) == KERN_SUCCESS) {
3385 memcpy(buffer, &audit_token, sizeof(audit_token_t));
3386 success = true;
3387 }
3388 }
3389 proc_rele(p);
3390 }
3391 return success;
3392 }
3393
3394 static int
cfil_dispatch_attach_event(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int conn_dir)3395 cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info,
3396 uint32_t kcunit, int conn_dir)
3397 {
3398 errno_t error = 0;
3399 struct cfil_entry *entry = NULL;
3400 struct cfil_msg_sock_attached * __single msg_attached;
3401 struct content_filter *cfc = NULL;
3402 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3403 struct soflow_hash_entry *hash_entry_ptr = NULL;
3404 struct soflow_hash_entry hash_entry;
3405
3406 memset(&hash_entry, 0, sizeof(struct soflow_hash_entry));
3407
3408 socket_lock_assert_owned(so);
3409
3410 cfil_rw_lock_shared(&cfil_lck_rw);
3411
3412 if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
3413 error = EINVAL;
3414 goto done;
3415 }
3416
3417 if (kcunit == 0) {
3418 entry = SLIST_FIRST(&cfil_info->cfi_ordered_entries);
3419 } else {
3420 entry = &cfil_info->cfi_entries[kcunit - 1];
3421 }
3422
3423 if (entry == NULL) {
3424 goto done;
3425 }
3426
3427 cfc = entry->cfe_filter;
3428 if (cfc == NULL) {
3429 goto done;
3430 }
3431
3432 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) {
3433 goto done;
3434 }
3435
3436 if (kcunit == 0) {
3437 kcunit = CFI_ENTRY_KCUNIT(cfil_info, entry);
3438 }
3439
3440 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
3441 (uint64_t)VM_KERNEL_ADDRPERM(so), entry->cfe_necp_control_unit, kcunit);
3442
3443 /* Would be wasteful to try when flow controlled */
3444 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3445 error = ENOBUFS;
3446 goto done;
3447 }
3448
3449 msg_attached = kalloc_data(sizeof(struct cfil_msg_sock_attached), Z_WAITOK);
3450 if (msg_attached == NULL) {
3451 error = ENOMEM;
3452 goto done;
3453 }
3454
3455 bzero(msg_attached, sizeof(struct cfil_msg_sock_attached));
3456 msg_attached->cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
3457 msg_attached->cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
3458 msg_attached->cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
3459 msg_attached->cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
3460 msg_attached->cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3461
3462 msg_attached->cfs_sock_family = SOCK_DOM(so);
3463 msg_attached->cfs_sock_type = SOCK_TYPE(so);
3464 msg_attached->cfs_sock_protocol = GET_SO_PROTO(so);
3465 msg_attached->cfs_pid = so->last_pid;
3466 memcpy(msg_attached->cfs_uuid, so->last_uuid, sizeof(uuid_t));
3467 if (so->so_flags & SOF_DELEGATED) {
3468 msg_attached->cfs_e_pid = so->e_pid;
3469 memcpy(msg_attached->cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
3470 }
3471 #if defined(XNU_TARGET_OS_OSX)
3472 else if (!uuid_is_null(so->so_ruuid)) {
3473 msg_attached->cfs_e_pid = so->so_rpid;
3474 memcpy(msg_attached->cfs_e_uuid, so->so_ruuid, sizeof(uuid_t));
3475 }
3476 #endif
3477 else {
3478 msg_attached->cfs_e_pid = so->last_pid;
3479 memcpy(msg_attached->cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
3480 }
3481
3482 /*
3483 * Fill in address info:
3484 * For UDP, use the cfil_info hash entry directly.
3485 * For TCP, compose an hash entry with the saved addresses.
3486 */
3487 if (cfil_info->cfi_hash_entry != NULL) {
3488 hash_entry_ptr = cfil_info->cfi_hash_entry;
3489 } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3490 cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3491 soflow_fill_hash_entry_from_address(&hash_entry, TRUE, SA(&cfil_info->cfi_so_attach_laddr), FALSE);
3492 soflow_fill_hash_entry_from_address(&hash_entry, FALSE, SA(&cfil_info->cfi_so_attach_faddr), FALSE);
3493 hash_entry_ptr = &hash_entry;
3494 }
3495 if (hash_entry_ptr != NULL) {
3496 cfil_fill_event_msg_addresses(hash_entry_ptr, inp,
3497 &msg_attached->cfs_src, &msg_attached->cfs_dst,
3498 !IS_INP_V6(inp), conn_dir == CFS_CONNECTION_DIR_OUT);
3499 }
3500 msg_attached->cfs_conn_dir = conn_dir;
3501
3502 if (msg_attached->cfs_e_pid != 0) {
3503 if (!cfil_copy_audit_token(msg_attached->cfs_e_pid, (audit_token_t *)&msg_attached->cfs_audit_token)) {
3504 CFIL_LOG(LOG_ERR, "CFIL: Failed to get effective audit token for <sockID %llu <%llx>> ",
3505 entry->cfe_cfil_info->cfi_sock_id, entry->cfe_cfil_info->cfi_sock_id);
3506 }
3507 }
3508
3509 if (msg_attached->cfs_pid != 0) {
3510 if (msg_attached->cfs_pid == msg_attached->cfs_e_pid) {
3511 memcpy(&msg_attached->cfs_real_audit_token, &msg_attached->cfs_audit_token, sizeof(msg_attached->cfs_real_audit_token));
3512 } else if (!cfil_copy_audit_token(msg_attached->cfs_pid, (audit_token_t *)&msg_attached->cfs_real_audit_token)) {
3513 CFIL_LOG(LOG_ERR, "CFIL: Failed to get real audit token for <sockID %llu <%llx>> ",
3514 entry->cfe_cfil_info->cfi_sock_id, entry->cfe_cfil_info->cfi_sock_id);
3515 }
3516 }
3517
3518 necp_with_inp_domain_name(so, msg_attached, cfil_populate_attached_msg_domain_name);
3519
3520 if (cfil_info->cfi_debug) {
3521 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING ATTACH UP");
3522 }
3523
3524 cfil_dispatch_attach_event_sign(entry->cfe_filter->cf_crypto_state, cfil_info, msg_attached);
3525
3526 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3527 entry->cfe_filter->cf_kcunit,
3528 msg_attached,
3529 sizeof(struct cfil_msg_sock_attached),
3530 CTL_DATA_EOR);
3531
3532 kfree_data(msg_attached, sizeof(struct cfil_msg_sock_attached));
3533
3534 if (error != 0) {
3535 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
3536 goto done;
3537 }
3538 microuptime(&entry->cfe_last_event);
3539 cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
3540 cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
3541
3542 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
3543 OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
3544 done:
3545
3546 /* We can recover from flow control */
3547 if (error == ENOBUFS) {
3548 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3549 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
3550
3551 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3552 cfil_rw_lock_exclusive(&cfil_lck_rw);
3553 }
3554
3555 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3556
3557 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3558 } else {
3559 if (error != 0) {
3560 OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
3561 }
3562
3563 cfil_rw_unlock_shared(&cfil_lck_rw);
3564 }
3565 return error;
3566 }
3567
3568 static int
cfil_dispatch_disconnect_event(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int outgoing)3569 cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3570 {
3571 errno_t error = 0;
3572 struct mbuf *msg = NULL;
3573 struct cfil_entry *entry;
3574 struct cfe_buf *entrybuf;
3575 struct cfil_msg_hdr msg_disconnected;
3576 struct content_filter *cfc;
3577
3578 socket_lock_assert_owned(so);
3579
3580 cfil_rw_lock_shared(&cfil_lck_rw);
3581
3582 entry = &cfil_info->cfi_entries[kcunit - 1];
3583 if (outgoing) {
3584 entrybuf = &entry->cfe_snd;
3585 } else {
3586 entrybuf = &entry->cfe_rcv;
3587 }
3588
3589 cfc = entry->cfe_filter;
3590 if (cfc == NULL) {
3591 goto done;
3592 }
3593
3594 // Mark if this flow qualifies for immediate close.
3595 SET_NO_CLOSE_WAIT(sotoinpcb(so), cfil_info);
3596
3597 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3598 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3599
3600 /*
3601 * Send the disconnection event once
3602 */
3603 if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
3604 (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
3605 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
3606 (uint64_t)VM_KERNEL_ADDRPERM(so));
3607 goto done;
3608 }
3609
3610 /*
3611 * We're not disconnected as long as some data is waiting
3612 * to be delivered to the filter
3613 */
3614 if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
3615 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
3616 (uint64_t)VM_KERNEL_ADDRPERM(so));
3617 error = EBUSY;
3618 goto done;
3619 }
3620 /* Would be wasteful to try when flow controlled */
3621 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3622 error = ENOBUFS;
3623 goto done;
3624 }
3625
3626 if (cfil_info->cfi_debug) {
3627 const char * __null_terminated out = "CFIL: OUT - SENDING DISCONNECT UP";
3628 const char * __null_terminated in = "CFIL: IN - SENDING DISCONNECT UP";
3629 cfil_info_log(LOG_ERR, cfil_info, outgoing ? out : in);
3630 }
3631
3632 bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
3633 msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
3634 msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
3635 msg_disconnected.cfm_type = CFM_TYPE_EVENT;
3636 msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
3637 CFM_OP_DISCONNECT_IN;
3638 msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3639 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3640 entry->cfe_filter->cf_kcunit,
3641 &msg_disconnected,
3642 sizeof(struct cfil_msg_hdr),
3643 CTL_DATA_EOR);
3644 if (error != 0) {
3645 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3646 mbuf_freem(msg);
3647 goto done;
3648 }
3649 microuptime(&entry->cfe_last_event);
3650 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
3651
3652 /* Remember we have sent the disconnection message */
3653 if (outgoing) {
3654 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
3655 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
3656 } else {
3657 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
3658 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
3659 }
3660 done:
3661 if (error == ENOBUFS) {
3662 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3663 OSIncrementAtomic(
3664 &cfil_stats.cfs_disconnect_event_flow_control);
3665
3666 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3667 cfil_rw_lock_exclusive(&cfil_lck_rw);
3668 }
3669
3670 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3671
3672 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3673 } else {
3674 if (error != 0) {
3675 OSIncrementAtomic(
3676 &cfil_stats.cfs_disconnect_event_fail);
3677 }
3678
3679 cfil_rw_unlock_shared(&cfil_lck_rw);
3680 }
3681 return error;
3682 }
3683
3684 int
cfil_dispatch_closed_event(struct socket * so,struct cfil_info * cfil_info,int kcunit)3685 cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
3686 {
3687 struct cfil_entry *entry;
3688 struct cfil_msg_sock_closed msg_closed;
3689 errno_t error = 0;
3690 struct content_filter *cfc;
3691 struct inpcb *inp = NULL;
3692
3693 socket_lock_assert_owned(so);
3694
3695 cfil_rw_lock_shared(&cfil_lck_rw);
3696
3697 entry = &cfil_info->cfi_entries[kcunit - 1];
3698 cfc = entry->cfe_filter;
3699 if (cfc == NULL) {
3700 goto done;
3701 }
3702
3703 CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
3704 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3705
3706 /* Would be wasteful to try when flow controlled */
3707 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3708 error = ENOBUFS;
3709 goto done;
3710 }
3711 /*
3712 * Send a single closed message per filter
3713 */
3714 if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) {
3715 goto done;
3716 }
3717 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3718 goto done;
3719 }
3720
3721 microuptime(&entry->cfe_last_event);
3722 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
3723
3724 bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
3725 msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
3726 msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
3727 msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
3728 msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
3729 msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3730 msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
3731 msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
3732 memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY);
3733 memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY);
3734 msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
3735 msg_closed.cfc_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
3736 msg_closed.cfc_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
3737
3738 if (entry->cfe_laddr_sent == false) {
3739 /* cache it if necessary */
3740 if (cfil_info->cfi_so_attach_laddr.sa.sa_len == 0) {
3741 inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
3742 if (inp != NULL) {
3743 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
3744 union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
3745 union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
3746 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
3747 src, dst, !IS_INP_V6(inp), outgoing);
3748 }
3749 }
3750
3751 if (cfil_info->cfi_so_attach_laddr.sa.sa_len != 0) {
3752 msg_closed.cfc_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
3753 entry->cfe_laddr_sent = true;
3754 }
3755 }
3756
3757 cfil_dispatch_closed_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, &msg_closed);
3758
3759 if (cfil_info->cfi_debug) {
3760 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING CLOSED UP");
3761 }
3762
3763 /* for debugging
3764 * if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3765 * msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
3766 * }
3767 * for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3768 * CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3769 * }
3770 */
3771
3772 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3773 entry->cfe_filter->cf_kcunit,
3774 &msg_closed,
3775 sizeof(struct cfil_msg_sock_closed),
3776 CTL_DATA_EOR);
3777 if (error != 0) {
3778 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
3779 error);
3780 goto done;
3781 }
3782
3783 entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
3784 OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
3785 done:
3786 /* We can recover from flow control */
3787 if (error == ENOBUFS) {
3788 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3789 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
3790
3791 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3792 cfil_rw_lock_exclusive(&cfil_lck_rw);
3793 }
3794
3795 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3796
3797 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3798 } else {
3799 if (error != 0) {
3800 OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
3801 }
3802
3803 cfil_rw_unlock_shared(&cfil_lck_rw);
3804 }
3805
3806 return error;
3807 }
3808
3809 static void
fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 * sin46,struct in6_addr * ip6,u_int16_t port,uint32_t ifscope)3810 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3811 struct in6_addr *ip6, u_int16_t port, uint32_t ifscope)
3812 {
3813 if (sin46 == NULL) {
3814 return;
3815 }
3816
3817 struct sockaddr_in6 *sin6 = &sin46->sin6;
3818
3819 sin6->sin6_family = AF_INET6;
3820 sin6->sin6_len = sizeof(*sin6);
3821 sin6->sin6_port = port;
3822 sin6->sin6_addr = *ip6;
3823 if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
3824 sin6->sin6_scope_id = ifscope;
3825 if (in6_embedded_scope) {
3826 in6_verify_ifscope(&sin6->sin6_addr, sin6->sin6_scope_id);
3827 if (sin6->sin6_addr.s6_addr16[1] != 0) {
3828 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
3829 sin6->sin6_addr.s6_addr16[1] = 0;
3830 }
3831 }
3832 }
3833 }
3834
3835 static void
fill_ip_sockaddr_4_6(union sockaddr_in_4_6 * sin46,struct in_addr ip,u_int16_t port)3836 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3837 struct in_addr ip, u_int16_t port)
3838 {
3839 if (sin46 == NULL) {
3840 return;
3841 }
3842
3843 struct sockaddr_in *sin = &sin46->sin;
3844
3845 sin->sin_family = AF_INET;
3846 sin->sin_len = sizeof(*sin);
3847 sin->sin_port = port;
3848 sin->sin_addr.s_addr = ip.s_addr;
3849 }
3850
3851 static void
cfil_get_flow_address_v6(struct soflow_hash_entry * entry,struct inpcb * inp,struct in6_addr ** laddr,struct in6_addr ** faddr,u_int16_t * lport,u_int16_t * fport)3852 cfil_get_flow_address_v6(struct soflow_hash_entry *entry, struct inpcb *inp,
3853 struct in6_addr **laddr, struct in6_addr **faddr,
3854 u_int16_t *lport, u_int16_t *fport)
3855 {
3856 if (entry != NULL) {
3857 *laddr = &entry->soflow_laddr.addr6;
3858 *faddr = &entry->soflow_faddr.addr6;
3859 *lport = entry->soflow_lport;
3860 *fport = entry->soflow_fport;
3861 } else {
3862 *laddr = &inp->in6p_laddr;
3863 *faddr = &inp->in6p_faddr;
3864 *lport = inp->inp_lport;
3865 *fport = inp->inp_fport;
3866 }
3867 }
3868
3869 static void
cfil_get_flow_address(struct soflow_hash_entry * entry,struct inpcb * inp,struct in_addr * laddr,struct in_addr * faddr,u_int16_t * lport,u_int16_t * fport)3870 cfil_get_flow_address(struct soflow_hash_entry *entry, struct inpcb *inp,
3871 struct in_addr *laddr, struct in_addr *faddr,
3872 u_int16_t *lport, u_int16_t *fport)
3873 {
3874 if (entry != NULL) {
3875 *laddr = entry->soflow_laddr.addr46.ia46_addr4;
3876 *faddr = entry->soflow_faddr.addr46.ia46_addr4;
3877 *lport = entry->soflow_lport;
3878 *fport = entry->soflow_fport;
3879 } else {
3880 *laddr = inp->inp_laddr;
3881 *faddr = inp->inp_faddr;
3882 *lport = inp->inp_lport;
3883 *fport = inp->inp_fport;
3884 }
3885 }
3886
3887 static int
cfil_dispatch_data_event(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int outgoing,struct mbuf * data,unsigned int copyoffset,unsigned int copylen)3888 cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3889 struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
3890 {
3891 errno_t error = 0;
3892 struct mbuf *copy = NULL;
3893 struct mbuf * __single msg = NULL;
3894 unsigned int one = 1;
3895 struct cfil_msg_data_event *data_req;
3896 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3897 struct cfil_entry *entry;
3898 struct cfe_buf *entrybuf;
3899 struct content_filter *cfc;
3900 struct timeval tv;
3901 int inp_flags = 0;
3902
3903 cfil_rw_lock_shared(&cfil_lck_rw);
3904
3905 entry = &cfil_info->cfi_entries[kcunit - 1];
3906 if (outgoing) {
3907 entrybuf = &entry->cfe_snd;
3908 } else {
3909 entrybuf = &entry->cfe_rcv;
3910 }
3911
3912 cfc = entry->cfe_filter;
3913 if (cfc == NULL) {
3914 goto done;
3915 }
3916
3917 data = cfil_data_start(data);
3918 if (data == NULL) {
3919 CFIL_LOG(LOG_ERR, "No data start");
3920 goto done;
3921 }
3922
3923 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3924 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3925
3926 socket_lock_assert_owned(so);
3927
3928 /* Would be wasteful to try */
3929 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3930 error = ENOBUFS;
3931 goto done;
3932 }
3933
3934 /* Make a copy of the data to pass to kernel control socket */
3935 copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT, NULL, NULL,
3936 M_COPYM_NOOP_HDR);
3937 if (copy == NULL) {
3938 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
3939 error = ENOMEM;
3940 goto done;
3941 }
3942
3943 /* We need an mbuf packet for the message header */
3944 const size_t hdrsize = sizeof(struct cfil_msg_data_event);
3945 error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
3946 if (error != 0) {
3947 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
3948 m_freem(copy);
3949 /*
3950 * ENOBUFS is to indicate flow control
3951 */
3952 error = ENOMEM;
3953 goto done;
3954 }
3955 mbuf_setlen(msg, hdrsize);
3956 mbuf_pkthdr_setlen(msg, hdrsize + copylen);
3957 msg->m_next = copy;
3958 data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
3959 bzero(data_req, hdrsize);
3960 data_req->cfd_msghdr.cfm_len = (uint32_t)hdrsize + copylen;
3961 data_req->cfd_msghdr.cfm_version = 1;
3962 data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
3963 data_req->cfd_msghdr.cfm_op =
3964 outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
3965 data_req->cfd_msghdr.cfm_sock_id =
3966 entry->cfe_cfil_info->cfi_sock_id;
3967 data_req->cfd_start_offset = entrybuf->cfe_peeked;
3968 data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
3969 // The last_pid or e_pid is set here because a socket could have been
3970 // accepted by launchd and a new process spawned (with a new pid).
3971 // So the last pid associated with the socket is appended to the data event.
3972 // for a provider that is peeking bytes.
3973 if (so->so_flags & SOF_DELEGATED) {
3974 data_req->cfd_delegated_pid = so->e_pid;
3975 } else {
3976 data_req->cfd_delegated_pid = so->last_pid;
3977 }
3978 if (data_req->cfd_delegated_pid != 0) {
3979 if (!cfil_copy_audit_token(data_req->cfd_delegated_pid, (audit_token_t *)&data_req->cfd_delegated_audit_token)) {
3980 CFIL_LOG(LOG_ERR, "CFIL: Failed to get audit token for <sockID %llu <%llx>> ",
3981 entry->cfe_cfil_info->cfi_sock_id, entry->cfe_cfil_info->cfi_sock_id);
3982 }
3983 }
3984
3985 data_req->cfd_flags = 0;
3986 if (OPTIONAL_IP_HEADER(so)) {
3987 /*
3988 * For non-UDP/TCP traffic, indicate to filters if optional
3989 * IP header is present:
3990 * outgoing - indicate according to INP_HDRINCL flag
3991 * incoming - For IPv4 only, stripping of IP header is
3992 * optional. But for CFIL, we delay stripping
3993 * at rip_input. So CFIL always expects IP
3994 * frames. IP header will be stripped according
3995 * to INP_STRIPHDR flag later at reinjection.
3996 */
3997 if ((!outgoing && !IS_INP_V6(inp)) ||
3998 (outgoing && cfil_dgram_peek_socket_state(data, &inp_flags) && (inp_flags & INP_HDRINCL))) {
3999 data_req->cfd_flags |= CFD_DATA_FLAG_IP_HEADER;
4000 }
4001 }
4002
4003 /*
4004 * Copy address/port into event msg.
4005 * For non connected sockets need to copy addresses from passed
4006 * parameters
4007 */
4008 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
4009 &data_req->cfc_src, &data_req->cfc_dst,
4010 !IS_INP_V6(inp), outgoing);
4011
4012 if (cfil_info->cfi_debug && cfil_log_data) {
4013 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DATA UP");
4014 }
4015
4016 if (cfil_info->cfi_isSignatureLatest == false) {
4017 cfil_dispatch_data_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, data_req);
4018 }
4019
4020 microuptime(&tv);
4021 CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
4022
4023 /* Pass the message to the content filter */
4024 error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
4025 entry->cfe_filter->cf_kcunit,
4026 msg, CTL_DATA_EOR);
4027 if (error != 0) {
4028 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
4029 mbuf_freem(msg);
4030 goto done;
4031 }
4032 entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
4033 OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
4034
4035 if (cfil_info->cfi_debug && cfil_log_data) {
4036 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu <%llx> outgoing %d: mbuf %llx copyoffset %u copylen %u (%s)",
4037 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen,
4038 data_req->cfd_flags & CFD_DATA_FLAG_IP_HEADER ? "IP HDR" : "NO IP HDR");
4039 }
4040
4041 done:
4042 if (error == ENOBUFS) {
4043 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
4044 OSIncrementAtomic(
4045 &cfil_stats.cfs_data_event_flow_control);
4046
4047 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
4048 cfil_rw_lock_exclusive(&cfil_lck_rw);
4049 }
4050
4051 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
4052
4053 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4054 } else {
4055 if (error != 0) {
4056 OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
4057 }
4058
4059 cfil_rw_unlock_shared(&cfil_lck_rw);
4060 }
4061 return error;
4062 }
4063
4064 /*
4065 * Process the queue of data waiting to be delivered to content filter
4066 */
4067 static int
cfil_data_service_ctl_q(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int outgoing)4068 cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
4069 {
4070 errno_t error = 0;
4071 struct mbuf *data, *tmp = NULL;
4072 unsigned int datalen = 0, copylen = 0, copyoffset = 0;
4073 struct cfil_entry *entry;
4074 struct cfe_buf *entrybuf;
4075 uint64_t currentoffset = 0;
4076
4077 if (cfil_info == NULL) {
4078 return 0;
4079 }
4080
4081 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4082 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4083
4084 socket_lock_assert_owned(so);
4085
4086 entry = &cfil_info->cfi_entries[kcunit - 1];
4087 if (outgoing) {
4088 entrybuf = &entry->cfe_snd;
4089 } else {
4090 entrybuf = &entry->cfe_rcv;
4091 }
4092
4093 /* Send attached message if not yet done */
4094 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
4095 error = cfil_dispatch_attach_event(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, entry),
4096 cfil_info->cfi_dir);
4097 if (error != 0) {
4098 /* We can recover from flow control */
4099 if (error == ENOBUFS || error == ENOMEM) {
4100 error = 0;
4101 }
4102 goto done;
4103 }
4104 } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
4105 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
4106 goto done;
4107 }
4108
4109 if (cfil_info->cfi_debug && cfil_log_data) {
4110 CFIL_LOG(LOG_ERR, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
4111 entrybuf->cfe_pass_offset,
4112 entrybuf->cfe_peeked,
4113 entrybuf->cfe_peek_offset);
4114 }
4115
4116 /* Move all data that can pass */
4117 while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
4118 entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
4119 datalen = cfil_data_length(data, NULL, NULL);
4120 tmp = data;
4121
4122 if (entrybuf->cfe_ctl_q.q_start + datalen <=
4123 entrybuf->cfe_pass_offset) {
4124 /*
4125 * The first mbuf can fully pass
4126 */
4127 copylen = datalen;
4128 } else {
4129 /*
4130 * The first mbuf can partially pass
4131 */
4132 copylen = (unsigned int)(entrybuf->cfe_pass_offset - entrybuf->cfe_ctl_q.q_start);
4133 }
4134 VERIFY(copylen <= datalen);
4135
4136 if (cfil_info->cfi_debug && cfil_log_data) {
4137 CFIL_LOG(LOG_ERR,
4138 "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
4139 "datalen %u copylen %u",
4140 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
4141 entrybuf->cfe_ctl_q.q_start,
4142 entrybuf->cfe_peeked,
4143 entrybuf->cfe_pass_offset,
4144 entrybuf->cfe_peek_offset,
4145 datalen, copylen);
4146 }
4147
4148 /*
4149 * Data that passes has been peeked at explicitly or
4150 * implicitly
4151 */
4152 if (entrybuf->cfe_ctl_q.q_start + copylen >
4153 entrybuf->cfe_peeked) {
4154 entrybuf->cfe_peeked =
4155 entrybuf->cfe_ctl_q.q_start + copylen;
4156 }
4157 /*
4158 * Stop on partial pass
4159 */
4160 if (copylen < datalen) {
4161 break;
4162 }
4163
4164 /* All good, move full data from ctl queue to pending queue */
4165 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
4166
4167 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
4168 if (outgoing) {
4169 OSAddAtomic64(datalen,
4170 &cfil_stats.cfs_pending_q_out_enqueued);
4171 } else {
4172 OSAddAtomic64(datalen,
4173 &cfil_stats.cfs_pending_q_in_enqueued);
4174 }
4175 }
4176 CFIL_INFO_VERIFY(cfil_info);
4177 if (tmp != NULL) {
4178 CFIL_LOG(LOG_DEBUG,
4179 "%llx first %llu peeked %llu pass %llu peek %llu"
4180 "datalen %u copylen %u",
4181 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
4182 entrybuf->cfe_ctl_q.q_start,
4183 entrybuf->cfe_peeked,
4184 entrybuf->cfe_pass_offset,
4185 entrybuf->cfe_peek_offset,
4186 datalen, copylen);
4187 }
4188 tmp = NULL;
4189
4190 /* Now deal with remaining data the filter wants to peek at */
4191 for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
4192 currentoffset = entrybuf->cfe_ctl_q.q_start;
4193 data != NULL && currentoffset < entrybuf->cfe_peek_offset;
4194 data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
4195 currentoffset += datalen) {
4196 datalen = cfil_data_length(data, NULL, NULL);
4197 tmp = data;
4198
4199 /* We've already peeked at this mbuf */
4200 if (currentoffset + datalen <= entrybuf->cfe_peeked) {
4201 continue;
4202 }
4203 /*
4204 * The data in the first mbuf may have been
4205 * partially peeked at
4206 */
4207 copyoffset = (unsigned int)(entrybuf->cfe_peeked - currentoffset);
4208 VERIFY(copyoffset < datalen);
4209 copylen = datalen - copyoffset;
4210 VERIFY(copylen <= datalen);
4211 /*
4212 * Do not copy more than needed
4213 */
4214 if (currentoffset + copyoffset + copylen >
4215 entrybuf->cfe_peek_offset) {
4216 copylen = (unsigned int)(entrybuf->cfe_peek_offset -
4217 (currentoffset + copyoffset));
4218 }
4219
4220 if (cfil_info->cfi_debug && cfil_log_data) {
4221 CFIL_LOG(LOG_ERR,
4222 "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
4223 "datalen %u copylen %u copyoffset %u",
4224 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
4225 currentoffset,
4226 entrybuf->cfe_peeked,
4227 entrybuf->cfe_pass_offset,
4228 entrybuf->cfe_peek_offset,
4229 datalen, copylen, copyoffset);
4230 }
4231
4232 /*
4233 * Stop if there is nothing more to peek at
4234 */
4235 if (copylen == 0) {
4236 break;
4237 }
4238 /*
4239 * Let the filter get a peek at this span of data
4240 */
4241 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
4242 outgoing, data, copyoffset, copylen);
4243 if (error != 0) {
4244 /* On error, leave data in ctl_q */
4245 break;
4246 }
4247 entrybuf->cfe_peeked += copylen;
4248 if (outgoing) {
4249 OSAddAtomic64(copylen,
4250 &cfil_stats.cfs_ctl_q_out_peeked);
4251 } else {
4252 OSAddAtomic64(copylen,
4253 &cfil_stats.cfs_ctl_q_in_peeked);
4254 }
4255
4256 /* Stop when data could not be fully peeked at */
4257 if (copylen + copyoffset < datalen) {
4258 break;
4259 }
4260 }
4261 CFIL_INFO_VERIFY(cfil_info);
4262 if (tmp != NULL) {
4263 CFIL_LOG(LOG_DEBUG,
4264 "%llx first %llu peeked %llu pass %llu peek %llu"
4265 "datalen %u copylen %u copyoffset %u",
4266 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
4267 currentoffset,
4268 entrybuf->cfe_peeked,
4269 entrybuf->cfe_pass_offset,
4270 entrybuf->cfe_peek_offset,
4271 datalen, copylen, copyoffset);
4272 }
4273
4274 /*
4275 * Process data that has passed the filter
4276 */
4277 error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
4278 if (error != 0) {
4279 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
4280 error);
4281 goto done;
4282 }
4283
4284 /*
4285 * Dispatch disconnect events that could not be sent
4286 */
4287 if (cfil_info == NULL) {
4288 goto done;
4289 } else if (outgoing) {
4290 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
4291 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) {
4292 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
4293 }
4294 } else {
4295 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
4296 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) {
4297 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
4298 }
4299 }
4300
4301 done:
4302 CFIL_LOG(LOG_DEBUG,
4303 "first %llu peeked %llu pass %llu peek %llu",
4304 entrybuf->cfe_ctl_q.q_start,
4305 entrybuf->cfe_peeked,
4306 entrybuf->cfe_pass_offset,
4307 entrybuf->cfe_peek_offset);
4308
4309 CFIL_INFO_VERIFY(cfil_info);
4310 return error;
4311 }
4312
4313 /*
4314 * cfil_data_filter()
4315 *
4316 * Process data for a content filter installed on a socket
4317 */
4318 int
cfil_data_filter(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int outgoing,struct mbuf * data,uint32_t datalen)4319 cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4320 struct mbuf *data, uint32_t datalen)
4321 {
4322 errno_t error = 0;
4323 struct cfil_entry *entry;
4324 struct cfe_buf *entrybuf;
4325
4326 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4327 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4328
4329 socket_lock_assert_owned(so);
4330
4331 entry = &cfil_info->cfi_entries[kcunit - 1];
4332 if (outgoing) {
4333 entrybuf = &entry->cfe_snd;
4334 } else {
4335 entrybuf = &entry->cfe_rcv;
4336 }
4337
4338 /* Are we attached to the filter? */
4339 if (entry->cfe_filter == NULL) {
4340 error = 0;
4341 goto done;
4342 }
4343
4344 /* Dispatch to filters */
4345 cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
4346 if (outgoing) {
4347 OSAddAtomic64(datalen,
4348 &cfil_stats.cfs_ctl_q_out_enqueued);
4349 } else {
4350 OSAddAtomic64(datalen,
4351 &cfil_stats.cfs_ctl_q_in_enqueued);
4352 }
4353
4354 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4355 if (error != 0) {
4356 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4357 error);
4358 }
4359 /*
4360 * We have to return EJUSTRETURN in all cases to avoid double free
4361 * by socket layer
4362 */
4363 error = EJUSTRETURN;
4364 done:
4365 CFIL_INFO_VERIFY(cfil_info);
4366
4367 CFIL_LOG(LOG_INFO, "return %d", error);
4368 return error;
4369 }
4370
4371 static void
cfil_strip_ip_header(struct cfil_info * cfil_info,mbuf_t data,struct socket * so)4372 cfil_strip_ip_header(struct cfil_info *cfil_info, mbuf_t data, struct socket *so)
4373 {
4374 struct ip *ip = NULL;
4375 unsigned int hlen = 0;
4376 mbuf_t data_start = NULL;
4377 struct inpcb *inp = so ? sotoinpcb(so) : NULL;
4378
4379 if (inp && (inp->inp_flags & INP_STRIPHDR)) {
4380 data_start = cfil_data_start(data);
4381 if (data_start != NULL && (data_start->m_flags & M_PKTHDR)) {
4382 ip = mtod(data_start, struct ip *);
4383 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4384
4385 if (cfil_info->cfi_debug && cfil_log_data) {
4386 CFIL_LOG(LOG_ERR, "CFIL: IPHDR STRIPPING: <so %llx>: <hlen %d m_len %d>",
4387 (uint64_t)VM_KERNEL_ADDRPERM(so),
4388 hlen, data_start->m_len);
4389 }
4390 VERIFY(hlen <= data_start->m_len);
4391 data_start->m_len -= hlen;
4392 data_start->m_pkthdr.len -= hlen;
4393 data_start->m_data += hlen;
4394 }
4395 }
4396 }
4397
4398 /*
4399 * cfil_service_inject_queue() re-inject data that passed the
4400 * content filters
4401 */
4402 static int
cfil_service_inject_queue(struct socket * so,struct cfil_info * cfil_info,int outgoing)4403 cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4404 {
4405 mbuf_t data;
4406 unsigned int datalen;
4407 int mbcnt = 0;
4408 int mbnum = 0;
4409 errno_t error = 0;
4410 struct cfi_buf *cfi_buf;
4411 struct cfil_queue *inject_q;
4412 int need_rwakeup = 0;
4413 int count = 0;
4414
4415 if (cfil_info == NULL) {
4416 return 0;
4417 }
4418
4419 socket_lock_assert_owned(so);
4420
4421 if (so->so_state & SS_DEFUNCT) {
4422 return 0;
4423 }
4424
4425 if (outgoing) {
4426 cfi_buf = &cfil_info->cfi_snd;
4427 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
4428 } else {
4429 cfi_buf = &cfil_info->cfi_rcv;
4430 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
4431 }
4432 inject_q = &cfi_buf->cfi_inject_q;
4433
4434 if (cfil_queue_empty(inject_q)) {
4435 return 0;
4436 }
4437
4438 if (cfil_info->cfi_debug && cfil_log_data) {
4439 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
4440 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
4441 }
4442
4443 while ((data = cfil_queue_first(inject_q)) != NULL) {
4444 datalen = cfil_data_length(data, &mbcnt, &mbnum);
4445
4446 if (cfil_info->cfi_debug && cfil_log_data) {
4447 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4448 (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4449 }
4450
4451 /* Remove data from queue and adjust stats */
4452 cfil_queue_remove(inject_q, data, datalen);
4453 cfi_buf->cfi_pending_first += datalen;
4454 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4455 cfi_buf->cfi_pending_mbnum -= mbnum;
4456 cfil_info_buf_verify(cfi_buf);
4457
4458 if (outgoing) {
4459 error = sosend_reinject(so, NULL, data, NULL, 0);
4460 if (error != 0) {
4461 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
4462 CFIL_LOG(LOG_ERR, "CFIL: sosend() failed %d", error);
4463 break;
4464 }
4465 // At least one injection succeeded, need to wake up pending threads.
4466 need_rwakeup = 1;
4467 } else {
4468 data->m_flags |= M_SKIPCFIL;
4469
4470 /*
4471 * NOTE: We currently only support TCP, UDP, ICMP,
4472 * ICMPv6 and RAWIP. For MPTCP and message TCP we'll
4473 * need to call the appropriate sbappendxxx()
4474 * of fix sock_inject_data_in()
4475 */
4476 if (NEED_DGRAM_FLOW_TRACKING(so)) {
4477 if (OPTIONAL_IP_HEADER(so)) {
4478 cfil_strip_ip_header(cfil_info, data, so);
4479 }
4480
4481 if (sbappendchain(&so->so_rcv, data)) {
4482 need_rwakeup = 1;
4483 }
4484 } else {
4485 if (sbappendstream(&so->so_rcv, data)) {
4486 need_rwakeup = 1;
4487 }
4488 }
4489 }
4490
4491 if (outgoing) {
4492 OSAddAtomic64(datalen,
4493 &cfil_stats.cfs_inject_q_out_passed);
4494 } else {
4495 OSAddAtomic64(datalen,
4496 &cfil_stats.cfs_inject_q_in_passed);
4497 }
4498
4499 count++;
4500 }
4501
4502 if (cfil_info->cfi_debug && cfil_log_data) {
4503 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4504 (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4505 }
4506
4507 /* A single wakeup is for several packets is more efficient */
4508 if (need_rwakeup) {
4509 if (outgoing == TRUE) {
4510 sowwakeup(so);
4511 } else {
4512 sorwakeup(so);
4513 }
4514 }
4515
4516 if (error != 0 && cfil_info) {
4517 if (error == ENOBUFS) {
4518 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
4519 }
4520 if (error == ENOMEM) {
4521 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
4522 }
4523
4524 if (outgoing) {
4525 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
4526 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
4527 } else {
4528 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
4529 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
4530 }
4531 }
4532
4533 /*
4534 * Notify
4535 */
4536 if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
4537 cfil_sock_notify_shutdown(so, SHUT_WR);
4538 if (cfil_sock_data_pending(&so->so_snd) == 0) {
4539 soshutdownlock_final(so, SHUT_WR);
4540 }
4541 }
4542 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4543 if (cfil_filters_attached(so) == 0) {
4544 CFIL_LOG(LOG_INFO, "so %llx waking",
4545 (uint64_t)VM_KERNEL_ADDRPERM(so));
4546 wakeup((caddr_t)cfil_info);
4547 }
4548 }
4549
4550 if (SO_DELAYED_DEAD_GET(so)) {
4551 // Check to see if all data processed for this socket, if so mark it DEAD now.
4552 const bool is_dead = cfil_sock_is_dead(so);
4553 if (is_dead && cfil_info->cfi_debug) {
4554 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Marked previoulsy delayed socket as DEAD");
4555 }
4556 }
4557 if (SO_DELAYED_TCP_TIME_WAIT_GET(so)) {
4558 // Check to see if all data processed for this socket, if so handle the TCP time wait now
4559 const bool is_added = cfil_sock_tcp_add_time_wait(so);
4560 if (is_added && cfil_info->cfi_debug) {
4561 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Handled previously delayed socket for TCP time wait");
4562 }
4563 }
4564
4565 CFIL_INFO_VERIFY(cfil_info);
4566
4567 return error;
4568 }
4569
4570 static int
cfil_service_pending_queue(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int outgoing)4571 cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
4572 {
4573 uint64_t passlen, curlen;
4574 mbuf_t data;
4575 unsigned int datalen;
4576 errno_t error = 0;
4577 struct cfil_entry *entry;
4578 struct cfe_buf *entrybuf;
4579 struct cfil_queue *pending_q;
4580 struct cfil_entry *iter_entry = NULL;
4581
4582 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4583 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4584
4585 socket_lock_assert_owned(so);
4586
4587 entry = &cfil_info->cfi_entries[kcunit - 1];
4588 if (outgoing) {
4589 entrybuf = &entry->cfe_snd;
4590 } else {
4591 entrybuf = &entry->cfe_rcv;
4592 }
4593
4594 pending_q = &entrybuf->cfe_pending_q;
4595
4596 passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
4597
4598 if (cfil_queue_empty(pending_q)) {
4599 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4600 iter_entry != NULL;
4601 iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4602 error = cfil_data_service_ctl_q(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing);
4603 /* 0 means passed so we can continue */
4604 if (error != 0) {
4605 break;
4606 }
4607 }
4608 goto done;
4609 }
4610
4611 /*
4612 * Locate the chunks of data that we can pass to the next filter
4613 * A data chunk must be on mbuf boundaries
4614 */
4615 curlen = 0;
4616 while ((data = cfil_queue_first(pending_q)) != NULL) {
4617 datalen = cfil_data_length(data, NULL, NULL);
4618
4619 if (cfil_info->cfi_debug && cfil_log_data) {
4620 CFIL_LOG(LOG_ERR,
4621 "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
4622 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
4623 passlen, curlen);
4624 }
4625
4626 if (curlen + datalen > passlen) {
4627 break;
4628 }
4629
4630 cfil_queue_remove(pending_q, data, datalen);
4631
4632 curlen += datalen;
4633
4634 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4635 iter_entry != NULL;
4636 iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4637 error = cfil_data_filter(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing,
4638 data, datalen);
4639 /* 0 means passed so we can continue */
4640 if (error != 0) {
4641 break;
4642 }
4643 }
4644 /* When data has passed all filters, re-inject */
4645 if (error == 0) {
4646 if (outgoing) {
4647 cfil_queue_enqueue(
4648 &cfil_info->cfi_snd.cfi_inject_q,
4649 data, datalen);
4650 OSAddAtomic64(datalen,
4651 &cfil_stats.cfs_inject_q_out_enqueued);
4652 } else {
4653 cfil_queue_enqueue(
4654 &cfil_info->cfi_rcv.cfi_inject_q,
4655 data, datalen);
4656 OSAddAtomic64(datalen,
4657 &cfil_stats.cfs_inject_q_in_enqueued);
4658 }
4659 }
4660 }
4661
4662 done:
4663 CFIL_INFO_VERIFY(cfil_info);
4664
4665 return error;
4666 }
4667
4668 int
cfil_update_data_offsets(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int outgoing,uint64_t pass_offset,uint64_t peek_offset)4669 cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4670 uint64_t pass_offset, uint64_t peek_offset)
4671 {
4672 errno_t error = 0;
4673 struct cfil_entry *entry = NULL;
4674 struct cfe_buf *entrybuf;
4675 int updated = 0;
4676
4677 CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
4678
4679 socket_lock_assert_owned(so);
4680
4681 if (cfil_info == NULL) {
4682 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4683 (uint64_t)VM_KERNEL_ADDRPERM(so));
4684 error = 0;
4685 goto done;
4686 } else if (cfil_info->cfi_flags & CFIF_DROP) {
4687 CFIL_LOG(LOG_ERR, "so %llx drop set",
4688 (uint64_t)VM_KERNEL_ADDRPERM(so));
4689 error = EPIPE;
4690 goto done;
4691 }
4692
4693 entry = &cfil_info->cfi_entries[kcunit - 1];
4694 if (outgoing) {
4695 entrybuf = &entry->cfe_snd;
4696 } else {
4697 entrybuf = &entry->cfe_rcv;
4698 }
4699
4700 /* Record updated offsets for this content filter */
4701 if (pass_offset > entrybuf->cfe_pass_offset) {
4702 entrybuf->cfe_pass_offset = pass_offset;
4703
4704 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4705 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4706 }
4707 updated = 1;
4708 } else {
4709 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
4710 pass_offset, entrybuf->cfe_pass_offset);
4711 }
4712 /* Filter does not want or need to see data that's allowed to pass */
4713 if (peek_offset > entrybuf->cfe_pass_offset &&
4714 peek_offset > entrybuf->cfe_peek_offset) {
4715 entrybuf->cfe_peek_offset = peek_offset;
4716 updated = 1;
4717 }
4718 /* Nothing to do */
4719 if (updated == 0) {
4720 goto done;
4721 }
4722
4723 /* Move data held in control queue to pending queue if needed */
4724 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4725 if (error != 0) {
4726 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4727 error);
4728 goto done;
4729 }
4730 error = EJUSTRETURN;
4731
4732 done:
4733 /*
4734 * The filter is effectively detached when pass all from both sides
4735 * or when the socket is closed and no more data is waiting
4736 * to be delivered to the filter
4737 */
4738 if (entry != NULL &&
4739 ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
4740 entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
4741 ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4742 cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
4743 cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
4744 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4745
4746 if (cfil_info->cfi_debug) {
4747 const char * __null_terminated out = "CFIL: OUT - PASSED ALL - DETACH";
4748 const char * __null_terminated in = "CFIL: IN - PASSED ALL - DETACH";
4749 cfil_info_log(LOG_ERR, cfil_info, outgoing ? out : in);
4750 }
4751
4752 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4753 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4754 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4755 cfil_filters_attached(so) == 0) {
4756 if (cfil_info->cfi_debug) {
4757 cfil_info_log(LOG_ERR, cfil_info, "CFIL: WAKING");
4758 }
4759 CFIL_LOG(LOG_INFO, "so %llx waking",
4760 (uint64_t)VM_KERNEL_ADDRPERM(so));
4761 wakeup((caddr_t)cfil_info);
4762 }
4763 }
4764 CFIL_INFO_VERIFY(cfil_info);
4765 CFIL_LOG(LOG_INFO, "return %d", error);
4766 return error;
4767 }
4768
4769 /*
4770 * Update pass offset for socket when no data is pending
4771 */
4772 static int
cfil_set_socket_pass_offset(struct socket * so,struct cfil_info * cfil_info,int outgoing)4773 cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4774 {
4775 struct cfi_buf *cfi_buf;
4776 struct cfil_entry *entry;
4777 struct cfe_buf *entrybuf;
4778 uint32_t kcunit;
4779 uint64_t pass_offset = 0;
4780 boolean_t first = true;
4781
4782 if (cfil_info == NULL) {
4783 return 0;
4784 }
4785
4786 if (cfil_info->cfi_debug && cfil_log_data) {
4787 CFIL_LOG(LOG_ERR, "so %llx outgoing %d",
4788 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4789 }
4790
4791 socket_lock_assert_owned(so);
4792
4793 if (outgoing) {
4794 cfi_buf = &cfil_info->cfi_snd;
4795 } else {
4796 cfi_buf = &cfil_info->cfi_rcv;
4797 }
4798
4799 if (cfil_info->cfi_debug && cfil_log_data) {
4800 CFIL_LOG(LOG_ERR, "CFIL: <so %llx, sockID %llu <%llx>> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4801 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfil_info->cfi_sock_id, outgoing,
4802 cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
4803 }
4804
4805 if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
4806 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4807 entry = &cfil_info->cfi_entries[kcunit - 1];
4808
4809 /* Are we attached to a filter? */
4810 if (entry->cfe_filter == NULL) {
4811 continue;
4812 }
4813
4814 if (outgoing) {
4815 entrybuf = &entry->cfe_snd;
4816 } else {
4817 entrybuf = &entry->cfe_rcv;
4818 }
4819
4820 // Keep track of the smallest pass_offset among filters.
4821 if (first == true ||
4822 entrybuf->cfe_pass_offset < pass_offset) {
4823 pass_offset = entrybuf->cfe_pass_offset;
4824 first = false;
4825 }
4826 }
4827 cfi_buf->cfi_pass_offset = pass_offset;
4828 }
4829
4830 if (cfil_info->cfi_debug && cfil_log_data) {
4831 CFIL_LOG(LOG_ERR, "CFIL: <so %llx, sockID %llu <%llx>>, cfi_pass_offset %llu",
4832 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
4833 }
4834
4835 return 0;
4836 }
4837
4838 int
cfil_action_data_pass(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit,int outgoing,uint64_t pass_offset,uint64_t peek_offset)4839 cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4840 uint64_t pass_offset, uint64_t peek_offset)
4841 {
4842 errno_t error = 0;
4843
4844 CFIL_LOG(LOG_INFO, "");
4845
4846 socket_lock_assert_owned(so);
4847
4848 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
4849 if (error != 0) {
4850 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
4851 (uint64_t)VM_KERNEL_ADDRPERM(so),
4852 outgoing ? "out" : "in");
4853 goto release;
4854 }
4855
4856 error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
4857 pass_offset, peek_offset);
4858
4859 cfil_service_inject_queue(so, cfil_info, outgoing);
4860
4861 cfil_set_socket_pass_offset(so, cfil_info, outgoing);
4862 release:
4863 CFIL_INFO_VERIFY(cfil_info);
4864 cfil_release_sockbuf(so, outgoing);
4865
4866 return error;
4867 }
4868
4869
4870 static void
cfil_flush_queues(struct socket * so,struct cfil_info * cfil_info)4871 cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
4872 {
4873 struct cfil_entry *entry;
4874 int kcunit;
4875 uint64_t drained;
4876
4877 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4878 goto done;
4879 }
4880
4881 socket_lock_assert_owned(so);
4882
4883 /*
4884 * Flush the output queues and ignore errors as long as
4885 * we are attached
4886 */
4887 (void) cfil_acquire_sockbuf(so, cfil_info, 1);
4888 if (cfil_info != NULL) {
4889 drained = 0;
4890 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4891 entry = &cfil_info->cfi_entries[kcunit - 1];
4892
4893 drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
4894 drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
4895 }
4896 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
4897
4898 if (drained) {
4899 if (cfil_info->cfi_flags & CFIF_DROP) {
4900 OSIncrementAtomic(
4901 &cfil_stats.cfs_flush_out_drop);
4902 } else {
4903 OSIncrementAtomic(
4904 &cfil_stats.cfs_flush_out_close);
4905 }
4906 }
4907 }
4908 cfil_release_sockbuf(so, 1);
4909
4910 /*
4911 * Flush the input queues
4912 */
4913 (void) cfil_acquire_sockbuf(so, cfil_info, 0);
4914 if (cfil_info != NULL) {
4915 drained = 0;
4916 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4917 entry = &cfil_info->cfi_entries[kcunit - 1];
4918
4919 drained += cfil_queue_drain(
4920 &entry->cfe_rcv.cfe_ctl_q);
4921 drained += cfil_queue_drain(
4922 &entry->cfe_rcv.cfe_pending_q);
4923 }
4924 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
4925
4926 if (drained) {
4927 if (cfil_info->cfi_flags & CFIF_DROP) {
4928 OSIncrementAtomic(
4929 &cfil_stats.cfs_flush_in_drop);
4930 } else {
4931 OSIncrementAtomic(
4932 &cfil_stats.cfs_flush_in_close);
4933 }
4934 }
4935 }
4936 cfil_release_sockbuf(so, 0);
4937 done:
4938 CFIL_INFO_VERIFY(cfil_info);
4939 }
4940
4941 int
cfil_action_drop(struct socket * so,struct cfil_info * cfil_info,uint32_t kcunit)4942 cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
4943 {
4944 errno_t error = 0;
4945 struct cfil_entry *entry;
4946 struct proc *p;
4947
4948 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4949 goto done;
4950 }
4951
4952 socket_lock_assert_owned(so);
4953
4954 entry = &cfil_info->cfi_entries[kcunit - 1];
4955
4956 /* Are we attached to the filter? */
4957 if (entry->cfe_filter == NULL) {
4958 goto done;
4959 }
4960
4961 cfil_info->cfi_flags |= CFIF_DROP;
4962
4963 p = current_proc();
4964
4965 /*
4966 * Force the socket to be marked defunct
4967 * (forcing fixed along with rdar://19391339)
4968 */
4969 if (so->so_flow_db == NULL) {
4970 error = sosetdefunct(p, so,
4971 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
4972 FALSE);
4973
4974 /* Flush the socket buffer and disconnect */
4975 if (error == 0) {
4976 error = sodefunct(p, so,
4977 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
4978 }
4979 }
4980
4981 /* The filter is done, mark as detached */
4982 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4983
4984 if (cfil_info->cfi_debug) {
4985 cfil_info_log(LOG_ERR, cfil_info, "CFIL: DROP - DETACH");
4986 }
4987
4988 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4989 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4990
4991 /* Pending data needs to go */
4992 cfil_flush_queues(so, cfil_info);
4993
4994 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4995 if (cfil_filters_attached(so) == 0) {
4996 CFIL_LOG(LOG_INFO, "so %llx waking",
4997 (uint64_t)VM_KERNEL_ADDRPERM(so));
4998 wakeup((caddr_t)cfil_info);
4999 }
5000 }
5001 done:
5002 return error;
5003 }
5004
5005 int
cfil_action_bless_client(uint32_t kcunit,struct cfil_msg_hdr * msghdr)5006 cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
5007 {
5008 errno_t error = 0;
5009 struct cfil_info * __single cfil_info = NULL;
5010
5011 bool cfil_attached = false;
5012 struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
5013
5014 // Search and lock socket
5015 struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
5016 if (so == NULL) {
5017 error = ENOENT;
5018 } else {
5019 // The client gets a pass automatically
5020 cfil_info = (so->so_flow_db != NULL) ?
5021 soflow_db_get_feature_context(so->so_flow_db, msghdr->cfm_sock_id) : so->so_cfil;
5022
5023 if (cfil_attached) {
5024 if (cfil_info != NULL && cfil_info->cfi_debug) {
5025 cfil_info_log(LOG_ERR, cfil_info, "CFIL: VERDICT RECEIVED: BLESS");
5026 }
5027 cfil_sock_received_verdict(so);
5028 (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
5029 (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
5030 } else {
5031 so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
5032 }
5033 socket_unlock(so, 1);
5034 }
5035
5036 return error;
5037 }
5038
5039 int
cfil_action_set_crypto_key(uint32_t kcunit,struct cfil_msg_hdr * msghdr)5040 cfil_action_set_crypto_key(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
5041 {
5042 struct content_filter *cfc = NULL;
5043 cfil_crypto_state_t crypto_state = NULL;
5044 struct cfil_msg_set_crypto_key *keymsg = (struct cfil_msg_set_crypto_key *)msghdr;
5045
5046 CFIL_LOG(LOG_NOTICE, "");
5047
5048 if (kcunit > MAX_CONTENT_FILTER) {
5049 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
5050 kcunit, MAX_CONTENT_FILTER);
5051 return EINVAL;
5052 }
5053 crypto_state = cfil_crypto_init_client((uint8_t *)keymsg->crypto_key);
5054 if (crypto_state == NULL) {
5055 CFIL_LOG(LOG_ERR, "failed to initialize crypto state for unit %u)",
5056 kcunit);
5057 return EINVAL;
5058 }
5059
5060 cfil_rw_lock_exclusive(&cfil_lck_rw);
5061
5062 cfc = content_filters[kcunit - 1];
5063 if (cfc->cf_kcunit != kcunit) {
5064 CFIL_LOG(LOG_ERR, "bad unit info %u)",
5065 kcunit);
5066 cfil_rw_unlock_exclusive(&cfil_lck_rw);
5067 cfil_crypto_cleanup_state(crypto_state);
5068 return EINVAL;
5069 }
5070 if (cfc->cf_crypto_state != NULL) {
5071 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
5072 cfc->cf_crypto_state = NULL;
5073 }
5074 cfc->cf_crypto_state = crypto_state;
5075
5076 cfil_rw_unlock_exclusive(&cfil_lck_rw);
5077 return 0;
5078 }
5079
5080 static int
cfil_update_entry_offsets(struct socket * so,struct cfil_info * cfil_info,int outgoing,unsigned int datalen)5081 cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
5082 {
5083 struct cfil_entry *entry;
5084 struct cfe_buf *entrybuf;
5085 uint32_t kcunit;
5086
5087 CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
5088 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
5089
5090 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5091 entry = &cfil_info->cfi_entries[kcunit - 1];
5092
5093 /* Are we attached to the filter? */
5094 if (entry->cfe_filter == NULL) {
5095 continue;
5096 }
5097
5098 if (outgoing) {
5099 entrybuf = &entry->cfe_snd;
5100 } else {
5101 entrybuf = &entry->cfe_rcv;
5102 }
5103
5104 entrybuf->cfe_ctl_q.q_start += datalen;
5105 if (entrybuf->cfe_pass_offset < entrybuf->cfe_ctl_q.q_start) {
5106 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
5107 }
5108 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
5109 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
5110 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
5111 }
5112
5113 entrybuf->cfe_ctl_q.q_end += datalen;
5114
5115 entrybuf->cfe_pending_q.q_start += datalen;
5116 entrybuf->cfe_pending_q.q_end += datalen;
5117 }
5118 CFIL_INFO_VERIFY(cfil_info);
5119 return 0;
5120 }
5121
5122 int
cfil_data_common(struct socket * so,struct cfil_info * cfil_info,int outgoing,struct sockaddr * to,struct mbuf * data,struct mbuf * control,uint32_t flags)5123 cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
5124 struct mbuf *data, struct mbuf *control, uint32_t flags)
5125 {
5126 #pragma unused(to, control, flags)
5127 errno_t error = 0;
5128 unsigned int datalen;
5129 int mbcnt = 0;
5130 int mbnum = 0;
5131 int kcunit;
5132 struct cfi_buf *cfi_buf;
5133 struct mbuf *chain = NULL;
5134
5135 if (cfil_info == NULL) {
5136 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
5137 (uint64_t)VM_KERNEL_ADDRPERM(so));
5138 error = 0;
5139 goto done;
5140 } else if (cfil_info->cfi_flags & CFIF_DROP) {
5141 CFIL_LOG(LOG_ERR, "so %llx drop set",
5142 (uint64_t)VM_KERNEL_ADDRPERM(so));
5143 error = EPIPE;
5144 goto done;
5145 }
5146
5147 datalen = cfil_data_length(data, &mbcnt, &mbnum);
5148
5149 if (datalen == 0) {
5150 error = 0;
5151 goto done;
5152 }
5153
5154 if (outgoing) {
5155 cfi_buf = &cfil_info->cfi_snd;
5156 cfil_info->cfi_byte_outbound_count += datalen;
5157 } else {
5158 cfi_buf = &cfil_info->cfi_rcv;
5159 cfil_info->cfi_byte_inbound_count += datalen;
5160 }
5161
5162 cfi_buf->cfi_pending_last += datalen;
5163 cfi_buf->cfi_pending_mbcnt += mbcnt;
5164 cfi_buf->cfi_pending_mbnum += mbnum;
5165
5166 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5167 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
5168 cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
5169 cfi_buf->cfi_tail_drop_cnt++;
5170 cfi_buf->cfi_pending_mbcnt -= mbcnt;
5171 cfi_buf->cfi_pending_mbnum -= mbnum;
5172 return EPIPE;
5173 }
5174 }
5175
5176 cfil_info_buf_verify(cfi_buf);
5177
5178 if (cfil_info->cfi_debug && cfil_log_data) {
5179 CFIL_LOG(LOG_ERR, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u cfi_pass_offset %llu",
5180 (uint64_t)VM_KERNEL_ADDRPERM(so),
5181 outgoing ? "OUT" : "IN",
5182 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
5183 (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
5184 cfi_buf->cfi_pending_last,
5185 cfi_buf->cfi_pending_mbcnt,
5186 cfi_buf->cfi_pass_offset);
5187 }
5188
5189 /* Fast path when below pass offset */
5190 if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
5191 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
5192 if (cfil_info->cfi_debug && cfil_log_data) {
5193 CFIL_LOG(LOG_ERR, "CFIL: QUEUEING DATA: <so %llx> %s: FAST PATH",
5194 (uint64_t)VM_KERNEL_ADDRPERM(so),
5195 outgoing ? "OUT" : "IN");
5196 }
5197 // For incoming packets, see if we need to strip off ip header
5198 if (!outgoing && NEED_DGRAM_FLOW_TRACKING(so) && OPTIONAL_IP_HEADER(so)) {
5199 cfil_strip_ip_header(cfil_info, data, so);
5200 }
5201 } else {
5202 struct cfil_entry *iter_entry;
5203 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
5204 // Is cfil attached to this filter?
5205 kcunit = CFI_ENTRY_KCUNIT(cfil_info, iter_entry);
5206 if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
5207 if (NEED_DGRAM_FLOW_TRACKING(so) && chain == NULL) {
5208 /* Datagrams only:
5209 * Chain addr (incoming only TDB), control (optional) and data into one chain.
5210 * This full chain will be reinjected into socket after recieving verdict.
5211 */
5212 (void) cfil_dgram_save_socket_state(cfil_info, data);
5213 chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
5214 if (chain == NULL) {
5215 return ENOBUFS;
5216 }
5217 data = chain;
5218 }
5219 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
5220 datalen);
5221 }
5222 /* 0 means passed so continue with next filter */
5223 if (error != 0) {
5224 break;
5225 }
5226 }
5227 }
5228
5229 /* Move cursor if no filter claimed the data */
5230 if (error == 0) {
5231 cfi_buf->cfi_pending_first += datalen;
5232 cfi_buf->cfi_pending_mbcnt -= mbcnt;
5233 cfi_buf->cfi_pending_mbnum -= mbnum;
5234 cfil_info_buf_verify(cfi_buf);
5235 }
5236 done:
5237 CFIL_INFO_VERIFY(cfil_info);
5238
5239 return error;
5240 }
5241
5242 /*
5243 * Callback from socket layer sosendxxx()
5244 */
5245 int
cfil_sock_data_out(struct socket * so,struct sockaddr * to,struct mbuf * data,struct mbuf * control,uint32_t flags,struct soflow_hash_entry * flow_entry)5246 cfil_sock_data_out(struct socket *so, struct sockaddr *to,
5247 struct mbuf *data, struct mbuf *control, uint32_t flags, struct soflow_hash_entry *flow_entry)
5248 {
5249 int error = 0;
5250 int new_filter_control_unit = 0;
5251
5252 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5253 return cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags, flow_entry);
5254 }
5255
5256 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5257 /* Drop pre-existing TCP sockets if filter is enabled now */
5258 if (!DO_PRESERVE_CONNECTIONS && cfil_active_count > 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5259 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5260 if (new_filter_control_unit > 0) {
5261 CFIL_LOG(LOG_NOTICE, "CFIL: TCP(OUT) <so %llx> - filter state changed - dropped pre-existing flow", (uint64_t)VM_KERNEL_ADDRPERM(so));
5262 return EPIPE;
5263 }
5264 }
5265 return 0;
5266 }
5267
5268 /* Drop pre-existing TCP sockets when filter state changed */
5269 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5270 if (new_filter_control_unit > 0 && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5271 if (DO_PRESERVE_CONNECTIONS) {
5272 so->so_cfil->cfi_filter_control_unit = new_filter_control_unit;
5273 } else {
5274 CFIL_LOG(LOG_NOTICE, "CFIL: TCP(OUT) <so %llx> - filter state changed - dropped pre-existing flow (old state 0x%x new state 0x%x)",
5275 (uint64_t)VM_KERNEL_ADDRPERM(so),
5276 so->so_cfil->cfi_filter_control_unit, new_filter_control_unit);
5277 return EPIPE;
5278 }
5279 }
5280
5281 /*
5282 * Pass initial data for TFO.
5283 */
5284 if (IS_INITIAL_TFO_DATA(so)) {
5285 return 0;
5286 }
5287
5288 socket_lock_assert_owned(so);
5289
5290 if (so->so_cfil->cfi_flags & CFIF_DROP) {
5291 CFIL_LOG(LOG_ERR, "so %llx drop set",
5292 (uint64_t)VM_KERNEL_ADDRPERM(so));
5293 return EPIPE;
5294 }
5295 if (control != NULL) {
5296 CFIL_LOG(LOG_ERR, "so %llx control",
5297 (uint64_t)VM_KERNEL_ADDRPERM(so));
5298 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
5299 }
5300 if ((flags & MSG_OOB)) {
5301 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
5302 (uint64_t)VM_KERNEL_ADDRPERM(so));
5303 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
5304 }
5305 /*
5306 * Abort if socket is defunct.
5307 */
5308 if (so->so_flags & SOF_DEFUNCT) {
5309 return EPIPE;
5310 }
5311 if ((so->so_snd.sb_flags & SB_LOCK) == 0) {
5312 panic("so %p SB_LOCK not set", so);
5313 }
5314
5315 if (so->so_snd.sb_cfil_thread != NULL) {
5316 panic("%s sb_cfil_thread %p not NULL", __func__,
5317 so->so_snd.sb_cfil_thread);
5318 }
5319
5320 error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
5321
5322 return error;
5323 }
5324
5325 /*
5326 * Callback from socket layer sbappendxxx()
5327 */
5328 int
cfil_sock_data_in(struct socket * so,struct sockaddr * from,struct mbuf * data,struct mbuf * control,uint32_t flags,struct soflow_hash_entry * flow_entry)5329 cfil_sock_data_in(struct socket *so, struct sockaddr *from,
5330 struct mbuf *data, struct mbuf *control, uint32_t flags, struct soflow_hash_entry *flow_entry)
5331 {
5332 int error = 0;
5333 int new_filter_control_unit = 0;
5334
5335 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5336 return cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags, flow_entry);
5337 }
5338
5339 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5340 /* Drop pre-existing TCP sockets if filter is enabled now */
5341 if (!DO_PRESERVE_CONNECTIONS && cfil_active_count > 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5342 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5343 if (new_filter_control_unit > 0) {
5344 CFIL_LOG(LOG_NOTICE, "CFIL: TCP(IN) <so %llx> - filter state changed - dropped pre-existing flow", (uint64_t)VM_KERNEL_ADDRPERM(so));
5345 return EPIPE;
5346 }
5347 }
5348 return 0;
5349 }
5350
5351 /* Drop pre-existing TCP sockets when filter state changed */
5352 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5353 if (new_filter_control_unit > 0 && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5354 if (DO_PRESERVE_CONNECTIONS) {
5355 so->so_cfil->cfi_filter_control_unit = new_filter_control_unit;
5356 } else {
5357 CFIL_LOG(LOG_NOTICE, "CFIL: TCP(IN) <so %llx> - filter state changed - dropped pre-existing flow (old state 0x%x new state 0x%x)",
5358 (uint64_t)VM_KERNEL_ADDRPERM(so),
5359 so->so_cfil->cfi_filter_control_unit, new_filter_control_unit);
5360 return EPIPE;
5361 }
5362 }
5363
5364 /*
5365 * Pass initial data for TFO.
5366 */
5367 if (IS_INITIAL_TFO_DATA(so)) {
5368 return 0;
5369 }
5370
5371 socket_lock_assert_owned(so);
5372
5373 if (so->so_cfil->cfi_flags & CFIF_DROP) {
5374 CFIL_LOG(LOG_ERR, "so %llx drop set",
5375 (uint64_t)VM_KERNEL_ADDRPERM(so));
5376 return EPIPE;
5377 }
5378 if (control != NULL) {
5379 CFIL_LOG(LOG_ERR, "so %llx control",
5380 (uint64_t)VM_KERNEL_ADDRPERM(so));
5381 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
5382 }
5383 if (data->m_type == MT_OOBDATA) {
5384 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
5385 (uint64_t)VM_KERNEL_ADDRPERM(so));
5386 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
5387 }
5388 error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
5389
5390 return error;
5391 }
5392
5393 /*
5394 * Callback from socket layer soshutdownxxx()
5395 *
5396 * We may delay the shutdown write if there's outgoing data in process.
5397 *
5398 * There is no point in delaying the shutdown read because the process
5399 * indicated that it does not want to read anymore data.
5400 */
5401 int
cfil_sock_shutdown(struct socket * so,int * how)5402 cfil_sock_shutdown(struct socket *so, int *how)
5403 {
5404 int error = 0;
5405
5406 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5407 return cfil_sock_udp_shutdown(so, how);
5408 }
5409
5410 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5411 goto done;
5412 }
5413
5414 socket_lock_assert_owned(so);
5415
5416 CFIL_LOG(LOG_INFO, "so %llx how %d",
5417 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
5418
5419 /*
5420 * Check the state of the socket before the content filter
5421 */
5422 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
5423 /* read already shut down */
5424 error = ENOTCONN;
5425 goto done;
5426 }
5427 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
5428 /* write already shut down */
5429 error = ENOTCONN;
5430 goto done;
5431 }
5432
5433 if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
5434 CFIL_LOG(LOG_ERR, "so %llx drop set",
5435 (uint64_t)VM_KERNEL_ADDRPERM(so));
5436 goto done;
5437 }
5438
5439 /*
5440 * shutdown read: SHUT_RD or SHUT_RDWR
5441 */
5442 if (*how != SHUT_WR) {
5443 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
5444 error = ENOTCONN;
5445 goto done;
5446 }
5447 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
5448 cfil_sock_notify_shutdown(so, SHUT_RD);
5449 }
5450 /*
5451 * shutdown write: SHUT_WR or SHUT_RDWR
5452 */
5453 if (*how != SHUT_RD) {
5454 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
5455 error = ENOTCONN;
5456 goto done;
5457 }
5458 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
5459 cfil_sock_notify_shutdown(so, SHUT_WR);
5460 /*
5461 * When outgoing data is pending, we delay the shutdown at the
5462 * protocol level until the content filters give the final
5463 * verdict on the pending data.
5464 */
5465 if (cfil_sock_data_pending(&so->so_snd) != 0) {
5466 /*
5467 * When shutting down the read and write sides at once
5468 * we can proceed to the final shutdown of the read
5469 * side. Otherwise, we just return.
5470 */
5471 if (*how == SHUT_WR) {
5472 error = EJUSTRETURN;
5473 } else if (*how == SHUT_RDWR) {
5474 *how = SHUT_RD;
5475 }
5476 }
5477 }
5478 done:
5479 return error;
5480 }
5481
5482 /*
5483 * This is called when the socket is closed and there is no more
5484 * opportunity for filtering
5485 */
5486 void
cfil_sock_is_closed(struct socket * so)5487 cfil_sock_is_closed(struct socket *so)
5488 {
5489 errno_t error = 0;
5490 int kcunit;
5491
5492 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5493 cfil_sock_udp_is_closed(so);
5494 return;
5495 }
5496
5497 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5498 return;
5499 }
5500
5501 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5502
5503 socket_lock_assert_owned(so);
5504
5505 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5506 /* Let the filters know of the closing */
5507 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
5508 }
5509
5510 /* Last chance to push passed data out */
5511 error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
5512 if (error == 0) {
5513 cfil_service_inject_queue(so, so->so_cfil, 1);
5514 }
5515 cfil_release_sockbuf(so, 1);
5516
5517 if (so->so_cfil != NULL) {
5518 so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
5519 }
5520
5521 /* Pending data needs to go */
5522 cfil_flush_queues(so, so->so_cfil);
5523
5524 CFIL_INFO_VERIFY(so->so_cfil);
5525 }
5526
5527 /*
5528 * This is called when the socket is disconnected so let the filters
5529 * know about the disconnection and that no more data will come
5530 *
5531 * The how parameter has the same values as soshutown()
5532 */
5533 void
cfil_sock_notify_shutdown(struct socket * so,int how)5534 cfil_sock_notify_shutdown(struct socket *so, int how)
5535 {
5536 errno_t error = 0;
5537 int kcunit;
5538
5539 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5540 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
5541 return;
5542 }
5543
5544 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5545 return;
5546 }
5547
5548 CFIL_LOG(LOG_INFO, "so %llx how %d",
5549 (uint64_t)VM_KERNEL_ADDRPERM(so), how);
5550
5551 socket_lock_assert_owned(so);
5552
5553 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5554 /* Disconnect incoming side */
5555 if (how != SHUT_WR) {
5556 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
5557 }
5558 /* Disconnect outgoing side */
5559 if (how != SHUT_RD) {
5560 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
5561 }
5562 }
5563 }
5564
5565 static int
cfil_filters_attached(struct socket * so)5566 cfil_filters_attached(struct socket *so)
5567 {
5568 struct cfil_entry *entry;
5569 uint32_t kcunit;
5570 int attached = 0;
5571
5572 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5573 return cfil_filters_udp_attached(so, FALSE);
5574 }
5575
5576 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5577 return 0;
5578 }
5579
5580 socket_lock_assert_owned(so);
5581
5582 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5583 entry = &so->so_cfil->cfi_entries[kcunit - 1];
5584
5585 /* Are we attached to the filter? */
5586 if (entry->cfe_filter == NULL) {
5587 continue;
5588 }
5589 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
5590 continue;
5591 }
5592 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
5593 continue;
5594 }
5595 attached = 1;
5596 break;
5597 }
5598
5599 return attached;
5600 }
5601
5602 /*
5603 * This is called when the socket is closed and we are waiting for
5604 * the filters to gives the final pass or drop
5605 */
5606 void
cfil_sock_close_wait(struct socket * so)5607 cfil_sock_close_wait(struct socket *so)
5608 {
5609 lck_mtx_t *mutex_held;
5610 struct timespec ts;
5611 int error;
5612
5613 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5614 cfil_sock_udp_close_wait(so);
5615 return;
5616 }
5617
5618 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5619 return;
5620 }
5621
5622 // This flow does not need to wait for close ack from user-space
5623 if (IS_NO_CLOSE_WAIT(so->so_cfil)) {
5624 if (so->so_cfil->cfi_debug) {
5625 cfil_info_log(LOG_ERR, so->so_cfil, "CFIL: SKIP CLOSE WAIT");
5626 }
5627 return;
5628 }
5629
5630 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5631
5632 if (so->so_proto->pr_getlock != NULL) {
5633 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5634 } else {
5635 mutex_held = so->so_proto->pr_domain->dom_mtx;
5636 }
5637 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5638
5639 while (cfil_filters_attached(so)) {
5640 /*
5641 * Notify the filters we are going away so they can detach
5642 */
5643 cfil_sock_notify_shutdown(so, SHUT_RDWR);
5644
5645 /*
5646 * Make sure we need to wait after the filter are notified
5647 * of the disconnection
5648 */
5649 if (cfil_filters_attached(so) == 0) {
5650 break;
5651 }
5652
5653 CFIL_LOG(LOG_INFO, "so %llx waiting",
5654 (uint64_t)VM_KERNEL_ADDRPERM(so));
5655
5656 ts.tv_sec = cfil_close_wait_timeout / 1000;
5657 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
5658 NSEC_PER_USEC * 1000;
5659
5660 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5661 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
5662 error = msleep((caddr_t)so->so_cfil, mutex_held,
5663 PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
5664
5665 // Woke up from sleep, validate if cfil_info is still valid
5666 if (so->so_cfil == NULL) {
5667 // cfil_info is not valid, do not continue
5668 return;
5669 }
5670
5671 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
5672
5673 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
5674 (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
5675
5676 /*
5677 * Force close in case of timeout
5678 */
5679 if (error != 0) {
5680 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5681 break;
5682 }
5683 }
5684 }
5685
5686 /*
5687 * Returns the size of the data held by the content filter by using
5688 */
5689 int32_t
cfil_sock_data_pending(struct sockbuf * sb)5690 cfil_sock_data_pending(struct sockbuf *sb)
5691 {
5692 struct socket *so = sb->sb_so;
5693 uint64_t pending = 0;
5694
5695 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5696 return cfil_sock_udp_data_pending(sb, FALSE);
5697 }
5698
5699 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
5700 struct cfi_buf *cfi_buf;
5701
5702 socket_lock_assert_owned(so);
5703
5704 if ((sb->sb_flags & SB_RECV) == 0) {
5705 cfi_buf = &so->so_cfil->cfi_snd;
5706 } else {
5707 cfi_buf = &so->so_cfil->cfi_rcv;
5708 }
5709
5710 pending = cfi_buf->cfi_pending_last -
5711 cfi_buf->cfi_pending_first;
5712
5713 /*
5714 * If we are limited by the "chars of mbufs used" roughly
5715 * adjust so we won't overcommit
5716 */
5717 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) {
5718 pending = cfi_buf->cfi_pending_mbcnt;
5719 }
5720 }
5721
5722 VERIFY(pending < INT32_MAX);
5723
5724 return (int32_t)(pending);
5725 }
5726
5727 /*
5728 * Return the socket buffer space used by data being held by content filters
5729 * so processes won't clog the socket buffer
5730 */
5731 int32_t
cfil_sock_data_space(struct sockbuf * sb)5732 cfil_sock_data_space(struct sockbuf *sb)
5733 {
5734 struct socket *so = sb->sb_so;
5735 uint64_t pending = 0;
5736
5737 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5738 return cfil_sock_udp_data_pending(sb, TRUE);
5739 }
5740
5741 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
5742 so->so_snd.sb_cfil_thread != current_thread()) {
5743 struct cfi_buf *cfi_buf;
5744
5745 socket_lock_assert_owned(so);
5746
5747 if ((sb->sb_flags & SB_RECV) == 0) {
5748 cfi_buf = &so->so_cfil->cfi_snd;
5749 } else {
5750 cfi_buf = &so->so_cfil->cfi_rcv;
5751 }
5752
5753 pending = cfi_buf->cfi_pending_last -
5754 cfi_buf->cfi_pending_first;
5755
5756 /*
5757 * If we are limited by the "chars of mbufs used" roughly
5758 * adjust so we won't overcommit
5759 */
5760 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
5761 pending = cfi_buf->cfi_pending_mbcnt;
5762 }
5763
5764 VERIFY(pending < INT32_MAX);
5765 }
5766
5767 return (int32_t)(pending);
5768 }
5769
5770 /*
5771 * A callback from the socket and protocol layer when data becomes
5772 * available in the socket buffer to give a chance for the content filter
5773 * to re-inject data that was held back
5774 */
5775 void
cfil_sock_buf_update(struct sockbuf * sb)5776 cfil_sock_buf_update(struct sockbuf *sb)
5777 {
5778 int outgoing;
5779 int error;
5780 struct socket *so = sb->sb_so;
5781
5782 if (NEED_DGRAM_FLOW_TRACKING(so)) {
5783 cfil_sock_udp_buf_update(sb);
5784 return;
5785 }
5786
5787 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5788 return;
5789 }
5790
5791 if (!cfil_sbtrim) {
5792 return;
5793 }
5794
5795 socket_lock_assert_owned(so);
5796
5797 if ((sb->sb_flags & SB_RECV) == 0) {
5798 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
5799 return;
5800 }
5801 outgoing = 1;
5802 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5803 } else {
5804 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
5805 return;
5806 }
5807 outgoing = 0;
5808 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5809 }
5810
5811 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5812 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5813
5814 error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
5815 if (error == 0) {
5816 cfil_service_inject_queue(so, so->so_cfil, outgoing);
5817 }
5818 cfil_release_sockbuf(so, outgoing);
5819 }
5820
5821 int
sysctl_cfil_filter_list(struct sysctl_oid * oidp,void * arg1,int arg2,struct sysctl_req * req)5822 sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5823 struct sysctl_req *req)
5824 {
5825 #pragma unused(oidp, arg1, arg2)
5826 int error = 0;
5827 size_t len = 0;
5828 u_int32_t i;
5829
5830 /* Read only */
5831 if (req->newptr != USER_ADDR_NULL) {
5832 return EPERM;
5833 }
5834
5835 cfil_rw_lock_shared(&cfil_lck_rw);
5836
5837 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
5838 struct cfil_filter_stat filter_stat;
5839 struct content_filter *cfc = content_filters[i];
5840
5841 if (cfc == NULL) {
5842 continue;
5843 }
5844
5845 /* If just asking for the size */
5846 if (req->oldptr == USER_ADDR_NULL) {
5847 len += sizeof(struct cfil_filter_stat);
5848 continue;
5849 }
5850
5851 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
5852 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
5853 filter_stat.cfs_filter_id = cfc->cf_kcunit;
5854 filter_stat.cfs_flags = cfc->cf_flags;
5855 filter_stat.cfs_sock_count = cfc->cf_sock_count;
5856 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
5857
5858 error = SYSCTL_OUT(req, &filter_stat,
5859 sizeof(struct cfil_filter_stat));
5860 if (error != 0) {
5861 break;
5862 }
5863 }
5864 /* If just asking for the size */
5865 if (req->oldptr == USER_ADDR_NULL) {
5866 req->oldidx = len;
5867 }
5868
5869 cfil_rw_unlock_shared(&cfil_lck_rw);
5870
5871 if (cfil_log_level >= LOG_DEBUG) {
5872 if (req->oldptr != USER_ADDR_NULL) {
5873 for (i = 1; i <= MAX_CONTENT_FILTER; i++) {
5874 cfil_filter_show(i);
5875 }
5876 }
5877 }
5878
5879 return error;
5880 }
5881
5882 static int
sysctl_cfil_sock_list(struct sysctl_oid * oidp,void * arg1,int arg2,struct sysctl_req * req)5883 sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5884 struct sysctl_req *req)
5885 {
5886 #pragma unused(oidp, arg1, arg2)
5887 int error = 0;
5888 u_int32_t i;
5889 struct cfil_info *cfi;
5890
5891 /* Read only */
5892 if (req->newptr != USER_ADDR_NULL) {
5893 return EPERM;
5894 }
5895
5896 cfil_rw_lock_shared(&cfil_lck_rw);
5897
5898 /*
5899 * If just asking for the size,
5900 */
5901 if (req->oldptr == USER_ADDR_NULL) {
5902 req->oldidx = cfil_sock_attached_count *
5903 sizeof(struct cfil_sock_stat);
5904 /* Bump the length in case new sockets gets attached */
5905 req->oldidx += req->oldidx >> 3;
5906 goto done;
5907 }
5908
5909 TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
5910 struct cfil_entry *entry;
5911 struct cfil_sock_stat stat;
5912 struct socket *so = cfi->cfi_so;
5913
5914 bzero(&stat, sizeof(struct cfil_sock_stat));
5915 stat.cfs_len = sizeof(struct cfil_sock_stat);
5916 stat.cfs_sock_id = cfi->cfi_sock_id;
5917 stat.cfs_flags = cfi->cfi_flags;
5918
5919 if (so != NULL && so->so_proto != NULL && so->so_proto->pr_domain != NULL) {
5920 stat.cfs_pid = so->last_pid;
5921 memcpy(stat.cfs_uuid, so->last_uuid,
5922 sizeof(uuid_t));
5923 if (so->so_flags & SOF_DELEGATED) {
5924 stat.cfs_e_pid = so->e_pid;
5925 memcpy(stat.cfs_e_uuid, so->e_uuid,
5926 sizeof(uuid_t));
5927 }
5928 #if defined(XNU_TARGET_OS_OSX)
5929 else if (!uuid_is_null(so->so_ruuid)) {
5930 stat.cfs_e_pid = so->so_rpid;
5931 memcpy(stat.cfs_e_uuid, so->so_ruuid,
5932 sizeof(uuid_t));
5933 }
5934 #endif
5935 else {
5936 stat.cfs_e_pid = so->last_pid;
5937 memcpy(stat.cfs_e_uuid, so->last_uuid,
5938 sizeof(uuid_t));
5939 }
5940
5941 stat.cfs_sock_family = SOCK_DOM(so);
5942 stat.cfs_sock_type = SOCK_TYPE(so);
5943 stat.cfs_sock_protocol = GET_SO_PROTO(so);
5944 }
5945
5946 stat.cfs_snd.cbs_pending_first =
5947 cfi->cfi_snd.cfi_pending_first;
5948 stat.cfs_snd.cbs_pending_last =
5949 cfi->cfi_snd.cfi_pending_last;
5950 stat.cfs_snd.cbs_inject_q_len =
5951 cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
5952 stat.cfs_snd.cbs_pass_offset =
5953 cfi->cfi_snd.cfi_pass_offset;
5954
5955 stat.cfs_rcv.cbs_pending_first =
5956 cfi->cfi_rcv.cfi_pending_first;
5957 stat.cfs_rcv.cbs_pending_last =
5958 cfi->cfi_rcv.cfi_pending_last;
5959 stat.cfs_rcv.cbs_inject_q_len =
5960 cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
5961 stat.cfs_rcv.cbs_pass_offset =
5962 cfi->cfi_rcv.cfi_pass_offset;
5963
5964 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
5965 struct cfil_entry_stat *estat;
5966 struct cfe_buf *ebuf;
5967 struct cfe_buf_stat *sbuf;
5968
5969 entry = &cfi->cfi_entries[i];
5970
5971 estat = &stat.ces_entries[i];
5972
5973 estat->ces_len = sizeof(struct cfil_entry_stat);
5974 estat->ces_filter_id = entry->cfe_filter ?
5975 entry->cfe_filter->cf_kcunit : 0;
5976 estat->ces_flags = entry->cfe_flags;
5977 estat->ces_necp_control_unit =
5978 entry->cfe_necp_control_unit;
5979
5980 estat->ces_last_event.tv_sec =
5981 (int64_t)entry->cfe_last_event.tv_sec;
5982 estat->ces_last_event.tv_usec =
5983 (int64_t)entry->cfe_last_event.tv_usec;
5984
5985 estat->ces_last_action.tv_sec =
5986 (int64_t)entry->cfe_last_action.tv_sec;
5987 estat->ces_last_action.tv_usec =
5988 (int64_t)entry->cfe_last_action.tv_usec;
5989
5990 ebuf = &entry->cfe_snd;
5991 sbuf = &estat->ces_snd;
5992 sbuf->cbs_pending_first =
5993 cfil_queue_offset_first(&ebuf->cfe_pending_q);
5994 sbuf->cbs_pending_last =
5995 cfil_queue_offset_last(&ebuf->cfe_pending_q);
5996 sbuf->cbs_ctl_first =
5997 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5998 sbuf->cbs_ctl_last =
5999 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
6000 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
6001 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
6002 sbuf->cbs_peeked = ebuf->cfe_peeked;
6003
6004 ebuf = &entry->cfe_rcv;
6005 sbuf = &estat->ces_rcv;
6006 sbuf->cbs_pending_first =
6007 cfil_queue_offset_first(&ebuf->cfe_pending_q);
6008 sbuf->cbs_pending_last =
6009 cfil_queue_offset_last(&ebuf->cfe_pending_q);
6010 sbuf->cbs_ctl_first =
6011 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
6012 sbuf->cbs_ctl_last =
6013 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
6014 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
6015 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
6016 sbuf->cbs_peeked = ebuf->cfe_peeked;
6017 }
6018 error = SYSCTL_OUT(req, &stat,
6019 sizeof(struct cfil_sock_stat));
6020 if (error != 0) {
6021 break;
6022 }
6023 }
6024 done:
6025 cfil_rw_unlock_shared(&cfil_lck_rw);
6026
6027 if (cfil_log_level >= LOG_DEBUG) {
6028 if (req->oldptr != USER_ADDR_NULL) {
6029 cfil_info_show();
6030 }
6031 }
6032
6033 return error;
6034 }
6035
6036 /*
6037 * UDP Socket Support
6038 */
6039 static void
cfil_hash_entry_log(int level,struct socket * so,struct soflow_hash_entry * entry,uint64_t sockId,const char * msg)6040 cfil_hash_entry_log(int level, struct socket *so, struct soflow_hash_entry *entry, uint64_t sockId, const char* msg)
6041 {
6042 char local[MAX_IPv6_STR_LEN + 6];
6043 char remote[MAX_IPv6_STR_LEN + 6];
6044 const void *addr;
6045
6046 // No sock or not UDP, no-op
6047 if (so == NULL || entry == NULL) {
6048 return;
6049 }
6050
6051 local[0] = remote[0] = 0x0;
6052
6053 switch (entry->soflow_family) {
6054 case AF_INET6:
6055 addr = &entry->soflow_laddr.addr6;
6056 inet_ntop(AF_INET6, addr, local, sizeof(local));
6057 addr = &entry->soflow_faddr.addr6;
6058 inet_ntop(AF_INET6, addr, remote, sizeof(local));
6059 break;
6060 case AF_INET:
6061 addr = &entry->soflow_laddr.addr46.ia46_addr4.s_addr;
6062 inet_ntop(AF_INET, addr, local, sizeof(local));
6063 addr = &entry->soflow_faddr.addr46.ia46_addr4.s_addr;
6064 inet_ntop(AF_INET, addr, remote, sizeof(local));
6065 break;
6066 default:
6067 return;
6068 }
6069
6070 CFIL_LOG(level, "<%s>: <%s(%d) so %llx cfil %p, entry %p, sockID %llu <%llx> feat_ctxt_id <%llu> lport %d fport %d laddr %s faddr %s hash %X",
6071 msg,
6072 IS_UDP(so) ? "UDP" : "proto", GET_SO_PROTO(so),
6073 (uint64_t)VM_KERNEL_ADDRPERM(so), entry->soflow_feat_ctxt, entry, sockId, sockId, entry->soflow_feat_ctxt_id,
6074 ntohs(entry->soflow_lport), ntohs(entry->soflow_fport), local, remote,
6075 entry->soflow_flowhash);
6076 }
6077
6078 static void
cfil_inp_log(int level,struct socket * so,const char * msg)6079 cfil_inp_log(int level, struct socket *so, const char* msg)
6080 {
6081 struct inpcb *inp = NULL;
6082 struct sockaddr_in *sin = NULL;
6083 struct sockaddr_in6 *sin6 = NULL;
6084 char local[MAX_IPv6_STR_LEN + 6];
6085 char remote[MAX_IPv6_STR_LEN + 6];
6086 ushort lport = 0;
6087 ushort fport = 0;
6088 const void *addr;
6089
6090 if (so == NULL) {
6091 return;
6092 }
6093
6094 inp = sotoinpcb(so);
6095 if (inp == NULL) {
6096 return;
6097 }
6098
6099 local[0] = remote[0] = 0x0;
6100
6101 if (inp->inp_vflag & INP_IPV6) {
6102 addr = &inp->in6p_laddr.s6_addr32;
6103 inet_ntop(AF_INET6, addr, local, sizeof(local));
6104 addr = &inp->in6p_faddr.s6_addr32;
6105 inet_ntop(AF_INET6, addr, remote, sizeof(remote));
6106 } else {
6107 addr = &inp->inp_laddr.s_addr;
6108 inet_ntop(AF_INET, addr, local, sizeof(local));
6109 addr = &inp->inp_faddr.s_addr;
6110 inet_ntop(AF_INET, addr, remote, sizeof(remote));
6111 }
6112 lport = inp->inp_lport;
6113 fport = inp->inp_fport;
6114
6115 if (so->so_cfil && so->so_cfil->cfi_so_attach_faddr.sa.sa_len > 0) {
6116 if (so->so_cfil->cfi_so_attach_faddr.sa.sa_family == AF_INET6) {
6117 sin6 = SIN6(&so->so_cfil->cfi_so_attach_faddr.sa);
6118 addr = &sin6->sin6_addr;
6119 inet_ntop(AF_INET6, addr, remote, sizeof(remote));
6120 fport = sin6->sin6_port;
6121 } else if (so->so_cfil->cfi_so_attach_faddr.sa.sa_family == AF_INET) {
6122 sin = SIN(&so->so_cfil->cfi_so_attach_faddr.sa);
6123 addr = &sin->sin_addr.s_addr;
6124 inet_ntop(AF_INET, addr, remote, sizeof(remote));
6125 fport = sin->sin_port;
6126 }
6127 }
6128 if (so->so_cfil && so->so_cfil->cfi_so_attach_laddr.sa.sa_len > 0) {
6129 if (so->so_cfil->cfi_so_attach_laddr.sa.sa_family == AF_INET6) {
6130 sin6 = SIN6(&so->so_cfil->cfi_so_attach_laddr.sa);
6131 addr = &sin6->sin6_addr;
6132 inet_ntop(AF_INET6, addr, local, sizeof(remote));
6133 fport = sin6->sin6_port;
6134 } else if (so->so_cfil->cfi_so_attach_laddr.sa.sa_family == AF_INET) {
6135 sin = SIN(&so->so_cfil->cfi_so_attach_laddr.sa);
6136 addr = &sin->sin_addr.s_addr;
6137 inet_ntop(AF_INET, addr, local, sizeof(remote));
6138 fport = sin->sin_port;
6139 }
6140 }
6141
6142 if (so->so_cfil != NULL) {
6143 CFIL_LOG(level, "<%s>: <%s so %llx cfil %p - flags 0x%x 0x%x, sockID %llu <%llx>> lport %d fport %d laddr %s faddr %s",
6144 msg, IS_UDP(so) ? "UDP" : "TCP",
6145 (uint64_t)VM_KERNEL_ADDRPERM(so), so->so_cfil, inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id, so->so_cfil->cfi_sock_id,
6146 ntohs(lport), ntohs(fport), local, remote);
6147 } else {
6148 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
6149 msg, IS_UDP(so) ? "UDP" : "TCP",
6150 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
6151 ntohs(lport), ntohs(fport), local, remote);
6152 }
6153 }
6154
6155 static void
cfil_info_log(int level,struct cfil_info * cfil_info,const char * msg)6156 cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
6157 {
6158 if (cfil_info == NULL) {
6159 return;
6160 }
6161
6162 if (cfil_info->cfi_hash_entry != NULL) {
6163 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
6164 } else {
6165 cfil_inp_log(level, cfil_info->cfi_so, msg);
6166 }
6167 }
6168
6169 static void
cfil_sock_udp_unlink_flow(struct socket * so,struct soflow_hash_entry * hash_entry,struct cfil_info * cfil_info)6170 cfil_sock_udp_unlink_flow(struct socket *so, struct soflow_hash_entry *hash_entry, struct cfil_info *cfil_info)
6171 {
6172 if (so == NULL || hash_entry == NULL || cfil_info == NULL) {
6173 return;
6174 }
6175
6176 if (so->so_flags & SOF_CONTENT_FILTER) {
6177 VERIFY(so->so_usecount > 0);
6178 so->so_usecount--;
6179 }
6180
6181 // Hold exclusive lock before clearing cfil_info hash entry link
6182 cfil_rw_lock_exclusive(&cfil_lck_rw);
6183
6184 cfil_info->cfi_hash_entry = NULL;
6185
6186 if (cfil_info->cfi_debug) {
6187 CFIL_LOG(LOG_ERR, "CFIL <%s>: <so %llx> - use count %d",
6188 IS_UDP(so) ? "UDP" : "TCP", (uint64_t)VM_KERNEL_ADDRPERM(so), so->so_usecount);
6189 }
6190
6191 cfil_rw_unlock_exclusive(&cfil_lck_rw);
6192 }
6193
6194 bool
check_port(struct sockaddr * addr,u_short port)6195 check_port(struct sockaddr *addr, u_short port)
6196 {
6197 struct sockaddr_in *sin = NULL;
6198 struct sockaddr_in6 *sin6 = NULL;
6199
6200 if (addr == NULL || port == 0) {
6201 return FALSE;
6202 }
6203
6204 switch (addr->sa_family) {
6205 case AF_INET:
6206 sin = SIN(addr);
6207 if (sin->sin_len < sizeof(*sin)) {
6208 return FALSE;
6209 }
6210 if (port == ntohs(sin->sin_port)) {
6211 return TRUE;
6212 }
6213 break;
6214 case AF_INET6:
6215 sin6 = SIN6(addr);
6216 if (sin6->sin6_len < sizeof(*sin6)) {
6217 return FALSE;
6218 }
6219 if (port == ntohs(sin6->sin6_port)) {
6220 return TRUE;
6221 }
6222 break;
6223 default:
6224 break;
6225 }
6226 return FALSE;
6227 }
6228
6229 cfil_sock_id_t
cfil_sock_id_from_datagram_socket(struct socket * so,struct sockaddr * local,struct sockaddr * remote)6230 cfil_sock_id_from_datagram_socket(struct socket *so, struct sockaddr *local, struct sockaddr *remote)
6231 {
6232 socket_lock_assert_owned(so);
6233
6234 if (so->so_flow_db == NULL) {
6235 return CFIL_SOCK_ID_NONE;
6236 }
6237 return (cfil_sock_id_t)soflow_db_get_feature_context_id(so->so_flow_db, local, remote);
6238 }
6239
6240 static struct cfil_info *
cfil_sock_udp_get_info(struct socket * so,uint32_t filter_control_unit,bool outgoing,struct soflow_hash_entry * hash_entry,struct sockaddr * local,struct sockaddr * remote)6241 cfil_sock_udp_get_info(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct soflow_hash_entry *hash_entry,
6242 struct sockaddr *local, struct sockaddr *remote)
6243 {
6244 int new_filter_control_unit = 0;
6245 struct cfil_info *cfil_info = NULL;
6246
6247 errno_t error = 0;
6248 socket_lock_assert_owned(so);
6249
6250 if (hash_entry == NULL || hash_entry->soflow_db == NULL) {
6251 return NULL;
6252 }
6253
6254 if (hash_entry->soflow_feat_ctxt != NULL && hash_entry->soflow_feat_ctxt_id != 0) {
6255 /* Drop pre-existing UDP flow if filter state changed */
6256 cfil_info = (struct cfil_info *) hash_entry->soflow_feat_ctxt;
6257 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6258 if (new_filter_control_unit > 0 &&
6259 new_filter_control_unit != cfil_info->cfi_filter_control_unit) {
6260 if (DO_PRESERVE_CONNECTIONS) {
6261 cfil_info->cfi_filter_control_unit = new_filter_control_unit;
6262 } else {
6263 CFIL_LOG(LOG_NOTICE, "CFIL: UDP(%s) <so %llx> - filter state changed - dropped pre-existing flow (old state 0x%x new state 0x%x)",
6264 outgoing ? "OUT" : "IN", (uint64_t)VM_KERNEL_ADDRPERM(so),
6265 cfil_info->cfi_filter_control_unit, new_filter_control_unit);
6266 return NULL;
6267 }
6268 }
6269 return cfil_info;
6270 }
6271
6272 cfil_info = cfil_info_alloc(so, hash_entry);
6273 if (cfil_info == NULL) {
6274 CFIL_LOG(LOG_ERR, "CFIL: <so %llx> UDP failed to alloc cfil_info", (uint64_t)VM_KERNEL_ADDRPERM(so));
6275 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6276 return NULL;
6277 }
6278 cfil_info->cfi_filter_control_unit = filter_control_unit;
6279 cfil_info->cfi_dir = outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN;
6280 cfil_info->cfi_debug = DEBUG_FLOW(sotoinpcb(so), so, local, remote);
6281 if (cfil_info->cfi_debug) {
6282 CFIL_LOG(LOG_ERR, "CFIL: <so %llx> UDP (outgoing %d) - debug flow with port %d", (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_log_port);
6283 CFIL_LOG(LOG_ERR, "CFIL: <so %llx> UDP so_gencnt %llx entry flowhash %x cfil %p sockID %llu <%llx>",
6284 (uint64_t)VM_KERNEL_ADDRPERM(so), so->so_gencnt, hash_entry->soflow_flowhash, cfil_info, cfil_info->cfi_sock_id, cfil_info->cfi_sock_id);
6285 }
6286
6287 if (cfil_info_attach_unit(so, filter_control_unit, cfil_info) == 0) {
6288 CFIL_INFO_FREE(cfil_info);
6289 CFIL_LOG(LOG_ERR, "CFIL: <so %llx> UDP cfil_info_attach_unit(%u) failed",
6290 (uint64_t)VM_KERNEL_ADDRPERM(so), filter_control_unit);
6291 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
6292 return NULL;
6293 }
6294
6295 if (cfil_info->cfi_debug) {
6296 CFIL_LOG(LOG_ERR, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu <%llx> attached",
6297 (uint64_t)VM_KERNEL_ADDRPERM(so),
6298 filter_control_unit, cfil_info->cfi_sock_id, cfil_info->cfi_sock_id);
6299 }
6300
6301 so->so_flags |= SOF_CONTENT_FILTER;
6302 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
6303
6304 /* Hold a reference on the socket for each flow */
6305 so->so_usecount++;
6306
6307 /* link cfil_info to flow */
6308 hash_entry->soflow_feat_ctxt = cfil_info;
6309 hash_entry->soflow_feat_ctxt_id = cfil_info->cfi_sock_id;
6310
6311 if (cfil_info->cfi_debug) {
6312 cfil_info_log(LOG_ERR, cfil_info, "CFIL: ADDED");
6313 }
6314
6315 error = cfil_dispatch_attach_event(so, cfil_info, 0,
6316 outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
6317 /* We can recover from flow control or out of memory errors */
6318 if (error != 0 && error != ENOBUFS && error != ENOMEM) {
6319 CFIL_LOG(LOG_ERR, "CFIL: UDP <so %llx> cfil_dispatch_attach_event failed <error %d>",
6320 (uint64_t)VM_KERNEL_ADDRPERM(so), error);
6321 return NULL;
6322 }
6323
6324 CFIL_INFO_VERIFY(cfil_info);
6325 return cfil_info;
6326 }
6327
6328 errno_t
cfil_sock_udp_handle_data(bool outgoing,struct socket * so,struct sockaddr * local,struct sockaddr * remote,struct mbuf * data,struct mbuf * control,uint32_t flags,struct soflow_hash_entry * hash_entry)6329 cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
6330 struct sockaddr *local, struct sockaddr *remote,
6331 struct mbuf *data, struct mbuf *control, uint32_t flags,
6332 struct soflow_hash_entry *hash_entry)
6333 {
6334 #pragma unused(outgoing, so, local, remote, data, control, flags)
6335 errno_t error = 0;
6336 uint32_t filter_control_unit;
6337 struct cfil_info *cfil_info = NULL;
6338
6339 socket_lock_assert_owned(so);
6340
6341 if (cfil_active_count == 0) {
6342 CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
6343 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
6344 return error;
6345 }
6346
6347 // Socket has been blessed
6348 if ((so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
6349 return error;
6350 }
6351
6352 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6353 if (filter_control_unit == 0) {
6354 CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
6355 return error;
6356 }
6357
6358 if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
6359 return error;
6360 }
6361
6362 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
6363 CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
6364 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
6365 return error;
6366 }
6367
6368 if (hash_entry == NULL) {
6369 CFIL_LOG(LOG_ERR, "CFIL: <so %llx> NULL soflow_hash_entry", (uint64_t)VM_KERNEL_ADDRPERM(so));
6370 return EPIPE;
6371 }
6372
6373 if (hash_entry->soflow_db == NULL) {
6374 CFIL_LOG(LOG_ERR, "CFIL: <so %llx> NULL soflow_hash_entry db", (uint64_t)VM_KERNEL_ADDRPERM(so));
6375 return EPIPE;
6376 }
6377
6378 cfil_info = cfil_sock_udp_get_info(so, filter_control_unit, outgoing, hash_entry, local, remote);
6379 if (cfil_info == NULL) {
6380 return EPIPE;
6381 }
6382 // Update last used timestamp, this is for flow Idle TO
6383
6384 if (cfil_info->cfi_debug) {
6385 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Got flow");
6386 }
6387
6388 if (cfil_info->cfi_flags & CFIF_DROP) {
6389 if (cfil_info->cfi_debug) {
6390 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP DROP");
6391 }
6392 return EPIPE;
6393 }
6394 if (control != NULL) {
6395 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
6396 }
6397 if (data->m_type == MT_OOBDATA) {
6398 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
6399 (uint64_t)VM_KERNEL_ADDRPERM(so));
6400 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
6401 }
6402
6403 error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
6404
6405 return error;
6406 }
6407
6408 struct cfil_udp_attached_context {
6409 bool need_wait;
6410 lck_mtx_t *mutex_held;
6411 int attached;
6412 };
6413
6414 static bool
cfil_filters_udp_attached_per_flow(struct socket * so,struct soflow_hash_entry * hash_entry,void * context)6415 cfil_filters_udp_attached_per_flow(struct socket *so,
6416 struct soflow_hash_entry *hash_entry,
6417 void *context)
6418 {
6419 struct cfil_udp_attached_context *apply_context = NULL;
6420 struct cfil_info * __single cfil_info = NULL;
6421 struct cfil_entry *entry = NULL;
6422 uint64_t sock_flow_id = 0;
6423 struct timespec ts;
6424 errno_t error = 0;
6425 int kcunit;
6426
6427 if (hash_entry->soflow_feat_ctxt == NULL || context == NULL) {
6428 return true;
6429 }
6430
6431 cfil_info = hash_entry->soflow_feat_ctxt;
6432 apply_context = (struct cfil_udp_attached_context *)context;
6433
6434 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6435 entry = &cfil_info->cfi_entries[kcunit - 1];
6436
6437 /* Are we attached to the filter? */
6438 if (entry->cfe_filter == NULL) {
6439 continue;
6440 }
6441
6442 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
6443 continue;
6444 }
6445 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
6446 continue;
6447 }
6448
6449 if (apply_context->need_wait == TRUE) {
6450 if (cfil_info->cfi_debug) {
6451 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP PER-FLOW WAIT FOR FLOW TO FINISH");
6452 }
6453
6454 ts.tv_sec = cfil_close_wait_timeout / 1000;
6455 ts.tv_nsec = (cfil_close_wait_timeout % 1000) * NSEC_PER_USEC * 1000;
6456
6457 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
6458 cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
6459 sock_flow_id = cfil_info->cfi_sock_id;
6460
6461 error = msleep((caddr_t)cfil_info, apply_context->mutex_held,
6462 PSOCK | PCATCH, "cfil_filters_udp_attached_per_flow", &ts);
6463
6464 // Woke up from sleep, validate if cfil_info is still valid
6465 if (so->so_flow_db == NULL ||
6466 (cfil_info != soflow_db_get_feature_context(so->so_flow_db, sock_flow_id))) {
6467 // cfil_info is not valid, do not continue
6468 return false;
6469 }
6470
6471 cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
6472
6473 if (cfil_info->cfi_debug) {
6474 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP PER-FLOW WAIT FOR FLOW DONE");
6475 }
6476
6477 /*
6478 * Force close in case of timeout
6479 */
6480 if (error != 0) {
6481 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
6482
6483 if (cfil_info->cfi_debug) {
6484 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP PER-FLOW WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6485 }
6486
6487 entry->cfe_flags |= CFEF_CFIL_DETACHED;
6488 return false;
6489 }
6490 }
6491 apply_context->attached = 1;
6492 return false;
6493 }
6494 return true;
6495 }
6496
6497 /*
6498 * Go through all UDP flows for specified socket and returns TRUE if
6499 * any flow is still attached. If need_wait is TRUE, wait on first
6500 * attached flow.
6501 */
6502 static int
cfil_filters_udp_attached(struct socket * so,bool need_wait)6503 cfil_filters_udp_attached(struct socket *so, bool need_wait)
6504 {
6505 struct cfil_udp_attached_context apply_context = { 0 };
6506 lck_mtx_t *mutex_held;
6507
6508 socket_lock_assert_owned(so);
6509
6510 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_flow_db != NULL) {
6511 if (so->so_proto->pr_getlock != NULL) {
6512 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
6513 } else {
6514 mutex_held = so->so_proto->pr_domain->dom_mtx;
6515 }
6516 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
6517
6518 apply_context.need_wait = need_wait;
6519 apply_context.mutex_held = mutex_held;
6520 soflow_db_apply(so->so_flow_db, cfil_filters_udp_attached_per_flow, (void *)&apply_context);
6521 }
6522
6523 return apply_context.attached;
6524 }
6525
6526 struct cfil_udp_data_pending_context {
6527 struct sockbuf *sb;
6528 uint64_t total_pending;
6529 };
6530
6531 static bool
cfil_sock_udp_data_pending_per_flow(struct socket * so,struct soflow_hash_entry * hash_entry,void * context)6532 cfil_sock_udp_data_pending_per_flow(struct socket *so,
6533 struct soflow_hash_entry *hash_entry,
6534 void *context)
6535 {
6536 #pragma unused(so)
6537 struct cfil_udp_data_pending_context *apply_context = NULL;
6538 struct cfil_info * __single cfil_info = NULL;
6539 struct cfi_buf *cfi_buf;
6540
6541 uint64_t pending = 0;
6542
6543 if (hash_entry->soflow_feat_ctxt == NULL || context == NULL) {
6544 return true;
6545 }
6546
6547 cfil_info = hash_entry->soflow_feat_ctxt;
6548 apply_context = (struct cfil_udp_data_pending_context *)context;
6549
6550 if (apply_context->sb == NULL) {
6551 return true;
6552 }
6553
6554 if ((apply_context->sb->sb_flags & SB_RECV) == 0) {
6555 cfi_buf = &cfil_info->cfi_snd;
6556 } else {
6557 cfi_buf = &cfil_info->cfi_rcv;
6558 }
6559
6560 pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
6561 /*
6562 * If we are limited by the "chars of mbufs used" roughly
6563 * adjust so we won't overcommit
6564 */
6565 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
6566 pending = cfi_buf->cfi_pending_mbcnt;
6567 }
6568
6569 apply_context->total_pending += pending;
6570 return true;
6571 }
6572
6573 int32_t
cfil_sock_udp_data_pending(struct sockbuf * sb,bool check_thread)6574 cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
6575 {
6576 struct cfil_udp_data_pending_context apply_context = { 0 };
6577 struct socket *so = sb->sb_so;
6578
6579 socket_lock_assert_owned(so);
6580
6581 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_flow_db != NULL &&
6582 (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
6583 apply_context.sb = sb;
6584 soflow_db_apply(so->so_flow_db, cfil_sock_udp_data_pending_per_flow, (void *)&apply_context);
6585
6586 VERIFY(apply_context.total_pending < INT32_MAX);
6587 }
6588
6589 return (int32_t)(apply_context.total_pending);
6590 }
6591
6592 struct cfil_udp_notify_shutdown_context {
6593 int how;
6594 int drop_flag;
6595 int shut_flag;
6596 int done_count;
6597 };
6598
6599 static bool
cfil_sock_udp_notify_shutdown_per_flow(struct socket * so,struct soflow_hash_entry * hash_entry,void * context)6600 cfil_sock_udp_notify_shutdown_per_flow(struct socket *so,
6601 struct soflow_hash_entry *hash_entry,
6602 void *context)
6603 {
6604 struct cfil_udp_notify_shutdown_context *apply_context = NULL;
6605 struct cfil_info * __single cfil_info = NULL;
6606 errno_t error = 0;
6607 int kcunit;
6608
6609 if (hash_entry->soflow_feat_ctxt == NULL || context == NULL) {
6610 return true;
6611 }
6612
6613 cfil_info = hash_entry->soflow_feat_ctxt;
6614 apply_context = (struct cfil_udp_notify_shutdown_context *)context;
6615
6616 // This flow is marked as DROP
6617 if (cfil_info->cfi_flags & apply_context->drop_flag) {
6618 apply_context->done_count++;
6619 return true;
6620 }
6621
6622 // This flow has been shut already, skip
6623 if (cfil_info->cfi_flags & apply_context->shut_flag) {
6624 return true;
6625 }
6626 // Mark flow as shut
6627 cfil_info->cfi_flags |= apply_context->shut_flag;
6628 apply_context->done_count++;
6629
6630 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6631 /* Disconnect incoming side */
6632 if (apply_context->how != SHUT_WR) {
6633 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
6634 }
6635 /* Disconnect outgoing side */
6636 if (apply_context->how != SHUT_RD) {
6637 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
6638 }
6639 }
6640
6641 if (cfil_info->cfi_debug) {
6642 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP PER-FLOW NOTIFY_SHUTDOWN");
6643 }
6644
6645 return true;
6646 }
6647
6648 int
cfil_sock_udp_notify_shutdown(struct socket * so,int how,int drop_flag,int shut_flag)6649 cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
6650 {
6651 struct cfil_udp_notify_shutdown_context apply_context = { 0 };
6652 errno_t error = 0;
6653
6654 socket_lock_assert_owned(so);
6655
6656 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_flow_db != NULL) {
6657 apply_context.how = how;
6658 apply_context.drop_flag = drop_flag;
6659 apply_context.shut_flag = shut_flag;
6660
6661 soflow_db_apply(so->so_flow_db, cfil_sock_udp_notify_shutdown_per_flow, (void *)&apply_context);
6662 }
6663
6664 if (apply_context.done_count == 0) {
6665 error = ENOTCONN;
6666 }
6667 return error;
6668 }
6669
6670 int
cfil_sock_udp_shutdown(struct socket * so,int * how)6671 cfil_sock_udp_shutdown(struct socket *so, int *how)
6672 {
6673 int error = 0;
6674
6675 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_flow_db == NULL)) {
6676 goto done;
6677 }
6678
6679 socket_lock_assert_owned(so);
6680
6681 CFIL_LOG(LOG_INFO, "so %llx how %d",
6682 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
6683
6684 /*
6685 * Check the state of the socket before the content filter
6686 */
6687 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
6688 /* read already shut down */
6689 error = ENOTCONN;
6690 goto done;
6691 }
6692 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
6693 /* write already shut down */
6694 error = ENOTCONN;
6695 goto done;
6696 }
6697
6698 /*
6699 * shutdown read: SHUT_RD or SHUT_RDWR
6700 */
6701 if (*how != SHUT_WR) {
6702 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
6703 if (error != 0) {
6704 goto done;
6705 }
6706 }
6707 /*
6708 * shutdown write: SHUT_WR or SHUT_RDWR
6709 */
6710 if (*how != SHUT_RD) {
6711 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
6712 if (error != 0) {
6713 goto done;
6714 }
6715
6716 /*
6717 * When outgoing data is pending, we delay the shutdown at the
6718 * protocol level until the content filters give the final
6719 * verdict on the pending data.
6720 */
6721 if (cfil_sock_data_pending(&so->so_snd) != 0) {
6722 /*
6723 * When shutting down the read and write sides at once
6724 * we can proceed to the final shutdown of the read
6725 * side. Otherwise, we just return.
6726 */
6727 if (*how == SHUT_WR) {
6728 error = EJUSTRETURN;
6729 } else if (*how == SHUT_RDWR) {
6730 *how = SHUT_RD;
6731 }
6732 }
6733 }
6734 done:
6735 return error;
6736 }
6737
6738 void
cfil_sock_udp_close_wait(struct socket * so)6739 cfil_sock_udp_close_wait(struct socket *so)
6740 {
6741 socket_lock_assert_owned(so);
6742
6743 while (cfil_filters_udp_attached(so, FALSE)) {
6744 /*
6745 * Notify the filters we are going away so they can detach
6746 */
6747 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
6748
6749 /*
6750 * Make sure we need to wait after the filter are notified
6751 * of the disconnection
6752 */
6753 if (cfil_filters_udp_attached(so, TRUE) == 0) {
6754 break;
6755 }
6756 }
6757 }
6758
6759 static bool
cfil_sock_udp_is_closed_per_flow(struct socket * so,struct soflow_hash_entry * hash_entry,void * context)6760 cfil_sock_udp_is_closed_per_flow(struct socket *so,
6761 struct soflow_hash_entry *hash_entry,
6762 void *context)
6763 {
6764 #pragma unused(context)
6765 struct cfil_info * __single cfil_info = NULL;
6766 errno_t error = 0;
6767 int kcunit;
6768
6769 if (hash_entry->soflow_feat_ctxt == NULL) {
6770 return true;
6771 }
6772
6773 cfil_info = hash_entry->soflow_feat_ctxt;
6774
6775 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6776 /* Let the filters know of the closing */
6777 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
6778 }
6779
6780 /* Last chance to push passed data out */
6781 error = cfil_acquire_sockbuf(so, cfil_info, 1);
6782 if (error == 0) {
6783 cfil_service_inject_queue(so, cfil_info, 1);
6784 }
6785 cfil_release_sockbuf(so, 1);
6786
6787 cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
6788
6789 /* Pending data needs to go */
6790 cfil_flush_queues(so, cfil_info);
6791
6792 CFIL_INFO_VERIFY(cfil_info);
6793
6794 if (cfil_info->cfi_debug) {
6795 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP PER-FLOW IS_CLOSED");
6796 }
6797
6798 return true;
6799 }
6800
6801 void
cfil_sock_udp_is_closed(struct socket * so)6802 cfil_sock_udp_is_closed(struct socket *so)
6803 {
6804 socket_lock_assert_owned(so);
6805
6806 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_flow_db != NULL) {
6807 soflow_db_apply(so->so_flow_db, cfil_sock_udp_is_closed_per_flow, NULL);
6808 }
6809 }
6810
6811 static bool
cfil_sock_udp_buf_update_per_flow(struct socket * so,struct soflow_hash_entry * hash_entry,void * context)6812 cfil_sock_udp_buf_update_per_flow(struct socket *so,
6813 struct soflow_hash_entry *hash_entry,
6814 void *context)
6815 {
6816 struct cfil_info * __single cfil_info = NULL;
6817 struct sockbuf *sb = NULL;
6818 errno_t error = 0;
6819 int outgoing;
6820
6821 if (hash_entry->soflow_feat_ctxt == NULL || context == NULL) {
6822 return true;
6823 }
6824
6825 cfil_info = hash_entry->soflow_feat_ctxt;
6826 sb = (struct sockbuf *) context;
6827
6828 if ((sb->sb_flags & SB_RECV) == 0) {
6829 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
6830 return true;
6831 }
6832 outgoing = 1;
6833 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
6834 } else {
6835 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
6836 return true;
6837 }
6838 outgoing = 0;
6839 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
6840 }
6841
6842 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
6843 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
6844
6845 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
6846 if (error == 0) {
6847 cfil_service_inject_queue(so, cfil_info, outgoing);
6848 }
6849 cfil_release_sockbuf(so, outgoing);
6850 return true;
6851 }
6852
6853 void
cfil_sock_udp_buf_update(struct sockbuf * sb)6854 cfil_sock_udp_buf_update(struct sockbuf *sb)
6855 {
6856 struct socket *so = sb->sb_so;
6857
6858 socket_lock_assert_owned(so);
6859
6860 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_flow_db != NULL) {
6861 if (!cfil_sbtrim) {
6862 return;
6863 }
6864 soflow_db_apply(so->so_flow_db, cfil_sock_udp_buf_update_per_flow, (void *)sb);
6865 }
6866 }
6867
6868 void
cfil_filter_show(u_int32_t kcunit)6869 cfil_filter_show(u_int32_t kcunit)
6870 {
6871 struct content_filter *cfc = NULL;
6872 struct cfil_entry *entry;
6873 int count = 0;
6874
6875 if (kcunit > MAX_CONTENT_FILTER) {
6876 return;
6877 }
6878
6879 cfil_rw_lock_shared(&cfil_lck_rw);
6880
6881 if (content_filters[kcunit - 1] == NULL) {
6882 cfil_rw_unlock_shared(&cfil_lck_rw);
6883 return;
6884 }
6885 cfc = content_filters[kcunit - 1];
6886
6887 CFIL_LOG(LOG_DEBUG, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6888 kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
6889 if (cfc->cf_flags & CFF_DETACHING) {
6890 CFIL_LOG(LOG_DEBUG, "CFIL: FILTER SHOW:-DETACHING");
6891 }
6892 if (cfc->cf_flags & CFF_ACTIVE) {
6893 CFIL_LOG(LOG_DEBUG, "CFIL: FILTER SHOW:-ACTIVE");
6894 }
6895 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6896 CFIL_LOG(LOG_DEBUG, "CFIL: FILTER SHOW:-FLOW CONTROLLED");
6897 }
6898
6899 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
6900 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
6901 struct cfil_info *cfil_info = entry->cfe_cfil_info;
6902
6903 count++;
6904
6905 if (entry->cfe_flags & CFEF_CFIL_DETACHED) {
6906 cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: FILTER SHOW:-DETACHED");
6907 } else {
6908 cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: FILTER SHOW:-ATTACHED");
6909 }
6910 }
6911 }
6912
6913 CFIL_LOG(LOG_DEBUG, "CFIL: FILTER SHOW:Filter - total entries shown: %d", count);
6914
6915 cfil_rw_unlock_shared(&cfil_lck_rw);
6916 }
6917
6918 void
cfil_info_show(void)6919 cfil_info_show(void)
6920 {
6921 struct cfil_info *cfil_info;
6922 int count = 0;
6923
6924 cfil_rw_lock_shared(&cfil_lck_rw);
6925
6926 CFIL_LOG(LOG_DEBUG, "CFIL: INFO SHOW:count %d", cfil_sock_attached_count);
6927
6928 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6929 count++;
6930
6931 cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: INFO SHOW");
6932
6933 if (cfil_info->cfi_flags & CFIF_DROP) {
6934 CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - DROP");
6935 }
6936 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT) {
6937 CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - CLOSE_WAIT");
6938 }
6939 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED) {
6940 CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - SOCK_CLOSED");
6941 }
6942 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) {
6943 CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - RETRY_INJECT_IN");
6944 }
6945 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) {
6946 CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
6947 }
6948 if (cfil_info->cfi_flags & CFIF_SHUT_WR) {
6949 CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - SHUT_WR");
6950 }
6951 if (cfil_info->cfi_flags & CFIF_SHUT_RD) {
6952 CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - SHUT_RD");
6953 }
6954 }
6955
6956 CFIL_LOG(LOG_DEBUG, "CFIL: INFO SHOW:total cfil_info shown: %d", count);
6957
6958 cfil_rw_unlock_shared(&cfil_lck_rw);
6959 }
6960
6961 bool
cfil_info_action_timed_out(struct cfil_info * cfil_info,int timeout)6962 cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
6963 {
6964 struct cfil_entry *entry;
6965 struct timeval current_tv;
6966 struct timeval diff_time;
6967
6968 if (cfil_info == NULL) {
6969 return false;
6970 }
6971
6972 /*
6973 * If we have queued up more data than passed offset and we haven't received
6974 * an action from user space for a while (the user space filter might have crashed),
6975 * return action timed out.
6976 */
6977 if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
6978 cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
6979 microuptime(¤t_tv);
6980
6981 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6982 entry = &cfil_info->cfi_entries[kcunit - 1];
6983
6984 if (entry->cfe_filter == NULL) {
6985 continue;
6986 }
6987
6988 if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
6989 cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
6990 // haven't gotten an action from this filter, check timeout
6991 timersub(¤t_tv, &entry->cfe_last_action, &diff_time);
6992 if (diff_time.tv_sec >= timeout) {
6993 if (cfil_info->cfi_debug) {
6994 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
6995 }
6996 return true;
6997 }
6998 }
6999 }
7000 }
7001 return false;
7002 }
7003
7004 bool
cfil_info_buffer_threshold_exceeded(struct cfil_info * cfil_info)7005 cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
7006 {
7007 if (cfil_info == NULL) {
7008 return false;
7009 }
7010
7011 /*
7012 * Clean up flow if it exceeded queue thresholds
7013 */
7014 if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
7015 cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
7016 if (cfil_info->cfi_debug) {
7017 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded:mbuf max < count: %d bytes: %d > tail drop count < OUT: %d IN: %d > ",
7018 cfil_udp_gc_mbuf_num_max,
7019 cfil_udp_gc_mbuf_cnt_max,
7020 cfil_info->cfi_snd.cfi_tail_drop_cnt,
7021 cfil_info->cfi_rcv.cfi_tail_drop_cnt);
7022 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
7023 }
7024 return true;
7025 }
7026
7027 return false;
7028 }
7029
7030 static bool
cfil_dgram_gc_needed(struct socket * so,struct soflow_hash_entry * hash_entry,u_int64_t current_time)7031 cfil_dgram_gc_needed(struct socket *so, struct soflow_hash_entry *hash_entry, u_int64_t current_time)
7032 {
7033 #pragma unused(current_time)
7034 struct cfil_info *cfil_info = NULL;
7035
7036 if (so == NULL || hash_entry == NULL || hash_entry->soflow_feat_ctxt == NULL) {
7037 return false;
7038 }
7039 cfil_info = (struct cfil_info *) hash_entry->soflow_feat_ctxt;
7040
7041 cfil_rw_lock_shared(&cfil_lck_rw);
7042
7043 if (cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
7044 cfil_info_buffer_threshold_exceeded(cfil_info)) {
7045 if (cfil_info->cfi_debug) {
7046 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP PER-FLOW GC NEEDED");
7047 }
7048 cfil_rw_unlock_shared(&cfil_lck_rw);
7049 return true;
7050 }
7051
7052 cfil_rw_unlock_shared(&cfil_lck_rw);
7053 return false;
7054 }
7055
7056 static bool
cfil_dgram_gc_perform(struct socket * so,struct soflow_hash_entry * hash_entry)7057 cfil_dgram_gc_perform(struct socket *so, struct soflow_hash_entry *hash_entry)
7058 {
7059 struct cfil_info *cfil_info = NULL;
7060
7061 if (so == NULL || hash_entry == NULL || hash_entry->soflow_feat_ctxt == NULL) {
7062 return false;
7063 }
7064 cfil_info = (struct cfil_info *) hash_entry->soflow_feat_ctxt;
7065
7066 if (cfil_info->cfi_debug) {
7067 cfil_info_log(LOG_ERR, cfil_info, "CFIL: UDP PER-FLOW GC PERFORM");
7068 }
7069
7070 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7071 /* Let the filters know of the closing */
7072 cfil_dispatch_closed_event(so, cfil_info, kcunit);
7073 }
7074 cfil_sock_udp_unlink_flow(so, hash_entry, cfil_info);
7075 CFIL_INFO_FREE(cfil_info);
7076 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
7077 return true;
7078 }
7079
7080 static bool
cfil_dgram_detach_entry(struct socket * so,struct soflow_hash_entry * hash_entry)7081 cfil_dgram_detach_entry(struct socket *so, struct soflow_hash_entry *hash_entry)
7082 {
7083 struct cfil_info *cfil_info = NULL;
7084
7085 if (hash_entry == NULL || hash_entry->soflow_feat_ctxt == NULL) {
7086 return true;
7087 }
7088 cfil_info = (struct cfil_info *) hash_entry->soflow_feat_ctxt;
7089
7090 if (cfil_info->cfi_debug) {
7091 cfil_info_log(LOG_ERR, cfil_info, "CFIL: DGRAM DETACH ENTRY");
7092 }
7093
7094 cfil_sock_udp_unlink_flow(so, hash_entry, cfil_info);
7095 CFIL_INFO_FREE(cfil_info);
7096 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
7097
7098 return true;
7099 }
7100
7101 static bool
cfil_dgram_detach_db(struct socket * so,struct soflow_db * db)7102 cfil_dgram_detach_db(struct socket *so, struct soflow_db *db)
7103 {
7104 #pragma unused(db)
7105 if (so && so->so_flags & SOF_CONTENT_FILTER) {
7106 so->so_flags &= ~SOF_CONTENT_FILTER;
7107 CFIL_LOG(LOG_DEBUG, "CFIL: DGRAM DETACH DB <so %llx>", (uint64_t)VM_KERNEL_ADDRPERM(so));
7108 }
7109 return true;
7110 }
7111
7112 struct m_tag *
cfil_dgram_save_socket_state(struct cfil_info * cfil_info,struct mbuf * m)7113 cfil_dgram_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
7114 {
7115 struct m_tag *tag = NULL;
7116 struct cfil_tag *ctag = NULL;
7117 struct soflow_hash_entry *hash_entry = NULL;
7118 struct inpcb *inp = NULL;
7119
7120 if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
7121 cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
7122 return NULL;
7123 }
7124
7125 inp = sotoinpcb(cfil_info->cfi_so);
7126
7127 /* Allocate a tag */
7128 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
7129 sizeof(struct cfil_tag), M_DONTWAIT, m);
7130
7131 if (tag) {
7132 ctag = (struct cfil_tag *)(tag->m_tag_data);
7133 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
7134 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
7135 ctag->cfil_inp_flags = inp ? inp->inp_flags : 0;
7136
7137 hash_entry = cfil_info->cfi_hash_entry;
7138 if (hash_entry->soflow_family == AF_INET6) {
7139 fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
7140 &hash_entry->soflow_faddr.addr6,
7141 hash_entry->soflow_fport, hash_entry->soflow_faddr6_ifscope);
7142 } else if (hash_entry->soflow_family == AF_INET) {
7143 fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
7144 hash_entry->soflow_faddr.addr46.ia46_addr4,
7145 hash_entry->soflow_fport);
7146 }
7147 m_tag_prepend(m, tag);
7148 return tag;
7149 }
7150 return NULL;
7151 }
7152
7153 struct m_tag *
cfil_dgram_get_socket_state(struct mbuf * m,uint32_t * state_change_cnt,uint32_t * options,struct sockaddr ** faddr,int * inp_flags)7154 cfil_dgram_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, uint32_t *options,
7155 struct sockaddr **faddr, int *inp_flags)
7156 {
7157 struct m_tag *tag = NULL;
7158 struct cfil_tag *ctag = NULL;
7159
7160 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP);
7161 if (tag) {
7162 ctag = (struct cfil_tag *)(tag->m_tag_data);
7163 if (state_change_cnt) {
7164 *state_change_cnt = ctag->cfil_so_state_change_cnt;
7165 }
7166 if (options) {
7167 *options = ctag->cfil_so_options;
7168 }
7169 if (faddr) {
7170 *faddr = SA(&ctag->cfil_faddr);
7171 }
7172 if (inp_flags) {
7173 *inp_flags = ctag->cfil_inp_flags;
7174 }
7175
7176 /*
7177 * Unlink tag and hand it over to caller.
7178 * Note that caller will be responsible to free it.
7179 */
7180 m_tag_unlink(m, tag);
7181 return tag;
7182 }
7183 return NULL;
7184 }
7185
7186 boolean_t
cfil_dgram_peek_socket_state(struct mbuf * m,int * inp_flags)7187 cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags)
7188 {
7189 struct m_tag *tag = NULL;
7190 struct cfil_tag *ctag = NULL;
7191
7192 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP);
7193 if (tag) {
7194 ctag = (struct cfil_tag *)(tag->m_tag_data);
7195 if (inp_flags) {
7196 *inp_flags = ctag->cfil_inp_flags;
7197 }
7198 return true;
7199 }
7200 return false;
7201 }
7202
7203 static int
cfil_dispatch_stats_event_locked(int kcunit,struct cfil_stats_report_buffer * buffer,uint32_t stats_count)7204 cfil_dispatch_stats_event_locked(int kcunit, struct cfil_stats_report_buffer *buffer, uint32_t stats_count)
7205 {
7206 struct content_filter *cfc = NULL;
7207 errno_t error = 0;
7208 size_t msgsize = 0;
7209
7210 if (buffer == NULL || stats_count == 0) {
7211 return error;
7212 }
7213
7214 if (kcunit > MAX_CONTENT_FILTER) {
7215 return error;
7216 }
7217
7218 cfc = content_filters[kcunit - 1];
7219 if (cfc == NULL) {
7220 return error;
7221 }
7222
7223 /* Would be wasteful to try */
7224 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
7225 error = ENOBUFS;
7226 goto done;
7227 }
7228
7229 msgsize = sizeof(struct cfil_msg_stats_report) + (sizeof(struct cfil_msg_sock_stats) * stats_count);
7230 buffer->msghdr.cfm_len = (uint32_t)msgsize;
7231 buffer->msghdr.cfm_version = 1;
7232 buffer->msghdr.cfm_type = CFM_TYPE_EVENT;
7233 buffer->msghdr.cfm_op = CFM_OP_STATS;
7234 buffer->msghdr.cfm_sock_id = 0;
7235 buffer->count = stats_count;
7236
7237 if (cfil_log_stats) {
7238 CFIL_LOG(LOG_DEBUG, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
7239 kcunit,
7240 (unsigned long)msgsize,
7241 (unsigned long)sizeof(struct cfil_msg_stats_report),
7242 (unsigned long)sizeof(struct cfil_msg_sock_stats),
7243 (unsigned long)stats_count);
7244 }
7245
7246 error = ctl_enqueuedata(cfc->cf_kcref, cfc->cf_kcunit,
7247 buffer,
7248 sizeof(struct cfil_stats_report_buffer),
7249 CTL_DATA_EOR);
7250 if (error != 0) {
7251 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed:%d", error);
7252 goto done;
7253 }
7254 OSIncrementAtomic(&cfil_stats.cfs_stats_event_ok);
7255
7256 if (cfil_log_stats) {
7257 CFIL_LOG(LOG_DEBUG, "CFIL: STATS REPORT:send msg to %d", kcunit);
7258 }
7259 done:
7260
7261 if (error == ENOBUFS) {
7262 OSIncrementAtomic(
7263 &cfil_stats.cfs_stats_event_flow_control);
7264
7265 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
7266 cfil_rw_lock_exclusive(&cfil_lck_rw);
7267 }
7268
7269 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
7270
7271 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
7272 } else if (error != 0) {
7273 OSIncrementAtomic(&cfil_stats.cfs_stats_event_fail);
7274 }
7275
7276 return error;
7277 }
7278
7279 static void
cfil_stats_report_thread_sleep(bool forever)7280 cfil_stats_report_thread_sleep(bool forever)
7281 {
7282 if (cfil_log_stats) {
7283 CFIL_LOG(LOG_DEBUG, "CFIL: STATS COLLECTION SLEEP");
7284 }
7285
7286 if (forever) {
7287 (void) assert_wait((event_t) &cfil_sock_attached_stats_count,
7288 THREAD_INTERRUPTIBLE);
7289 } else {
7290 uint64_t deadline = 0;
7291 nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC, &deadline);
7292 clock_absolutetime_interval_to_deadline(deadline, &deadline);
7293
7294 (void) assert_wait_deadline(&cfil_sock_attached_stats_count,
7295 THREAD_INTERRUPTIBLE, deadline);
7296 }
7297 }
7298
7299 static void
cfil_stats_report_thread_func(void * v,wait_result_t w)7300 cfil_stats_report_thread_func(void *v, wait_result_t w)
7301 {
7302 #pragma unused(v, w)
7303
7304 ASSERT(cfil_stats_report_thread == current_thread());
7305 thread_set_thread_name(current_thread(), "CFIL_STATS_REPORT");
7306
7307 // Kick off gc shortly
7308 cfil_stats_report_thread_sleep(false);
7309 thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7310 /* NOTREACHED */
7311 }
7312
7313 static bool
cfil_stats_collect_flow_stats_for_filter(int kcunit,struct cfil_info * cfil_info,struct cfil_entry * entry,struct timeval current_tv)7314 cfil_stats_collect_flow_stats_for_filter(int kcunit,
7315 struct cfil_info *cfil_info,
7316 struct cfil_entry *entry,
7317 struct timeval current_tv)
7318 {
7319 struct cfil_stats_report_buffer *buffer = NULL;
7320 struct cfil_msg_sock_stats *flow_array = NULL;
7321 struct cfil_msg_sock_stats *stats = NULL;
7322 struct inpcb *inp = NULL;
7323 struct timeval diff_time;
7324 uint64_t diff_time_usecs;
7325 int index = 0;
7326
7327 if (entry->cfe_stats_report_frequency == 0) {
7328 return false;
7329 }
7330
7331 buffer = global_cfil_stats_report_buffers[kcunit - 1];
7332 if (buffer == NULL) {
7333 CFIL_LOG(LOG_ERR, "CFIL: STATS: no buffer");
7334 return false;
7335 }
7336
7337 timersub(¤t_tv, &entry->cfe_stats_report_ts, &diff_time);
7338 diff_time_usecs = (diff_time.tv_sec * USEC_PER_SEC) + diff_time.tv_usec;
7339
7340 if (cfil_info->cfi_debug && cfil_log_stats) {
7341 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu(usecs %llu) @freq %llu usecs sockID %llu <%llx>",
7342 (unsigned long long)entry->cfe_stats_report_ts.tv_sec,
7343 (unsigned long long)entry->cfe_stats_report_ts.tv_usec,
7344 (unsigned long long)current_tv.tv_sec,
7345 (unsigned long long)current_tv.tv_usec,
7346 (unsigned long long)diff_time.tv_sec,
7347 (unsigned long long)diff_time.tv_usec,
7348 (unsigned long long)diff_time_usecs,
7349 (unsigned long long)((entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC),
7350 cfil_info->cfi_sock_id, cfil_info->cfi_sock_id);
7351 }
7352
7353 // Compare elapsed time in usecs
7354 if (diff_time_usecs >= (entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC) {
7355 if (cfil_info->cfi_debug && cfil_log_stats) {
7356 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - in %llu reported %llu",
7357 cfil_info->cfi_byte_inbound_count,
7358 entry->cfe_byte_inbound_count_reported);
7359 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - out %llu reported %llu",
7360 cfil_info->cfi_byte_outbound_count,
7361 entry->cfe_byte_outbound_count_reported);
7362 }
7363 // Check if flow has new bytes that have not been reported
7364 if (entry->cfe_byte_inbound_count_reported < cfil_info->cfi_byte_inbound_count ||
7365 entry->cfe_byte_outbound_count_reported < cfil_info->cfi_byte_outbound_count) {
7366 flow_array = (struct cfil_msg_sock_stats *)&buffer->stats;
7367 index = global_cfil_stats_counts[kcunit - 1];
7368
7369 stats = &flow_array[index];
7370 stats->cfs_sock_id = cfil_info->cfi_sock_id;
7371 stats->cfs_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
7372 stats->cfs_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
7373
7374 if (entry->cfe_laddr_sent == false) {
7375 /* cache it if necessary */
7376 if (cfil_info->cfi_so_attach_laddr.sa.sa_len == 0) {
7377 inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
7378 if (inp != NULL) {
7379 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
7380 union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
7381 union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
7382 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
7383 src, dst, !IS_INP_V6(inp), outgoing);
7384 }
7385 }
7386
7387 if (cfil_info->cfi_so_attach_laddr.sa.sa_len != 0) {
7388 stats->cfs_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
7389 entry->cfe_laddr_sent = true;
7390 }
7391 }
7392
7393 global_cfil_stats_counts[kcunit - 1]++;
7394
7395 entry->cfe_stats_report_ts = current_tv;
7396 entry->cfe_byte_inbound_count_reported = cfil_info->cfi_byte_inbound_count;
7397 entry->cfe_byte_outbound_count_reported = cfil_info->cfi_byte_outbound_count;
7398 if (cfil_info->cfi_debug && cfil_log_stats) {
7399 cfil_info_log(LOG_ERR, cfil_info, "CFIL: STATS COLLECTED");
7400 }
7401 CFI_ADD_TIME_LOG(cfil_info, ¤t_tv, &cfil_info->cfi_first_event, CFM_OP_STATS);
7402 return true;
7403 }
7404 }
7405 return false;
7406 }
7407
7408 static void
cfil_stats_report(void * v,wait_result_t w)7409 cfil_stats_report(void *v, wait_result_t w)
7410 {
7411 #pragma unused(v, w)
7412
7413 struct cfil_info *cfil_info = NULL;
7414 struct cfil_entry *entry = NULL;
7415 struct timeval current_tv;
7416 uint32_t flow_count = 0;
7417 uint64_t saved_next_sock_id = 0; // Next sock id to be reported for next loop
7418 bool flow_reported = false;
7419
7420 if (cfil_log_stats) {
7421 CFIL_LOG(LOG_DEBUG, "CFIL: STATS COLLECTION RUNNING");
7422 }
7423
7424 do {
7425 // Collect all sock ids of flows that has new stats
7426 cfil_rw_lock_shared(&cfil_lck_rw);
7427
7428 if (cfil_sock_attached_stats_count == 0) {
7429 if (cfil_log_stats) {
7430 CFIL_LOG(LOG_DEBUG, "CFIL: STATS: no flow");
7431 }
7432 cfil_rw_unlock_shared(&cfil_lck_rw);
7433 goto go_sleep;
7434 }
7435
7436 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7437 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL) {
7438 memset(global_cfil_stats_report_buffers[kcunit - 1], 0, sizeof(struct cfil_stats_report_buffer));
7439 }
7440 global_cfil_stats_counts[kcunit - 1] = 0;
7441 }
7442
7443 microuptime(¤t_tv);
7444 flow_count = 0;
7445
7446 TAILQ_FOREACH(cfil_info, &cfil_sock_head_stats, cfi_link_stats) {
7447 if (saved_next_sock_id != 0 &&
7448 saved_next_sock_id == cfil_info->cfi_sock_id) {
7449 // Here is where we left off previously, start accumulating
7450 saved_next_sock_id = 0;
7451 }
7452
7453 if (saved_next_sock_id == 0) {
7454 if (flow_count >= CFIL_STATS_REPORT_MAX_COUNT) {
7455 // Examine a fixed number of flows each round. Remember the current flow
7456 // so we can start from here for next loop
7457 saved_next_sock_id = cfil_info->cfi_sock_id;
7458 break;
7459 }
7460
7461 flow_reported = false;
7462 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7463 entry = &cfil_info->cfi_entries[kcunit - 1];
7464 if (entry->cfe_filter == NULL) {
7465 if (cfil_info->cfi_debug && cfil_log_stats) {
7466 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - so %llx no filter",
7467 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0);
7468 }
7469 continue;
7470 }
7471
7472 if ((entry->cfe_stats_report_frequency > 0) &&
7473 cfil_stats_collect_flow_stats_for_filter(kcunit, cfil_info, entry, current_tv) == true) {
7474 flow_reported = true;
7475 }
7476 }
7477 if (flow_reported == true) {
7478 flow_count++;
7479 }
7480 }
7481 }
7482
7483 if (flow_count > 0) {
7484 if (cfil_log_stats) {
7485 CFIL_LOG(LOG_DEBUG, "CFIL: STATS reporting for %d flows", flow_count);
7486 }
7487 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7488 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL &&
7489 global_cfil_stats_counts[kcunit - 1] > 0) {
7490 cfil_dispatch_stats_event_locked(kcunit,
7491 global_cfil_stats_report_buffers[kcunit - 1],
7492 global_cfil_stats_counts[kcunit - 1]);
7493 }
7494 }
7495 } else {
7496 cfil_rw_unlock_shared(&cfil_lck_rw);
7497 goto go_sleep;
7498 }
7499
7500 cfil_rw_unlock_shared(&cfil_lck_rw);
7501
7502 // Loop again if we haven't finished the whole cfil_info list
7503 } while (saved_next_sock_id != 0);
7504
7505 go_sleep:
7506
7507 // Sleep forever (until waken up) if no more flow to report
7508 cfil_rw_lock_shared(&cfil_lck_rw);
7509 cfil_stats_report_thread_sleep(cfil_sock_attached_stats_count == 0 ? true : false);
7510 cfil_rw_unlock_shared(&cfil_lck_rw);
7511 thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7512 /* NOTREACHED */
7513 }
7514