1 /*
2 * Copyright (c) 2000-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1990, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from the Stanford/CMU enet packet filter,
33 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35 * Berkeley Laboratory.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)bpf.c 8.2 (Berkeley) 3/28/94
66 *
67 * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 #include "bpf.h"
77
78 #ifndef __GNUC__
79 #define inline
80 #else
81 #define inline __inline
82 #endif
83
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/conf.h>
87 #include <sys/malloc.h>
88 #include <sys/mbuf.h>
89 #include <sys/time.h>
90 #include <sys/proc.h>
91 #include <sys/signalvar.h>
92 #include <sys/filio.h>
93 #include <sys/sockio.h>
94 #include <sys/ttycom.h>
95 #include <sys/filedesc.h>
96 #include <sys/uio_internal.h>
97 #include <sys/file_internal.h>
98 #include <sys/event.h>
99
100 #include <sys/poll.h>
101
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/vnode.h>
105
106 #include <net/if.h>
107 #include <net/bpf.h>
108 #include <net/bpfdesc.h>
109
110 #include <netinet/in.h>
111 #include <netinet/ip.h>
112 #include <netinet/ip6.h>
113 #include <netinet/in_pcb.h>
114 #include <netinet/in_var.h>
115 #include <netinet/ip_var.h>
116 #include <netinet/tcp.h>
117 #include <netinet/tcp_var.h>
118 #include <netinet/udp.h>
119 #include <netinet/udp_var.h>
120 #include <netinet/if_ether.h>
121 #include <netinet/isakmp.h>
122 #include <netinet6/esp.h>
123 #include <sys/kernel.h>
124 #include <sys/sysctl.h>
125 #include <net/firewire.h>
126
127 #include <miscfs/devfs/devfs.h>
128 #include <net/dlil.h>
129 #include <net/pktap.h>
130
131 #include <kern/assert.h>
132 #include <kern/locks.h>
133 #include <kern/thread_call.h>
134 #include <libkern/section_keywords.h>
135
136 #include <os/log.h>
137
138 #include <IOKit/IOBSD.h>
139
140 #define BPF_WRITE_MAX 65535
141
142 extern int tvtohz(struct timeval *);
143 extern char *proc_name_address(void *p);
144
145 #define BPF_BUFSIZE 4096
146 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
147
148 #define PRINET 26 /* interruptible */
149
150 #define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
151 #define ESP_HDR_SIZE sizeof(struct newesp)
152
153 #define BPF_WRITE_LEEWAY 18
154
155 typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
156
157 /*
158 * The default read buffer size is patchable.
159 */
160 static unsigned int bpf_bufsize = BPF_BUFSIZE;
161 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
162 &bpf_bufsize, 0, "");
163
164 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
165 static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
166 SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
167 &bpf_maxbufsize, 0,
168 sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
169
170 extern const int copysize_limit_panic;
171 #define BPF_BUFSIZE_CAP (copysize_limit_panic >> 1)
172 static int sysctl_bpf_bufsize_cap SYSCTL_HANDLER_ARGS;
173 SYSCTL_PROC(_debug, OID_AUTO, bpf_bufsize_cap, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
174 0, 0,
175 sysctl_bpf_bufsize_cap, "I", "Upper limit on BPF max buffer size");
176
177 #define BPF_MAX_DEVICES 256
178 static unsigned int bpf_maxdevices = BPF_MAX_DEVICES;
179 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RD | CTLFLAG_LOCKED,
180 &bpf_maxdevices, 0, "");
181
182 /*
183 * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
184 * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
185 * explicitly to be able to use DLT_PKTAP.
186 */
187 #if !XNU_TARGET_OS_OSX
188 static unsigned int bpf_wantpktap = 1;
189 #else /* XNU_TARGET_OS_OSX */
190 static unsigned int bpf_wantpktap = 0;
191 #endif /* XNU_TARGET_OS_OSX */
192 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
193 &bpf_wantpktap, 0, "");
194
195 static int bpf_debug = 0;
196 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
197 &bpf_debug, 0, "");
198
199 static unsigned long bpf_trunc_overflow = 0;
200 SYSCTL_ULONG(_debug, OID_AUTO, bpf_trunc_overflow, CTLFLAG_RD | CTLFLAG_LOCKED,
201 &bpf_trunc_overflow, "");
202
203 static int bpf_hdr_comp_enable = 1;
204 SYSCTL_INT(_debug, OID_AUTO, bpf_hdr_comp_enable, CTLFLAG_RW | CTLFLAG_LOCKED,
205 &bpf_hdr_comp_enable, 1, "");
206
207 static int sysctl_bpf_stats SYSCTL_HANDLER_ARGS;
208 SYSCTL_PROC(_debug, OID_AUTO, bpf_stats, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
209 0, 0,
210 sysctl_bpf_stats, "S", "BPF statistics");
211
212 /*
213 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
214 * bpf_dtab holds pointer to the descriptors, indexed by minor device #
215 */
216 static struct bpf_if *bpf_iflist;
217 /*
218 * BSD now stores the bpf_d in the dev_t which is a struct
219 * on their system. Our dev_t is an int, so we still store
220 * the bpf_d in a separate table indexed by minor device #.
221 *
222 * The value stored in bpf_dtab[n] represent three states:
223 * NULL: device not opened
224 * BPF_DEV_RESERVED: device opening or closing
225 * other: device <n> opened with pointer to storage
226 */
227 #define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
228 static struct bpf_d **bpf_dtab = NULL;
229 static unsigned int bpf_dtab_size = 0;
230 static unsigned int nbpfilter = 0;
231 static unsigned bpf_bpfd_cnt = 0;
232
233 static LCK_GRP_DECLARE(bpf_mlock_grp, "bpf");
234 static LCK_MTX_DECLARE(bpf_mlock_data, &bpf_mlock_grp);
235 static lck_mtx_t *const bpf_mlock = &bpf_mlock_data;
236
237 static int bpf_allocbufs(struct bpf_d *);
238 static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
239 static int bpf_detachd(struct bpf_d *d);
240 static void bpf_freed(struct bpf_d *);
241 static int bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool, bool);
242 static void bpf_timed_out(void *, void *);
243 static void bpf_wakeup(struct bpf_d *);
244 static uint32_t get_pkt_trunc_len(struct bpf_packet *);
245 static void catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
246 static void reset_d(struct bpf_d *);
247 static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
248 static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
249 static int bpf_setdlt(struct bpf_d *, u_int);
250 static int bpf_set_traffic_class(struct bpf_d *, int);
251 static void bpf_set_packet_service_class(struct mbuf *, int);
252
253 static void bpf_acquire_d(struct bpf_d *);
254 static void bpf_release_d(struct bpf_d *);
255
256 static int bpf_devsw_installed;
257
258 void bpf_init(void *unused);
259 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
260
261 /*
262 * Darwin differs from BSD here, the following are static
263 * on BSD and not static on Darwin.
264 */
265 d_open_t bpfopen;
266 d_close_t bpfclose;
267 d_read_t bpfread;
268 d_write_t bpfwrite;
269 ioctl_fcn_t bpfioctl;
270 select_fcn_t bpfselect;
271
272 /* Darwin's cdevsw struct differs slightly from BSDs */
273 #define CDEV_MAJOR 23
274 static const struct cdevsw bpf_cdevsw = {
275 .d_open = bpfopen,
276 .d_close = bpfclose,
277 .d_read = bpfread,
278 .d_write = bpfwrite,
279 .d_ioctl = bpfioctl,
280 .d_stop = eno_stop,
281 .d_reset = eno_reset,
282 .d_ttys = NULL,
283 .d_select = bpfselect,
284 .d_mmap = eno_mmap,
285 .d_strategy = eno_strat,
286 .d_reserved_1 = eno_getc,
287 .d_reserved_2 = eno_putc,
288 .d_type = 0
289 };
290
291 #define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
292
293 static int
copy_uio_to_mbuf_packet(struct uio * auio,struct mbuf * top)294 copy_uio_to_mbuf_packet(struct uio *auio, struct mbuf *top)
295 {
296 int error = 0;
297
298 for (struct mbuf *m = top; m != NULL; m = m->m_next) {
299 int bytes_to_copy = (int)uio_resid(auio);
300 int mlen;
301
302 if (m->m_flags & M_EXT) {
303 mlen = m->m_ext.ext_size - (int)M_LEADINGSPACE(m);
304 } else if (m->m_flags & M_PKTHDR) {
305 mlen = MHLEN - (int)M_LEADINGSPACE(m);
306 } else {
307 mlen = MLEN - (int)M_LEADINGSPACE(m);
308 }
309 int copy_len = imin((int)mlen, bytes_to_copy);
310
311 error = uiomove(mtod(m, caddr_t), (int)copy_len, auio);
312 if (error != 0) {
313 os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: len %d error %d",
314 copy_len, error);
315 goto done;
316 }
317 m->m_len = copy_len;
318 top->m_pkthdr.len += copy_len;
319 }
320 done:
321 return error;
322 }
323
324 static int
bpf_movein(struct uio * uio,struct ifnet * ifp,int linktype,struct mbuf ** mp,struct sockaddr * sockp,int * datlen)325 bpf_movein(struct uio *uio, struct ifnet *ifp, int linktype, struct mbuf **mp,
326 struct sockaddr *sockp, int *datlen)
327 {
328 struct mbuf *m;
329 int error;
330 int len;
331 uint8_t sa_family;
332 int hlen = 0;
333
334 switch (linktype) {
335 #if SLIP
336 case DLT_SLIP:
337 sa_family = AF_INET;
338 hlen = 0;
339 break;
340 #endif /* SLIP */
341
342 case DLT_EN10MB:
343 sa_family = AF_UNSPEC;
344 /* XXX Would MAXLINKHDR be better? */
345 hlen = sizeof(struct ether_header);
346 break;
347
348 #if FDDI
349 case DLT_FDDI:
350 #if defined(__FreeBSD__) || defined(__bsdi__)
351 sa_family = AF_IMPLINK;
352 hlen = 0;
353 #else
354 sa_family = AF_UNSPEC;
355 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
356 hlen = 24;
357 #endif
358 break;
359 #endif /* FDDI */
360
361 case DLT_RAW:
362 case DLT_NULL:
363 sa_family = AF_UNSPEC;
364 hlen = 0;
365 break;
366
367 #ifdef __FreeBSD__
368 case DLT_ATM_RFC1483:
369 /*
370 * en atm driver requires 4-byte atm pseudo header.
371 * though it isn't standard, vpi:vci needs to be
372 * specified anyway.
373 */
374 sa_family = AF_UNSPEC;
375 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
376 break;
377 #endif
378
379 case DLT_PPP:
380 sa_family = AF_UNSPEC;
381 hlen = 4; /* This should match PPP_HDRLEN */
382 break;
383
384 case DLT_APPLE_IP_OVER_IEEE1394:
385 sa_family = AF_UNSPEC;
386 hlen = sizeof(struct firewire_header);
387 break;
388
389 case DLT_IEEE802_11: /* IEEE 802.11 wireless */
390 sa_family = AF_IEEE80211;
391 hlen = 0;
392 break;
393
394 case DLT_IEEE802_11_RADIO:
395 sa_family = AF_IEEE80211;
396 hlen = 0;
397 break;
398
399 default:
400 return EIO;
401 }
402
403 if (sockp) {
404 /*
405 * Build a sockaddr based on the data link layer type.
406 * We do this at this level because the ethernet header
407 * is copied directly into the data field of the sockaddr.
408 * In the case of SLIP, there is no header and the packet
409 * is forwarded as is.
410 * Also, we are careful to leave room at the front of the mbuf
411 * for the link level header.
412 */
413 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
414 return EIO;
415 }
416 sockp->sa_family = sa_family;
417 } else {
418 /*
419 * We're directly sending the packet data supplied by
420 * the user; we don't need to make room for the link
421 * header, and don't need the header length value any
422 * more, so set it to 0.
423 */
424 hlen = 0;
425 }
426
427 len = (int)uio_resid(uio);
428 if (len < hlen || (unsigned)len > BPF_WRITE_MAX) {
429 os_log(OS_LOG_DEFAULT, "bpfwrite: bad len %d if %s",
430 (unsigned)len, ifp->if_xname);
431 return EMSGSIZE;
432 }
433 if ((len - hlen) > (ifp->if_mtu + BPF_WRITE_LEEWAY)) {
434 os_log(OS_LOG_DEFAULT, "bpfwrite: len %u - hlen %u too big if %s mtu %u",
435 (unsigned)len, (unsigned)hlen, ifp->if_xname, ifp->if_mtu);
436 return EMSGSIZE;
437 }
438
439 *datlen = len - hlen;
440
441 error = mbuf_allocpacket(MBUF_WAITOK, len, NULL, &m);
442 if (error != 0) {
443 os_log(OS_LOG_DEFAULT,
444 "bpfwrite mbuf_allocpacket len %d error %d", len, error);
445 return error;
446 }
447 /*
448 * Make room for link header -- the packet length is 0 at this stage
449 */
450 if (hlen != 0) {
451 m->m_data += hlen; /* leading space */
452 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
453 if (error) {
454 os_log(OS_LOG_DEFAULT,
455 "bpfwrite UIOMOVE hlen %d error %d", hlen, error);
456 goto bad;
457 }
458 }
459 /*
460 * copy_uio_to_mbuf_packet() does set the length of each mbuf and adds it to
461 * the total packet length
462 */
463 error = copy_uio_to_mbuf_packet(uio, m);
464 if (error != 0) {
465 os_log(OS_LOG_DEFAULT,
466 "bpfwrite copy_uio_to_mbuf_packet error %d", error);
467 goto bad;
468 }
469
470 /* Check for multicast destination */
471 switch (linktype) {
472 case DLT_EN10MB: {
473 struct ether_header *eh;
474
475 eh = mtod(m, struct ether_header *);
476 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
477 if (_ether_cmp(etherbroadcastaddr,
478 eh->ether_dhost) == 0) {
479 m->m_flags |= M_BCAST;
480 } else {
481 m->m_flags |= M_MCAST;
482 }
483 }
484 break;
485 }
486 }
487 *mp = m;
488
489 return 0;
490 bad:
491 m_freem(m);
492 return error;
493 }
494
495 /*
496 * The dynamic addition of a new device node must block all processes that
497 * are opening the last device so that no process will get an unexpected
498 * ENOENT
499 */
500 static void
bpf_make_dev_t(int maj)501 bpf_make_dev_t(int maj)
502 {
503 static int bpf_growing = 0;
504 unsigned int cur_size = nbpfilter, i;
505
506 if (nbpfilter >= BPF_MAX_DEVICES) {
507 return;
508 }
509
510 while (bpf_growing) {
511 /* Wait until new device has been created */
512 (void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
513 }
514 if (nbpfilter > cur_size) {
515 /* other thread grew it already */
516 return;
517 }
518 bpf_growing = 1;
519
520 /* need to grow bpf_dtab first */
521 if (nbpfilter == bpf_dtab_size) {
522 unsigned int new_dtab_size;
523 struct bpf_d **new_dtab = NULL;
524
525 new_dtab_size = bpf_dtab_size + NBPFILTER;
526 new_dtab = krealloc_type(struct bpf_d *,
527 bpf_dtab_size, new_dtab_size, bpf_dtab, Z_WAITOK | Z_ZERO);
528 if (new_dtab == 0) {
529 os_log_error(OS_LOG_DEFAULT, "bpf_make_dev_t: malloc bpf_dtab failed");
530 goto done;
531 }
532 bpf_dtab = new_dtab;
533 bpf_dtab_size = new_dtab_size;
534 }
535 i = nbpfilter++;
536 (void) devfs_make_node(makedev(maj, i),
537 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
538 "bpf%d", i);
539 done:
540 bpf_growing = 0;
541 wakeup((caddr_t)&bpf_growing);
542 }
543
544 /*
545 * Attach file to the bpf interface, i.e. make d listen on bp.
546 */
547 static errno_t
bpf_attachd(struct bpf_d * d,struct bpf_if * bp)548 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
549 {
550 int first = bp->bif_dlist == NULL;
551 int error = 0;
552
553 /*
554 * Point d at bp, and add d to the interface's list of listeners.
555 * Finally, point the driver's bpf cookie at the interface so
556 * it will divert packets to bpf.
557 */
558 d->bd_bif = bp;
559 d->bd_next = bp->bif_dlist;
560 bp->bif_dlist = d;
561 bpf_bpfd_cnt++;
562
563 /*
564 * Take a reference on the device even if an error is returned
565 * because we keep the device in the interface's list of listeners
566 */
567 bpf_acquire_d(d);
568
569 if (first) {
570 /* Find the default bpf entry for this ifp */
571 if (bp->bif_ifp->if_bpf == NULL) {
572 struct bpf_if *tmp, *primary = NULL;
573
574 for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
575 if (tmp->bif_ifp == bp->bif_ifp) {
576 primary = tmp;
577 break;
578 }
579 }
580 bp->bif_ifp->if_bpf = primary;
581 }
582 /* Only call dlil_set_bpf_tap for primary dlt */
583 if (bp->bif_ifp->if_bpf == bp) {
584 dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
585 bpf_tap_callback);
586 }
587
588 if (bp->bif_tap != NULL) {
589 error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
590 BPF_TAP_INPUT_OUTPUT);
591 }
592 }
593
594 /*
595 * Reset the detach flags in case we previously detached an interface
596 */
597 d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
598
599 if (bp->bif_dlt == DLT_PKTAP) {
600 d->bd_flags |= BPF_FINALIZE_PKTAP;
601 } else {
602 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
603 }
604 return error;
605 }
606
607 /*
608 * Detach a file from its interface.
609 *
610 * Return 1 if was closed by some thread, 0 otherwise
611 */
612 static int
bpf_detachd(struct bpf_d * d)613 bpf_detachd(struct bpf_d *d)
614 {
615 struct bpf_d **p;
616 struct bpf_if *bp;
617 struct ifnet *ifp;
618 uint32_t dlt;
619 bpf_tap_func disable_tap;
620 uint8_t bd_promisc;
621
622 int bpf_closed = d->bd_flags & BPF_CLOSING;
623 /*
624 * Some other thread already detached
625 */
626 if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0) {
627 goto done;
628 }
629 /*
630 * This thread is doing the detach
631 */
632 d->bd_flags |= BPF_DETACHING;
633
634 ifp = d->bd_bif->bif_ifp;
635 bp = d->bd_bif;
636
637 /* Remove d from the interface's descriptor list. */
638 p = &bp->bif_dlist;
639 while (*p != d) {
640 p = &(*p)->bd_next;
641 if (*p == 0) {
642 panic("bpf_detachd: descriptor not in list");
643 }
644 }
645 *p = (*p)->bd_next;
646 bpf_bpfd_cnt--;
647 disable_tap = NULL;
648 if (bp->bif_dlist == 0) {
649 /*
650 * Let the driver know that there are no more listeners.
651 */
652 /* Only call dlil_set_bpf_tap for primary dlt */
653 if (bp->bif_ifp->if_bpf == bp) {
654 dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
655 }
656
657 disable_tap = bp->bif_tap;
658 if (disable_tap) {
659 dlt = bp->bif_dlt;
660 }
661
662 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
663 if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
664 break;
665 }
666 }
667 if (bp == NULL) {
668 ifp->if_bpf = NULL;
669 }
670 }
671 d->bd_bif = NULL;
672 /*
673 * Check if this descriptor had requested promiscuous mode.
674 * If so, turn it off.
675 */
676 bd_promisc = d->bd_promisc;
677 d->bd_promisc = 0;
678
679 lck_mtx_unlock(bpf_mlock);
680 if (bd_promisc) {
681 if (ifnet_set_promiscuous(ifp, 0)) {
682 /*
683 * Something is really wrong if we were able to put
684 * the driver into promiscuous mode, but can't
685 * take it out.
686 * Most likely the network interface is gone.
687 */
688 os_log_error(OS_LOG_DEFAULT,
689 "%s: bpf%d ifnet_set_promiscuous %s failed",
690 __func__, d->bd_dev_minor, if_name(ifp));
691 }
692 }
693
694 if (disable_tap) {
695 disable_tap(ifp, dlt, BPF_TAP_DISABLE);
696 }
697 lck_mtx_lock(bpf_mlock);
698
699 /*
700 * Wake up other thread that are waiting for this thread to finish
701 * detaching
702 */
703 d->bd_flags &= ~BPF_DETACHING;
704 d->bd_flags |= BPF_DETACHED;
705
706 /* Refresh the local variable as d could have been modified */
707 bpf_closed = d->bd_flags & BPF_CLOSING;
708
709 os_log(OS_LOG_DEFAULT, "bpf%d%s detached from %s fcount %llu dcount %llu",
710 d->bd_dev_minor, bpf_closed ? " closed and" : "", if_name(ifp),
711 d->bd_fcount, d->bd_dcount);
712
713 /*
714 * Note that We've kept the reference because we may have dropped
715 * the lock when turning off promiscuous mode
716 */
717 bpf_release_d(d);
718 done:
719 /*
720 * Let the caller know the bpf_d is closed
721 */
722 if (bpf_closed) {
723 return 1;
724 } else {
725 return 0;
726 }
727 }
728
729 /*
730 * Start asynchronous timer, if necessary.
731 * Must be called with bpf_mlock held.
732 */
733 static void
bpf_start_timer(struct bpf_d * d)734 bpf_start_timer(struct bpf_d *d)
735 {
736 uint64_t deadline;
737 struct timeval tv;
738
739 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
740 tv.tv_sec = d->bd_rtout / hz;
741 tv.tv_usec = (d->bd_rtout % hz) * tick;
742
743 clock_interval_to_deadline(
744 (uint32_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
745 NSEC_PER_USEC, &deadline);
746 /*
747 * The state is BPF_IDLE, so the timer hasn't
748 * been started yet, and hasn't gone off yet;
749 * there is no thread call scheduled, so this
750 * won't change the schedule.
751 *
752 * XXX - what if, by the time it gets entered,
753 * the deadline has already passed?
754 */
755 thread_call_enter_delayed(d->bd_thread_call, deadline);
756 d->bd_state = BPF_WAITING;
757 }
758 }
759
760 /*
761 * Cancel asynchronous timer.
762 * Must be called with bpf_mlock held.
763 */
764 static boolean_t
bpf_stop_timer(struct bpf_d * d)765 bpf_stop_timer(struct bpf_d *d)
766 {
767 /*
768 * If the timer has already gone off, this does nothing.
769 * Our caller is expected to set d->bd_state to BPF_IDLE,
770 * with the bpf_mlock, after we are called. bpf_timed_out()
771 * also grabs bpf_mlock, so, if the timer has gone off and
772 * bpf_timed_out() hasn't finished, it's waiting for the
773 * lock; when this thread releases the lock, it will
774 * find the state is BPF_IDLE, and just release the
775 * lock and return.
776 */
777 return thread_call_cancel(d->bd_thread_call);
778 }
779
780 void
bpf_acquire_d(struct bpf_d * d)781 bpf_acquire_d(struct bpf_d *d)
782 {
783 void *lr_saved = __builtin_return_address(0);
784
785 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
786
787 d->bd_refcnt += 1;
788
789 d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
790 d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
791 }
792
793 void
bpf_release_d(struct bpf_d * d)794 bpf_release_d(struct bpf_d *d)
795 {
796 void *lr_saved = __builtin_return_address(0);
797
798 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
799
800 if (d->bd_refcnt <= 0) {
801 panic("%s: %p refcnt <= 0", __func__, d);
802 }
803
804 d->bd_refcnt -= 1;
805
806 d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
807 d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
808
809 if (d->bd_refcnt == 0) {
810 /* Assert the device is detached */
811 if ((d->bd_flags & BPF_DETACHED) == 0) {
812 panic("%s: %p BPF_DETACHED not set", __func__, d);
813 }
814
815 kfree_type(struct bpf_d, d);
816 }
817 }
818
819 /*
820 * Open ethernet device. Returns ENXIO for illegal minor device number,
821 * EBUSY if file is open by another process.
822 */
823 /* ARGSUSED */
824 int
bpfopen(dev_t dev,int flags,__unused int fmt,struct proc * p)825 bpfopen(dev_t dev, int flags, __unused int fmt,
826 struct proc *p)
827 {
828 struct bpf_d *d;
829
830 lck_mtx_lock(bpf_mlock);
831 if ((unsigned int) minor(dev) >= nbpfilter) {
832 lck_mtx_unlock(bpf_mlock);
833 return ENXIO;
834 }
835 /*
836 * New device nodes are created on demand when opening the last one.
837 * The programming model is for processes to loop on the minor starting
838 * at 0 as long as EBUSY is returned. The loop stops when either the
839 * open succeeds or an error other that EBUSY is returned. That means
840 * that bpf_make_dev_t() must block all processes that are opening the
841 * last node. If not all processes are blocked, they could unexpectedly
842 * get ENOENT and abort their opening loop.
843 */
844 if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
845 bpf_make_dev_t(major(dev));
846 }
847
848 /*
849 * Each minor can be opened by only one process. If the requested
850 * minor is in use, return EBUSY.
851 *
852 * Important: bpfopen() and bpfclose() have to check and set the status
853 * of a device in the same lockin context otherwise the device may be
854 * leaked because the vnode use count will be unpextectly greater than 1
855 * when close() is called.
856 */
857 if (bpf_dtab[minor(dev)] == NULL) {
858 /* Reserve while opening */
859 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
860 } else {
861 lck_mtx_unlock(bpf_mlock);
862 return EBUSY;
863 }
864 d = kalloc_type(struct bpf_d, Z_WAITOK | Z_ZERO);
865 if (d == NULL) {
866 /* this really is a catastrophic failure */
867 os_log_error(OS_LOG_DEFAULT,
868 "bpfopen: bpf%d kalloc_type bpf_d failed", minor(dev));
869 bpf_dtab[minor(dev)] = NULL;
870 lck_mtx_unlock(bpf_mlock);
871 return ENOMEM;
872 }
873
874 /* Mark "in use" and do most initialization. */
875 bpf_acquire_d(d);
876 d->bd_bufsize = bpf_bufsize;
877 d->bd_sig = SIGIO;
878 d->bd_direction = BPF_D_INOUT;
879 d->bd_oflags = flags;
880 d->bd_state = BPF_IDLE;
881 d->bd_traffic_class = SO_TC_BE;
882 d->bd_flags |= BPF_DETACHED;
883 if (bpf_wantpktap) {
884 d->bd_flags |= BPF_WANT_PKTAP;
885 } else {
886 d->bd_flags &= ~BPF_WANT_PKTAP;
887 }
888
889 d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
890 if (d->bd_thread_call == NULL) {
891 os_log_error(OS_LOG_DEFAULT, "bpfopen: bpf%d malloc thread call failed",
892 minor(dev));
893 bpf_dtab[minor(dev)] = NULL;
894 bpf_release_d(d);
895 lck_mtx_unlock(bpf_mlock);
896
897 return ENOMEM;
898 }
899 d->bd_opened_by = p;
900 uuid_generate(d->bd_uuid);
901 d->bd_pid = proc_pid(p);
902
903 d->bd_dev_minor = minor(dev);
904 bpf_dtab[minor(dev)] = d; /* Mark opened */
905 lck_mtx_unlock(bpf_mlock);
906
907 if (bpf_debug) {
908 os_log(OS_LOG_DEFAULT, "bpf%u opened by %s.%u",
909 d->bd_dev_minor, proc_name_address(p), d->bd_pid);
910 }
911 return 0;
912 }
913
914 /*
915 * Close the descriptor by detaching it from its interface,
916 * deallocating its buffers, and marking it free.
917 */
918 /* ARGSUSED */
919 int
bpfclose(dev_t dev,__unused int flags,__unused int fmt,__unused struct proc * p)920 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
921 __unused struct proc *p)
922 {
923 struct bpf_d *d;
924
925 /* Take BPF lock to ensure no other thread is using the device */
926 lck_mtx_lock(bpf_mlock);
927
928 d = bpf_dtab[minor(dev)];
929 if (d == NULL || d == BPF_DEV_RESERVED) {
930 lck_mtx_unlock(bpf_mlock);
931 return ENXIO;
932 }
933
934 /*
935 * Other threads may call bpd_detachd() if we drop the bpf_mlock
936 */
937 d->bd_flags |= BPF_CLOSING;
938
939 if (bpf_debug != 0) {
940 os_log(OS_LOG_DEFAULT, "%s: bpf%d",
941 __func__, d->bd_dev_minor);
942 }
943
944 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
945
946 /*
947 * Deal with any in-progress timeouts.
948 */
949 switch (d->bd_state) {
950 case BPF_IDLE:
951 /*
952 * Not waiting for a timeout, and no timeout happened.
953 */
954 break;
955
956 case BPF_WAITING:
957 /*
958 * Waiting for a timeout.
959 * Cancel any timer that has yet to go off,
960 * and mark the state as "closing".
961 * Then drop the lock to allow any timers that
962 * *have* gone off to run to completion, and wait
963 * for them to finish.
964 */
965 if (!bpf_stop_timer(d)) {
966 /*
967 * There was no pending call, so the call must
968 * have been in progress. Wait for the call to
969 * complete; we have to drop the lock while
970 * waiting. to let the in-progrss call complete
971 */
972 d->bd_state = BPF_DRAINING;
973 while (d->bd_state == BPF_DRAINING) {
974 msleep((caddr_t)d, bpf_mlock, PRINET,
975 "bpfdraining", NULL);
976 }
977 }
978 d->bd_state = BPF_IDLE;
979 break;
980
981 case BPF_TIMED_OUT:
982 /*
983 * Timer went off, and the timeout routine finished.
984 */
985 d->bd_state = BPF_IDLE;
986 break;
987
988 case BPF_DRAINING:
989 /*
990 * Another thread is blocked on a close waiting for
991 * a timeout to finish.
992 * This "shouldn't happen", as the first thread to enter
993 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
994 * all subsequent threads should see that and fail with
995 * ENXIO.
996 */
997 panic("Two threads blocked in a BPF close");
998 break;
999 }
1000
1001 if (d->bd_bif) {
1002 bpf_detachd(d);
1003 }
1004 selthreadclear(&d->bd_sel);
1005 thread_call_free(d->bd_thread_call);
1006
1007 while (d->bd_hbuf_read || d->bd_hbuf_write) {
1008 msleep((caddr_t)d, bpf_mlock, PRINET, "bpfclose", NULL);
1009 }
1010
1011 if (bpf_debug) {
1012 os_log(OS_LOG_DEFAULT,
1013 "bpf%u closed by %s.%u dcount %llu fcount %llu ccount %llu",
1014 d->bd_dev_minor, proc_name_address(p), d->bd_pid,
1015 d->bd_dcount, d->bd_fcount, d->bd_bcs.bcs_count_compressed_prefix);
1016 }
1017
1018 bpf_freed(d);
1019
1020 /* Mark free in same context as bpfopen comes to check */
1021 bpf_dtab[minor(dev)] = NULL; /* Mark closed */
1022
1023 bpf_release_d(d);
1024
1025 lck_mtx_unlock(bpf_mlock);
1026
1027 return 0;
1028 }
1029
1030 #define BPF_SLEEP bpf_sleep
1031
1032 static int
bpf_sleep(struct bpf_d * d,int pri,const char * wmesg,int timo)1033 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
1034 {
1035 u_int64_t abstime = 0;
1036
1037 if (timo != 0) {
1038 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
1039 }
1040
1041 return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
1042 }
1043
1044 static void
bpf_finalize_pktap(struct bpf_hdr * hp,struct pktap_header * pktaphdr)1045 bpf_finalize_pktap(struct bpf_hdr *hp, struct pktap_header *pktaphdr)
1046 {
1047 if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
1048 struct pktap_v2_hdr *pktap_v2_hdr;
1049
1050 pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
1051
1052 if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
1053 pktap_v2_finalize_proc_info(pktap_v2_hdr);
1054 }
1055 } else {
1056 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
1057 pktap_finalize_proc_info(pktaphdr);
1058 }
1059
1060 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1061 hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
1062 hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
1063 }
1064 }
1065 }
1066
1067 /*
1068 * Rotate the packet buffers in descriptor d. Move the store buffer
1069 * into the hold slot, and the free buffer into the store slot.
1070 * Zero the length of the new store buffer.
1071 *
1072 * Note: in head drop mode, the hold buffer can be dropped so the fist packet of the
1073 * store buffer cannot be compressed as it otherwise would refer to deleted data
1074 * in a dropped hold buffer that the reader process does know about
1075 */
1076 #define ROTATE_BUFFERS(d) do { \
1077 if (d->bd_hbuf_read) \
1078 panic("rotating bpf buffers during read"); \
1079 (d)->bd_hbuf = (d)->bd_sbuf; \
1080 (d)->bd_hlen = (d)->bd_slen; \
1081 (d)->bd_hcnt = (d)->bd_scnt; \
1082 (d)->bd_sbuf = (d)->bd_fbuf; \
1083 (d)->bd_slen = 0; \
1084 (d)->bd_scnt = 0; \
1085 (d)->bd_fbuf = NULL; \
1086 if ((d)->bd_headdrop != 0) \
1087 (d)->bd_prev_slen = 0; \
1088 } while(false)
1089
1090 /*
1091 * bpfread - read next chunk of packets from buffers
1092 */
1093 int
bpfread(dev_t dev,struct uio * uio,int ioflag)1094 bpfread(dev_t dev, struct uio *uio, int ioflag)
1095 {
1096 struct bpf_d *d;
1097 caddr_t hbuf;
1098 int timed_out, hbuf_len;
1099 int error;
1100 int flags;
1101
1102 lck_mtx_lock(bpf_mlock);
1103
1104 d = bpf_dtab[minor(dev)];
1105 if (d == NULL || d == BPF_DEV_RESERVED ||
1106 (d->bd_flags & BPF_CLOSING) != 0) {
1107 lck_mtx_unlock(bpf_mlock);
1108 return ENXIO;
1109 }
1110
1111 bpf_acquire_d(d);
1112
1113 /*
1114 * Restrict application to use a buffer the same size as
1115 * as kernel buffers.
1116 */
1117 if (uio_resid(uio) != d->bd_bufsize) {
1118 bpf_release_d(d);
1119 lck_mtx_unlock(bpf_mlock);
1120 return EINVAL;
1121 }
1122
1123 if (d->bd_state == BPF_WAITING) {
1124 bpf_stop_timer(d);
1125 }
1126
1127 timed_out = (d->bd_state == BPF_TIMED_OUT);
1128 d->bd_state = BPF_IDLE;
1129
1130 while (d->bd_hbuf_read) {
1131 msleep((caddr_t)d, bpf_mlock, PRINET, "bpfread", NULL);
1132 }
1133
1134 if ((d->bd_flags & BPF_CLOSING) != 0) {
1135 bpf_release_d(d);
1136 lck_mtx_unlock(bpf_mlock);
1137 return ENXIO;
1138 }
1139 /*
1140 * If the hold buffer is empty, then do a timed sleep, which
1141 * ends when the timeout expires or when enough packets
1142 * have arrived to fill the store buffer.
1143 */
1144 while (d->bd_hbuf == 0) {
1145 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) &&
1146 d->bd_slen != 0) {
1147 /*
1148 * We're in immediate mode, or are reading
1149 * in non-blocking mode, or a timer was
1150 * started before the read (e.g., by select()
1151 * or poll()) and has expired and a packet(s)
1152 * either arrived since the previous
1153 * read or arrived while we were asleep.
1154 * Rotate the buffers and return what's here.
1155 */
1156 ROTATE_BUFFERS(d);
1157 break;
1158 }
1159
1160 /*
1161 * No data is available, check to see if the bpf device
1162 * is still pointed at a real interface. If not, return
1163 * ENXIO so that the userland process knows to rebind
1164 * it before using it again.
1165 */
1166 if (d->bd_bif == NULL) {
1167 bpf_release_d(d);
1168 lck_mtx_unlock(bpf_mlock);
1169 return ENXIO;
1170 }
1171 if (ioflag & IO_NDELAY) {
1172 bpf_release_d(d);
1173 lck_mtx_unlock(bpf_mlock);
1174 return EWOULDBLOCK;
1175 }
1176 error = BPF_SLEEP(d, PRINET | PCATCH, "bpf", d->bd_rtout);
1177 /*
1178 * Make sure device is still opened
1179 */
1180 if ((d->bd_flags & BPF_CLOSING) != 0) {
1181 bpf_release_d(d);
1182 lck_mtx_unlock(bpf_mlock);
1183 return ENXIO;
1184 }
1185
1186 while (d->bd_hbuf_read) {
1187 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_read",
1188 NULL);
1189 }
1190
1191 if ((d->bd_flags & BPF_CLOSING) != 0) {
1192 bpf_release_d(d);
1193 lck_mtx_unlock(bpf_mlock);
1194 return ENXIO;
1195 }
1196
1197 if (error == EINTR || error == ERESTART) {
1198 if (d->bd_hbuf != NULL) {
1199 /*
1200 * Because we msleep, the hold buffer might
1201 * be filled when we wake up. Avoid rotating
1202 * in this case.
1203 */
1204 break;
1205 }
1206 if (d->bd_slen != 0) {
1207 /*
1208 * Sometimes we may be interrupted often and
1209 * the sleep above will not timeout.
1210 * Regardless, we should rotate the buffers
1211 * if there's any new data pending and
1212 * return it.
1213 */
1214 ROTATE_BUFFERS(d);
1215 break;
1216 }
1217 bpf_release_d(d);
1218 lck_mtx_unlock(bpf_mlock);
1219 if (error == ERESTART) {
1220 os_log(OS_LOG_DEFAULT, "%s: bpf%d ERESTART to EINTR",
1221 __func__, d->bd_dev_minor);
1222 error = EINTR;
1223 }
1224 return error;
1225 }
1226 if (error == EWOULDBLOCK) {
1227 /*
1228 * On a timeout, return what's in the buffer,
1229 * which may be nothing. If there is something
1230 * in the store buffer, we can rotate the buffers.
1231 */
1232 if (d->bd_hbuf) {
1233 /*
1234 * We filled up the buffer in between
1235 * getting the timeout and arriving
1236 * here, so we don't need to rotate.
1237 */
1238 break;
1239 }
1240
1241 if (d->bd_slen == 0) {
1242 bpf_release_d(d);
1243 lck_mtx_unlock(bpf_mlock);
1244 return 0;
1245 }
1246 ROTATE_BUFFERS(d);
1247 break;
1248 }
1249 }
1250 /*
1251 * At this point, we know we have something in the hold slot.
1252 */
1253
1254 /*
1255 * Set the hold buffer read. So we do not
1256 * rotate the buffers until the hold buffer
1257 * read is complete. Also to avoid issues resulting
1258 * from page faults during disk sleep (<rdar://problem/13436396>).
1259 */
1260 d->bd_hbuf_read = true;
1261 hbuf = d->bd_hbuf;
1262 hbuf_len = d->bd_hlen;
1263 flags = d->bd_flags;
1264 d->bd_bcs.bcs_total_read += d->bd_hcnt;
1265 lck_mtx_unlock(bpf_mlock);
1266
1267 /*
1268 * Before we move data to userland, we fill out the extended
1269 * header fields.
1270 */
1271 if (flags & BPF_EXTENDED_HDR) {
1272 char *p;
1273
1274 p = hbuf;
1275 while (p < hbuf + hbuf_len) {
1276 struct bpf_hdr_ext *ehp;
1277 uint32_t flowid;
1278 struct so_procinfo soprocinfo;
1279 int found = 0;
1280
1281 ehp = (struct bpf_hdr_ext *)(void *)p;
1282 if ((flowid = ehp->bh_flowid) != 0) {
1283 if (ehp->bh_flags & BPF_HDR_EXT_FLAGS_TCP) {
1284 ehp->bh_flags &= ~BPF_HDR_EXT_FLAGS_TCP;
1285 found = inp_findinpcb_procinfo(&tcbinfo,
1286 flowid, &soprocinfo);
1287 } else if (ehp->bh_flags == BPF_HDR_EXT_FLAGS_UDP) {
1288 ehp->bh_flags &= ~BPF_HDR_EXT_FLAGS_UDP;
1289 found = inp_findinpcb_procinfo(&udbinfo,
1290 flowid, &soprocinfo);
1291 }
1292 if (found == 1) {
1293 ehp->bh_pid = soprocinfo.spi_pid;
1294 strlcpy(&ehp->bh_comm[0], &soprocinfo.spi_proc_name[0], sizeof(ehp->bh_comm));
1295 }
1296 ehp->bh_flowid = 0;
1297 }
1298
1299 if ((flags & BPF_FINALIZE_PKTAP) != 0 && ehp->bh_complen == 0) {
1300 struct pktap_header *pktaphdr;
1301
1302 pktaphdr = (struct pktap_header *)(void *)
1303 (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1304
1305 bpf_finalize_pktap((struct bpf_hdr *) ehp,
1306 pktaphdr);
1307 }
1308 p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1309 }
1310 } else if (flags & BPF_FINALIZE_PKTAP) {
1311 char *p;
1312
1313 p = hbuf;
1314
1315 while (p < hbuf + hbuf_len) {
1316 struct bpf_hdr *hp;
1317 struct pktap_header *pktaphdr;
1318
1319 hp = (struct bpf_hdr *)(void *)p;
1320
1321 /*
1322 * Cannot finalize a compressed pktap header as we may not have
1323 * all the fields present
1324 */
1325 if (d->bd_flags & BPF_COMP_ENABLED) {
1326 struct bpf_comp_hdr *hcp;
1327
1328 hcp = (struct bpf_comp_hdr *)(void *)p;
1329
1330 if (hcp->bh_complen != 0) {
1331 p += BPF_WORDALIGN(hcp->bh_hdrlen + hcp->bh_caplen);
1332 continue;
1333 }
1334 }
1335
1336 pktaphdr = (struct pktap_header *)(void *)
1337 (p + BPF_WORDALIGN(hp->bh_hdrlen));
1338
1339 bpf_finalize_pktap(hp, pktaphdr);
1340
1341 p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1342 }
1343 }
1344
1345 /*
1346 * Move data from hold buffer into user space.
1347 * We know the entire buffer is transferred since
1348 * we checked above that the read buffer is bpf_bufsize bytes.
1349 */
1350 error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1351
1352 lck_mtx_lock(bpf_mlock);
1353 /*
1354 * Make sure device is still opened
1355 */
1356 if ((d->bd_flags & BPF_CLOSING) != 0) {
1357 bpf_release_d(d);
1358 lck_mtx_unlock(bpf_mlock);
1359 return ENXIO;
1360 }
1361
1362 d->bd_hbuf_read = false;
1363 d->bd_fbuf = d->bd_hbuf;
1364 d->bd_hbuf = NULL;
1365 d->bd_hlen = 0;
1366 d->bd_hcnt = 0;
1367 wakeup((caddr_t)d);
1368
1369 bpf_release_d(d);
1370 lck_mtx_unlock(bpf_mlock);
1371 return error;
1372 }
1373
1374 /*
1375 * If there are processes sleeping on this descriptor, wake them up.
1376 */
1377 static void
bpf_wakeup(struct bpf_d * d)1378 bpf_wakeup(struct bpf_d *d)
1379 {
1380 if (d->bd_state == BPF_WAITING) {
1381 bpf_stop_timer(d);
1382 d->bd_state = BPF_IDLE;
1383 }
1384 wakeup((caddr_t)d);
1385 if (d->bd_async && d->bd_sig && d->bd_sigio) {
1386 pgsigio(d->bd_sigio, d->bd_sig);
1387 }
1388
1389 selwakeup(&d->bd_sel);
1390 if ((d->bd_flags & BPF_KNOTE)) {
1391 KNOTE(&d->bd_sel.si_note, 1);
1392 }
1393 }
1394
1395 static void
bpf_timed_out(void * arg,__unused void * dummy)1396 bpf_timed_out(void *arg, __unused void *dummy)
1397 {
1398 struct bpf_d *d = (struct bpf_d *)arg;
1399
1400 lck_mtx_lock(bpf_mlock);
1401 if (d->bd_state == BPF_WAITING) {
1402 /*
1403 * There's a select or kqueue waiting for this; if there's
1404 * now stuff to read, wake it up.
1405 */
1406 d->bd_state = BPF_TIMED_OUT;
1407 if (d->bd_slen != 0) {
1408 bpf_wakeup(d);
1409 }
1410 } else if (d->bd_state == BPF_DRAINING) {
1411 /*
1412 * A close is waiting for this to finish.
1413 * Mark it as finished, and wake the close up.
1414 */
1415 d->bd_state = BPF_IDLE;
1416 bpf_wakeup(d);
1417 }
1418 lck_mtx_unlock(bpf_mlock);
1419 }
1420
1421 /* keep in sync with bpf_movein above: */
1422 #define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
1423
1424 int
bpfwrite(dev_t dev,struct uio * uio,__unused int ioflag)1425 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1426 {
1427 struct bpf_d *d;
1428 struct ifnet *ifp;
1429 struct mbuf *m = NULL;
1430 int error = 0;
1431 char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1432 int datlen = 0;
1433 int bif_dlt;
1434 int bd_hdrcmplt;
1435
1436 lck_mtx_lock(bpf_mlock);
1437
1438 while (true) {
1439 d = bpf_dtab[minor(dev)];
1440 if (d == NULL || d == BPF_DEV_RESERVED ||
1441 (d->bd_flags & BPF_CLOSING) != 0) {
1442 lck_mtx_unlock(bpf_mlock);
1443 return ENXIO;
1444 }
1445
1446 if (d->bd_hbuf_write) {
1447 msleep((caddr_t)d, bpf_mlock, PRINET, "bpfwrite",
1448 NULL);
1449 } else {
1450 break;
1451 }
1452 }
1453 d->bd_hbuf_write = true;
1454
1455 bpf_acquire_d(d);
1456
1457 ++d->bd_wcount;
1458
1459 if (d->bd_bif == NULL) {
1460 error = ENXIO;
1461 goto done;
1462 }
1463
1464 ifp = d->bd_bif->bif_ifp;
1465
1466 if (IFNET_IS_MANAGEMENT(ifp) &&
1467 IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT) == false) {
1468 ++d->bd_wdcount;
1469 bpf_release_d(d);
1470 lck_mtx_unlock(bpf_mlock);
1471 return ENETDOWN;
1472 }
1473
1474 if ((ifp->if_flags & IFF_UP) == 0) {
1475 error = ENETDOWN;
1476 goto done;
1477 }
1478 if (uio_resid(uio) == 0) {
1479 error = 0;
1480 goto done;
1481 }
1482 ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1483
1484 /*
1485 * fix for PR-6849527
1486 * geting variables onto stack before dropping lock for bpf_movein()
1487 */
1488 bif_dlt = (int)d->bd_bif->bif_dlt;
1489 bd_hdrcmplt = d->bd_hdrcmplt;
1490
1491 /* bpf_movein allocating mbufs; drop lock */
1492 lck_mtx_unlock(bpf_mlock);
1493
1494 error = bpf_movein(uio, ifp, bif_dlt, &m,
1495 bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1496 &datlen);
1497
1498 /* take the lock again */
1499 lck_mtx_lock(bpf_mlock);
1500 if (error != 0) {
1501 goto done;
1502 }
1503
1504 /* verify the device is still open */
1505 if ((d->bd_flags & BPF_CLOSING) != 0) {
1506 error = ENXIO;
1507 goto done;
1508 }
1509
1510 if (d->bd_bif == NULL || d->bd_bif->bif_ifp != ifp) {
1511 error = ENXIO;
1512 goto done;
1513 }
1514
1515 bpf_set_packet_service_class(m, d->bd_traffic_class);
1516
1517 lck_mtx_unlock(bpf_mlock);
1518
1519 /*
1520 * The driver frees the mbuf.
1521 */
1522 if (d->bd_hdrcmplt) {
1523 if (d->bd_bif->bif_send) {
1524 error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1525 } else {
1526 error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1527 }
1528 } else {
1529 error = dlil_output(ifp, PF_INET, m, NULL,
1530 (struct sockaddr *)dst_buf, 0, NULL);
1531 }
1532 /* Make sure we do not double free */
1533 m = NULL;
1534
1535 lck_mtx_lock(bpf_mlock);
1536 done:
1537 if (error != 0) {
1538 ++d->bd_wdcount;
1539 }
1540 if (m != NULL) {
1541 m_freem(m);
1542 }
1543 d->bd_hbuf_write = false;
1544 wakeup((caddr_t)d);
1545 bpf_release_d(d);
1546 lck_mtx_unlock(bpf_mlock);
1547
1548 return error;
1549 }
1550
1551 /*
1552 * Reset a descriptor by flushing its packet buffer and clearing the
1553 * receive and drop counts.
1554 */
1555 static void
reset_d(struct bpf_d * d)1556 reset_d(struct bpf_d *d)
1557 {
1558 if (d->bd_hbuf_read) {
1559 panic("resetting buffers during read");
1560 }
1561
1562 if (d->bd_hbuf) {
1563 /* Free the hold buffer. */
1564 d->bd_fbuf = d->bd_hbuf;
1565 d->bd_hbuf = NULL;
1566 }
1567 d->bd_slen = 0;
1568 d->bd_hlen = 0;
1569 d->bd_scnt = 0;
1570 d->bd_hcnt = 0;
1571 d->bd_rcount = 0;
1572 d->bd_dcount = 0;
1573 d->bd_fcount = 0;
1574 d->bd_wcount = 0;
1575 d->bd_wdcount = 0;
1576
1577 d->bd_prev_slen = 0;
1578 }
1579
1580 static struct bpf_d *
bpf_get_device_from_uuid(uuid_t uuid)1581 bpf_get_device_from_uuid(uuid_t uuid)
1582 {
1583 unsigned int i;
1584
1585 for (i = 0; i < nbpfilter; i++) {
1586 struct bpf_d *d = bpf_dtab[i];
1587
1588 if (d == NULL || d == BPF_DEV_RESERVED ||
1589 (d->bd_flags & BPF_CLOSING) != 0) {
1590 continue;
1591 }
1592 if (uuid_compare(uuid, d->bd_uuid) == 0) {
1593 return d;
1594 }
1595 }
1596
1597 return NULL;
1598 }
1599
1600 /*
1601 * The BIOCSETUP command "atomically" attach to the interface and
1602 * copy the buffer from another interface. This minimizes the risk
1603 * of missing packet because this is done while holding
1604 * the BPF global lock
1605 */
1606 static int
bpf_setup(struct bpf_d * d_to,uuid_t uuid_from,ifnet_t ifp)1607 bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1608 {
1609 struct bpf_d *d_from;
1610 int error = 0;
1611
1612 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1613
1614 /*
1615 * Sanity checks
1616 */
1617 d_from = bpf_get_device_from_uuid(uuid_from);
1618 if (d_from == NULL) {
1619 error = ENOENT;
1620 os_log_error(OS_LOG_DEFAULT,
1621 "%s: uuids not found error %d",
1622 __func__, error);
1623 return error;
1624 }
1625 if (d_from->bd_opened_by != d_to->bd_opened_by) {
1626 error = EACCES;
1627 os_log_error(OS_LOG_DEFAULT,
1628 "%s: processes not matching error %d",
1629 __func__, error);
1630 return error;
1631 }
1632
1633 /*
1634 * Prevent any read or write while copying
1635 */
1636 while (d_to->bd_hbuf_read || d_to->bd_hbuf_write) {
1637 msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
1638 }
1639 d_to->bd_hbuf_read = true;
1640 d_to->bd_hbuf_write = true;
1641
1642 while (d_from->bd_hbuf_read || d_from->bd_hbuf_write) {
1643 msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
1644 }
1645 d_from->bd_hbuf_read = true;
1646 d_from->bd_hbuf_write = true;
1647
1648 /*
1649 * Verify the devices have not been closed
1650 */
1651 if (d_to->bd_flags & BPF_CLOSING) {
1652 error = ENXIO;
1653 os_log_error(OS_LOG_DEFAULT,
1654 "%s: d_to is closing error %d",
1655 __func__, error);
1656 goto done;
1657 }
1658 if (d_from->bd_flags & BPF_CLOSING) {
1659 error = ENXIO;
1660 os_log_error(OS_LOG_DEFAULT,
1661 "%s: d_from is closing error %d",
1662 __func__, error);
1663 goto done;
1664 }
1665
1666 /*
1667 * For now require the same buffer size
1668 */
1669 if (d_from->bd_bufsize != d_to->bd_bufsize) {
1670 error = EINVAL;
1671 os_log_error(OS_LOG_DEFAULT,
1672 "%s: bufsizes not matching error %d",
1673 __func__, error);
1674 goto done;
1675 }
1676
1677 /*
1678 * Copy relevant options and flags
1679 */
1680 d_to->bd_flags = d_from->bd_flags & (BPF_EXTENDED_HDR | BPF_WANT_PKTAP |
1681 BPF_FINALIZE_PKTAP | BPF_TRUNCATE | BPF_PKTHDRV2 |
1682 BPF_COMP_REQ | BPF_COMP_ENABLED);
1683
1684 d_to->bd_headdrop = d_from->bd_headdrop;
1685
1686 /*
1687 * Allocate and copy the buffers
1688 */
1689 error = bpf_allocbufs(d_to);
1690 if (error != 0) {
1691 goto done;
1692 }
1693
1694 /*
1695 * Make sure the buffers are setup as expected by bpf_setif()
1696 */
1697 ASSERT(d_to->bd_hbuf == NULL);
1698 ASSERT(d_to->bd_sbuf != NULL);
1699 ASSERT(d_to->bd_fbuf != NULL);
1700
1701 /*
1702 * Copy the buffers and update the pointers and counts
1703 */
1704 memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
1705 d_to->bd_slen = d_from->bd_slen;
1706 d_to->bd_scnt = d_from->bd_scnt;
1707
1708 if (d_from->bd_hbuf != NULL) {
1709 d_to->bd_hbuf = d_to->bd_fbuf;
1710 d_to->bd_fbuf = NULL;
1711 memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
1712 }
1713 d_to->bd_hlen = d_from->bd_hlen;
1714 d_to->bd_hcnt = d_from->bd_hcnt;
1715
1716 if (d_to->bd_flags & BPF_COMP_REQ) {
1717 ASSERT(d_to->bd_prev_sbuf != NULL);
1718 ASSERT(d_to->bd_prev_fbuf != NULL);
1719
1720 d_to->bd_prev_slen = d_from->bd_prev_slen;
1721 ASSERT(d_to->bd_prev_slen <= BPF_HDR_COMP_LEN_MAX);
1722 memcpy(d_to->bd_prev_sbuf, d_from->bd_prev_sbuf, BPF_HDR_COMP_LEN_MAX);
1723 }
1724
1725 d_to->bd_bcs = d_from->bd_bcs;
1726
1727 /*
1728 * Attach to the interface:
1729 * - don't reset the buffers
1730 * - we already prevent reads and writes
1731 * - the buffers are already allocated
1732 */
1733 error = bpf_setif(d_to, ifp, false, true, true);
1734 if (error != 0) {
1735 os_log_error(OS_LOG_DEFAULT,
1736 "%s: bpf_setif() failed error %d",
1737 __func__, error);
1738 goto done;
1739 }
1740 done:
1741 d_from->bd_hbuf_read = false;
1742 d_from->bd_hbuf_write = false;
1743 wakeup((caddr_t)d_from);
1744
1745 d_to->bd_hbuf_read = false;
1746 d_to->bd_hbuf_write = false;
1747 wakeup((caddr_t)d_to);
1748
1749 return error;
1750 }
1751
1752 #if DEVELOPMENT || DEBUG
1753 #define BPF_IOC_LIST \
1754 X(FIONREAD) \
1755 X(SIOCGIFADDR) \
1756 X(BIOCGBLEN) \
1757 X(BIOCSBLEN) \
1758 X(BIOCSETF32) \
1759 X(BIOCSETFNR32) \
1760 X(BIOCSETF64) \
1761 X(BIOCSETFNR64) \
1762 X(BIOCFLUSH) \
1763 X(BIOCPROMISC) \
1764 X(BIOCGDLT) \
1765 X(BIOCGDLTLIST) \
1766 X(BIOCSDLT) \
1767 X(BIOCGETIF) \
1768 X(BIOCSETIF) \
1769 X(BIOCSRTIMEOUT32) \
1770 X(BIOCSRTIMEOUT64) \
1771 X(BIOCGRTIMEOUT32) \
1772 X(BIOCGRTIMEOUT64) \
1773 X(BIOCGSTATS) \
1774 X(BIOCIMMEDIATE) \
1775 X(BIOCVERSION) \
1776 X(BIOCGHDRCMPLT) \
1777 X(BIOCSHDRCMPLT) \
1778 X(BIOCGSEESENT) \
1779 X(BIOCSSEESENT) \
1780 X(BIOCSETTC) \
1781 X(BIOCGETTC) \
1782 X(FIONBIO) \
1783 X(FIOASYNC) \
1784 X(BIOCSRSIG) \
1785 X(BIOCGRSIG) \
1786 X(BIOCSEXTHDR) \
1787 X(BIOCGIFATTACHCOUNT) \
1788 X(BIOCGWANTPKTAP) \
1789 X(BIOCSWANTPKTAP) \
1790 X(BIOCSHEADDROP) \
1791 X(BIOCGHEADDROP) \
1792 X(BIOCSTRUNCATE) \
1793 X(BIOCGETUUID) \
1794 X(BIOCSETUP) \
1795 X(BIOCSPKTHDRV2) \
1796 X(BIOCGHDRCOMP) \
1797 X(BIOCSHDRCOMP) \
1798 X(BIOCGHDRCOMPSTATS) \
1799 X(BIOCGHDRCOMPON)
1800
1801 static void
log_bpf_ioctl_str(struct bpf_d * d,u_long cmd)1802 log_bpf_ioctl_str(struct bpf_d *d, u_long cmd)
1803 {
1804 const char *p = NULL;
1805 char str[32];
1806
1807 #define X(x) case x: { p = #x ; printf("%s\n", p); break; }
1808 switch (cmd) {
1809 BPF_IOC_LIST
1810 }
1811 #undef X
1812 if (p == NULL) {
1813 snprintf(str, sizeof(str), "0x%08x", (unsigned int)cmd);
1814 p = str;
1815 }
1816 os_log(OS_LOG_DEFAULT, "bpfioctl bpf%u %s",
1817 d->bd_dev_minor, p);
1818 }
1819 #endif /* DEVELOPMENT || DEBUG */
1820
1821 /*
1822 * FIONREAD Check for read packet available.
1823 * SIOCGIFADDR Get interface address - convenient hook to driver.
1824 * BIOCGBLEN Get buffer len [for read()].
1825 * BIOCSETF Set ethernet read filter.
1826 * BIOCFLUSH Flush read packet buffer.
1827 * BIOCPROMISC Put interface into promiscuous mode.
1828 * BIOCGDLT Get link layer type.
1829 * BIOCGETIF Get interface name.
1830 * BIOCSETIF Set interface.
1831 * BIOCSRTIMEOUT Set read timeout.
1832 * BIOCGRTIMEOUT Get read timeout.
1833 * BIOCGSTATS Get packet stats.
1834 * BIOCIMMEDIATE Set immediate mode.
1835 * BIOCVERSION Get filter language version.
1836 * BIOCGHDRCMPLT Get "header already complete" flag
1837 * BIOCSHDRCMPLT Set "header already complete" flag
1838 * BIOCGSEESENT Get "see packets sent" flag
1839 * BIOCSSEESENT Set "see packets sent" flag
1840 * BIOCSETTC Set traffic class.
1841 * BIOCGETTC Get traffic class.
1842 * BIOCSEXTHDR Set "extended header" flag
1843 * BIOCSHEADDROP Drop head of the buffer if user is not reading
1844 * BIOCGHEADDROP Get "head-drop" flag
1845 */
1846 /* ARGSUSED */
1847 int
bpfioctl(dev_t dev,u_long cmd,caddr_t addr,__unused int flags,struct proc * p)1848 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1849 struct proc *p)
1850 {
1851 struct bpf_d *d;
1852 int error = 0;
1853 u_int int_arg;
1854 struct ifreq ifr = {};
1855
1856 lck_mtx_lock(bpf_mlock);
1857
1858 d = bpf_dtab[minor(dev)];
1859 if (d == NULL || d == BPF_DEV_RESERVED ||
1860 (d->bd_flags & BPF_CLOSING) != 0) {
1861 lck_mtx_unlock(bpf_mlock);
1862 return ENXIO;
1863 }
1864
1865 bpf_acquire_d(d);
1866
1867 if (d->bd_state == BPF_WAITING) {
1868 bpf_stop_timer(d);
1869 }
1870 d->bd_state = BPF_IDLE;
1871
1872 #if DEVELOPMENT || DEBUG
1873 if (bpf_debug > 0) {
1874 log_bpf_ioctl_str(d, cmd);
1875 }
1876 #endif /* DEVELOPMENT || DEBUG */
1877
1878 switch (cmd) {
1879 default:
1880 error = EINVAL;
1881 break;
1882
1883 /*
1884 * Check for read packet available.
1885 */
1886 case FIONREAD: /* int */
1887 {
1888 int n;
1889
1890 n = d->bd_slen;
1891 if (d->bd_hbuf && d->bd_hbuf_read) {
1892 n += d->bd_hlen;
1893 }
1894
1895 bcopy(&n, addr, sizeof(n));
1896 break;
1897 }
1898
1899 case SIOCGIFADDR: /* struct ifreq */
1900 {
1901 struct ifnet *ifp;
1902
1903 if (d->bd_bif == 0) {
1904 error = EINVAL;
1905 } else {
1906 ifp = d->bd_bif->bif_ifp;
1907 error = ifnet_ioctl(ifp, 0, cmd, addr);
1908 }
1909 break;
1910 }
1911
1912 /*
1913 * Get buffer len [for read()].
1914 */
1915 case BIOCGBLEN: /* u_int */
1916 bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1917 break;
1918
1919 /*
1920 * Set buffer length.
1921 */
1922 case BIOCSBLEN: { /* u_int */
1923 u_int size;
1924
1925 if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
1926 /*
1927 * Interface already attached, unable to change buffers
1928 */
1929 error = EINVAL;
1930 break;
1931 }
1932 bcopy(addr, &size, sizeof(size));
1933
1934 if (size > BPF_BUFSIZE_CAP) {
1935 d->bd_bufsize = BPF_BUFSIZE_CAP;
1936
1937 os_log_info(OS_LOG_DEFAULT,
1938 "bpf%d BIOCSBLEN capped to %u from %u",
1939 minor(dev), d->bd_bufsize, size);
1940 } else if (size < BPF_MINBUFSIZE) {
1941 d->bd_bufsize = BPF_MINBUFSIZE;
1942
1943 os_log_info(OS_LOG_DEFAULT,
1944 "bpf%d BIOCSBLEN bumped to %u from %u",
1945 minor(dev), d->bd_bufsize, size);
1946 } else {
1947 d->bd_bufsize = size;
1948
1949 os_log_info(OS_LOG_DEFAULT,
1950 "bpf%d BIOCSBLEN %u",
1951 minor(dev), d->bd_bufsize);
1952 }
1953
1954 /* It's a read/write ioctl */
1955 bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1956 break;
1957 }
1958 /*
1959 * Set link layer read filter.
1960 */
1961 case BIOCSETF32:
1962 case BIOCSETFNR32: { /* struct bpf_program32 */
1963 struct bpf_program32 prg32;
1964
1965 bcopy(addr, &prg32, sizeof(prg32));
1966 error = bpf_setf(d, prg32.bf_len,
1967 CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1968 break;
1969 }
1970
1971 case BIOCSETF64:
1972 case BIOCSETFNR64: { /* struct bpf_program64 */
1973 struct bpf_program64 prg64;
1974
1975 bcopy(addr, &prg64, sizeof(prg64));
1976 error = bpf_setf(d, prg64.bf_len, CAST_USER_ADDR_T(prg64.bf_insns), cmd);
1977 break;
1978 }
1979
1980 /*
1981 * Flush read packet buffer.
1982 */
1983 case BIOCFLUSH:
1984 while (d->bd_hbuf_read) {
1985 msleep((caddr_t)d, bpf_mlock, PRINET, "BIOCFLUSH",
1986 NULL);
1987 }
1988 if ((d->bd_flags & BPF_CLOSING) != 0) {
1989 error = ENXIO;
1990 break;
1991 }
1992 reset_d(d);
1993 break;
1994
1995 /*
1996 * Put interface into promiscuous mode.
1997 */
1998 case BIOCPROMISC:
1999 if (d->bd_bif == 0) {
2000 /*
2001 * No interface attached yet.
2002 */
2003 error = EINVAL;
2004 break;
2005 }
2006 if (d->bd_promisc == 0) {
2007 lck_mtx_unlock(bpf_mlock);
2008 error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
2009 lck_mtx_lock(bpf_mlock);
2010 if (error == 0) {
2011 d->bd_promisc = 1;
2012 }
2013 }
2014 break;
2015
2016 /*
2017 * Get device parameters.
2018 */
2019 case BIOCGDLT: /* u_int */
2020 if (d->bd_bif == 0) {
2021 error = EINVAL;
2022 } else {
2023 bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
2024 }
2025 break;
2026
2027 /*
2028 * Get a list of supported data link types.
2029 */
2030 case BIOCGDLTLIST: /* struct bpf_dltlist */
2031 if (d->bd_bif == NULL) {
2032 error = EINVAL;
2033 } else {
2034 error = bpf_getdltlist(d, addr, p);
2035 }
2036 break;
2037
2038 /*
2039 * Set data link type.
2040 */
2041 case BIOCSDLT: /* u_int */
2042 if (d->bd_bif == NULL) {
2043 error = EINVAL;
2044 } else {
2045 u_int dlt;
2046
2047 bcopy(addr, &dlt, sizeof(dlt));
2048
2049 if (dlt == DLT_PKTAP &&
2050 !(d->bd_flags & BPF_WANT_PKTAP)) {
2051 dlt = DLT_RAW;
2052 }
2053 error = bpf_setdlt(d, dlt);
2054 }
2055 break;
2056
2057 /*
2058 * Get interface name.
2059 */
2060 case BIOCGETIF: /* struct ifreq */
2061 if (d->bd_bif == 0) {
2062 error = EINVAL;
2063 } else {
2064 struct ifnet *const ifp = d->bd_bif->bif_ifp;
2065
2066 snprintf(((struct ifreq *)(void *)addr)->ifr_name,
2067 sizeof(ifr.ifr_name), "%s", if_name(ifp));
2068 }
2069 break;
2070
2071 /*
2072 * Set interface.
2073 */
2074 case BIOCSETIF: { /* struct ifreq */
2075 ifnet_t ifp;
2076
2077 bcopy(addr, &ifr, sizeof(ifr));
2078 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2079 ifp = ifunit(ifr.ifr_name);
2080 if (ifp == NULL) {
2081 error = ENXIO;
2082 } else {
2083 error = bpf_setif(d, ifp, true, false, false);
2084 }
2085 break;
2086 }
2087
2088 /*
2089 * Set read timeout.
2090 */
2091 case BIOCSRTIMEOUT32: { /* struct user32_timeval */
2092 struct user32_timeval _tv;
2093 struct timeval tv;
2094
2095 bcopy(addr, &_tv, sizeof(_tv));
2096 tv.tv_sec = _tv.tv_sec;
2097 tv.tv_usec = _tv.tv_usec;
2098
2099 /*
2100 * Subtract 1 tick from tvtohz() since this isn't
2101 * a one-shot timer.
2102 */
2103 if ((error = itimerfix(&tv)) == 0) {
2104 d->bd_rtout = tvtohz(&tv) - 1;
2105 }
2106 break;
2107 }
2108
2109 case BIOCSRTIMEOUT64: { /* struct user64_timeval */
2110 struct user64_timeval _tv;
2111 struct timeval tv;
2112
2113 bcopy(addr, &_tv, sizeof(_tv));
2114 tv.tv_sec = (__darwin_time_t)_tv.tv_sec;
2115 tv.tv_usec = _tv.tv_usec;
2116
2117 /*
2118 * Subtract 1 tick from tvtohz() since this isn't
2119 * a one-shot timer.
2120 */
2121 if ((error = itimerfix(&tv)) == 0) {
2122 d->bd_rtout = tvtohz(&tv) - 1;
2123 }
2124 break;
2125 }
2126
2127 /*
2128 * Get read timeout.
2129 */
2130 case BIOCGRTIMEOUT32: { /* struct user32_timeval */
2131 struct user32_timeval tv;
2132
2133 bzero(&tv, sizeof(tv));
2134 tv.tv_sec = d->bd_rtout / hz;
2135 tv.tv_usec = (d->bd_rtout % hz) * tick;
2136 bcopy(&tv, addr, sizeof(tv));
2137 break;
2138 }
2139
2140 case BIOCGRTIMEOUT64: { /* struct user64_timeval */
2141 struct user64_timeval tv;
2142
2143 bzero(&tv, sizeof(tv));
2144 tv.tv_sec = d->bd_rtout / hz;
2145 tv.tv_usec = (d->bd_rtout % hz) * tick;
2146 bcopy(&tv, addr, sizeof(tv));
2147 break;
2148 }
2149
2150 /*
2151 * Get packet stats.
2152 */
2153 case BIOCGSTATS: { /* struct bpf_stat */
2154 struct bpf_stat bs;
2155
2156 bzero(&bs, sizeof(bs));
2157 bs.bs_recv = (u_int)d->bd_rcount;
2158 bs.bs_drop = (u_int)d->bd_dcount;
2159 bcopy(&bs, addr, sizeof(bs));
2160 break;
2161 }
2162
2163 /*
2164 * Set immediate mode.
2165 */
2166 case BIOCIMMEDIATE: /* u_int */
2167 d->bd_immediate = *(u_char *)(void *)addr;
2168 break;
2169
2170 case BIOCVERSION: { /* struct bpf_version */
2171 struct bpf_version bv;
2172
2173 bzero(&bv, sizeof(bv));
2174 bv.bv_major = BPF_MAJOR_VERSION;
2175 bv.bv_minor = BPF_MINOR_VERSION;
2176 bcopy(&bv, addr, sizeof(bv));
2177 break;
2178 }
2179
2180 /*
2181 * Get "header already complete" flag
2182 */
2183 case BIOCGHDRCMPLT: /* u_int */
2184 bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
2185 break;
2186
2187 /*
2188 * Set "header already complete" flag
2189 */
2190 case BIOCSHDRCMPLT: /* u_int */
2191 bcopy(addr, &int_arg, sizeof(int_arg));
2192 d->bd_hdrcmplt = int_arg ? 1 : 0;
2193 break;
2194
2195 /*
2196 * Get "see sent packets" flag
2197 */
2198 case BIOCGSEESENT: { /* u_int */
2199 int_arg = 0;
2200
2201 if (d->bd_direction & BPF_D_OUT) {
2202 int_arg = 1;
2203 }
2204 bcopy(&int_arg, addr, sizeof(u_int));
2205 break;
2206 }
2207 /*
2208 * Set "see sent packets" flag
2209 */
2210 case BIOCSSEESENT: { /* u_int */
2211 bcopy(addr, &int_arg, sizeof(u_int));
2212
2213 if (int_arg == 0) {
2214 d->bd_direction = BPF_D_IN;
2215 } else {
2216 d->bd_direction = BPF_D_INOUT;
2217 }
2218 break;
2219 }
2220 /*
2221 * Get direction of tapped packets that can be seen for reading
2222 */
2223 case BIOCGDIRECTION: { /* u_int */
2224 int_arg = d->bd_direction;
2225
2226 bcopy(&int_arg, addr, sizeof(u_int));
2227 break;
2228 }
2229 /*
2230 * Set direction of tapped packets that can be seen for reading
2231 */
2232 case BIOCSDIRECTION: { /* u_int */
2233 bcopy(addr, &int_arg, sizeof(u_int));
2234
2235 switch (int_arg) {
2236 case BPF_D_NONE:
2237 case BPF_D_IN:
2238 case BPF_D_OUT:
2239 case BPF_D_INOUT:
2240 d->bd_direction = int_arg;
2241 break;
2242 default:
2243 error = EINVAL;
2244 break;
2245 }
2246 break;
2247 }
2248 /*
2249 * Set traffic service class
2250 */
2251 case BIOCSETTC: { /* int */
2252 int tc;
2253
2254 bcopy(addr, &tc, sizeof(int));
2255 error = bpf_set_traffic_class(d, tc);
2256 break;
2257 }
2258
2259 /*
2260 * Get traffic service class
2261 */
2262 case BIOCGETTC: /* int */
2263 bcopy(&d->bd_traffic_class, addr, sizeof(int));
2264 break;
2265
2266 case FIONBIO: /* Non-blocking I/O; int */
2267 break;
2268
2269 case FIOASYNC: /* Send signal on receive packets; int */
2270 bcopy(addr, &d->bd_async, sizeof(int));
2271 break;
2272
2273 case BIOCSRSIG: { /* Set receive signal; u_int */
2274 u_int sig;
2275
2276 bcopy(addr, &sig, sizeof(u_int));
2277
2278 if (sig >= NSIG) {
2279 error = EINVAL;
2280 } else {
2281 d->bd_sig = sig;
2282 }
2283 break;
2284 }
2285 case BIOCGRSIG: /* u_int */
2286 bcopy(&d->bd_sig, addr, sizeof(u_int));
2287 break;
2288
2289 case BIOCSEXTHDR: /* u_int */
2290 bcopy(addr, &int_arg, sizeof(int_arg));
2291 if (int_arg) {
2292 d->bd_flags |= BPF_EXTENDED_HDR;
2293 } else {
2294 d->bd_flags &= ~BPF_EXTENDED_HDR;
2295 }
2296 break;
2297
2298 case BIOCGIFATTACHCOUNT: { /* struct ifreq */
2299 ifnet_t ifp;
2300 struct bpf_if *bp;
2301
2302 bcopy(addr, &ifr, sizeof(ifr));
2303 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2304 ifp = ifunit(ifr.ifr_name);
2305 if (ifp == NULL) {
2306 error = ENXIO;
2307 break;
2308 }
2309 ifr.ifr_intval = 0;
2310 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2311 struct bpf_d *bpf_d;
2312
2313 if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) {
2314 continue;
2315 }
2316 for (bpf_d = bp->bif_dlist; bpf_d;
2317 bpf_d = bpf_d->bd_next) {
2318 ifr.ifr_intval += 1;
2319 }
2320 }
2321 bcopy(&ifr, addr, sizeof(ifr));
2322 break;
2323 }
2324 case BIOCGWANTPKTAP: /* u_int */
2325 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
2326 bcopy(&int_arg, addr, sizeof(int_arg));
2327 break;
2328
2329 case BIOCSWANTPKTAP: /* u_int */
2330 bcopy(addr, &int_arg, sizeof(int_arg));
2331 if (int_arg) {
2332 d->bd_flags |= BPF_WANT_PKTAP;
2333 } else {
2334 d->bd_flags &= ~BPF_WANT_PKTAP;
2335 }
2336 break;
2337
2338 case BIOCSHEADDROP:
2339 bcopy(addr, &int_arg, sizeof(int_arg));
2340 d->bd_headdrop = int_arg ? 1 : 0;
2341 break;
2342
2343 case BIOCGHEADDROP:
2344 bcopy(&d->bd_headdrop, addr, sizeof(int));
2345 break;
2346
2347 case BIOCSTRUNCATE:
2348 bcopy(addr, &int_arg, sizeof(int_arg));
2349 if (int_arg) {
2350 d->bd_flags |= BPF_TRUNCATE;
2351 } else {
2352 d->bd_flags &= ~BPF_TRUNCATE;
2353 }
2354 break;
2355
2356 case BIOCGETUUID:
2357 bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
2358 break;
2359
2360 case BIOCSETUP: {
2361 struct bpf_setup_args bsa;
2362 ifnet_t ifp;
2363
2364 bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
2365 bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
2366 ifp = ifunit(bsa.bsa_ifname);
2367 if (ifp == NULL) {
2368 error = ENXIO;
2369 os_log_error(OS_LOG_DEFAULT,
2370 "%s: ifnet not found for %s error %d",
2371 __func__, bsa.bsa_ifname, error);
2372 break;
2373 }
2374
2375 error = bpf_setup(d, bsa.bsa_uuid, ifp);
2376 break;
2377 }
2378 case BIOCSPKTHDRV2:
2379 bcopy(addr, &int_arg, sizeof(int_arg));
2380 if (int_arg != 0) {
2381 d->bd_flags |= BPF_PKTHDRV2;
2382 } else {
2383 d->bd_flags &= ~BPF_PKTHDRV2;
2384 }
2385 break;
2386
2387 case BIOCGPKTHDRV2:
2388 int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
2389 bcopy(&int_arg, addr, sizeof(int_arg));
2390 break;
2391
2392 case BIOCGHDRCOMP:
2393 int_arg = d->bd_flags & BPF_COMP_REQ ? 1 : 0;
2394 bcopy(&int_arg, addr, sizeof(int_arg));
2395 break;
2396
2397 case BIOCSHDRCOMP:
2398 bcopy(addr, &int_arg, sizeof(int_arg));
2399 if (int_arg != 0 && int_arg != 1) {
2400 return EINVAL;
2401 }
2402 if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
2403 /*
2404 * Interface already attached, unable to change buffers
2405 */
2406 error = EINVAL;
2407 break;
2408 }
2409 if (int_arg != 0) {
2410 d->bd_flags |= BPF_COMP_REQ;
2411 if (bpf_hdr_comp_enable != 0) {
2412 d->bd_flags |= BPF_COMP_ENABLED;
2413 }
2414 } else {
2415 d->bd_flags &= ~(BPF_COMP_REQ | BPF_COMP_ENABLED);
2416 }
2417 break;
2418
2419 case BIOCGHDRCOMPON:
2420 int_arg = d->bd_flags & BPF_COMP_ENABLED ? 1 : 0;
2421 bcopy(&int_arg, addr, sizeof(int_arg));
2422 break;
2423
2424 case BIOCGHDRCOMPSTATS: {
2425 struct bpf_comp_stats bcs = {};
2426
2427 bcs = d->bd_bcs;
2428
2429 bcopy(&bcs, addr, sizeof(bcs));
2430 break;
2431 }
2432 }
2433
2434 bpf_release_d(d);
2435 lck_mtx_unlock(bpf_mlock);
2436
2437 return error;
2438 }
2439
2440 /*
2441 * Set d's packet filter program to fp. If this file already has a filter,
2442 * free it and replace it. Returns EINVAL for bogus requests.
2443 */
2444 static int
bpf_setf(struct bpf_d * d,u_int bf_len,user_addr_t bf_insns,u_long cmd)2445 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2446 u_long cmd)
2447 {
2448 struct bpf_insn *fcode, *old;
2449 u_int flen, size;
2450
2451 while (d->bd_hbuf_read) {
2452 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_setf", NULL);
2453 }
2454
2455 if ((d->bd_flags & BPF_CLOSING) != 0) {
2456 return ENXIO;
2457 }
2458
2459 old = d->bd_filter;
2460 if (bf_insns == USER_ADDR_NULL) {
2461 if (bf_len != 0) {
2462 return EINVAL;
2463 }
2464 d->bd_filter = NULL;
2465 reset_d(d);
2466 if (old != 0) {
2467 kfree_data_addr(old);
2468 }
2469 return 0;
2470 }
2471 flen = bf_len;
2472 if (flen > BPF_MAXINSNS) {
2473 return EINVAL;
2474 }
2475
2476 size = flen * sizeof(struct bpf_insn);
2477 fcode = (struct bpf_insn *) kalloc_data(size, Z_WAITOK | Z_ZERO);
2478 if (fcode == NULL) {
2479 return ENOMEM;
2480 }
2481 if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
2482 bpf_validate(fcode, (int)flen)) {
2483 d->bd_filter = fcode;
2484
2485 if (cmd == BIOCSETF32 || cmd == BIOCSETF64) {
2486 reset_d(d);
2487 }
2488
2489 if (old != 0) {
2490 kfree_data_addr(old);
2491 }
2492
2493 return 0;
2494 }
2495 kfree_data(fcode, size);
2496 return EINVAL;
2497 }
2498
2499 /*
2500 * Detach a file from its current interface (if attached at all) and attach
2501 * to the interface indicated by the name stored in ifr.
2502 * Return an errno or 0.
2503 */
2504 static int
bpf_setif(struct bpf_d * d,ifnet_t theywant,bool do_reset,bool has_hbuf_read_write,bool has_bufs_allocated)2505 bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read_write,
2506 bool has_bufs_allocated)
2507 {
2508 struct bpf_if *bp;
2509 int error;
2510
2511 while (!has_hbuf_read_write && (d->bd_hbuf_read || d->bd_hbuf_write)) {
2512 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_setif", NULL);
2513 }
2514
2515 if ((d->bd_flags & BPF_CLOSING) != 0) {
2516 return ENXIO;
2517 }
2518
2519 /*
2520 * Look through attached interfaces for the named one.
2521 */
2522 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2523 struct ifnet *ifp = bp->bif_ifp;
2524
2525 if (ifp == 0 || ifp != theywant) {
2526 continue;
2527 }
2528 /*
2529 * Do not use DLT_PKTAP, unless requested explicitly
2530 */
2531 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2532 continue;
2533 }
2534 /*
2535 * Skip the coprocessor interface
2536 */
2537 if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
2538 continue;
2539 }
2540 /*
2541 * We found the requested interface.
2542 * Allocate the packet buffers.
2543 */
2544 if (has_bufs_allocated == false) {
2545 error = bpf_allocbufs(d);
2546 if (error != 0) {
2547 return error;
2548 }
2549 }
2550 /*
2551 * Detach if attached to something else.
2552 */
2553 if (bp != d->bd_bif) {
2554 if (d->bd_bif != NULL) {
2555 if (bpf_detachd(d) != 0) {
2556 return ENXIO;
2557 }
2558 }
2559 if (bpf_attachd(d, bp) != 0) {
2560 return ENXIO;
2561 }
2562 }
2563 if (do_reset) {
2564 reset_d(d);
2565 }
2566 os_log(OS_LOG_DEFAULT, "bpf%u attached to %s",
2567 d->bd_dev_minor, if_name(theywant));
2568 return 0;
2569 }
2570 /* Not found. */
2571 return ENXIO;
2572 }
2573
2574 /*
2575 * Get a list of available data link type of the interface.
2576 */
2577 static int
bpf_getdltlist(struct bpf_d * d,caddr_t addr,struct proc * p)2578 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2579 {
2580 u_int n;
2581 int error;
2582 struct ifnet *ifp;
2583 struct bpf_if *bp;
2584 user_addr_t dlist;
2585 struct bpf_dltlist bfl;
2586
2587 bcopy(addr, &bfl, sizeof(bfl));
2588 if (proc_is64bit(p)) {
2589 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2590 } else {
2591 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2592 }
2593
2594 ifp = d->bd_bif->bif_ifp;
2595 n = 0;
2596 error = 0;
2597
2598 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2599 if (bp->bif_ifp != ifp) {
2600 continue;
2601 }
2602 /*
2603 * Do not use DLT_PKTAP, unless requested explicitly
2604 */
2605 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2606 continue;
2607 }
2608 if (dlist != USER_ADDR_NULL) {
2609 if (n >= bfl.bfl_len) {
2610 return ENOMEM;
2611 }
2612 error = copyout(&bp->bif_dlt, dlist,
2613 sizeof(bp->bif_dlt));
2614 if (error != 0) {
2615 break;
2616 }
2617 dlist += sizeof(bp->bif_dlt);
2618 }
2619 n++;
2620 }
2621 bfl.bfl_len = n;
2622 bcopy(&bfl, addr, sizeof(bfl));
2623
2624 return error;
2625 }
2626
2627 /*
2628 * Set the data link type of a BPF instance.
2629 */
2630 static int
bpf_setdlt(struct bpf_d * d,uint32_t dlt)2631 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2632 {
2633 int error, opromisc;
2634 struct ifnet *ifp;
2635 struct bpf_if *bp;
2636
2637 if (d->bd_bif->bif_dlt == dlt) {
2638 return 0;
2639 }
2640
2641 while (d->bd_hbuf_read) {
2642 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_setdlt", NULL);
2643 }
2644
2645 if ((d->bd_flags & BPF_CLOSING) != 0) {
2646 return ENXIO;
2647 }
2648
2649 ifp = d->bd_bif->bif_ifp;
2650 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2651 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2652 /*
2653 * Do not use DLT_PKTAP, unless requested explicitly
2654 */
2655 if (bp->bif_dlt == DLT_PKTAP &&
2656 !(d->bd_flags & BPF_WANT_PKTAP)) {
2657 continue;
2658 }
2659 break;
2660 }
2661 }
2662 if (bp != NULL) {
2663 opromisc = d->bd_promisc;
2664 if (bpf_detachd(d) != 0) {
2665 return ENXIO;
2666 }
2667 error = bpf_attachd(d, bp);
2668 if (error != 0) {
2669 os_log_error(OS_LOG_DEFAULT,
2670 "bpf_setdlt: bpf%d bpf_attachd %s error %d",
2671 d->bd_dev_minor, if_name(bp->bif_ifp),
2672 error);
2673 return error;
2674 }
2675 reset_d(d);
2676 if (opromisc) {
2677 lck_mtx_unlock(bpf_mlock);
2678 error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2679 lck_mtx_lock(bpf_mlock);
2680 if (error != 0) {
2681 os_log_error(OS_LOG_DEFAULT,
2682 "bpf_setdlt: bpf%d ifpromisc %s error %d",
2683 d->bd_dev_minor, if_name(bp->bif_ifp), error);
2684 } else {
2685 d->bd_promisc = 1;
2686 }
2687 }
2688 }
2689 return bp == NULL ? EINVAL : 0;
2690 }
2691
2692 static int
bpf_set_traffic_class(struct bpf_d * d,int tc)2693 bpf_set_traffic_class(struct bpf_d *d, int tc)
2694 {
2695 int error = 0;
2696
2697 if (!SO_VALID_TC(tc)) {
2698 error = EINVAL;
2699 } else {
2700 d->bd_traffic_class = tc;
2701 }
2702
2703 return error;
2704 }
2705
2706 static void
bpf_set_packet_service_class(struct mbuf * m,int tc)2707 bpf_set_packet_service_class(struct mbuf *m, int tc)
2708 {
2709 if (!(m->m_flags & M_PKTHDR)) {
2710 return;
2711 }
2712
2713 VERIFY(SO_VALID_TC(tc));
2714 (void) m_set_service_class(m, so_tc2msc(tc));
2715 }
2716
2717 /*
2718 * Support for select()
2719 *
2720 * Return true iff the specific operation will not block indefinitely.
2721 * Otherwise, return false but make a note that a selwakeup() must be done.
2722 */
2723 int
bpfselect(dev_t dev,int which,void * wql,struct proc * p)2724 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2725 {
2726 struct bpf_d *d;
2727 int ret = 0;
2728
2729 lck_mtx_lock(bpf_mlock);
2730
2731 d = bpf_dtab[minor(dev)];
2732 if (d == NULL || d == BPF_DEV_RESERVED ||
2733 (d->bd_flags & BPF_CLOSING) != 0) {
2734 lck_mtx_unlock(bpf_mlock);
2735 return ENXIO;
2736 }
2737
2738 bpf_acquire_d(d);
2739
2740 if (d->bd_bif == NULL) {
2741 bpf_release_d(d);
2742 lck_mtx_unlock(bpf_mlock);
2743 return ENXIO;
2744 }
2745
2746 while (d->bd_hbuf_read) {
2747 msleep((caddr_t)d, bpf_mlock, PRINET, "bpfselect", NULL);
2748 }
2749
2750 if ((d->bd_flags & BPF_CLOSING) != 0) {
2751 bpf_release_d(d);
2752 lck_mtx_unlock(bpf_mlock);
2753 return ENXIO;
2754 }
2755
2756 switch (which) {
2757 case FREAD:
2758 if (d->bd_hlen != 0 ||
2759 ((d->bd_immediate ||
2760 d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
2761 ret = 1; /* read has data to return */
2762 } else {
2763 /*
2764 * Read has no data to return.
2765 * Make the select wait, and start a timer if
2766 * necessary.
2767 */
2768 selrecord(p, &d->bd_sel, wql);
2769 bpf_start_timer(d);
2770 }
2771 break;
2772
2773 case FWRITE:
2774 /* can't determine whether a write would block */
2775 ret = 1;
2776 break;
2777 }
2778
2779 bpf_release_d(d);
2780 lck_mtx_unlock(bpf_mlock);
2781
2782 return ret;
2783 }
2784
2785 /*
2786 * Support for kevent() system call. Register EVFILT_READ filters and
2787 * reject all others.
2788 */
2789 int bpfkqfilter(dev_t dev, struct knote *kn);
2790 static void filt_bpfdetach(struct knote *);
2791 static int filt_bpfread(struct knote *, long);
2792 static int filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev);
2793 static int filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev);
2794
2795 SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2796 .f_isfd = 1,
2797 .f_detach = filt_bpfdetach,
2798 .f_event = filt_bpfread,
2799 .f_touch = filt_bpftouch,
2800 .f_process = filt_bpfprocess,
2801 };
2802
2803 static int
filt_bpfread_common(struct knote * kn,struct kevent_qos_s * kev,struct bpf_d * d)2804 filt_bpfread_common(struct knote *kn, struct kevent_qos_s *kev, struct bpf_d *d)
2805 {
2806 int ready = 0;
2807 int64_t data = 0;
2808
2809 if (d->bd_immediate) {
2810 /*
2811 * If there's data in the hold buffer, it's the
2812 * amount of data a read will return.
2813 *
2814 * If there's no data in the hold buffer, but
2815 * there's data in the store buffer, a read will
2816 * immediately rotate the store buffer to the
2817 * hold buffer, the amount of data in the store
2818 * buffer is the amount of data a read will
2819 * return.
2820 *
2821 * If there's no data in either buffer, we're not
2822 * ready to read.
2823 */
2824 data = (d->bd_hlen == 0 || d->bd_hbuf_read ?
2825 d->bd_slen : d->bd_hlen);
2826 int64_t lowwat = knote_low_watermark(kn);
2827 if (lowwat > d->bd_bufsize) {
2828 lowwat = d->bd_bufsize;
2829 }
2830 ready = (data >= lowwat);
2831 } else {
2832 /*
2833 * If there's data in the hold buffer, it's the
2834 * amount of data a read will return.
2835 *
2836 * If there's no data in the hold buffer, but
2837 * there's data in the store buffer, if the
2838 * timer has expired a read will immediately
2839 * rotate the store buffer to the hold buffer,
2840 * so the amount of data in the store buffer is
2841 * the amount of data a read will return.
2842 *
2843 * If there's no data in either buffer, or there's
2844 * no data in the hold buffer and the timer hasn't
2845 * expired, we're not ready to read.
2846 */
2847 data = ((d->bd_hlen == 0 || d->bd_hbuf_read) &&
2848 d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
2849 ready = (data > 0);
2850 }
2851 if (!ready) {
2852 bpf_start_timer(d);
2853 } else if (kev) {
2854 knote_fill_kevent(kn, kev, data);
2855 }
2856
2857 return ready;
2858 }
2859
2860 int
bpfkqfilter(dev_t dev,struct knote * kn)2861 bpfkqfilter(dev_t dev, struct knote *kn)
2862 {
2863 struct bpf_d *d;
2864 int res;
2865
2866 /*
2867 * Is this device a bpf?
2868 */
2869 if (major(dev) != CDEV_MAJOR || kn->kn_filter != EVFILT_READ) {
2870 knote_set_error(kn, EINVAL);
2871 return 0;
2872 }
2873
2874 lck_mtx_lock(bpf_mlock);
2875
2876 d = bpf_dtab[minor(dev)];
2877
2878 if (d == NULL || d == BPF_DEV_RESERVED ||
2879 (d->bd_flags & BPF_CLOSING) != 0 ||
2880 d->bd_bif == NULL) {
2881 lck_mtx_unlock(bpf_mlock);
2882 knote_set_error(kn, ENXIO);
2883 return 0;
2884 }
2885
2886 kn->kn_filtid = EVFILTID_BPFREAD;
2887 knote_kn_hook_set_raw(kn, d);
2888 KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2889 d->bd_flags |= BPF_KNOTE;
2890
2891 /* capture the current state */
2892 res = filt_bpfread_common(kn, NULL, d);
2893
2894 lck_mtx_unlock(bpf_mlock);
2895
2896 return res;
2897 }
2898
2899 static void
filt_bpfdetach(struct knote * kn)2900 filt_bpfdetach(struct knote *kn)
2901 {
2902 struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
2903
2904 lck_mtx_lock(bpf_mlock);
2905 if (d->bd_flags & BPF_KNOTE) {
2906 KNOTE_DETACH(&d->bd_sel.si_note, kn);
2907 d->bd_flags &= ~BPF_KNOTE;
2908 }
2909 lck_mtx_unlock(bpf_mlock);
2910 }
2911
2912 static int
filt_bpfread(struct knote * kn,long hint)2913 filt_bpfread(struct knote *kn, long hint)
2914 {
2915 #pragma unused(hint)
2916 struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
2917
2918 return filt_bpfread_common(kn, NULL, d);
2919 }
2920
2921 static int
filt_bpftouch(struct knote * kn,struct kevent_qos_s * kev)2922 filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev)
2923 {
2924 struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
2925 int res;
2926
2927 lck_mtx_lock(bpf_mlock);
2928
2929 /* save off the lowat threshold and flag */
2930 kn->kn_sdata = kev->data;
2931 kn->kn_sfflags = kev->fflags;
2932
2933 /* output data will be re-generated here */
2934 res = filt_bpfread_common(kn, NULL, d);
2935
2936 lck_mtx_unlock(bpf_mlock);
2937
2938 return res;
2939 }
2940
2941 static int
filt_bpfprocess(struct knote * kn,struct kevent_qos_s * kev)2942 filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev)
2943 {
2944 struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
2945 int res;
2946
2947 lck_mtx_lock(bpf_mlock);
2948 res = filt_bpfread_common(kn, kev, d);
2949 lck_mtx_unlock(bpf_mlock);
2950
2951 return res;
2952 }
2953
2954 /*
2955 * Copy data from an mbuf chain into a buffer. This code is derived
2956 * from m_copydata in kern/uipc_mbuf.c.
2957 */
2958 static void
bpf_mcopy(struct mbuf * m,void * dst_arg,size_t len,size_t offset)2959 bpf_mcopy(struct mbuf *m, void *dst_arg, size_t len, size_t offset)
2960 {
2961 u_int count;
2962 u_char *dst;
2963
2964 dst = dst_arg;
2965
2966 while (offset >= m->m_len) {
2967 offset -= m->m_len;
2968 m = m->m_next;
2969 if (m == NULL) {
2970 panic("bpf_mcopy");
2971 }
2972 continue;
2973 }
2974
2975 while (len > 0) {
2976 if (m == NULL) {
2977 panic("bpf_mcopy");
2978 }
2979 count = MIN(m->m_len - (u_int)offset, (u_int)len);
2980 bcopy((u_char *)mbuf_data(m) + offset, dst, count);
2981 m = m->m_next;
2982 dst += count;
2983 len -= count;
2984 offset = 0;
2985 }
2986 }
2987
2988 static inline void
bpf_tap_imp(ifnet_t ifp,u_int32_t dlt,struct bpf_packet * bpf_pkt,int outbound)2989 bpf_tap_imp(
2990 ifnet_t ifp,
2991 u_int32_t dlt,
2992 struct bpf_packet *bpf_pkt,
2993 int outbound)
2994 {
2995 struct bpf_d *d;
2996 u_int slen;
2997 struct bpf_if *bp;
2998
2999 /*
3000 * It's possible that we get here after the bpf descriptor has been
3001 * detached from the interface; in such a case we simply return.
3002 * Lock ordering is important since we can be called asynchronously
3003 * (from IOKit) to process an inbound packet; when that happens
3004 * we would have been holding its "gateLock" and will be acquiring
3005 * "bpf_mlock" upon entering this routine. Due to that, we release
3006 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
3007 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
3008 * when a ifnet_set_promiscuous request simultaneously collides with
3009 * an inbound packet being passed into the tap callback.
3010 */
3011 lck_mtx_lock(bpf_mlock);
3012 if (ifp->if_bpf == NULL) {
3013 lck_mtx_unlock(bpf_mlock);
3014 return;
3015 }
3016 for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
3017 if (bp->bif_ifp != ifp) {
3018 /* wrong interface */
3019 bp = NULL;
3020 break;
3021 }
3022 if (dlt == 0 || bp->bif_dlt == dlt) {
3023 /* tapping default DLT or DLT matches */
3024 break;
3025 }
3026 }
3027 if (bp == NULL) {
3028 goto done;
3029 }
3030 for (d = bp->bif_dlist; d != NULL; d = d->bd_next) {
3031 struct bpf_packet *bpf_pkt_saved = bpf_pkt;
3032 struct bpf_packet bpf_pkt_tmp = {};
3033 struct pktap_header_buffer bpfp_header_tmp = {};
3034
3035 if (outbound && (d->bd_direction & BPF_D_OUT) == 0) {
3036 continue;
3037 }
3038 if (!outbound && (d->bd_direction & BPF_D_IN) == 0) {
3039 continue;
3040 }
3041
3042 ++d->bd_rcount;
3043 slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
3044 (u_int)bpf_pkt->bpfp_total_length, 0);
3045
3046 if (slen != 0) {
3047 if (bp->bif_ifp->if_type == IFT_PKTAP &&
3048 bp->bif_dlt == DLT_PKTAP) {
3049 if (d->bd_flags & BPF_TRUNCATE) {
3050 slen = min(slen, get_pkt_trunc_len(bpf_pkt));
3051 }
3052 /*
3053 * Need to copy the bpf_pkt because the conversion
3054 * to v2 pktap header modifies the content of the
3055 * bpfp_header
3056 */
3057 if ((d->bd_flags & BPF_PKTHDRV2) &&
3058 bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
3059 bpf_pkt_tmp = *bpf_pkt;
3060
3061 bpf_pkt = &bpf_pkt_tmp;
3062
3063 memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
3064 bpf_pkt->bpfp_header_length);
3065
3066 bpf_pkt->bpfp_header = &bpfp_header_tmp;
3067
3068 convert_to_pktap_header_to_v2(bpf_pkt,
3069 !!(d->bd_flags & BPF_TRUNCATE));
3070 }
3071 }
3072 ++d->bd_fcount;
3073 catchpacket(d, bpf_pkt, slen, outbound);
3074 }
3075 bpf_pkt = bpf_pkt_saved;
3076 }
3077
3078 done:
3079 lck_mtx_unlock(bpf_mlock);
3080 }
3081
3082 static inline void
bpf_tap_mbuf(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen,int outbound)3083 bpf_tap_mbuf(
3084 ifnet_t ifp,
3085 u_int32_t dlt,
3086 mbuf_t m,
3087 void* hdr,
3088 size_t hlen,
3089 int outbound)
3090 {
3091 struct bpf_packet bpf_pkt;
3092 struct mbuf *m0;
3093
3094 if (ifp->if_bpf == NULL) {
3095 /* quickly check without taking lock */
3096 return;
3097 }
3098 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
3099 bpf_pkt.bpfp_mbuf = m;
3100 bpf_pkt.bpfp_total_length = 0;
3101 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
3102 bpf_pkt.bpfp_total_length += m0->m_len;
3103 }
3104 bpf_pkt.bpfp_header = hdr;
3105 if (hdr != NULL) {
3106 bpf_pkt.bpfp_total_length += hlen;
3107 bpf_pkt.bpfp_header_length = hlen;
3108 } else {
3109 bpf_pkt.bpfp_header_length = 0;
3110 }
3111 bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
3112 }
3113
3114 void
bpf_tap_out(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)3115 bpf_tap_out(
3116 ifnet_t ifp,
3117 u_int32_t dlt,
3118 mbuf_t m,
3119 void* hdr,
3120 size_t hlen)
3121 {
3122 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
3123 }
3124
3125 void
bpf_tap_in(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)3126 bpf_tap_in(
3127 ifnet_t ifp,
3128 u_int32_t dlt,
3129 mbuf_t m,
3130 void* hdr,
3131 size_t hlen)
3132 {
3133 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
3134 }
3135
3136 /* Callback registered with Ethernet driver. */
3137 static int
bpf_tap_callback(struct ifnet * ifp,struct mbuf * m)3138 bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
3139 {
3140 bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
3141
3142 return 0;
3143 }
3144
3145 #if SKYWALK
3146 #include <skywalk/os_skywalk_private.h>
3147
3148 static void
bpf_pktcopy(kern_packet_t pkt,void * dst_arg,size_t len,size_t offset)3149 bpf_pktcopy(kern_packet_t pkt, void *dst_arg, size_t len, size_t offset)
3150 {
3151 kern_buflet_t buflet = NULL;
3152 size_t count;
3153 u_char *dst;
3154
3155 dst = dst_arg;
3156 while (len > 0) {
3157 uint8_t *addr;
3158
3159 u_int32_t buflet_length;
3160
3161 buflet = kern_packet_get_next_buflet(pkt, buflet);
3162 VERIFY(buflet != NULL);
3163 addr = kern_buflet_get_data_address(buflet);
3164 VERIFY(addr != NULL);
3165 addr += kern_buflet_get_data_offset(buflet);
3166 buflet_length = kern_buflet_get_data_length(buflet);
3167 if (offset >= buflet_length) {
3168 offset -= buflet_length;
3169 continue;
3170 }
3171 count = MIN(buflet_length - offset, len);
3172 bcopy((void *)(addr + offset), (void *)dst, count);
3173 dst += count;
3174 len -= count;
3175 offset = 0;
3176 }
3177 }
3178
3179 static inline void
bpf_tap_packet(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen,int outbound)3180 bpf_tap_packet(
3181 ifnet_t ifp,
3182 u_int32_t dlt,
3183 kern_packet_t pkt,
3184 void* hdr,
3185 size_t hlen,
3186 int outbound)
3187 {
3188 struct bpf_packet bpf_pkt;
3189 struct mbuf * m;
3190
3191 if (ifp->if_bpf == NULL) {
3192 /* quickly check without taking lock */
3193 return;
3194 }
3195 m = kern_packet_get_mbuf(pkt);
3196 if (m != NULL) {
3197 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
3198 bpf_pkt.bpfp_mbuf = m;
3199 bpf_pkt.bpfp_total_length = m_length(m);
3200 } else {
3201 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_PKT;
3202 bpf_pkt.bpfp_pkt = pkt;
3203 bpf_pkt.bpfp_total_length = kern_packet_get_data_length(pkt);
3204 }
3205 bpf_pkt.bpfp_header = hdr;
3206 bpf_pkt.bpfp_header_length = hlen;
3207 if (hlen != 0) {
3208 bpf_pkt.bpfp_total_length += hlen;
3209 }
3210 bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
3211 }
3212
3213 void
bpf_tap_packet_out(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)3214 bpf_tap_packet_out(
3215 ifnet_t ifp,
3216 u_int32_t dlt,
3217 kern_packet_t pkt,
3218 void* hdr,
3219 size_t hlen)
3220 {
3221 bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 1);
3222 }
3223
3224 void
bpf_tap_packet_in(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)3225 bpf_tap_packet_in(
3226 ifnet_t ifp,
3227 u_int32_t dlt,
3228 kern_packet_t pkt,
3229 void* hdr,
3230 size_t hlen)
3231 {
3232 bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 0);
3233 }
3234
3235 #endif /* SKYWALK */
3236
3237 static errno_t
bpf_copydata(struct bpf_packet * pkt,size_t off,size_t len,void * out_data)3238 bpf_copydata(struct bpf_packet *pkt, size_t off, size_t len, void* out_data)
3239 {
3240 errno_t err = 0;
3241 if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
3242 err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
3243 #if SKYWALK
3244 } else if (pkt->bpfp_type == BPF_PACKET_TYPE_PKT) {
3245 err = kern_packet_copy_bytes(pkt->bpfp_pkt, off, len, out_data);
3246 #endif /* SKYWALK */
3247 } else {
3248 err = EINVAL;
3249 }
3250
3251 return err;
3252 }
3253
3254 static void
copy_bpf_packet_offset(struct bpf_packet * pkt,void * dst,size_t len,size_t offset)3255 copy_bpf_packet_offset(struct bpf_packet * pkt, void * dst, size_t len, size_t offset)
3256 {
3257 /* copy the optional header */
3258 if (offset < pkt->bpfp_header_length) {
3259 size_t count = MIN(len, pkt->bpfp_header_length - offset);
3260 caddr_t src = (caddr_t)pkt->bpfp_header;
3261 bcopy(src + offset, dst, count);
3262 len -= count;
3263 dst = (void *)((uintptr_t)dst + count);
3264 offset = 0;
3265 } else {
3266 offset -= pkt->bpfp_header_length;
3267 }
3268
3269 if (len == 0) {
3270 /* nothing past the header */
3271 return;
3272 }
3273 /* copy the packet */
3274 switch (pkt->bpfp_type) {
3275 case BPF_PACKET_TYPE_MBUF:
3276 bpf_mcopy(pkt->bpfp_mbuf, dst, len, offset);
3277 break;
3278 #if SKYWALK
3279 case BPF_PACKET_TYPE_PKT:
3280 bpf_pktcopy(pkt->bpfp_pkt, dst, len, offset);
3281 break;
3282 #endif /* SKYWALK */
3283 default:
3284 break;
3285 }
3286 }
3287
3288 static void
copy_bpf_packet(struct bpf_packet * pkt,void * dst,size_t len)3289 copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
3290 {
3291 copy_bpf_packet_offset(pkt, dst, len, 0);
3292 }
3293
3294 static uint32_t
get_esp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)3295 get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
3296 const uint32_t remaining_caplen)
3297 {
3298 /*
3299 * For some reason tcpdump expects to have one byte beyond the ESP header
3300 */
3301 uint32_t trunc_len = ESP_HDR_SIZE + 1;
3302
3303 if (trunc_len > remaining_caplen) {
3304 return remaining_caplen;
3305 }
3306
3307 return trunc_len;
3308 }
3309
3310 static uint32_t
get_isakmp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)3311 get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
3312 const uint32_t remaining_caplen)
3313 {
3314 /*
3315 * Include the payload generic header
3316 */
3317 uint32_t trunc_len = ISAKMP_HDR_SIZE;
3318
3319 if (trunc_len > remaining_caplen) {
3320 return remaining_caplen;
3321 }
3322
3323 return trunc_len;
3324 }
3325
3326 static uint32_t
get_isakmp_natt_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3327 get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint32_t off,
3328 const uint32_t remaining_caplen)
3329 {
3330 int err = 0;
3331 uint32_t trunc_len = 0;
3332 char payload[remaining_caplen];
3333
3334 err = bpf_copydata(pkt, off, remaining_caplen, payload);
3335 if (err != 0) {
3336 return remaining_caplen;
3337 }
3338 /*
3339 * They are three cases:
3340 * - IKE: payload start with 4 bytes header set to zero before ISAKMP header
3341 * - keep alive: 1 byte payload
3342 * - otherwise it's ESP
3343 */
3344 if (remaining_caplen >= 4 &&
3345 payload[0] == 0 && payload[1] == 0 &&
3346 payload[2] == 0 && payload[3] == 0) {
3347 trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
3348 } else if (remaining_caplen == 1) {
3349 trunc_len = 1;
3350 } else {
3351 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3352 }
3353
3354 if (trunc_len > remaining_caplen) {
3355 return remaining_caplen;
3356 }
3357
3358 return trunc_len;
3359 }
3360
3361 static uint32_t
get_udp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3362 get_udp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3363 {
3364 int err = 0;
3365 uint32_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
3366
3367 if (trunc_len >= remaining_caplen) {
3368 return remaining_caplen;
3369 }
3370
3371 struct udphdr udphdr;
3372 err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
3373 if (err != 0) {
3374 return remaining_caplen;
3375 }
3376
3377 u_short sport, dport;
3378
3379 sport = EXTRACT_SHORT(&udphdr.uh_sport);
3380 dport = EXTRACT_SHORT(&udphdr.uh_dport);
3381
3382 if (dport == PORT_DNS || sport == PORT_DNS) {
3383 /*
3384 * Full UDP payload for DNS
3385 */
3386 trunc_len = remaining_caplen;
3387 } else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) ||
3388 (sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
3389 /*
3390 * Full UDP payload for BOOTP and DHCP
3391 */
3392 trunc_len = remaining_caplen;
3393 } else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
3394 /*
3395 * Return the ISAKMP header
3396 */
3397 trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
3398 remaining_caplen - sizeof(struct udphdr));
3399 } else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
3400 trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
3401 remaining_caplen - sizeof(struct udphdr));
3402 }
3403 if (trunc_len >= remaining_caplen) {
3404 return remaining_caplen;
3405 }
3406
3407 return trunc_len;
3408 }
3409
3410 static uint32_t
get_tcp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3411 get_tcp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3412 {
3413 int err = 0;
3414 uint32_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
3415 if (trunc_len >= remaining_caplen) {
3416 return remaining_caplen;
3417 }
3418
3419 struct tcphdr tcphdr;
3420 err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
3421 if (err != 0) {
3422 return remaining_caplen;
3423 }
3424
3425 u_short sport, dport;
3426 sport = EXTRACT_SHORT(&tcphdr.th_sport);
3427 dport = EXTRACT_SHORT(&tcphdr.th_dport);
3428
3429 if (dport == PORT_DNS || sport == PORT_DNS) {
3430 /*
3431 * Full TCP payload for DNS
3432 */
3433 trunc_len = remaining_caplen;
3434 } else {
3435 trunc_len = (uint16_t)(tcphdr.th_off << 2);
3436 }
3437 if (trunc_len >= remaining_caplen) {
3438 return remaining_caplen;
3439 }
3440
3441 return trunc_len;
3442 }
3443
3444 static uint32_t
get_proto_trunc_len(uint8_t proto,struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3445 get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3446 {
3447 uint32_t trunc_len;
3448
3449 switch (proto) {
3450 case IPPROTO_ICMP: {
3451 /*
3452 * Full IMCP payload
3453 */
3454 trunc_len = remaining_caplen;
3455 break;
3456 }
3457 case IPPROTO_ICMPV6: {
3458 /*
3459 * Full IMCPV6 payload
3460 */
3461 trunc_len = remaining_caplen;
3462 break;
3463 }
3464 case IPPROTO_IGMP: {
3465 /*
3466 * Full IGMP payload
3467 */
3468 trunc_len = remaining_caplen;
3469 break;
3470 }
3471 case IPPROTO_UDP: {
3472 trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3473 break;
3474 }
3475 case IPPROTO_TCP: {
3476 trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3477 break;
3478 }
3479 case IPPROTO_ESP: {
3480 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3481 break;
3482 }
3483 default: {
3484 /*
3485 * By default we only include the IP header
3486 */
3487 trunc_len = 0;
3488 break;
3489 }
3490 }
3491 if (trunc_len >= remaining_caplen) {
3492 return remaining_caplen;
3493 }
3494
3495 return trunc_len;
3496 }
3497
3498 static uint32_t
get_ip_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3499 get_ip_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3500 {
3501 int err = 0;
3502 uint32_t iplen = sizeof(struct ip);
3503 if (iplen >= remaining_caplen) {
3504 return remaining_caplen;
3505 }
3506
3507 struct ip iphdr;
3508 err = bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
3509 if (err != 0) {
3510 return remaining_caplen;
3511 }
3512
3513 uint8_t proto = 0;
3514
3515 iplen = (uint16_t)(iphdr.ip_hl << 2);
3516 if (iplen >= remaining_caplen) {
3517 return remaining_caplen;
3518 }
3519
3520 proto = iphdr.ip_p;
3521 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3522
3523 if (iplen >= remaining_caplen) {
3524 return remaining_caplen;
3525 }
3526
3527 return iplen;
3528 }
3529
3530 static uint32_t
get_ip6_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3531 get_ip6_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3532 {
3533 int err = 0;
3534 uint32_t iplen = sizeof(struct ip6_hdr);
3535 if (iplen >= remaining_caplen) {
3536 return remaining_caplen;
3537 }
3538
3539 struct ip6_hdr ip6hdr;
3540 err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
3541 if (err != 0) {
3542 return remaining_caplen;
3543 }
3544
3545 uint8_t proto = 0;
3546
3547 /*
3548 * TBD: process the extension headers
3549 */
3550 proto = ip6hdr.ip6_nxt;
3551 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3552
3553 if (iplen >= remaining_caplen) {
3554 return remaining_caplen;
3555 }
3556
3557 return iplen;
3558 }
3559
3560 static uint32_t
get_ether_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3561 get_ether_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3562 {
3563 int err = 0;
3564 uint32_t ethlen = sizeof(struct ether_header);
3565 if (ethlen >= remaining_caplen) {
3566 return remaining_caplen;
3567 }
3568
3569 struct ether_header eh = {};
3570 err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
3571 if (err != 0) {
3572 return remaining_caplen;
3573 }
3574
3575 u_short type = EXTRACT_SHORT(&eh.ether_type);
3576 /* Include full ARP */
3577 if (type == ETHERTYPE_ARP) {
3578 ethlen = remaining_caplen;
3579 } else if (type == ETHERTYPE_IP) {
3580 ethlen += get_ip_trunc_len(pkt, off + sizeof(struct ether_header),
3581 remaining_caplen - ethlen);
3582 } else if (type == ETHERTYPE_IPV6) {
3583 ethlen += get_ip6_trunc_len(pkt, off + sizeof(struct ether_header),
3584 remaining_caplen - ethlen);
3585 } else {
3586 ethlen = MIN(BPF_MIN_PKT_SIZE, remaining_caplen);
3587 }
3588 return ethlen;
3589 }
3590
3591 static uint32_t
get_pkt_trunc_len(struct bpf_packet * pkt)3592 get_pkt_trunc_len(struct bpf_packet *pkt)
3593 {
3594 struct pktap_header *pktap = (struct pktap_header *) (pkt->bpfp_header);
3595 uint32_t in_pkt_len = 0;
3596 uint32_t out_pkt_len = 0;
3597 uint32_t tlen = 0;
3598 uint32_t pre_adjust; // L2 header not in mbuf or kern_packet
3599
3600 // bpfp_total_length must contain the BPF packet header
3601 assert3u(pkt->bpfp_total_length, >=, pkt->bpfp_header_length);
3602
3603 // The BPF packet header must contain the pktap header
3604 assert3u(pkt->bpfp_header_length, >=, pktap->pth_length);
3605
3606 // The pre frame length (L2 header) must be contained in the packet
3607 assert3u(pkt->bpfp_total_length, >=, pktap->pth_length + pktap->pth_frame_pre_length);
3608
3609 /*
3610 * pktap->pth_frame_pre_length is the L2 header length and accounts
3611 * for both L2 header in the packet payload and pre_adjust.
3612 *
3613 * pre_adjust represents an adjustment for a pseudo L2 header that is not
3614 * part of packet payload -- not in the mbuf or kern_packet -- and comes
3615 * just after the pktap header.
3616 *
3617 * pktap->pth_length is the size of the pktap header (exclude pre_adjust)
3618 *
3619 * pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3620 */
3621 pre_adjust = (uint32_t)(pkt->bpfp_header_length - pktap->pth_length);
3622
3623 if (pktap->pth_iftype == IFT_ETHER) {
3624 /*
3625 * We need to parse the Ethernet header to find the network layer
3626 * protocol
3627 */
3628 in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pre_adjust);
3629
3630 out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
3631
3632 tlen = pktap->pth_length + pre_adjust + out_pkt_len;
3633 } else {
3634 /*
3635 * For other interface types, we only know to parse IPv4 and IPv6.
3636 *
3637 * To get to the beginning of the IPv4 or IPv6 packet, we need to to skip
3638 * over the L2 header that is the actual packet payload (mbuf or kern_packet)
3639 */
3640 uint32_t off; // offset past the L2 header in the actual packet payload
3641
3642 off = pktap->pth_frame_pre_length - pre_adjust;
3643
3644 in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pktap->pth_frame_pre_length);
3645
3646 if (pktap->pth_protocol_family == AF_INET) {
3647 out_pkt_len = get_ip_trunc_len(pkt, off, in_pkt_len);
3648 } else if (pktap->pth_protocol_family == AF_INET6) {
3649 out_pkt_len = get_ip6_trunc_len(pkt, off, in_pkt_len);
3650 } else {
3651 out_pkt_len = MIN(BPF_MIN_PKT_SIZE, in_pkt_len);
3652 }
3653 tlen = pktap->pth_length + pktap->pth_frame_pre_length + out_pkt_len;
3654 }
3655
3656 // Verify we do not overflow the buffer
3657 if (__improbable(tlen > pkt->bpfp_total_length)) {
3658 bool do_panic = bpf_debug != 0 ? true : false;
3659
3660 #if DEBUG
3661 do_panic = true;
3662 #endif /* DEBUG */
3663 if (do_panic) {
3664 panic("%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3665 __func__, __LINE__,
3666 tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3667 } else {
3668 os_log(OS_LOG_DEFAULT,
3669 "%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3670 __func__, __LINE__,
3671 tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3672 }
3673 bpf_trunc_overflow += 1;
3674 tlen = (uint32_t)pkt->bpfp_total_length;
3675 }
3676
3677 return tlen;
3678 }
3679
3680 static uint8_t
get_common_prefix_size(const void * a,const void * b,uint8_t max_bytes)3681 get_common_prefix_size(const void *a, const void *b, uint8_t max_bytes)
3682 {
3683 uint8_t max_words = max_bytes >> 2;
3684 const uint32_t *x = (const uint32_t *)a;
3685 const uint32_t *y = (const uint32_t *)b;
3686 uint8_t i;
3687
3688 for (i = 0; i < max_words; i++) {
3689 if (x[i] != y[i]) {
3690 break;
3691 }
3692 }
3693 return (uint8_t)(i << 2);
3694 }
3695
3696 /*
3697 * Move the packet data from interface memory (pkt) into the
3698 * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
3699 * otherwise 0.
3700 */
3701 static void
catchpacket(struct bpf_d * d,struct bpf_packet * pkt,u_int snaplen,int outbound)3702 catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
3703 u_int snaplen, int outbound)
3704 {
3705 struct bpf_hdr *hp;
3706 struct bpf_hdr_ext *ehp;
3707 uint32_t totlen, curlen;
3708 uint32_t hdrlen, caplen;
3709 int do_wakeup = 0;
3710 u_char *payload;
3711 struct timeval tv;
3712
3713 hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
3714 (d->bd_flags & BPF_COMP_REQ) ? d->bd_bif->bif_comphdrlen:
3715 d->bd_bif->bif_hdrlen;
3716 /*
3717 * Figure out how many bytes to move. If the packet is
3718 * greater or equal to the snapshot length, transfer that
3719 * much. Otherwise, transfer the whole packet (unless
3720 * we hit the buffer size limit).
3721 */
3722 totlen = hdrlen + MIN(snaplen, (int)pkt->bpfp_total_length);
3723 if (totlen > d->bd_bufsize) {
3724 totlen = d->bd_bufsize;
3725 }
3726
3727 if (hdrlen > totlen) {
3728 return;
3729 }
3730
3731 /*
3732 * Round up the end of the previous packet to the next longword.
3733 */
3734 curlen = BPF_WORDALIGN(d->bd_slen);
3735 if (curlen + totlen > d->bd_bufsize) {
3736 /*
3737 * This packet will overflow the storage buffer.
3738 * Rotate the buffers if we can, then wakeup any
3739 * pending reads.
3740 *
3741 * We cannot rotate buffers if a read is in progress
3742 * so drop the packet
3743 */
3744 if (d->bd_hbuf_read) {
3745 ++d->bd_dcount;
3746 return;
3747 }
3748
3749 if (d->bd_fbuf == NULL) {
3750 if (d->bd_headdrop == 0) {
3751 /*
3752 * We haven't completed the previous read yet,
3753 * so drop the packet.
3754 */
3755 ++d->bd_dcount;
3756 return;
3757 }
3758 /*
3759 * Drop the hold buffer as it contains older packets
3760 */
3761 d->bd_dcount += d->bd_hcnt;
3762 d->bd_fbuf = d->bd_hbuf;
3763 ROTATE_BUFFERS(d);
3764 } else {
3765 ROTATE_BUFFERS(d);
3766 }
3767 do_wakeup = 1;
3768 curlen = 0;
3769 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
3770 /*
3771 * Immediate mode is set, or the read timeout has
3772 * already expired during a select call. A packet
3773 * arrived, so the reader should be woken up.
3774 */
3775 do_wakeup = 1;
3776 }
3777
3778 /*
3779 * Append the bpf header.
3780 */
3781 microtime(&tv);
3782 if (d->bd_flags & BPF_EXTENDED_HDR) {
3783 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
3784 memset(ehp, 0, sizeof(*ehp));
3785 ehp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3786 ehp->bh_tstamp.tv_usec = tv.tv_usec;
3787
3788 ehp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3789 ehp->bh_hdrlen = (u_short)hdrlen;
3790 caplen = ehp->bh_caplen = totlen - hdrlen;
3791 payload = (u_char *)ehp + hdrlen;
3792
3793 if (outbound) {
3794 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3795 } else {
3796 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
3797 }
3798
3799 if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
3800 struct mbuf *m = pkt->bpfp_mbuf;
3801
3802 if (outbound) {
3803 /* only do lookups on non-raw INPCB */
3804 if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID |
3805 PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)) ==
3806 (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC) &&
3807 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3808 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
3809 if (m->m_pkthdr.pkt_proto == IPPROTO_TCP) {
3810 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_TCP;
3811 } else if (m->m_pkthdr.pkt_proto == IPPROTO_UDP) {
3812 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_UDP;
3813 }
3814 }
3815 ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
3816 if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
3817 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3818 }
3819 if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
3820 ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
3821 }
3822 if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
3823 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3824 }
3825 if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
3826 ehp->bh_unsent_bytes =
3827 m->m_pkthdr.bufstatus_if;
3828 ehp->bh_unsent_snd =
3829 m->m_pkthdr.bufstatus_sndbuf;
3830 }
3831 } else {
3832 if (m->m_pkthdr.pkt_flags & PKTF_WAKE_PKT) {
3833 ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3834 }
3835 }
3836 #if SKYWALK
3837 } else {
3838 kern_packet_t kern_pkt = pkt->bpfp_pkt;
3839 packet_flowid_t flowid = 0;
3840
3841 if (outbound) {
3842 /*
3843 * Note: pp_init() asserts that kern_packet_svc_class_t is equivalent
3844 * to mbuf_svc_class_t
3845 */
3846 ehp->bh_svc = so_svc2tc((mbuf_svc_class_t)kern_packet_get_service_class(kern_pkt));
3847 if (kern_packet_get_transport_retransmit(kern_pkt)) {
3848 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3849 }
3850 if (kern_packet_get_transport_last_packet(kern_pkt)) {
3851 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3852 }
3853 } else {
3854 if (kern_packet_get_wake_flag(kern_pkt)) {
3855 ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3856 }
3857 }
3858 ehp->bh_trace_tag = kern_packet_get_trace_tag(kern_pkt);
3859 if (kern_packet_get_flowid(kern_pkt, &flowid) == 0) {
3860 ehp->bh_flowid = flowid;
3861 }
3862 #endif /* SKYWALK */
3863 }
3864 } else {
3865 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
3866 memset(hp, 0, BPF_WORDALIGN(sizeof(*hp)));
3867 hp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3868 hp->bh_tstamp.tv_usec = tv.tv_usec;
3869 hp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3870 hp->bh_hdrlen = (u_short)hdrlen;
3871 caplen = hp->bh_caplen = totlen - hdrlen;
3872 payload = (u_char *)hp + hdrlen;
3873 }
3874 if (d->bd_flags & BPF_COMP_REQ) {
3875 uint8_t common_prefix_size = 0;
3876 uint8_t copy_len = MIN((uint8_t)caplen, BPF_HDR_COMP_LEN_MAX);
3877
3878 copy_bpf_packet(pkt, d->bd_prev_fbuf, copy_len);
3879
3880 if (d->bd_prev_slen != 0) {
3881 common_prefix_size = get_common_prefix_size(d->bd_prev_fbuf,
3882 d->bd_prev_sbuf, MIN(copy_len, d->bd_prev_slen));
3883 }
3884
3885 if (d->bd_flags & BPF_COMP_ENABLED) {
3886 assert3u(caplen, >=, common_prefix_size);
3887 copy_bpf_packet_offset(pkt, payload, caplen - common_prefix_size,
3888 common_prefix_size);
3889 d->bd_slen = curlen + totlen - common_prefix_size;
3890 } else {
3891 copy_bpf_packet(pkt, payload, caplen);
3892 d->bd_slen = curlen + totlen;
3893 }
3894
3895 /*
3896 * Update the caplen only if compression is enabled -- the caller
3897 * must pay attention to bpf_hdr_comp_enable
3898 */
3899 if (d->bd_flags & BPF_EXTENDED_HDR) {
3900 ehp->bh_complen = common_prefix_size;
3901 if (d->bd_flags & BPF_COMP_ENABLED) {
3902 ehp->bh_caplen -= common_prefix_size;
3903 }
3904 } else {
3905 struct bpf_comp_hdr *hcp;
3906
3907 hcp = (struct bpf_comp_hdr *)(void *)(d->bd_sbuf + curlen);
3908 hcp->bh_complen = common_prefix_size;
3909 if (d->bd_flags & BPF_COMP_ENABLED) {
3910 hcp->bh_caplen -= common_prefix_size;
3911 }
3912 }
3913
3914 if (common_prefix_size > 0) {
3915 d->bd_bcs.bcs_total_compressed_prefix_size += common_prefix_size;
3916 if (common_prefix_size > d->bd_bcs.bcs_max_compressed_prefix_size) {
3917 d->bd_bcs.bcs_max_compressed_prefix_size = common_prefix_size;
3918 }
3919 d->bd_bcs.bcs_count_compressed_prefix += 1;
3920 } else {
3921 d->bd_bcs.bcs_count_no_common_prefix += 1;
3922 }
3923
3924 /* The current compression buffer becomes the previous one */
3925 caddr_t tmp = d->bd_prev_sbuf;
3926 d->bd_prev_sbuf = d->bd_prev_fbuf;
3927 d->bd_prev_slen = copy_len;
3928 d->bd_prev_fbuf = tmp;
3929 } else {
3930 /*
3931 * Copy the packet data into the store buffer and update its length.
3932 */
3933 copy_bpf_packet(pkt, payload, caplen);
3934 d->bd_slen = curlen + totlen;
3935 }
3936 d->bd_scnt += 1;
3937 d->bd_bcs.bcs_total_hdr_size += pkt->bpfp_header_length;
3938 d->bd_bcs.bcs_total_size += caplen;
3939
3940 if (do_wakeup) {
3941 bpf_wakeup(d);
3942 }
3943 }
3944
3945 static void
bpf_freebufs(struct bpf_d * d)3946 bpf_freebufs(struct bpf_d *d)
3947 {
3948 if (d->bd_sbuf != NULL) {
3949 kfree_data_addr(d->bd_sbuf);
3950 }
3951 if (d->bd_hbuf != NULL) {
3952 kfree_data_addr(d->bd_hbuf);
3953 }
3954 if (d->bd_fbuf != NULL) {
3955 kfree_data_addr(d->bd_fbuf);
3956 }
3957
3958 if (d->bd_prev_sbuf != NULL) {
3959 kfree_data_addr(d->bd_prev_sbuf);
3960 }
3961 if (d->bd_prev_fbuf != NULL) {
3962 kfree_data_addr(d->bd_prev_fbuf);
3963 }
3964 }
3965 /*
3966 * Initialize all nonzero fields of a descriptor.
3967 */
3968 static int
bpf_allocbufs(struct bpf_d * d)3969 bpf_allocbufs(struct bpf_d *d)
3970 {
3971 bpf_freebufs(d);
3972
3973 d->bd_fbuf = (caddr_t) kalloc_data(d->bd_bufsize, Z_WAITOK | Z_ZERO);
3974 if (d->bd_fbuf == NULL) {
3975 goto nobufs;
3976 }
3977
3978 d->bd_sbuf = (caddr_t) kalloc_data(d->bd_bufsize, Z_WAITOK | Z_ZERO);
3979 if (d->bd_sbuf == NULL) {
3980 goto nobufs;
3981 }
3982 d->bd_slen = 0;
3983 d->bd_hlen = 0;
3984 d->bd_scnt = 0;
3985 d->bd_hcnt = 0;
3986
3987 d->bd_prev_slen = 0;
3988 if (d->bd_flags & BPF_COMP_REQ) {
3989 d->bd_prev_sbuf = (caddr_t) kalloc_data(BPF_HDR_COMP_LEN_MAX, Z_WAITOK | Z_ZERO);
3990 if (d->bd_prev_sbuf == NULL) {
3991 goto nobufs;
3992 }
3993 d->bd_prev_fbuf = (caddr_t) kalloc_data(BPF_HDR_COMP_LEN_MAX, Z_WAITOK | Z_ZERO);
3994 if (d->bd_prev_fbuf == NULL) {
3995 goto nobufs;
3996 }
3997 }
3998 return 0;
3999 nobufs:
4000 bpf_freebufs(d);
4001 return ENOMEM;
4002 }
4003
4004 /*
4005 * Free buffers currently in use by a descriptor.
4006 * Called on close.
4007 */
4008 static void
bpf_freed(struct bpf_d * d)4009 bpf_freed(struct bpf_d *d)
4010 {
4011 /*
4012 * We don't need to lock out interrupts since this descriptor has
4013 * been detached from its interface and it yet hasn't been marked
4014 * free.
4015 */
4016 if (d->bd_hbuf_read || d->bd_hbuf_write) {
4017 panic("bpf buffer freed during read/write");
4018 }
4019
4020 bpf_freebufs(d);
4021
4022 if (d->bd_filter) {
4023 kfree_data_addr(d->bd_filter);
4024 }
4025 }
4026
4027 /*
4028 * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
4029 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
4030 * size of the link header (variable length headers not yet supported).
4031 */
4032 void
bpfattach(struct ifnet * ifp,u_int dlt,u_int hdrlen)4033 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
4034 {
4035 bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
4036 }
4037
4038 errno_t
bpf_attach(ifnet_t ifp,u_int32_t dlt,u_int32_t hdrlen,bpf_send_func send,bpf_tap_func tap)4039 bpf_attach(
4040 ifnet_t ifp,
4041 u_int32_t dlt,
4042 u_int32_t hdrlen,
4043 bpf_send_func send,
4044 bpf_tap_func tap)
4045 {
4046 struct bpf_if *bp;
4047 struct bpf_if *bp_new;
4048 struct bpf_if *bp_before_first = NULL;
4049 struct bpf_if *bp_first = NULL;
4050 struct bpf_if *bp_last = NULL;
4051 boolean_t found;
4052
4053 /*
4054 * Z_NOFAIL will cause a panic if the allocation fails
4055 */
4056 bp_new = kalloc_type(struct bpf_if, Z_WAITOK | Z_NOFAIL | Z_ZERO);
4057
4058 lck_mtx_lock(bpf_mlock);
4059
4060 /*
4061 * Check if this interface/dlt is already attached. Remember the
4062 * first and last attachment for this interface, as well as the
4063 * element before the first attachment.
4064 */
4065 found = FALSE;
4066 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
4067 if (bp->bif_ifp != ifp) {
4068 if (bp_first != NULL) {
4069 /* no more elements for this interface */
4070 break;
4071 }
4072 bp_before_first = bp;
4073 } else {
4074 if (bp->bif_dlt == dlt) {
4075 found = TRUE;
4076 break;
4077 }
4078 if (bp_first == NULL) {
4079 bp_first = bp;
4080 }
4081 bp_last = bp;
4082 }
4083 }
4084 if (found) {
4085 lck_mtx_unlock(bpf_mlock);
4086 os_log_error(OS_LOG_DEFAULT,
4087 "bpfattach - %s with dlt %d is already attached",
4088 if_name(ifp), dlt);
4089 kfree_type(struct bpf_if, bp_new);
4090 return EEXIST;
4091 }
4092
4093 bp_new->bif_ifp = ifp;
4094 bp_new->bif_dlt = dlt;
4095 bp_new->bif_send = send;
4096 bp_new->bif_tap = tap;
4097
4098 if (bp_first == NULL) {
4099 /* No other entries for this ifp */
4100 bp_new->bif_next = bpf_iflist;
4101 bpf_iflist = bp_new;
4102 } else {
4103 if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
4104 /* Make this the first entry for this interface */
4105 if (bp_before_first != NULL) {
4106 /* point the previous to us */
4107 bp_before_first->bif_next = bp_new;
4108 } else {
4109 /* we're the new head */
4110 bpf_iflist = bp_new;
4111 }
4112 bp_new->bif_next = bp_first;
4113 } else {
4114 /* Add this after the last entry for this interface */
4115 bp_new->bif_next = bp_last->bif_next;
4116 bp_last->bif_next = bp_new;
4117 }
4118 }
4119
4120 /*
4121 * Compute the length of the bpf header. This is not necessarily
4122 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
4123 * that the network layer header begins on a longword boundary (for
4124 * performance reasons and to alleviate alignment restrictions).
4125 */
4126 bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
4127 bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
4128 sizeof(struct bpf_hdr_ext)) - hdrlen;
4129 bp_new->bif_comphdrlen = BPF_WORDALIGN(hdrlen +
4130 sizeof(struct bpf_comp_hdr)) - hdrlen;
4131
4132 /* Take a reference on the interface */
4133 ifnet_reference(ifp);
4134
4135 lck_mtx_unlock(bpf_mlock);
4136
4137 return 0;
4138 }
4139
4140 /*
4141 * Detach bpf from an interface. This involves detaching each descriptor
4142 * associated with the interface, and leaving bd_bif NULL. Notify each
4143 * descriptor as it's detached so that any sleepers wake up and get
4144 * ENXIO.
4145 */
4146 void
bpfdetach(struct ifnet * ifp)4147 bpfdetach(struct ifnet *ifp)
4148 {
4149 struct bpf_if *bp, *bp_prev, *bp_next;
4150 struct bpf_d *d;
4151
4152 if (bpf_debug != 0) {
4153 os_log(OS_LOG_DEFAULT, "%s: %s", __func__, if_name(ifp));
4154 }
4155
4156 lck_mtx_lock(bpf_mlock);
4157
4158 /*
4159 * Build the list of devices attached to that interface
4160 * that we need to free while keeping the lock to maintain
4161 * the integrity of the interface list
4162 */
4163 bp_prev = NULL;
4164 for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
4165 bp_next = bp->bif_next;
4166
4167 if (ifp != bp->bif_ifp) {
4168 bp_prev = bp;
4169 continue;
4170 }
4171 /* Unlink from the interface list */
4172 if (bp_prev) {
4173 bp_prev->bif_next = bp->bif_next;
4174 } else {
4175 bpf_iflist = bp->bif_next;
4176 }
4177
4178 /* Detach the devices attached to the interface */
4179 while ((d = bp->bif_dlist) != NULL) {
4180 /*
4181 * Take an extra reference to prevent the device
4182 * from being freed when bpf_detachd() releases
4183 * the reference for the interface list
4184 */
4185 bpf_acquire_d(d);
4186
4187 /*
4188 * Wait for active read and writes to complete
4189 */
4190 while (d->bd_hbuf_read || d->bd_hbuf_write) {
4191 msleep((caddr_t)d, bpf_mlock, PRINET, "bpfdetach", NULL);
4192 }
4193
4194 bpf_detachd(d);
4195 bpf_wakeup(d);
4196 bpf_release_d(d);
4197 }
4198 ifnet_release(ifp);
4199 }
4200
4201 lck_mtx_unlock(bpf_mlock);
4202 }
4203
4204 void
bpf_init(__unused void * unused)4205 bpf_init(__unused void *unused)
4206 {
4207 int maj;
4208
4209 /* bpf_comp_hdr is an overlay of bpf_hdr */
4210 _CASSERT(BPF_WORDALIGN(sizeof(struct bpf_hdr)) ==
4211 BPF_WORDALIGN(sizeof(struct bpf_comp_hdr)));
4212
4213 /* compression length must fits in a byte */
4214 _CASSERT(BPF_HDR_COMP_LEN_MAX <= UCHAR_MAX );
4215
4216 (void) PE_parse_boot_argn("bpf_hdr_comp", &bpf_hdr_comp_enable,
4217 sizeof(bpf_hdr_comp_enable));
4218
4219 if (bpf_devsw_installed == 0) {
4220 bpf_devsw_installed = 1;
4221 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
4222 if (maj == -1) {
4223 bpf_devsw_installed = 0;
4224 os_log_error(OS_LOG_DEFAULT,
4225 "bpf_init: failed to allocate a major number");
4226 return;
4227 }
4228
4229 for (int i = 0; i < NBPFILTER; i++) {
4230 bpf_make_dev_t(maj);
4231 }
4232 }
4233 }
4234
4235 static int
4236 sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
4237 {
4238 #pragma unused(arg1, arg2)
4239 int i, err;
4240
4241 i = bpf_maxbufsize;
4242
4243 err = sysctl_handle_int(oidp, &i, 0, req);
4244 if (err != 0 || req->newptr == USER_ADDR_NULL) {
4245 return err;
4246 }
4247
4248 if (i < 0 || i > BPF_BUFSIZE_CAP) {
4249 i = BPF_BUFSIZE_CAP;
4250 }
4251
4252 bpf_maxbufsize = i;
4253 return err;
4254 }
4255
4256 static int
4257 sysctl_bpf_bufsize_cap SYSCTL_HANDLER_ARGS
4258 {
4259 #pragma unused(arg1, arg2)
4260 int i, err;
4261
4262 i = BPF_BUFSIZE_CAP;
4263
4264 err = sysctl_handle_int(oidp, &i, 0, req);
4265 if (err != 0 || req->newptr == USER_ADDR_NULL) {
4266 return err;
4267 }
4268
4269 return err;
4270 }
4271
4272 /*
4273 * Fill filter statistics
4274 */
4275 static void
bpfstats_fill_xbpf(struct xbpf_d * d,struct bpf_d * bd)4276 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
4277 {
4278 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
4279
4280 d->bd_structsize = sizeof(struct xbpf_d);
4281 d->bd_promisc = bd->bd_promisc != 0 ? 1 : 0;
4282 d->bd_immediate = d->bd_immediate != 0 ? 1 : 0;
4283 d->bd_hdrcmplt = bd->bd_hdrcmplt != 0 ? 1 : 0;
4284 d->bd_async = bd->bd_async != 0 ? 1 : 0;
4285 d->bd_headdrop = bd->bd_headdrop != 0 ? 1 : 0;
4286 d->bd_direction = (uint8_t)bd->bd_direction;
4287 d->bh_compreq = bd->bd_flags & BPF_COMP_REQ ? 1 : 0;
4288 d->bh_compenabled = bd->bd_flags & BPF_COMP_ENABLED ? 1 : 0;
4289 d->bd_exthdr = bd->bd_flags & BPF_EXTENDED_HDR ? 1 : 0;
4290 d->bd_trunc = bd->bd_flags & BPF_TRUNCATE ? 1 : 0;
4291 d->bd_pkthdrv2 = bd->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
4292
4293 d->bd_dev_minor = (uint8_t)bd->bd_dev_minor;
4294
4295 d->bd_sig = bd->bd_sig;
4296
4297 d->bd_rcount = bd->bd_rcount;
4298 d->bd_dcount = bd->bd_dcount;
4299 d->bd_fcount = bd->bd_fcount;
4300 d->bd_wcount = bd->bd_wcount;
4301 d->bd_wdcount = bd->bd_wdcount;
4302 d->bd_slen = bd->bd_slen;
4303 d->bd_hlen = bd->bd_hlen;
4304 d->bd_bufsize = bd->bd_bufsize;
4305 d->bd_pid = bd->bd_pid;
4306 if (bd->bd_bif != NULL && bd->bd_bif->bif_ifp != NULL) {
4307 strlcpy(d->bd_ifname,
4308 bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
4309 }
4310
4311 d->bd_comp_count = bd->bd_bcs.bcs_count_compressed_prefix;
4312 d->bd_comp_size = bd->bd_bcs.bcs_total_compressed_prefix_size;
4313
4314 d->bd_scnt = bd->bd_scnt;
4315 d->bd_hcnt = bd->bd_hcnt;
4316
4317 d->bd_read_count = bd->bd_bcs.bcs_total_read;
4318 d->bd_fsize = bd->bd_bcs.bcs_total_size;
4319 }
4320
4321 /*
4322 * Handle `netstat -B' stats request
4323 */
4324 static int
4325 sysctl_bpf_stats SYSCTL_HANDLER_ARGS
4326 {
4327 int error;
4328 struct xbpf_d *xbdbuf;
4329 unsigned int x_cnt;
4330 vm_size_t buf_size;
4331
4332 if (req->oldptr == USER_ADDR_NULL) {
4333 return SYSCTL_OUT(req, 0, nbpfilter * sizeof(struct xbpf_d));
4334 }
4335 if (nbpfilter == 0) {
4336 return SYSCTL_OUT(req, 0, 0);
4337 }
4338 buf_size = req->oldlen;
4339 if (buf_size > BPF_MAX_DEVICES * sizeof(struct xbpf_d)) {
4340 buf_size = BPF_MAX_DEVICES * sizeof(struct xbpf_d);
4341 }
4342 xbdbuf = kalloc_data(buf_size, Z_WAITOK | Z_ZERO);
4343
4344 lck_mtx_lock(bpf_mlock);
4345 if (buf_size < (nbpfilter * sizeof(struct xbpf_d))) {
4346 lck_mtx_unlock(bpf_mlock);
4347 kfree_data(xbdbuf, buf_size);
4348 return ENOMEM;
4349 }
4350 x_cnt = 0;
4351 unsigned int i;
4352
4353 for (i = 0; i < nbpfilter; i++) {
4354 struct bpf_d *bd = bpf_dtab[i];
4355 struct xbpf_d *xbd;
4356
4357 if (bd == NULL || bd == BPF_DEV_RESERVED ||
4358 (bd->bd_flags & BPF_CLOSING) != 0) {
4359 continue;
4360 }
4361 VERIFY(x_cnt < nbpfilter);
4362
4363 xbd = &xbdbuf[x_cnt++];
4364 bpfstats_fill_xbpf(xbd, bd);
4365 }
4366 lck_mtx_unlock(bpf_mlock);
4367
4368 error = SYSCTL_OUT(req, xbdbuf, x_cnt * sizeof(struct xbpf_d));
4369 kfree_data(xbdbuf, buf_size);
4370 return error;
4371 }
4372