1 /*
2 * Copyright (c) 2000-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1990, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from the Stanford/CMU enet packet filter,
33 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35 * Berkeley Laboratory.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)bpf.c 8.2 (Berkeley) 3/28/94
66 *
67 * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 #include "bpf.h"
77
78 #ifndef __GNUC__
79 #define inline
80 #else
81 #define inline __inline
82 #endif
83
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/conf.h>
87 #include <sys/malloc.h>
88 #include <sys/mbuf.h>
89 #include <sys/time.h>
90 #include <sys/proc.h>
91 #include <sys/signalvar.h>
92 #include <sys/filio.h>
93 #include <sys/sockio.h>
94 #include <sys/ttycom.h>
95 #include <sys/filedesc.h>
96 #include <sys/uio_internal.h>
97 #include <sys/file_internal.h>
98 #include <sys/event.h>
99
100 #include <sys/poll.h>
101
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/vnode.h>
105
106 #include <net/if.h>
107 #include <net/bpf.h>
108 #include <net/bpfdesc.h>
109
110 #include <netinet/in.h>
111 #include <netinet/ip.h>
112 #include <netinet/ip6.h>
113 #include <netinet/in_pcb.h>
114 #include <netinet/in_var.h>
115 #include <netinet/ip_var.h>
116 #include <netinet/tcp.h>
117 #include <netinet/tcp_var.h>
118 #include <netinet/udp.h>
119 #include <netinet/udp_var.h>
120 #include <netinet/if_ether.h>
121 #include <netinet/isakmp.h>
122 #include <netinet6/esp.h>
123 #include <sys/kernel.h>
124 #include <sys/sysctl.h>
125 #include <net/firewire.h>
126
127 #include <miscfs/devfs/devfs.h>
128 #include <net/dlil.h>
129 #include <net/pktap.h>
130
131 #include <kern/assert.h>
132 #include <kern/locks.h>
133 #include <kern/thread_call.h>
134 #include <libkern/section_keywords.h>
135
136 #include <os/log.h>
137
138 extern int tvtohz(struct timeval *);
139 extern char *proc_name_address(void *p);
140
141 #define BPF_BUFSIZE 4096
142 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
143
144 #define PRINET 26 /* interruptible */
145
146 #define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
147 #define ESP_HDR_SIZE sizeof(struct newesp)
148
149 typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
150
151 /*
152 * The default read buffer size is patchable.
153 */
154 static unsigned int bpf_bufsize = BPF_BUFSIZE;
155 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
156 &bpf_bufsize, 0, "");
157
158 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
159 static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
160 SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
161 &bpf_maxbufsize, 0,
162 sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
163
164 extern const int copysize_limit_panic;
165 #define BPF_BUFSIZE_CAP (copysize_limit_panic >> 1)
166 static int sysctl_bpf_bufsize_cap SYSCTL_HANDLER_ARGS;
167 SYSCTL_PROC(_debug, OID_AUTO, bpf_bufsize_cap, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
168 0, 0,
169 sysctl_bpf_bufsize_cap, "I", "Upper limit on BPF max buffer size");
170
171 static unsigned int bpf_maxdevices = 256;
172 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RD | CTLFLAG_LOCKED,
173 &bpf_maxdevices, 0, "");
174 /*
175 * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
176 * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
177 * explicitly to be able to use DLT_PKTAP.
178 */
179 #if !XNU_TARGET_OS_OSX
180 static unsigned int bpf_wantpktap = 1;
181 #else /* XNU_TARGET_OS_OSX */
182 static unsigned int bpf_wantpktap = 0;
183 #endif /* XNU_TARGET_OS_OSX */
184 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
185 &bpf_wantpktap, 0, "");
186
187 static int bpf_debug = 0;
188 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
189 &bpf_debug, 0, "");
190
191 static unsigned long bpf_trunc_overflow = 0;
192 SYSCTL_ULONG(_debug, OID_AUTO, bpf_trunc_overflow, CTLFLAG_RD | CTLFLAG_LOCKED,
193 &bpf_trunc_overflow, "");
194
195 static int bpf_hdr_comp_enable = 1;
196 SYSCTL_INT(_debug, OID_AUTO, bpf_hdr_comp_enable, CTLFLAG_RW | CTLFLAG_LOCKED,
197 &bpf_hdr_comp_enable, 1, "");
198
199 static int sysctl_bpf_stats SYSCTL_HANDLER_ARGS;
200 SYSCTL_PROC(_debug, OID_AUTO, bpf_stats, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
201 0, 0,
202 sysctl_bpf_stats, "S", "BPF statistics");
203
204 /*
205 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
206 * bpf_dtab holds pointer to the descriptors, indexed by minor device #
207 */
208 static struct bpf_if *bpf_iflist;
209 /*
210 * BSD now stores the bpf_d in the dev_t which is a struct
211 * on their system. Our dev_t is an int, so we still store
212 * the bpf_d in a separate table indexed by minor device #.
213 *
214 * The value stored in bpf_dtab[n] represent three states:
215 * NULL: device not opened
216 * BPF_DEV_RESERVED: device opening or closing
217 * other: device <n> opened with pointer to storage
218 */
219 #define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
220 static struct bpf_d **bpf_dtab = NULL;
221 static unsigned int bpf_dtab_size = 0;
222 static unsigned int nbpfilter = 0;
223 static unsigned bpf_bpfd_cnt = 0;
224
225 static LCK_GRP_DECLARE(bpf_mlock_grp, "bpf");
226 static LCK_MTX_DECLARE(bpf_mlock_data, &bpf_mlock_grp);
227 static lck_mtx_t *const bpf_mlock = &bpf_mlock_data;
228
229 static int bpf_allocbufs(struct bpf_d *);
230 static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
231 static int bpf_detachd(struct bpf_d *d);
232 static void bpf_freed(struct bpf_d *);
233 static int bpf_movein(struct uio *, int,
234 struct mbuf **, struct sockaddr *, int *);
235 static int bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool, bool);
236 static void bpf_timed_out(void *, void *);
237 static void bpf_wakeup(struct bpf_d *);
238 static uint32_t get_pkt_trunc_len(struct bpf_packet *);
239 static void catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
240 static void reset_d(struct bpf_d *);
241 static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
242 static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
243 static int bpf_setdlt(struct bpf_d *, u_int);
244 static int bpf_set_traffic_class(struct bpf_d *, int);
245 static void bpf_set_packet_service_class(struct mbuf *, int);
246
247 static void bpf_acquire_d(struct bpf_d *);
248 static void bpf_release_d(struct bpf_d *);
249
250 static int bpf_devsw_installed;
251
252 void bpf_init(void *unused);
253 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
254
255 /*
256 * Darwin differs from BSD here, the following are static
257 * on BSD and not static on Darwin.
258 */
259 d_open_t bpfopen;
260 d_close_t bpfclose;
261 d_read_t bpfread;
262 d_write_t bpfwrite;
263 ioctl_fcn_t bpfioctl;
264 select_fcn_t bpfselect;
265
266 /* Darwin's cdevsw struct differs slightly from BSDs */
267 #define CDEV_MAJOR 23
268 static const struct cdevsw bpf_cdevsw = {
269 .d_open = bpfopen,
270 .d_close = bpfclose,
271 .d_read = bpfread,
272 .d_write = bpfwrite,
273 .d_ioctl = bpfioctl,
274 .d_stop = eno_stop,
275 .d_reset = eno_reset,
276 .d_ttys = NULL,
277 .d_select = bpfselect,
278 .d_mmap = eno_mmap,
279 .d_strategy = eno_strat,
280 .d_reserved_1 = eno_getc,
281 .d_reserved_2 = eno_putc,
282 .d_type = 0
283 };
284
285 #define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
286
287 static int
bpf_movein(struct uio * uio,int linktype,struct mbuf ** mp,struct sockaddr * sockp,int * datlen)288 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp,
289 struct sockaddr *sockp, int *datlen)
290 {
291 struct mbuf *m;
292 int error;
293 int len;
294 uint8_t sa_family;
295 int hlen;
296
297 switch (linktype) {
298 #if SLIP
299 case DLT_SLIP:
300 sa_family = AF_INET;
301 hlen = 0;
302 break;
303 #endif /* SLIP */
304
305 case DLT_EN10MB:
306 sa_family = AF_UNSPEC;
307 /* XXX Would MAXLINKHDR be better? */
308 hlen = sizeof(struct ether_header);
309 break;
310
311 #if FDDI
312 case DLT_FDDI:
313 #if defined(__FreeBSD__) || defined(__bsdi__)
314 sa_family = AF_IMPLINK;
315 hlen = 0;
316 #else
317 sa_family = AF_UNSPEC;
318 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
319 hlen = 24;
320 #endif
321 break;
322 #endif /* FDDI */
323
324 case DLT_RAW:
325 case DLT_NULL:
326 sa_family = AF_UNSPEC;
327 hlen = 0;
328 break;
329
330 #ifdef __FreeBSD__
331 case DLT_ATM_RFC1483:
332 /*
333 * en atm driver requires 4-byte atm pseudo header.
334 * though it isn't standard, vpi:vci needs to be
335 * specified anyway.
336 */
337 sa_family = AF_UNSPEC;
338 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
339 break;
340 #endif
341
342 case DLT_PPP:
343 sa_family = AF_UNSPEC;
344 hlen = 4; /* This should match PPP_HDRLEN */
345 break;
346
347 case DLT_APPLE_IP_OVER_IEEE1394:
348 sa_family = AF_UNSPEC;
349 hlen = sizeof(struct firewire_header);
350 break;
351
352 case DLT_IEEE802_11: /* IEEE 802.11 wireless */
353 sa_family = AF_IEEE80211;
354 hlen = 0;
355 break;
356
357 case DLT_IEEE802_11_RADIO:
358 sa_family = AF_IEEE80211;
359 hlen = 0;
360 break;
361
362 default:
363 return EIO;
364 }
365
366 // LP64todo - fix this!
367 len = (int)uio_resid(uio);
368 if (len < hlen || (unsigned)len > MCLBYTES || len - hlen > MCLBYTES) {
369 return EIO;
370 }
371
372 *datlen = len - hlen;
373
374 if (sockp) {
375 /*
376 * Build a sockaddr based on the data link layer type.
377 * We do this at this level because the ethernet header
378 * is copied directly into the data field of the sockaddr.
379 * In the case of SLIP, there is no header and the packet
380 * is forwarded as is.
381 * Also, we are careful to leave room at the front of the mbuf
382 * for the link level header.
383 */
384 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
385 return EIO;
386 }
387 sockp->sa_family = sa_family;
388 } else {
389 /*
390 * We're directly sending the packet data supplied by
391 * the user; we don't need to make room for the link
392 * header, and don't need the header length value any
393 * more, so set it to 0.
394 */
395 hlen = 0;
396 }
397
398 MGETHDR(m, M_WAIT, MT_DATA);
399 if (m == 0) {
400 return ENOBUFS;
401 }
402 if ((unsigned)len > MHLEN) {
403 MCLGET(m, M_WAIT);
404 if ((m->m_flags & M_EXT) == 0) {
405 error = ENOBUFS;
406 goto bad;
407 }
408 }
409 m->m_pkthdr.len = m->m_len = len;
410 m->m_pkthdr.rcvif = NULL;
411 *mp = m;
412
413 /*
414 * Make room for link header.
415 */
416 if (hlen != 0) {
417 m->m_pkthdr.len -= hlen;
418 m->m_len -= hlen;
419 m->m_data += hlen; /* XXX */
420 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
421 if (error) {
422 goto bad;
423 }
424 }
425 error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
426 if (error) {
427 goto bad;
428 }
429
430 /* Check for multicast destination */
431 switch (linktype) {
432 case DLT_EN10MB: {
433 struct ether_header *eh;
434
435 eh = mtod(m, struct ether_header *);
436 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
437 if (_ether_cmp(etherbroadcastaddr,
438 eh->ether_dhost) == 0) {
439 m->m_flags |= M_BCAST;
440 } else {
441 m->m_flags |= M_MCAST;
442 }
443 }
444 break;
445 }
446 }
447
448 return 0;
449 bad:
450 m_freem(m);
451 return error;
452 }
453
454 /*
455 * The dynamic addition of a new device node must block all processes that
456 * are opening the last device so that no process will get an unexpected
457 * ENOENT
458 */
459 static void
bpf_make_dev_t(int maj)460 bpf_make_dev_t(int maj)
461 {
462 static int bpf_growing = 0;
463 unsigned int cur_size = nbpfilter, i;
464
465 if (nbpfilter >= bpf_maxdevices) {
466 return;
467 }
468
469 while (bpf_growing) {
470 /* Wait until new device has been created */
471 (void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
472 }
473 if (nbpfilter > cur_size) {
474 /* other thread grew it already */
475 return;
476 }
477 bpf_growing = 1;
478
479 /* need to grow bpf_dtab first */
480 if (nbpfilter == bpf_dtab_size) {
481 unsigned int new_dtab_size;
482 struct bpf_d **new_dtab = NULL;
483
484 new_dtab_size = bpf_dtab_size + NBPFILTER;
485 new_dtab = krealloc_type(struct bpf_d *,
486 bpf_dtab_size, new_dtab_size, bpf_dtab, Z_WAITOK | Z_ZERO);
487 if (new_dtab == 0) {
488 os_log_error(OS_LOG_DEFAULT, "bpf_make_dev_t: malloc bpf_dtab failed");
489 goto done;
490 }
491 bpf_dtab = new_dtab;
492 bpf_dtab_size = new_dtab_size;
493 }
494 i = nbpfilter++;
495 (void) devfs_make_node(makedev(maj, i),
496 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
497 "bpf%d", i);
498 done:
499 bpf_growing = 0;
500 wakeup((caddr_t)&bpf_growing);
501 }
502
503 /*
504 * Attach file to the bpf interface, i.e. make d listen on bp.
505 */
506 static errno_t
bpf_attachd(struct bpf_d * d,struct bpf_if * bp)507 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
508 {
509 int first = bp->bif_dlist == NULL;
510 int error = 0;
511
512 /*
513 * Point d at bp, and add d to the interface's list of listeners.
514 * Finally, point the driver's bpf cookie at the interface so
515 * it will divert packets to bpf.
516 */
517 d->bd_bif = bp;
518 d->bd_next = bp->bif_dlist;
519 bp->bif_dlist = d;
520 bpf_bpfd_cnt++;
521
522 /*
523 * Take a reference on the device even if an error is returned
524 * because we keep the device in the interface's list of listeners
525 */
526 bpf_acquire_d(d);
527
528 if (first) {
529 /* Find the default bpf entry for this ifp */
530 if (bp->bif_ifp->if_bpf == NULL) {
531 struct bpf_if *tmp, *primary = NULL;
532
533 for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
534 if (tmp->bif_ifp == bp->bif_ifp) {
535 primary = tmp;
536 break;
537 }
538 }
539 bp->bif_ifp->if_bpf = primary;
540 }
541 /* Only call dlil_set_bpf_tap for primary dlt */
542 if (bp->bif_ifp->if_bpf == bp) {
543 dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
544 bpf_tap_callback);
545 }
546
547 if (bp->bif_tap != NULL) {
548 error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
549 BPF_TAP_INPUT_OUTPUT);
550 }
551 }
552
553 /*
554 * Reset the detach flags in case we previously detached an interface
555 */
556 d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
557
558 if (bp->bif_dlt == DLT_PKTAP) {
559 d->bd_flags |= BPF_FINALIZE_PKTAP;
560 } else {
561 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
562 }
563 return error;
564 }
565
566 /*
567 * Detach a file from its interface.
568 *
569 * Return 1 if was closed by some thread, 0 otherwise
570 */
571 static int
bpf_detachd(struct bpf_d * d)572 bpf_detachd(struct bpf_d *d)
573 {
574 struct bpf_d **p;
575 struct bpf_if *bp;
576 struct ifnet *ifp;
577 uint32_t dlt;
578 bpf_tap_func disable_tap;
579 uint8_t bd_promisc;
580
581
582 int bpf_closed = d->bd_flags & BPF_CLOSING;
583 /*
584 * Some other thread already detached
585 */
586 if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0) {
587 goto done;
588 }
589 /*
590 * This thread is doing the detach
591 */
592 d->bd_flags |= BPF_DETACHING;
593
594 ifp = d->bd_bif->bif_ifp;
595 bp = d->bd_bif;
596
597 /* Remove d from the interface's descriptor list. */
598 p = &bp->bif_dlist;
599 while (*p != d) {
600 p = &(*p)->bd_next;
601 if (*p == 0) {
602 panic("bpf_detachd: descriptor not in list");
603 }
604 }
605 *p = (*p)->bd_next;
606 bpf_bpfd_cnt--;
607 disable_tap = NULL;
608 if (bp->bif_dlist == 0) {
609 /*
610 * Let the driver know that there are no more listeners.
611 */
612 /* Only call dlil_set_bpf_tap for primary dlt */
613 if (bp->bif_ifp->if_bpf == bp) {
614 dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
615 }
616
617 disable_tap = bp->bif_tap;
618 if (disable_tap) {
619 dlt = bp->bif_dlt;
620 }
621
622 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
623 if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
624 break;
625 }
626 }
627 if (bp == NULL) {
628 ifp->if_bpf = NULL;
629 }
630 }
631 d->bd_bif = NULL;
632 /*
633 * Check if this descriptor had requested promiscuous mode.
634 * If so, turn it off.
635 */
636 bd_promisc = d->bd_promisc;
637 d->bd_promisc = 0;
638
639 lck_mtx_unlock(bpf_mlock);
640 if (bd_promisc) {
641 if (ifnet_set_promiscuous(ifp, 0)) {
642 /*
643 * Something is really wrong if we were able to put
644 * the driver into promiscuous mode, but can't
645 * take it out.
646 * Most likely the network interface is gone.
647 */
648 os_log_error(OS_LOG_DEFAULT,
649 "%s: bpf%d ifnet_set_promiscuous %s failed",
650 __func__, d->bd_dev_minor, if_name(ifp));
651 }
652 }
653
654 if (disable_tap) {
655 disable_tap(ifp, dlt, BPF_TAP_DISABLE);
656 }
657 lck_mtx_lock(bpf_mlock);
658
659 /*
660 * Wake up other thread that are waiting for this thread to finish
661 * detaching
662 */
663 d->bd_flags &= ~BPF_DETACHING;
664 d->bd_flags |= BPF_DETACHED;
665
666 /* Refresh the local variable as d could have been modified */
667 bpf_closed = d->bd_flags & BPF_CLOSING;
668
669 os_log(OS_LOG_DEFAULT, "bpf%d%s detached from %s fcount %llu dcount %llu",
670 d->bd_dev_minor, bpf_closed ? " closed and" : "", if_name(ifp),
671 d->bd_fcount, d->bd_dcount);
672
673 /*
674 * Note that We've kept the reference because we may have dropped
675 * the lock when turning off promiscuous mode
676 */
677 bpf_release_d(d);
678 done:
679 /*
680 * Let the caller know the bpf_d is closed
681 */
682 if (bpf_closed) {
683 return 1;
684 } else {
685 return 0;
686 }
687 }
688
689 /*
690 * Start asynchronous timer, if necessary.
691 * Must be called with bpf_mlock held.
692 */
693 static void
bpf_start_timer(struct bpf_d * d)694 bpf_start_timer(struct bpf_d *d)
695 {
696 uint64_t deadline;
697 struct timeval tv;
698
699 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
700 tv.tv_sec = d->bd_rtout / hz;
701 tv.tv_usec = (d->bd_rtout % hz) * tick;
702
703 clock_interval_to_deadline(
704 (uint32_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
705 NSEC_PER_USEC, &deadline);
706 /*
707 * The state is BPF_IDLE, so the timer hasn't
708 * been started yet, and hasn't gone off yet;
709 * there is no thread call scheduled, so this
710 * won't change the schedule.
711 *
712 * XXX - what if, by the time it gets entered,
713 * the deadline has already passed?
714 */
715 thread_call_enter_delayed(d->bd_thread_call, deadline);
716 d->bd_state = BPF_WAITING;
717 }
718 }
719
720 /*
721 * Cancel asynchronous timer.
722 * Must be called with bpf_mlock held.
723 */
724 static boolean_t
bpf_stop_timer(struct bpf_d * d)725 bpf_stop_timer(struct bpf_d *d)
726 {
727 /*
728 * If the timer has already gone off, this does nothing.
729 * Our caller is expected to set d->bd_state to BPF_IDLE,
730 * with the bpf_mlock, after we are called. bpf_timed_out()
731 * also grabs bpf_mlock, so, if the timer has gone off and
732 * bpf_timed_out() hasn't finished, it's waiting for the
733 * lock; when this thread releases the lock, it will
734 * find the state is BPF_IDLE, and just release the
735 * lock and return.
736 */
737 return thread_call_cancel(d->bd_thread_call);
738 }
739
740 void
bpf_acquire_d(struct bpf_d * d)741 bpf_acquire_d(struct bpf_d *d)
742 {
743 void *lr_saved = __builtin_return_address(0);
744
745 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
746
747 d->bd_refcnt += 1;
748
749 d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
750 d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
751 }
752
753 void
bpf_release_d(struct bpf_d * d)754 bpf_release_d(struct bpf_d *d)
755 {
756 void *lr_saved = __builtin_return_address(0);
757
758 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
759
760 if (d->bd_refcnt <= 0) {
761 panic("%s: %p refcnt <= 0", __func__, d);
762 }
763
764 d->bd_refcnt -= 1;
765
766 d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
767 d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
768
769 if (d->bd_refcnt == 0) {
770 /* Assert the device is detached */
771 if ((d->bd_flags & BPF_DETACHED) == 0) {
772 panic("%s: %p BPF_DETACHED not set", __func__, d);
773 }
774
775 kfree_type(struct bpf_d, d);
776 }
777 }
778
779 /*
780 * Open ethernet device. Returns ENXIO for illegal minor device number,
781 * EBUSY if file is open by another process.
782 */
783 /* ARGSUSED */
784 int
bpfopen(dev_t dev,int flags,__unused int fmt,struct proc * p)785 bpfopen(dev_t dev, int flags, __unused int fmt,
786 struct proc *p)
787 {
788 struct bpf_d *d;
789
790 lck_mtx_lock(bpf_mlock);
791 if ((unsigned int) minor(dev) >= nbpfilter) {
792 lck_mtx_unlock(bpf_mlock);
793 return ENXIO;
794 }
795 /*
796 * New device nodes are created on demand when opening the last one.
797 * The programming model is for processes to loop on the minor starting
798 * at 0 as long as EBUSY is returned. The loop stops when either the
799 * open succeeds or an error other that EBUSY is returned. That means
800 * that bpf_make_dev_t() must block all processes that are opening the
801 * last node. If not all processes are blocked, they could unexpectedly
802 * get ENOENT and abort their opening loop.
803 */
804 if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
805 bpf_make_dev_t(major(dev));
806 }
807
808 /*
809 * Each minor can be opened by only one process. If the requested
810 * minor is in use, return EBUSY.
811 *
812 * Important: bpfopen() and bpfclose() have to check and set the status
813 * of a device in the same lockin context otherwise the device may be
814 * leaked because the vnode use count will be unpextectly greater than 1
815 * when close() is called.
816 */
817 if (bpf_dtab[minor(dev)] == NULL) {
818 /* Reserve while opening */
819 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
820 } else {
821 lck_mtx_unlock(bpf_mlock);
822 return EBUSY;
823 }
824 d = kalloc_type(struct bpf_d, Z_WAITOK | Z_ZERO);
825 if (d == NULL) {
826 /* this really is a catastrophic failure */
827 os_log_error(OS_LOG_DEFAULT,
828 "bpfopen: bpf%d kalloc_type bpf_d failed", minor(dev));
829 bpf_dtab[minor(dev)] = NULL;
830 lck_mtx_unlock(bpf_mlock);
831 return ENOMEM;
832 }
833
834 /* Mark "in use" and do most initialization. */
835 bpf_acquire_d(d);
836 d->bd_bufsize = bpf_bufsize;
837 d->bd_sig = SIGIO;
838 d->bd_seesent = 1;
839 d->bd_oflags = flags;
840 d->bd_state = BPF_IDLE;
841 d->bd_traffic_class = SO_TC_BE;
842 d->bd_flags |= BPF_DETACHED;
843 if (bpf_wantpktap) {
844 d->bd_flags |= BPF_WANT_PKTAP;
845 } else {
846 d->bd_flags &= ~BPF_WANT_PKTAP;
847 }
848
849 d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
850 if (d->bd_thread_call == NULL) {
851 os_log_error(OS_LOG_DEFAULT, "bpfopen: bpf%d malloc thread call failed",
852 minor(dev));
853 bpf_dtab[minor(dev)] = NULL;
854 bpf_release_d(d);
855 lck_mtx_unlock(bpf_mlock);
856
857 return ENOMEM;
858 }
859 d->bd_opened_by = p;
860 uuid_generate(d->bd_uuid);
861 d->bd_pid = proc_pid(p);
862
863 d->bd_dev_minor = minor(dev);
864 bpf_dtab[minor(dev)] = d; /* Mark opened */
865 lck_mtx_unlock(bpf_mlock);
866
867 if (bpf_debug) {
868 os_log(OS_LOG_DEFAULT, "bpf%u opened by %s.%u",
869 d->bd_dev_minor, proc_name_address(p), d->bd_pid);
870 }
871 return 0;
872 }
873
874 /*
875 * Close the descriptor by detaching it from its interface,
876 * deallocating its buffers, and marking it free.
877 */
878 /* ARGSUSED */
879 int
bpfclose(dev_t dev,__unused int flags,__unused int fmt,__unused struct proc * p)880 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
881 __unused struct proc *p)
882 {
883 struct bpf_d *d;
884
885 /* Take BPF lock to ensure no other thread is using the device */
886 lck_mtx_lock(bpf_mlock);
887
888 d = bpf_dtab[minor(dev)];
889 if (d == NULL || d == BPF_DEV_RESERVED) {
890 lck_mtx_unlock(bpf_mlock);
891 return ENXIO;
892 }
893
894 /*
895 * Other threads may call bpd_detachd() if we drop the bpf_mlock
896 */
897 d->bd_flags |= BPF_CLOSING;
898
899 if (bpf_debug != 0) {
900 os_log(OS_LOG_DEFAULT, "%s: bpf%d",
901 __func__, d->bd_dev_minor);
902 }
903
904 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
905
906 /*
907 * Deal with any in-progress timeouts.
908 */
909 switch (d->bd_state) {
910 case BPF_IDLE:
911 /*
912 * Not waiting for a timeout, and no timeout happened.
913 */
914 break;
915
916 case BPF_WAITING:
917 /*
918 * Waiting for a timeout.
919 * Cancel any timer that has yet to go off,
920 * and mark the state as "closing".
921 * Then drop the lock to allow any timers that
922 * *have* gone off to run to completion, and wait
923 * for them to finish.
924 */
925 if (!bpf_stop_timer(d)) {
926 /*
927 * There was no pending call, so the call must
928 * have been in progress. Wait for the call to
929 * complete; we have to drop the lock while
930 * waiting. to let the in-progrss call complete
931 */
932 d->bd_state = BPF_DRAINING;
933 while (d->bd_state == BPF_DRAINING) {
934 msleep((caddr_t)d, bpf_mlock, PRINET,
935 "bpfdraining", NULL);
936 }
937 }
938 d->bd_state = BPF_IDLE;
939 break;
940
941 case BPF_TIMED_OUT:
942 /*
943 * Timer went off, and the timeout routine finished.
944 */
945 d->bd_state = BPF_IDLE;
946 break;
947
948 case BPF_DRAINING:
949 /*
950 * Another thread is blocked on a close waiting for
951 * a timeout to finish.
952 * This "shouldn't happen", as the first thread to enter
953 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
954 * all subsequent threads should see that and fail with
955 * ENXIO.
956 */
957 panic("Two threads blocked in a BPF close");
958 break;
959 }
960
961 if (d->bd_bif) {
962 bpf_detachd(d);
963 }
964 selthreadclear(&d->bd_sel);
965 thread_call_free(d->bd_thread_call);
966
967 while (d->bd_hbuf_read != 0) {
968 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
969 }
970
971 if (bpf_debug) {
972 os_log(OS_LOG_DEFAULT,
973 "bpf%u closed by %s.%u dcount %llu fcount %llu ccount %llu",
974 d->bd_dev_minor, proc_name_address(p), d->bd_pid,
975 d->bd_dcount, d->bd_fcount, d->bd_bcs.bcs_count_compressed_prefix);
976 }
977
978 bpf_freed(d);
979
980 /* Mark free in same context as bpfopen comes to check */
981 bpf_dtab[minor(dev)] = NULL; /* Mark closed */
982
983 bpf_release_d(d);
984
985 lck_mtx_unlock(bpf_mlock);
986
987 return 0;
988 }
989
990 #define BPF_SLEEP bpf_sleep
991
992 static int
bpf_sleep(struct bpf_d * d,int pri,const char * wmesg,int timo)993 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
994 {
995 u_int64_t abstime = 0;
996
997 if (timo != 0) {
998 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
999 }
1000
1001 return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
1002 }
1003
1004 static void
bpf_finalize_pktap(struct bpf_hdr * hp,struct pktap_header * pktaphdr)1005 bpf_finalize_pktap(struct bpf_hdr *hp, struct pktap_header *pktaphdr)
1006 {
1007 if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
1008 struct pktap_v2_hdr *pktap_v2_hdr;
1009
1010 pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
1011
1012 if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
1013 pktap_v2_finalize_proc_info(pktap_v2_hdr);
1014 }
1015 } else {
1016 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
1017 pktap_finalize_proc_info(pktaphdr);
1018 }
1019
1020 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1021 hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
1022 hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
1023 }
1024 }
1025 }
1026
1027 /*
1028 * Rotate the packet buffers in descriptor d. Move the store buffer
1029 * into the hold slot, and the free buffer into the store slot.
1030 * Zero the length of the new store buffer.
1031 *
1032 * Note: in head drop mode, the hold buffer can be dropped so the fist packet of the
1033 * store buffer cannot be compressed as it otherwise would refer to deleted data
1034 * in a dropped hold buffer that the reader process does know about
1035 */
1036 #define ROTATE_BUFFERS(d) do { \
1037 if (d->bd_hbuf_read != 0) \
1038 panic("rotating bpf buffers during read"); \
1039 (d)->bd_hbuf = (d)->bd_sbuf; \
1040 (d)->bd_hlen = (d)->bd_slen; \
1041 (d)->bd_hcnt = (d)->bd_scnt; \
1042 (d)->bd_sbuf = (d)->bd_fbuf; \
1043 (d)->bd_slen = 0; \
1044 (d)->bd_scnt = 0; \
1045 (d)->bd_fbuf = NULL; \
1046 if ((d)->bd_headdrop != 0) \
1047 (d)->bd_prev_slen = 0; \
1048 } while(false)
1049
1050 /*
1051 * bpfread - read next chunk of packets from buffers
1052 */
1053 int
bpfread(dev_t dev,struct uio * uio,int ioflag)1054 bpfread(dev_t dev, struct uio *uio, int ioflag)
1055 {
1056 struct bpf_d *d;
1057 caddr_t hbuf;
1058 int timed_out, hbuf_len;
1059 int error;
1060 int flags;
1061
1062 lck_mtx_lock(bpf_mlock);
1063
1064 d = bpf_dtab[minor(dev)];
1065 if (d == NULL || d == BPF_DEV_RESERVED ||
1066 (d->bd_flags & BPF_CLOSING) != 0) {
1067 lck_mtx_unlock(bpf_mlock);
1068 return ENXIO;
1069 }
1070
1071 bpf_acquire_d(d);
1072
1073 /*
1074 * Restrict application to use a buffer the same size as
1075 * as kernel buffers.
1076 */
1077 if (uio_resid(uio) != d->bd_bufsize) {
1078 bpf_release_d(d);
1079 lck_mtx_unlock(bpf_mlock);
1080 return EINVAL;
1081 }
1082
1083 if (d->bd_state == BPF_WAITING) {
1084 bpf_stop_timer(d);
1085 }
1086
1087 timed_out = (d->bd_state == BPF_TIMED_OUT);
1088 d->bd_state = BPF_IDLE;
1089
1090 while (d->bd_hbuf_read != 0) {
1091 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1092 }
1093
1094 if ((d->bd_flags & BPF_CLOSING) != 0) {
1095 bpf_release_d(d);
1096 lck_mtx_unlock(bpf_mlock);
1097 return ENXIO;
1098 }
1099 /*
1100 * If the hold buffer is empty, then do a timed sleep, which
1101 * ends when the timeout expires or when enough packets
1102 * have arrived to fill the store buffer.
1103 */
1104 while (d->bd_hbuf == 0) {
1105 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) &&
1106 d->bd_slen != 0) {
1107 /*
1108 * We're in immediate mode, or are reading
1109 * in non-blocking mode, or a timer was
1110 * started before the read (e.g., by select()
1111 * or poll()) and has expired and a packet(s)
1112 * either arrived since the previous
1113 * read or arrived while we were asleep.
1114 * Rotate the buffers and return what's here.
1115 */
1116 ROTATE_BUFFERS(d);
1117 break;
1118 }
1119
1120 /*
1121 * No data is available, check to see if the bpf device
1122 * is still pointed at a real interface. If not, return
1123 * ENXIO so that the userland process knows to rebind
1124 * it before using it again.
1125 */
1126 if (d->bd_bif == NULL) {
1127 bpf_release_d(d);
1128 lck_mtx_unlock(bpf_mlock);
1129 return ENXIO;
1130 }
1131 if (ioflag & IO_NDELAY) {
1132 bpf_release_d(d);
1133 lck_mtx_unlock(bpf_mlock);
1134 return EWOULDBLOCK;
1135 }
1136 error = BPF_SLEEP(d, PRINET | PCATCH, "bpf", d->bd_rtout);
1137 /*
1138 * Make sure device is still opened
1139 */
1140 if ((d->bd_flags & BPF_CLOSING) != 0) {
1141 bpf_release_d(d);
1142 lck_mtx_unlock(bpf_mlock);
1143 return ENXIO;
1144 }
1145
1146 while (d->bd_hbuf_read != 0) {
1147 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1148 NULL);
1149 }
1150
1151 if ((d->bd_flags & BPF_CLOSING) != 0) {
1152 bpf_release_d(d);
1153 lck_mtx_unlock(bpf_mlock);
1154 return ENXIO;
1155 }
1156
1157 if (error == EINTR || error == ERESTART) {
1158 if (d->bd_hbuf != NULL) {
1159 /*
1160 * Because we msleep, the hold buffer might
1161 * be filled when we wake up. Avoid rotating
1162 * in this case.
1163 */
1164 break;
1165 }
1166 if (d->bd_slen != 0) {
1167 /*
1168 * Sometimes we may be interrupted often and
1169 * the sleep above will not timeout.
1170 * Regardless, we should rotate the buffers
1171 * if there's any new data pending and
1172 * return it.
1173 */
1174 ROTATE_BUFFERS(d);
1175 break;
1176 }
1177 bpf_release_d(d);
1178 lck_mtx_unlock(bpf_mlock);
1179 if (error == ERESTART) {
1180 os_log(OS_LOG_DEFAULT, "%s: bpf%d ERESTART to EINTR",
1181 __func__, d->bd_dev_minor);
1182 error = EINTR;
1183 }
1184 return error;
1185 }
1186 if (error == EWOULDBLOCK) {
1187 /*
1188 * On a timeout, return what's in the buffer,
1189 * which may be nothing. If there is something
1190 * in the store buffer, we can rotate the buffers.
1191 */
1192 if (d->bd_hbuf) {
1193 /*
1194 * We filled up the buffer in between
1195 * getting the timeout and arriving
1196 * here, so we don't need to rotate.
1197 */
1198 break;
1199 }
1200
1201 if (d->bd_slen == 0) {
1202 bpf_release_d(d);
1203 lck_mtx_unlock(bpf_mlock);
1204 return 0;
1205 }
1206 ROTATE_BUFFERS(d);
1207 break;
1208 }
1209 }
1210 /*
1211 * At this point, we know we have something in the hold slot.
1212 */
1213
1214 /*
1215 * Set the hold buffer read. So we do not
1216 * rotate the buffers until the hold buffer
1217 * read is complete. Also to avoid issues resulting
1218 * from page faults during disk sleep (<rdar://problem/13436396>).
1219 */
1220 d->bd_hbuf_read = 1;
1221 hbuf = d->bd_hbuf;
1222 hbuf_len = d->bd_hlen;
1223 flags = d->bd_flags;
1224 d->bd_bcs.bcs_total_read += d->bd_hcnt;
1225 lck_mtx_unlock(bpf_mlock);
1226
1227 /*
1228 * Before we move data to userland, we fill out the extended
1229 * header fields.
1230 */
1231 if (flags & BPF_EXTENDED_HDR) {
1232 char *p;
1233
1234 p = hbuf;
1235 while (p < hbuf + hbuf_len) {
1236 struct bpf_hdr_ext *ehp;
1237 uint32_t flowid;
1238 struct so_procinfo soprocinfo;
1239 int found = 0;
1240
1241 ehp = (struct bpf_hdr_ext *)(void *)p;
1242 if ((flowid = ehp->bh_flowid) != 0) {
1243 if (ehp->bh_flags & BPF_HDR_EXT_FLAGS_TCP) {
1244 ehp->bh_flags &= ~BPF_HDR_EXT_FLAGS_TCP;
1245 found = inp_findinpcb_procinfo(&tcbinfo,
1246 flowid, &soprocinfo);
1247 } else if (ehp->bh_flags == BPF_HDR_EXT_FLAGS_UDP) {
1248 ehp->bh_flags &= ~BPF_HDR_EXT_FLAGS_UDP;
1249 found = inp_findinpcb_procinfo(&udbinfo,
1250 flowid, &soprocinfo);
1251 }
1252 if (found == 1) {
1253 ehp->bh_pid = soprocinfo.spi_pid;
1254 strlcpy(&ehp->bh_comm[0], &soprocinfo.spi_proc_name[0], sizeof(ehp->bh_comm));
1255 }
1256 ehp->bh_flowid = 0;
1257 }
1258
1259 if ((flags & BPF_FINALIZE_PKTAP) != 0 && ehp->bh_complen == 0) {
1260 struct pktap_header *pktaphdr;
1261
1262 pktaphdr = (struct pktap_header *)(void *)
1263 (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1264
1265 bpf_finalize_pktap((struct bpf_hdr *) ehp,
1266 pktaphdr);
1267 }
1268 p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1269 }
1270 } else if (flags & BPF_FINALIZE_PKTAP) {
1271 char *p;
1272
1273 p = hbuf;
1274
1275 while (p < hbuf + hbuf_len) {
1276 struct bpf_hdr *hp;
1277 struct pktap_header *pktaphdr;
1278
1279 hp = (struct bpf_hdr *)(void *)p;
1280
1281 /*
1282 * Cannot finalize a compressed pktap header as we may not have
1283 * all the fields present
1284 */
1285 if (d->bd_flags & BPF_COMP_ENABLED) {
1286 struct bpf_comp_hdr *hcp;
1287
1288 hcp = (struct bpf_comp_hdr *)(void *)p;
1289
1290 if (hcp->bh_complen != 0) {
1291 p += BPF_WORDALIGN(hcp->bh_hdrlen + hcp->bh_caplen);
1292 continue;
1293 }
1294 }
1295
1296 pktaphdr = (struct pktap_header *)(void *)
1297 (p + BPF_WORDALIGN(hp->bh_hdrlen));
1298
1299 bpf_finalize_pktap(hp, pktaphdr);
1300
1301 p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1302 }
1303 }
1304
1305 /*
1306 * Move data from hold buffer into user space.
1307 * We know the entire buffer is transferred since
1308 * we checked above that the read buffer is bpf_bufsize bytes.
1309 */
1310 error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1311
1312 lck_mtx_lock(bpf_mlock);
1313 /*
1314 * Make sure device is still opened
1315 */
1316 if ((d->bd_flags & BPF_CLOSING) != 0) {
1317 bpf_release_d(d);
1318 lck_mtx_unlock(bpf_mlock);
1319 return ENXIO;
1320 }
1321
1322 d->bd_hbuf_read = 0;
1323 d->bd_fbuf = d->bd_hbuf;
1324 d->bd_hbuf = NULL;
1325 d->bd_hlen = 0;
1326 d->bd_hcnt = 0;
1327 wakeup((caddr_t)d);
1328
1329 bpf_release_d(d);
1330 lck_mtx_unlock(bpf_mlock);
1331 return error;
1332 }
1333
1334 /*
1335 * If there are processes sleeping on this descriptor, wake them up.
1336 */
1337 static void
bpf_wakeup(struct bpf_d * d)1338 bpf_wakeup(struct bpf_d *d)
1339 {
1340 if (d->bd_state == BPF_WAITING) {
1341 bpf_stop_timer(d);
1342 d->bd_state = BPF_IDLE;
1343 }
1344 wakeup((caddr_t)d);
1345 if (d->bd_async && d->bd_sig && d->bd_sigio) {
1346 pgsigio(d->bd_sigio, d->bd_sig);
1347 }
1348
1349 selwakeup(&d->bd_sel);
1350 if ((d->bd_flags & BPF_KNOTE)) {
1351 KNOTE(&d->bd_sel.si_note, 1);
1352 }
1353 }
1354
1355 static void
bpf_timed_out(void * arg,__unused void * dummy)1356 bpf_timed_out(void *arg, __unused void *dummy)
1357 {
1358 struct bpf_d *d = (struct bpf_d *)arg;
1359
1360 lck_mtx_lock(bpf_mlock);
1361 if (d->bd_state == BPF_WAITING) {
1362 /*
1363 * There's a select or kqueue waiting for this; if there's
1364 * now stuff to read, wake it up.
1365 */
1366 d->bd_state = BPF_TIMED_OUT;
1367 if (d->bd_slen != 0) {
1368 bpf_wakeup(d);
1369 }
1370 } else if (d->bd_state == BPF_DRAINING) {
1371 /*
1372 * A close is waiting for this to finish.
1373 * Mark it as finished, and wake the close up.
1374 */
1375 d->bd_state = BPF_IDLE;
1376 bpf_wakeup(d);
1377 }
1378 lck_mtx_unlock(bpf_mlock);
1379 }
1380
1381 /* keep in sync with bpf_movein above: */
1382 #define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
1383
1384 int
bpfwrite(dev_t dev,struct uio * uio,__unused int ioflag)1385 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1386 {
1387 struct bpf_d *d;
1388 struct ifnet *ifp;
1389 struct mbuf *m = NULL;
1390 int error;
1391 char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1392 int datlen = 0;
1393 int bif_dlt;
1394 int bd_hdrcmplt;
1395
1396 lck_mtx_lock(bpf_mlock);
1397
1398 d = bpf_dtab[minor(dev)];
1399 if (d == NULL || d == BPF_DEV_RESERVED ||
1400 (d->bd_flags & BPF_CLOSING) != 0) {
1401 lck_mtx_unlock(bpf_mlock);
1402 return ENXIO;
1403 }
1404
1405 bpf_acquire_d(d);
1406
1407 ++d->bd_wcount;
1408
1409 if (d->bd_bif == 0) {
1410 ++d->bd_wdcount;
1411 bpf_release_d(d);
1412 lck_mtx_unlock(bpf_mlock);
1413 return ENXIO;
1414 }
1415
1416 ifp = d->bd_bif->bif_ifp;
1417
1418 if ((ifp->if_flags & IFF_UP) == 0) {
1419 ++d->bd_wdcount;
1420 bpf_release_d(d);
1421 lck_mtx_unlock(bpf_mlock);
1422 return ENETDOWN;
1423 }
1424 if (uio_resid(uio) == 0) {
1425 bpf_release_d(d);
1426 lck_mtx_unlock(bpf_mlock);
1427 return 0;
1428 }
1429 ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1430
1431 /*
1432 * fix for PR-6849527
1433 * geting variables onto stack before dropping lock for bpf_movein()
1434 */
1435 bif_dlt = (int)d->bd_bif->bif_dlt;
1436 bd_hdrcmplt = d->bd_hdrcmplt;
1437
1438 /* bpf_movein allocating mbufs; drop lock */
1439 lck_mtx_unlock(bpf_mlock);
1440
1441 error = bpf_movein(uio, bif_dlt, &m,
1442 bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1443 &datlen);
1444
1445 /* take the lock again */
1446 lck_mtx_lock(bpf_mlock);
1447 if (error != 0) {
1448 ++d->bd_wdcount;
1449 bpf_release_d(d);
1450 lck_mtx_unlock(bpf_mlock);
1451 return error;
1452 }
1453
1454 /* verify the device is still open */
1455 if ((d->bd_flags & BPF_CLOSING) != 0) {
1456 ++d->bd_wdcount;
1457 bpf_release_d(d);
1458 lck_mtx_unlock(bpf_mlock);
1459 m_freem(m);
1460 return ENXIO;
1461 }
1462
1463 if (d->bd_bif == NULL) {
1464 ++d->bd_wdcount;
1465 bpf_release_d(d);
1466 lck_mtx_unlock(bpf_mlock);
1467 m_free(m);
1468 return ENXIO;
1469 }
1470
1471 if ((unsigned)datlen > ifp->if_mtu) {
1472 ++d->bd_wdcount;
1473 bpf_release_d(d);
1474 lck_mtx_unlock(bpf_mlock);
1475 m_freem(m);
1476 return EMSGSIZE;
1477 }
1478
1479 bpf_set_packet_service_class(m, d->bd_traffic_class);
1480
1481 lck_mtx_unlock(bpf_mlock);
1482
1483 /*
1484 * The driver frees the mbuf.
1485 */
1486 if (d->bd_hdrcmplt) {
1487 if (d->bd_bif->bif_send) {
1488 error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1489 } else {
1490 error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1491 }
1492 } else {
1493 error = dlil_output(ifp, PF_INET, m, NULL,
1494 (struct sockaddr *)dst_buf, 0, NULL);
1495 }
1496
1497 lck_mtx_lock(bpf_mlock);
1498 if (error != 0) {
1499 ++d->bd_wdcount;
1500 }
1501 bpf_release_d(d);
1502 lck_mtx_unlock(bpf_mlock);
1503
1504 return error;
1505 }
1506
1507 /*
1508 * Reset a descriptor by flushing its packet buffer and clearing the
1509 * receive and drop counts.
1510 */
1511 static void
reset_d(struct bpf_d * d)1512 reset_d(struct bpf_d *d)
1513 {
1514 if (d->bd_hbuf_read != 0) {
1515 panic("resetting buffers during read");
1516 }
1517
1518 if (d->bd_hbuf) {
1519 /* Free the hold buffer. */
1520 d->bd_fbuf = d->bd_hbuf;
1521 d->bd_hbuf = NULL;
1522 }
1523 d->bd_slen = 0;
1524 d->bd_hlen = 0;
1525 d->bd_scnt = 0;
1526 d->bd_hcnt = 0;
1527 d->bd_rcount = 0;
1528 d->bd_dcount = 0;
1529 d->bd_fcount = 0;
1530 d->bd_wcount = 0;
1531 d->bd_wdcount = 0;
1532
1533 d->bd_prev_slen = 0;
1534 }
1535
1536 static struct bpf_d *
bpf_get_device_from_uuid(uuid_t uuid)1537 bpf_get_device_from_uuid(uuid_t uuid)
1538 {
1539 unsigned int i;
1540
1541 for (i = 0; i < nbpfilter; i++) {
1542 struct bpf_d *d = bpf_dtab[i];
1543
1544 if (d == NULL || d == BPF_DEV_RESERVED ||
1545 (d->bd_flags & BPF_CLOSING) != 0) {
1546 continue;
1547 }
1548 if (uuid_compare(uuid, d->bd_uuid) == 0) {
1549 return d;
1550 }
1551 }
1552
1553 return NULL;
1554 }
1555
1556 /*
1557 * The BIOCSETUP command "atomically" attach to the interface and
1558 * copy the buffer from another interface. This minimizes the risk
1559 * of missing packet because this is done while holding
1560 * the BPF global lock
1561 */
1562 static int
bpf_setup(struct bpf_d * d_to,uuid_t uuid_from,ifnet_t ifp)1563 bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1564 {
1565 struct bpf_d *d_from;
1566 int error = 0;
1567
1568 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1569
1570 /*
1571 * Sanity checks
1572 */
1573 d_from = bpf_get_device_from_uuid(uuid_from);
1574 if (d_from == NULL) {
1575 error = ENOENT;
1576 os_log_error(OS_LOG_DEFAULT,
1577 "%s: uuids not found error %d",
1578 __func__, error);
1579 return error;
1580 }
1581 if (d_from->bd_opened_by != d_to->bd_opened_by) {
1582 error = EACCES;
1583 os_log_error(OS_LOG_DEFAULT,
1584 "%s: processes not matching error %d",
1585 __func__, error);
1586 return error;
1587 }
1588
1589 /*
1590 * Prevent any read while copying
1591 */
1592 while (d_to->bd_hbuf_read != 0) {
1593 msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
1594 }
1595 d_to->bd_hbuf_read = 1;
1596
1597 while (d_from->bd_hbuf_read != 0) {
1598 msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
1599 }
1600 d_from->bd_hbuf_read = 1;
1601
1602 /*
1603 * Verify the devices have not been closed
1604 */
1605 if (d_to->bd_flags & BPF_CLOSING) {
1606 error = ENXIO;
1607 os_log_error(OS_LOG_DEFAULT,
1608 "%s: d_to is closing error %d",
1609 __func__, error);
1610 goto done;
1611 }
1612 if (d_from->bd_flags & BPF_CLOSING) {
1613 error = ENXIO;
1614 os_log_error(OS_LOG_DEFAULT,
1615 "%s: d_from is closing error %d",
1616 __func__, error);
1617 goto done;
1618 }
1619
1620 /*
1621 * For now require the same buffer size
1622 */
1623 if (d_from->bd_bufsize != d_to->bd_bufsize) {
1624 error = EINVAL;
1625 os_log_error(OS_LOG_DEFAULT,
1626 "%s: bufsizes not matching error %d",
1627 __func__, error);
1628 goto done;
1629 }
1630
1631 /*
1632 * Copy relevant options and flags
1633 */
1634 d_to->bd_flags = d_from->bd_flags & (BPF_EXTENDED_HDR | BPF_WANT_PKTAP |
1635 BPF_FINALIZE_PKTAP | BPF_TRUNCATE | BPF_PKTHDRV2 |
1636 BPF_COMP_REQ | BPF_COMP_ENABLED);
1637
1638 d_to->bd_headdrop = d_from->bd_headdrop;
1639
1640 /*
1641 * Allocate and copy the buffers
1642 */
1643 error = bpf_allocbufs(d_to);
1644 if (error != 0) {
1645 goto done;
1646 }
1647
1648 /*
1649 * Make sure the buffers are setup as expected by bpf_setif()
1650 */
1651 ASSERT(d_to->bd_hbuf == NULL);
1652 ASSERT(d_to->bd_sbuf != NULL);
1653 ASSERT(d_to->bd_fbuf != NULL);
1654
1655 /*
1656 * Copy the buffers and update the pointers and counts
1657 */
1658 memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
1659 d_to->bd_slen = d_from->bd_slen;
1660 d_to->bd_scnt = d_from->bd_scnt;
1661
1662 if (d_from->bd_hbuf != NULL) {
1663 d_to->bd_hbuf = d_to->bd_fbuf;
1664 d_to->bd_fbuf = NULL;
1665 memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
1666 }
1667 d_to->bd_hlen = d_from->bd_hlen;
1668 d_to->bd_hcnt = d_from->bd_hcnt;
1669
1670 if (d_to->bd_flags & BPF_COMP_REQ) {
1671 ASSERT(d_to->bd_prev_sbuf != NULL);
1672 ASSERT(d_to->bd_prev_fbuf != NULL);
1673
1674 d_to->bd_prev_slen = d_from->bd_prev_slen;
1675 ASSERT(d_to->bd_prev_slen <= BPF_HDR_COMP_LEN_MAX);
1676 memcpy(d_to->bd_prev_sbuf, d_from->bd_prev_sbuf, BPF_HDR_COMP_LEN_MAX);
1677 }
1678
1679 d_to->bd_bcs = d_from->bd_bcs;
1680
1681 /*
1682 * Attach to the interface:
1683 * - don't reset the buffers
1684 * - we already prevent reads
1685 * - the buffers are already allocated
1686 */
1687 error = bpf_setif(d_to, ifp, false, true, true);
1688 if (error != 0) {
1689 os_log_error(OS_LOG_DEFAULT,
1690 "%s: bpf_setif() failed error %d",
1691 __func__, error);
1692 goto done;
1693 }
1694 done:
1695 d_from->bd_hbuf_read = 0;
1696 wakeup((caddr_t)d_from);
1697
1698 d_to->bd_hbuf_read = 0;
1699 wakeup((caddr_t)d_to);
1700
1701 return error;
1702 }
1703
1704 #if DEVELOPMENT || DEBUG
1705 #define BPF_IOC_LIST \
1706 X(FIONREAD) \
1707 X(SIOCGIFADDR) \
1708 X(BIOCGBLEN) \
1709 X(BIOCSBLEN) \
1710 X(BIOCSETF32) \
1711 X(BIOCSETFNR32) \
1712 X(BIOCSETF64) \
1713 X(BIOCSETFNR64) \
1714 X(BIOCFLUSH) \
1715 X(BIOCPROMISC) \
1716 X(BIOCGDLT) \
1717 X(BIOCGDLTLIST) \
1718 X(BIOCSDLT) \
1719 X(BIOCGETIF) \
1720 X(BIOCSETIF) \
1721 X(BIOCSRTIMEOUT32) \
1722 X(BIOCSRTIMEOUT64) \
1723 X(BIOCGRTIMEOUT32) \
1724 X(BIOCGRTIMEOUT64) \
1725 X(BIOCGSTATS) \
1726 X(BIOCIMMEDIATE) \
1727 X(BIOCVERSION) \
1728 X(BIOCGHDRCMPLT) \
1729 X(BIOCSHDRCMPLT) \
1730 X(BIOCGSEESENT) \
1731 X(BIOCSSEESENT) \
1732 X(BIOCSETTC) \
1733 X(BIOCGETTC) \
1734 X(FIONBIO) \
1735 X(FIOASYNC) \
1736 X(BIOCSRSIG) \
1737 X(BIOCGRSIG) \
1738 X(BIOCSEXTHDR) \
1739 X(BIOCGIFATTACHCOUNT) \
1740 X(BIOCGWANTPKTAP) \
1741 X(BIOCSWANTPKTAP) \
1742 X(BIOCSHEADDROP) \
1743 X(BIOCGHEADDROP) \
1744 X(BIOCSTRUNCATE) \
1745 X(BIOCGETUUID) \
1746 X(BIOCSETUP) \
1747 X(BIOCSPKTHDRV2) \
1748 X(BIOCGHDRCOMP) \
1749 X(BIOCSHDRCOMP) \
1750 X(BIOCGHDRCOMPSTATS) \
1751 X(BIOCGHDRCOMPON)
1752
1753 static void
log_bpf_ioctl_str(struct bpf_d * d,u_long cmd)1754 log_bpf_ioctl_str(struct bpf_d *d, u_long cmd)
1755 {
1756 const char *p = NULL;
1757 char str[32];
1758
1759 #define X(x) case x: { p = #x ; printf("%s\n", p); break; }
1760 switch (cmd) {
1761 BPF_IOC_LIST
1762 }
1763 #undef X
1764 if (p == NULL) {
1765 snprintf(str, sizeof(str), "0x%08x", (unsigned int)cmd);
1766 p = str;
1767 }
1768 os_log(OS_LOG_DEFAULT, "bpfioctl bpf%u %s",
1769 d->bd_dev_minor, p);
1770 }
1771 #endif /* DEVELOPMENT || DEBUG */
1772
1773 /*
1774 * FIONREAD Check for read packet available.
1775 * SIOCGIFADDR Get interface address - convenient hook to driver.
1776 * BIOCGBLEN Get buffer len [for read()].
1777 * BIOCSETF Set ethernet read filter.
1778 * BIOCFLUSH Flush read packet buffer.
1779 * BIOCPROMISC Put interface into promiscuous mode.
1780 * BIOCGDLT Get link layer type.
1781 * BIOCGETIF Get interface name.
1782 * BIOCSETIF Set interface.
1783 * BIOCSRTIMEOUT Set read timeout.
1784 * BIOCGRTIMEOUT Get read timeout.
1785 * BIOCGSTATS Get packet stats.
1786 * BIOCIMMEDIATE Set immediate mode.
1787 * BIOCVERSION Get filter language version.
1788 * BIOCGHDRCMPLT Get "header already complete" flag
1789 * BIOCSHDRCMPLT Set "header already complete" flag
1790 * BIOCGSEESENT Get "see packets sent" flag
1791 * BIOCSSEESENT Set "see packets sent" flag
1792 * BIOCSETTC Set traffic class.
1793 * BIOCGETTC Get traffic class.
1794 * BIOCSEXTHDR Set "extended header" flag
1795 * BIOCSHEADDROP Drop head of the buffer if user is not reading
1796 * BIOCGHEADDROP Get "head-drop" flag
1797 */
1798 /* ARGSUSED */
1799 int
bpfioctl(dev_t dev,u_long cmd,caddr_t addr,__unused int flags,struct proc * p)1800 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1801 struct proc *p)
1802 {
1803 struct bpf_d *d;
1804 int error = 0;
1805 u_int int_arg;
1806 struct ifreq ifr = {};
1807
1808 lck_mtx_lock(bpf_mlock);
1809
1810 d = bpf_dtab[minor(dev)];
1811 if (d == NULL || d == BPF_DEV_RESERVED ||
1812 (d->bd_flags & BPF_CLOSING) != 0) {
1813 lck_mtx_unlock(bpf_mlock);
1814 return ENXIO;
1815 }
1816
1817 bpf_acquire_d(d);
1818
1819 if (d->bd_state == BPF_WAITING) {
1820 bpf_stop_timer(d);
1821 }
1822 d->bd_state = BPF_IDLE;
1823
1824 #if DEVELOPMENT || DEBUG
1825 if (bpf_debug > 0) {
1826 log_bpf_ioctl_str(d, cmd);
1827 }
1828 #endif /* DEVELOPMENT || DEBUG */
1829
1830 switch (cmd) {
1831 default:
1832 error = EINVAL;
1833 break;
1834
1835 /*
1836 * Check for read packet available.
1837 */
1838 case FIONREAD: /* int */
1839 {
1840 int n;
1841
1842 n = d->bd_slen;
1843 if (d->bd_hbuf && d->bd_hbuf_read == 0) {
1844 n += d->bd_hlen;
1845 }
1846
1847 bcopy(&n, addr, sizeof(n));
1848 break;
1849 }
1850
1851 case SIOCGIFADDR: /* struct ifreq */
1852 {
1853 struct ifnet *ifp;
1854
1855 if (d->bd_bif == 0) {
1856 error = EINVAL;
1857 } else {
1858 ifp = d->bd_bif->bif_ifp;
1859 error = ifnet_ioctl(ifp, 0, cmd, addr);
1860 }
1861 break;
1862 }
1863
1864 /*
1865 * Get buffer len [for read()].
1866 */
1867 case BIOCGBLEN: /* u_int */
1868 bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1869 break;
1870
1871 /*
1872 * Set buffer length.
1873 */
1874 case BIOCSBLEN: { /* u_int */
1875 u_int size;
1876
1877 if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
1878 /*
1879 * Interface already attached, unable to change buffers
1880 */
1881 error = EINVAL;
1882 break;
1883 }
1884 bcopy(addr, &size, sizeof(size));
1885
1886 if (size > BPF_BUFSIZE_CAP) {
1887 d->bd_bufsize = BPF_BUFSIZE_CAP;
1888
1889 os_log_info(OS_LOG_DEFAULT,
1890 "bpf%d BIOCSBLEN capped to %u from %u",
1891 minor(dev), d->bd_bufsize, size);
1892 } else if (size < BPF_MINBUFSIZE) {
1893 d->bd_bufsize = BPF_MINBUFSIZE;
1894
1895 os_log_info(OS_LOG_DEFAULT,
1896 "bpf%d BIOCSBLEN bumped to %u from %u",
1897 minor(dev), d->bd_bufsize, size);
1898 } else {
1899 d->bd_bufsize = size;
1900
1901 os_log_info(OS_LOG_DEFAULT,
1902 "bpf%d BIOCSBLEN %u",
1903 minor(dev), d->bd_bufsize);
1904 }
1905
1906 /* It's a read/write ioctl */
1907 bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1908 break;
1909 }
1910 /*
1911 * Set link layer read filter.
1912 */
1913 case BIOCSETF32:
1914 case BIOCSETFNR32: { /* struct bpf_program32 */
1915 struct bpf_program32 prg32;
1916
1917 bcopy(addr, &prg32, sizeof(prg32));
1918 error = bpf_setf(d, prg32.bf_len,
1919 CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1920 break;
1921 }
1922
1923 case BIOCSETF64:
1924 case BIOCSETFNR64: { /* struct bpf_program64 */
1925 struct bpf_program64 prg64;
1926
1927 bcopy(addr, &prg64, sizeof(prg64));
1928 error = bpf_setf(d, prg64.bf_len, CAST_USER_ADDR_T(prg64.bf_insns), cmd);
1929 break;
1930 }
1931
1932 /*
1933 * Flush read packet buffer.
1934 */
1935 case BIOCFLUSH:
1936 while (d->bd_hbuf_read != 0) {
1937 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1938 NULL);
1939 }
1940 if ((d->bd_flags & BPF_CLOSING) != 0) {
1941 error = ENXIO;
1942 break;
1943 }
1944 reset_d(d);
1945 break;
1946
1947 /*
1948 * Put interface into promiscuous mode.
1949 */
1950 case BIOCPROMISC:
1951 if (d->bd_bif == 0) {
1952 /*
1953 * No interface attached yet.
1954 */
1955 error = EINVAL;
1956 break;
1957 }
1958 if (d->bd_promisc == 0) {
1959 lck_mtx_unlock(bpf_mlock);
1960 error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1961 lck_mtx_lock(bpf_mlock);
1962 if (error == 0) {
1963 d->bd_promisc = 1;
1964 }
1965 }
1966 break;
1967
1968 /*
1969 * Get device parameters.
1970 */
1971 case BIOCGDLT: /* u_int */
1972 if (d->bd_bif == 0) {
1973 error = EINVAL;
1974 } else {
1975 bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
1976 }
1977 break;
1978
1979 /*
1980 * Get a list of supported data link types.
1981 */
1982 case BIOCGDLTLIST: /* struct bpf_dltlist */
1983 if (d->bd_bif == NULL) {
1984 error = EINVAL;
1985 } else {
1986 error = bpf_getdltlist(d, addr, p);
1987 }
1988 break;
1989
1990 /*
1991 * Set data link type.
1992 */
1993 case BIOCSDLT: /* u_int */
1994 if (d->bd_bif == NULL) {
1995 error = EINVAL;
1996 } else {
1997 u_int dlt;
1998
1999 bcopy(addr, &dlt, sizeof(dlt));
2000
2001 if (dlt == DLT_PKTAP &&
2002 !(d->bd_flags & BPF_WANT_PKTAP)) {
2003 dlt = DLT_RAW;
2004 }
2005 error = bpf_setdlt(d, dlt);
2006 }
2007 break;
2008
2009 /*
2010 * Get interface name.
2011 */
2012 case BIOCGETIF: /* struct ifreq */
2013 if (d->bd_bif == 0) {
2014 error = EINVAL;
2015 } else {
2016 struct ifnet *const ifp = d->bd_bif->bif_ifp;
2017
2018 snprintf(((struct ifreq *)(void *)addr)->ifr_name,
2019 sizeof(ifr.ifr_name), "%s", if_name(ifp));
2020 }
2021 break;
2022
2023 /*
2024 * Set interface.
2025 */
2026 case BIOCSETIF: { /* struct ifreq */
2027 ifnet_t ifp;
2028
2029 bcopy(addr, &ifr, sizeof(ifr));
2030 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2031 ifp = ifunit(ifr.ifr_name);
2032 if (ifp == NULL) {
2033 error = ENXIO;
2034 } else {
2035 error = bpf_setif(d, ifp, true, false, false);
2036 }
2037 break;
2038 }
2039
2040 /*
2041 * Set read timeout.
2042 */
2043 case BIOCSRTIMEOUT32: { /* struct user32_timeval */
2044 struct user32_timeval _tv;
2045 struct timeval tv;
2046
2047 bcopy(addr, &_tv, sizeof(_tv));
2048 tv.tv_sec = _tv.tv_sec;
2049 tv.tv_usec = _tv.tv_usec;
2050
2051 /*
2052 * Subtract 1 tick from tvtohz() since this isn't
2053 * a one-shot timer.
2054 */
2055 if ((error = itimerfix(&tv)) == 0) {
2056 d->bd_rtout = tvtohz(&tv) - 1;
2057 }
2058 break;
2059 }
2060
2061 case BIOCSRTIMEOUT64: { /* struct user64_timeval */
2062 struct user64_timeval _tv;
2063 struct timeval tv;
2064
2065 bcopy(addr, &_tv, sizeof(_tv));
2066 tv.tv_sec = (__darwin_time_t)_tv.tv_sec;
2067 tv.tv_usec = _tv.tv_usec;
2068
2069 /*
2070 * Subtract 1 tick from tvtohz() since this isn't
2071 * a one-shot timer.
2072 */
2073 if ((error = itimerfix(&tv)) == 0) {
2074 d->bd_rtout = tvtohz(&tv) - 1;
2075 }
2076 break;
2077 }
2078
2079 /*
2080 * Get read timeout.
2081 */
2082 case BIOCGRTIMEOUT32: { /* struct user32_timeval */
2083 struct user32_timeval tv;
2084
2085 bzero(&tv, sizeof(tv));
2086 tv.tv_sec = d->bd_rtout / hz;
2087 tv.tv_usec = (d->bd_rtout % hz) * tick;
2088 bcopy(&tv, addr, sizeof(tv));
2089 break;
2090 }
2091
2092 case BIOCGRTIMEOUT64: { /* struct user64_timeval */
2093 struct user64_timeval tv;
2094
2095 bzero(&tv, sizeof(tv));
2096 tv.tv_sec = d->bd_rtout / hz;
2097 tv.tv_usec = (d->bd_rtout % hz) * tick;
2098 bcopy(&tv, addr, sizeof(tv));
2099 break;
2100 }
2101
2102 /*
2103 * Get packet stats.
2104 */
2105 case BIOCGSTATS: { /* struct bpf_stat */
2106 struct bpf_stat bs;
2107
2108 bzero(&bs, sizeof(bs));
2109 bs.bs_recv = (u_int)d->bd_rcount;
2110 bs.bs_drop = (u_int)d->bd_dcount;
2111 bcopy(&bs, addr, sizeof(bs));
2112 break;
2113 }
2114
2115 /*
2116 * Set immediate mode.
2117 */
2118 case BIOCIMMEDIATE: /* u_int */
2119 d->bd_immediate = *(u_char *)(void *)addr;
2120 break;
2121
2122 case BIOCVERSION: { /* struct bpf_version */
2123 struct bpf_version bv;
2124
2125 bzero(&bv, sizeof(bv));
2126 bv.bv_major = BPF_MAJOR_VERSION;
2127 bv.bv_minor = BPF_MINOR_VERSION;
2128 bcopy(&bv, addr, sizeof(bv));
2129 break;
2130 }
2131
2132 /*
2133 * Get "header already complete" flag
2134 */
2135 case BIOCGHDRCMPLT: /* u_int */
2136 bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
2137 break;
2138
2139 /*
2140 * Set "header already complete" flag
2141 */
2142 case BIOCSHDRCMPLT: /* u_int */
2143 bcopy(addr, &int_arg, sizeof(int_arg));
2144 d->bd_hdrcmplt = int_arg ? 1 : 0;
2145 break;
2146
2147 /*
2148 * Get "see sent packets" flag
2149 */
2150 case BIOCGSEESENT: /* u_int */
2151 bcopy(&d->bd_seesent, addr, sizeof(u_int));
2152 break;
2153
2154 /*
2155 * Set "see sent packets" flag
2156 */
2157 case BIOCSSEESENT: /* u_int */
2158 bcopy(addr, &d->bd_seesent, sizeof(u_int));
2159 break;
2160
2161 /*
2162 * Set traffic service class
2163 */
2164 case BIOCSETTC: { /* int */
2165 int tc;
2166
2167 bcopy(addr, &tc, sizeof(int));
2168 error = bpf_set_traffic_class(d, tc);
2169 break;
2170 }
2171
2172 /*
2173 * Get traffic service class
2174 */
2175 case BIOCGETTC: /* int */
2176 bcopy(&d->bd_traffic_class, addr, sizeof(int));
2177 break;
2178
2179 case FIONBIO: /* Non-blocking I/O; int */
2180 break;
2181
2182 case FIOASYNC: /* Send signal on receive packets; int */
2183 bcopy(addr, &d->bd_async, sizeof(int));
2184 break;
2185
2186 case BIOCSRSIG: { /* Set receive signal; u_int */
2187 u_int sig;
2188
2189 bcopy(addr, &sig, sizeof(u_int));
2190
2191 if (sig >= NSIG) {
2192 error = EINVAL;
2193 } else {
2194 d->bd_sig = sig;
2195 }
2196 break;
2197 }
2198 case BIOCGRSIG: /* u_int */
2199 bcopy(&d->bd_sig, addr, sizeof(u_int));
2200 break;
2201
2202 case BIOCSEXTHDR: /* u_int */
2203 bcopy(addr, &int_arg, sizeof(int_arg));
2204 if (int_arg) {
2205 d->bd_flags |= BPF_EXTENDED_HDR;
2206 } else {
2207 d->bd_flags &= ~BPF_EXTENDED_HDR;
2208 }
2209 break;
2210
2211 case BIOCGIFATTACHCOUNT: { /* struct ifreq */
2212 ifnet_t ifp;
2213 struct bpf_if *bp;
2214
2215 bcopy(addr, &ifr, sizeof(ifr));
2216 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2217 ifp = ifunit(ifr.ifr_name);
2218 if (ifp == NULL) {
2219 error = ENXIO;
2220 break;
2221 }
2222 ifr.ifr_intval = 0;
2223 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2224 struct bpf_d *bpf_d;
2225
2226 if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) {
2227 continue;
2228 }
2229 for (bpf_d = bp->bif_dlist; bpf_d;
2230 bpf_d = bpf_d->bd_next) {
2231 ifr.ifr_intval += 1;
2232 }
2233 }
2234 bcopy(&ifr, addr, sizeof(ifr));
2235 break;
2236 }
2237 case BIOCGWANTPKTAP: /* u_int */
2238 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
2239 bcopy(&int_arg, addr, sizeof(int_arg));
2240 break;
2241
2242 case BIOCSWANTPKTAP: /* u_int */
2243 bcopy(addr, &int_arg, sizeof(int_arg));
2244 if (int_arg) {
2245 d->bd_flags |= BPF_WANT_PKTAP;
2246 } else {
2247 d->bd_flags &= ~BPF_WANT_PKTAP;
2248 }
2249 break;
2250
2251 case BIOCSHEADDROP:
2252 bcopy(addr, &int_arg, sizeof(int_arg));
2253 d->bd_headdrop = int_arg ? 1 : 0;
2254 break;
2255
2256 case BIOCGHEADDROP:
2257 bcopy(&d->bd_headdrop, addr, sizeof(int));
2258 break;
2259
2260 case BIOCSTRUNCATE:
2261 bcopy(addr, &int_arg, sizeof(int_arg));
2262 if (int_arg) {
2263 d->bd_flags |= BPF_TRUNCATE;
2264 } else {
2265 d->bd_flags &= ~BPF_TRUNCATE;
2266 }
2267 break;
2268
2269 case BIOCGETUUID:
2270 bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
2271 break;
2272
2273 case BIOCSETUP: {
2274 struct bpf_setup_args bsa;
2275 ifnet_t ifp;
2276
2277 bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
2278 bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
2279 ifp = ifunit(bsa.bsa_ifname);
2280 if (ifp == NULL) {
2281 error = ENXIO;
2282 os_log_error(OS_LOG_DEFAULT,
2283 "%s: ifnet not found for %s error %d",
2284 __func__, bsa.bsa_ifname, error);
2285 break;
2286 }
2287
2288 error = bpf_setup(d, bsa.bsa_uuid, ifp);
2289 break;
2290 }
2291 case BIOCSPKTHDRV2:
2292 bcopy(addr, &int_arg, sizeof(int_arg));
2293 if (int_arg != 0) {
2294 d->bd_flags |= BPF_PKTHDRV2;
2295 } else {
2296 d->bd_flags &= ~BPF_PKTHDRV2;
2297 }
2298 break;
2299
2300 case BIOCGPKTHDRV2:
2301 int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
2302 bcopy(&int_arg, addr, sizeof(int_arg));
2303 break;
2304
2305 case BIOCGHDRCOMP:
2306 int_arg = d->bd_flags & BPF_COMP_REQ ? 1 : 0;
2307 bcopy(&int_arg, addr, sizeof(int_arg));
2308 break;
2309
2310 case BIOCSHDRCOMP:
2311 bcopy(addr, &int_arg, sizeof(int_arg));
2312 if (int_arg != 0 && int_arg != 1) {
2313 return EINVAL;
2314 }
2315 if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
2316 /*
2317 * Interface already attached, unable to change buffers
2318 */
2319 error = EINVAL;
2320 break;
2321 }
2322 if (int_arg != 0) {
2323 d->bd_flags |= BPF_COMP_REQ;
2324 if (bpf_hdr_comp_enable != 0) {
2325 d->bd_flags |= BPF_COMP_ENABLED;
2326 }
2327 } else {
2328 d->bd_flags &= ~(BPF_COMP_REQ | BPF_COMP_ENABLED);
2329 }
2330 break;
2331
2332 case BIOCGHDRCOMPON:
2333 int_arg = d->bd_flags & BPF_COMP_ENABLED ? 1 : 0;
2334 bcopy(&int_arg, addr, sizeof(int_arg));
2335 break;
2336
2337 case BIOCGHDRCOMPSTATS: {
2338 struct bpf_comp_stats bcs = {};
2339
2340 bcs = d->bd_bcs;
2341
2342 bcopy(&bcs, addr, sizeof(bcs));
2343 break;
2344 }
2345 }
2346
2347 bpf_release_d(d);
2348 lck_mtx_unlock(bpf_mlock);
2349
2350 return error;
2351 }
2352
2353 /*
2354 * Set d's packet filter program to fp. If this file already has a filter,
2355 * free it and replace it. Returns EINVAL for bogus requests.
2356 */
2357 static int
bpf_setf(struct bpf_d * d,u_int bf_len,user_addr_t bf_insns,u_long cmd)2358 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2359 u_long cmd)
2360 {
2361 struct bpf_insn *fcode, *old;
2362 u_int flen, size;
2363
2364 while (d->bd_hbuf_read != 0) {
2365 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2366 }
2367
2368 if ((d->bd_flags & BPF_CLOSING) != 0) {
2369 return ENXIO;
2370 }
2371
2372 old = d->bd_filter;
2373 if (bf_insns == USER_ADDR_NULL) {
2374 if (bf_len != 0) {
2375 return EINVAL;
2376 }
2377 d->bd_filter = NULL;
2378 reset_d(d);
2379 if (old != 0) {
2380 kfree_data_addr(old);
2381 }
2382 return 0;
2383 }
2384 flen = bf_len;
2385 if (flen > BPF_MAXINSNS) {
2386 return EINVAL;
2387 }
2388
2389 size = flen * sizeof(struct bpf_insn);
2390 fcode = (struct bpf_insn *) kalloc_data(size, Z_WAITOK | Z_ZERO);
2391 if (fcode == NULL) {
2392 return ENOMEM;
2393 }
2394 if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
2395 bpf_validate(fcode, (int)flen)) {
2396 d->bd_filter = fcode;
2397
2398 if (cmd == BIOCSETF32 || cmd == BIOCSETF64) {
2399 reset_d(d);
2400 }
2401
2402 if (old != 0) {
2403 kfree_data_addr(old);
2404 }
2405
2406 return 0;
2407 }
2408 kfree_data(fcode, size);
2409 return EINVAL;
2410 }
2411
2412 /*
2413 * Detach a file from its current interface (if attached at all) and attach
2414 * to the interface indicated by the name stored in ifr.
2415 * Return an errno or 0.
2416 */
2417 static int
bpf_setif(struct bpf_d * d,ifnet_t theywant,bool do_reset,bool has_hbuf_read,bool has_bufs_allocated)2418 bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read,
2419 bool has_bufs_allocated)
2420 {
2421 struct bpf_if *bp;
2422 int error;
2423
2424 while (d->bd_hbuf_read != 0 && !has_hbuf_read) {
2425 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2426 }
2427
2428 if ((d->bd_flags & BPF_CLOSING) != 0) {
2429 return ENXIO;
2430 }
2431
2432 /*
2433 * Look through attached interfaces for the named one.
2434 */
2435 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2436 struct ifnet *ifp = bp->bif_ifp;
2437
2438 if (ifp == 0 || ifp != theywant) {
2439 continue;
2440 }
2441 /*
2442 * Do not use DLT_PKTAP, unless requested explicitly
2443 */
2444 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2445 continue;
2446 }
2447 /*
2448 * Skip the coprocessor interface
2449 */
2450 if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
2451 continue;
2452 }
2453 /*
2454 * We found the requested interface.
2455 * Allocate the packet buffers.
2456 */
2457 if (has_bufs_allocated == false) {
2458 error = bpf_allocbufs(d);
2459 if (error != 0) {
2460 return error;
2461 }
2462 }
2463 /*
2464 * Detach if attached to something else.
2465 */
2466 if (bp != d->bd_bif) {
2467 if (d->bd_bif != NULL) {
2468 if (bpf_detachd(d) != 0) {
2469 return ENXIO;
2470 }
2471 }
2472 if (bpf_attachd(d, bp) != 0) {
2473 return ENXIO;
2474 }
2475 }
2476 if (do_reset) {
2477 reset_d(d);
2478 }
2479 os_log(OS_LOG_DEFAULT, "bpf%u attached to %s",
2480 d->bd_dev_minor, if_name(theywant));
2481 return 0;
2482 }
2483 /* Not found. */
2484 return ENXIO;
2485 }
2486
2487 /*
2488 * Get a list of available data link type of the interface.
2489 */
2490 static int
bpf_getdltlist(struct bpf_d * d,caddr_t addr,struct proc * p)2491 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2492 {
2493 u_int n;
2494 int error;
2495 struct ifnet *ifp;
2496 struct bpf_if *bp;
2497 user_addr_t dlist;
2498 struct bpf_dltlist bfl;
2499
2500 bcopy(addr, &bfl, sizeof(bfl));
2501 if (proc_is64bit(p)) {
2502 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2503 } else {
2504 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2505 }
2506
2507 ifp = d->bd_bif->bif_ifp;
2508 n = 0;
2509 error = 0;
2510
2511 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2512 if (bp->bif_ifp != ifp) {
2513 continue;
2514 }
2515 /*
2516 * Do not use DLT_PKTAP, unless requested explicitly
2517 */
2518 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2519 continue;
2520 }
2521 if (dlist != USER_ADDR_NULL) {
2522 if (n >= bfl.bfl_len) {
2523 return ENOMEM;
2524 }
2525 error = copyout(&bp->bif_dlt, dlist,
2526 sizeof(bp->bif_dlt));
2527 if (error != 0) {
2528 break;
2529 }
2530 dlist += sizeof(bp->bif_dlt);
2531 }
2532 n++;
2533 }
2534 bfl.bfl_len = n;
2535 bcopy(&bfl, addr, sizeof(bfl));
2536
2537 return error;
2538 }
2539
2540 /*
2541 * Set the data link type of a BPF instance.
2542 */
2543 static int
bpf_setdlt(struct bpf_d * d,uint32_t dlt)2544 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2545 {
2546 int error, opromisc;
2547 struct ifnet *ifp;
2548 struct bpf_if *bp;
2549
2550 if (d->bd_bif->bif_dlt == dlt) {
2551 return 0;
2552 }
2553
2554 while (d->bd_hbuf_read != 0) {
2555 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2556 }
2557
2558 if ((d->bd_flags & BPF_CLOSING) != 0) {
2559 return ENXIO;
2560 }
2561
2562 ifp = d->bd_bif->bif_ifp;
2563 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2564 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2565 /*
2566 * Do not use DLT_PKTAP, unless requested explicitly
2567 */
2568 if (bp->bif_dlt == DLT_PKTAP &&
2569 !(d->bd_flags & BPF_WANT_PKTAP)) {
2570 continue;
2571 }
2572 break;
2573 }
2574 }
2575 if (bp != NULL) {
2576 opromisc = d->bd_promisc;
2577 if (bpf_detachd(d) != 0) {
2578 return ENXIO;
2579 }
2580 error = bpf_attachd(d, bp);
2581 if (error != 0) {
2582 os_log_error(OS_LOG_DEFAULT,
2583 "bpf_setdlt: bpf%d bpf_attachd %s error %d",
2584 d->bd_dev_minor, if_name(bp->bif_ifp),
2585 error);
2586 return error;
2587 }
2588 reset_d(d);
2589 if (opromisc) {
2590 lck_mtx_unlock(bpf_mlock);
2591 error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2592 lck_mtx_lock(bpf_mlock);
2593 if (error != 0) {
2594 os_log_error(OS_LOG_DEFAULT,
2595 "bpf_setdlt: bpf%d ifpromisc %s error %d",
2596 d->bd_dev_minor, if_name(bp->bif_ifp), error);
2597 } else {
2598 d->bd_promisc = 1;
2599 }
2600 }
2601 }
2602 return bp == NULL ? EINVAL : 0;
2603 }
2604
2605 static int
bpf_set_traffic_class(struct bpf_d * d,int tc)2606 bpf_set_traffic_class(struct bpf_d *d, int tc)
2607 {
2608 int error = 0;
2609
2610 if (!SO_VALID_TC(tc)) {
2611 error = EINVAL;
2612 } else {
2613 d->bd_traffic_class = tc;
2614 }
2615
2616 return error;
2617 }
2618
2619 static void
bpf_set_packet_service_class(struct mbuf * m,int tc)2620 bpf_set_packet_service_class(struct mbuf *m, int tc)
2621 {
2622 if (!(m->m_flags & M_PKTHDR)) {
2623 return;
2624 }
2625
2626 VERIFY(SO_VALID_TC(tc));
2627 (void) m_set_service_class(m, so_tc2msc(tc));
2628 }
2629
2630 /*
2631 * Support for select()
2632 *
2633 * Return true iff the specific operation will not block indefinitely.
2634 * Otherwise, return false but make a note that a selwakeup() must be done.
2635 */
2636 int
bpfselect(dev_t dev,int which,void * wql,struct proc * p)2637 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2638 {
2639 struct bpf_d *d;
2640 int ret = 0;
2641
2642 lck_mtx_lock(bpf_mlock);
2643
2644 d = bpf_dtab[minor(dev)];
2645 if (d == NULL || d == BPF_DEV_RESERVED ||
2646 (d->bd_flags & BPF_CLOSING) != 0) {
2647 lck_mtx_unlock(bpf_mlock);
2648 return ENXIO;
2649 }
2650
2651 bpf_acquire_d(d);
2652
2653 if (d->bd_bif == NULL) {
2654 bpf_release_d(d);
2655 lck_mtx_unlock(bpf_mlock);
2656 return ENXIO;
2657 }
2658
2659 while (d->bd_hbuf_read != 0) {
2660 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2661 }
2662
2663 if ((d->bd_flags & BPF_CLOSING) != 0) {
2664 bpf_release_d(d);
2665 lck_mtx_unlock(bpf_mlock);
2666 return ENXIO;
2667 }
2668
2669 switch (which) {
2670 case FREAD:
2671 if (d->bd_hlen != 0 ||
2672 ((d->bd_immediate ||
2673 d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
2674 ret = 1; /* read has data to return */
2675 } else {
2676 /*
2677 * Read has no data to return.
2678 * Make the select wait, and start a timer if
2679 * necessary.
2680 */
2681 selrecord(p, &d->bd_sel, wql);
2682 bpf_start_timer(d);
2683 }
2684 break;
2685
2686 case FWRITE:
2687 /* can't determine whether a write would block */
2688 ret = 1;
2689 break;
2690 }
2691
2692 bpf_release_d(d);
2693 lck_mtx_unlock(bpf_mlock);
2694
2695 return ret;
2696 }
2697
2698 /*
2699 * Support for kevent() system call. Register EVFILT_READ filters and
2700 * reject all others.
2701 */
2702 int bpfkqfilter(dev_t dev, struct knote *kn);
2703 static void filt_bpfdetach(struct knote *);
2704 static int filt_bpfread(struct knote *, long);
2705 static int filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev);
2706 static int filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev);
2707
2708 SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2709 .f_isfd = 1,
2710 .f_detach = filt_bpfdetach,
2711 .f_event = filt_bpfread,
2712 .f_touch = filt_bpftouch,
2713 .f_process = filt_bpfprocess,
2714 };
2715
2716 static int
filt_bpfread_common(struct knote * kn,struct kevent_qos_s * kev,struct bpf_d * d)2717 filt_bpfread_common(struct knote *kn, struct kevent_qos_s *kev, struct bpf_d *d)
2718 {
2719 int ready = 0;
2720 int64_t data = 0;
2721
2722 if (d->bd_immediate) {
2723 /*
2724 * If there's data in the hold buffer, it's the
2725 * amount of data a read will return.
2726 *
2727 * If there's no data in the hold buffer, but
2728 * there's data in the store buffer, a read will
2729 * immediately rotate the store buffer to the
2730 * hold buffer, the amount of data in the store
2731 * buffer is the amount of data a read will
2732 * return.
2733 *
2734 * If there's no data in either buffer, we're not
2735 * ready to read.
2736 */
2737 data = (d->bd_hlen == 0 || d->bd_hbuf_read != 0 ?
2738 d->bd_slen : d->bd_hlen);
2739 int64_t lowwat = knote_low_watermark(kn);
2740 if (lowwat > d->bd_bufsize) {
2741 lowwat = d->bd_bufsize;
2742 }
2743 ready = (data >= lowwat);
2744 } else {
2745 /*
2746 * If there's data in the hold buffer, it's the
2747 * amount of data a read will return.
2748 *
2749 * If there's no data in the hold buffer, but
2750 * there's data in the store buffer, if the
2751 * timer has expired a read will immediately
2752 * rotate the store buffer to the hold buffer,
2753 * so the amount of data in the store buffer is
2754 * the amount of data a read will return.
2755 *
2756 * If there's no data in either buffer, or there's
2757 * no data in the hold buffer and the timer hasn't
2758 * expired, we're not ready to read.
2759 */
2760 data = ((d->bd_hlen == 0 || d->bd_hbuf_read != 0) &&
2761 d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
2762 ready = (data > 0);
2763 }
2764 if (!ready) {
2765 bpf_start_timer(d);
2766 } else if (kev) {
2767 knote_fill_kevent(kn, kev, data);
2768 }
2769
2770 return ready;
2771 }
2772
2773 int
bpfkqfilter(dev_t dev,struct knote * kn)2774 bpfkqfilter(dev_t dev, struct knote *kn)
2775 {
2776 struct bpf_d *d;
2777 int res;
2778
2779 /*
2780 * Is this device a bpf?
2781 */
2782 if (major(dev) != CDEV_MAJOR || kn->kn_filter != EVFILT_READ) {
2783 knote_set_error(kn, EINVAL);
2784 return 0;
2785 }
2786
2787 lck_mtx_lock(bpf_mlock);
2788
2789 d = bpf_dtab[minor(dev)];
2790
2791 if (d == NULL || d == BPF_DEV_RESERVED ||
2792 (d->bd_flags & BPF_CLOSING) != 0 ||
2793 d->bd_bif == NULL) {
2794 lck_mtx_unlock(bpf_mlock);
2795 knote_set_error(kn, ENXIO);
2796 return 0;
2797 }
2798
2799 kn->kn_hook = d;
2800 kn->kn_filtid = EVFILTID_BPFREAD;
2801 KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2802 d->bd_flags |= BPF_KNOTE;
2803
2804 /* capture the current state */
2805 res = filt_bpfread_common(kn, NULL, d);
2806
2807 lck_mtx_unlock(bpf_mlock);
2808
2809 return res;
2810 }
2811
2812 static void
filt_bpfdetach(struct knote * kn)2813 filt_bpfdetach(struct knote *kn)
2814 {
2815 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2816
2817 lck_mtx_lock(bpf_mlock);
2818 if (d->bd_flags & BPF_KNOTE) {
2819 KNOTE_DETACH(&d->bd_sel.si_note, kn);
2820 d->bd_flags &= ~BPF_KNOTE;
2821 }
2822 lck_mtx_unlock(bpf_mlock);
2823 }
2824
2825 static int
filt_bpfread(struct knote * kn,long hint)2826 filt_bpfread(struct knote *kn, long hint)
2827 {
2828 #pragma unused(hint)
2829 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2830
2831 return filt_bpfread_common(kn, NULL, d);
2832 }
2833
2834 static int
filt_bpftouch(struct knote * kn,struct kevent_qos_s * kev)2835 filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev)
2836 {
2837 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2838 int res;
2839
2840 lck_mtx_lock(bpf_mlock);
2841
2842 /* save off the lowat threshold and flag */
2843 kn->kn_sdata = kev->data;
2844 kn->kn_sfflags = kev->fflags;
2845
2846 /* output data will be re-generated here */
2847 res = filt_bpfread_common(kn, NULL, d);
2848
2849 lck_mtx_unlock(bpf_mlock);
2850
2851 return res;
2852 }
2853
2854 static int
filt_bpfprocess(struct knote * kn,struct kevent_qos_s * kev)2855 filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev)
2856 {
2857 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2858 int res;
2859
2860 lck_mtx_lock(bpf_mlock);
2861 res = filt_bpfread_common(kn, kev, d);
2862 lck_mtx_unlock(bpf_mlock);
2863
2864 return res;
2865 }
2866
2867 /*
2868 * Copy data from an mbuf chain into a buffer. This code is derived
2869 * from m_copydata in kern/uipc_mbuf.c.
2870 */
2871 static void
bpf_mcopy(struct mbuf * m,void * dst_arg,size_t len,size_t offset)2872 bpf_mcopy(struct mbuf *m, void *dst_arg, size_t len, size_t offset)
2873 {
2874 u_int count;
2875 u_char *dst;
2876
2877 dst = dst_arg;
2878
2879 while (offset >= m->m_len) {
2880 offset -= m->m_len;
2881 m = m->m_next;
2882 if (m == NULL) {
2883 panic("bpf_mcopy");
2884 }
2885 continue;
2886 }
2887
2888 while (len > 0) {
2889 if (m == NULL) {
2890 panic("bpf_mcopy");
2891 }
2892 count = MIN(m->m_len - (u_int)offset, (u_int)len);
2893 bcopy((u_char *)mbuf_data(m) + offset, dst, count);
2894 m = m->m_next;
2895 dst += count;
2896 len -= count;
2897 offset = 0;
2898 }
2899 }
2900
2901 static inline void
bpf_tap_imp(ifnet_t ifp,u_int32_t dlt,struct bpf_packet * bpf_pkt,int outbound)2902 bpf_tap_imp(
2903 ifnet_t ifp,
2904 u_int32_t dlt,
2905 struct bpf_packet *bpf_pkt,
2906 int outbound)
2907 {
2908 struct bpf_d *d;
2909 u_int slen;
2910 struct bpf_if *bp;
2911
2912 /*
2913 * It's possible that we get here after the bpf descriptor has been
2914 * detached from the interface; in such a case we simply return.
2915 * Lock ordering is important since we can be called asynchronously
2916 * (from IOKit) to process an inbound packet; when that happens
2917 * we would have been holding its "gateLock" and will be acquiring
2918 * "bpf_mlock" upon entering this routine. Due to that, we release
2919 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2920 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2921 * when a ifnet_set_promiscuous request simultaneously collides with
2922 * an inbound packet being passed into the tap callback.
2923 */
2924 lck_mtx_lock(bpf_mlock);
2925 if (ifp->if_bpf == NULL) {
2926 lck_mtx_unlock(bpf_mlock);
2927 return;
2928 }
2929 for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2930 if (bp->bif_ifp != ifp) {
2931 /* wrong interface */
2932 bp = NULL;
2933 break;
2934 }
2935 if (dlt == 0 || bp->bif_dlt == dlt) {
2936 /* tapping default DLT or DLT matches */
2937 break;
2938 }
2939 }
2940 if (bp == NULL) {
2941 goto done;
2942 }
2943 for (d = bp->bif_dlist; d != NULL; d = d->bd_next) {
2944 struct bpf_packet *bpf_pkt_saved = bpf_pkt;
2945 struct bpf_packet bpf_pkt_tmp = {};
2946 struct pktap_header_buffer bpfp_header_tmp = {};
2947
2948 if (outbound && !d->bd_seesent) {
2949 continue;
2950 }
2951
2952 ++d->bd_rcount;
2953 slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
2954 (u_int)bpf_pkt->bpfp_total_length, 0);
2955
2956 if (slen != 0) {
2957 if (bp->bif_ifp->if_type == IFT_PKTAP &&
2958 bp->bif_dlt == DLT_PKTAP) {
2959 if (d->bd_flags & BPF_TRUNCATE) {
2960 slen = min(slen, get_pkt_trunc_len(bpf_pkt));
2961 }
2962 /*
2963 * Need to copy the bpf_pkt because the conversion
2964 * to v2 pktap header modifies the content of the
2965 * bpfp_header
2966 */
2967 if ((d->bd_flags & BPF_PKTHDRV2) &&
2968 bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
2969 bpf_pkt_tmp = *bpf_pkt;
2970
2971 bpf_pkt = &bpf_pkt_tmp;
2972
2973 memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
2974 bpf_pkt->bpfp_header_length);
2975
2976 bpf_pkt->bpfp_header = &bpfp_header_tmp;
2977
2978 convert_to_pktap_header_to_v2(bpf_pkt,
2979 !!(d->bd_flags & BPF_TRUNCATE));
2980 }
2981 }
2982 ++d->bd_fcount;
2983 catchpacket(d, bpf_pkt, slen, outbound);
2984 }
2985 bpf_pkt = bpf_pkt_saved;
2986 }
2987
2988 done:
2989 lck_mtx_unlock(bpf_mlock);
2990 }
2991
2992 static inline void
bpf_tap_mbuf(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen,int outbound)2993 bpf_tap_mbuf(
2994 ifnet_t ifp,
2995 u_int32_t dlt,
2996 mbuf_t m,
2997 void* hdr,
2998 size_t hlen,
2999 int outbound)
3000 {
3001 struct bpf_packet bpf_pkt;
3002 struct mbuf *m0;
3003
3004 if (ifp->if_bpf == NULL) {
3005 /* quickly check without taking lock */
3006 return;
3007 }
3008 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
3009 bpf_pkt.bpfp_mbuf = m;
3010 bpf_pkt.bpfp_total_length = 0;
3011 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
3012 bpf_pkt.bpfp_total_length += m0->m_len;
3013 }
3014 bpf_pkt.bpfp_header = hdr;
3015 if (hdr != NULL) {
3016 bpf_pkt.bpfp_total_length += hlen;
3017 bpf_pkt.bpfp_header_length = hlen;
3018 } else {
3019 bpf_pkt.bpfp_header_length = 0;
3020 }
3021 bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
3022 }
3023
3024 void
bpf_tap_out(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)3025 bpf_tap_out(
3026 ifnet_t ifp,
3027 u_int32_t dlt,
3028 mbuf_t m,
3029 void* hdr,
3030 size_t hlen)
3031 {
3032 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
3033 }
3034
3035 void
bpf_tap_in(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)3036 bpf_tap_in(
3037 ifnet_t ifp,
3038 u_int32_t dlt,
3039 mbuf_t m,
3040 void* hdr,
3041 size_t hlen)
3042 {
3043 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
3044 }
3045
3046 /* Callback registered with Ethernet driver. */
3047 static int
bpf_tap_callback(struct ifnet * ifp,struct mbuf * m)3048 bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
3049 {
3050 bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
3051
3052 return 0;
3053 }
3054
3055 #if SKYWALK
3056 #include <skywalk/os_skywalk_private.h>
3057
3058 static void
bpf_pktcopy(kern_packet_t pkt,void * dst_arg,size_t len,size_t offset)3059 bpf_pktcopy(kern_packet_t pkt, void *dst_arg, size_t len, size_t offset)
3060 {
3061 kern_buflet_t buflet = NULL;
3062 size_t count;
3063 u_char *dst;
3064
3065 dst = dst_arg;
3066 while (len > 0) {
3067 uint8_t *addr;
3068
3069 u_int32_t buflet_length;
3070
3071 buflet = kern_packet_get_next_buflet(pkt, buflet);
3072 VERIFY(buflet != NULL);
3073 addr = kern_buflet_get_data_address(buflet);
3074 VERIFY(addr != NULL);
3075 addr += kern_buflet_get_data_offset(buflet);
3076 buflet_length = kern_buflet_get_data_length(buflet);
3077 if (offset >= buflet_length) {
3078 offset -= buflet_length;
3079 continue;
3080 }
3081 count = MIN(buflet_length - offset, len);
3082 bcopy((void *)(addr + offset), (void *)dst, count);
3083 dst += count;
3084 len -= count;
3085 offset = 0;
3086 }
3087 }
3088
3089 static inline void
bpf_tap_packet(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen,int outbound)3090 bpf_tap_packet(
3091 ifnet_t ifp,
3092 u_int32_t dlt,
3093 kern_packet_t pkt,
3094 void* hdr,
3095 size_t hlen,
3096 int outbound)
3097 {
3098 struct bpf_packet bpf_pkt;
3099 struct mbuf * m;
3100
3101 if (ifp->if_bpf == NULL) {
3102 /* quickly check without taking lock */
3103 return;
3104 }
3105 m = kern_packet_get_mbuf(pkt);
3106 if (m != NULL) {
3107 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
3108 bpf_pkt.bpfp_mbuf = m;
3109 bpf_pkt.bpfp_total_length = m_length(m);
3110 } else {
3111 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_PKT;
3112 bpf_pkt.bpfp_pkt = pkt;
3113 bpf_pkt.bpfp_total_length = kern_packet_get_data_length(pkt);
3114 }
3115 bpf_pkt.bpfp_header = hdr;
3116 bpf_pkt.bpfp_header_length = hlen;
3117 if (hlen != 0) {
3118 bpf_pkt.bpfp_total_length += hlen;
3119 }
3120 bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
3121 }
3122
3123 void
bpf_tap_packet_out(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)3124 bpf_tap_packet_out(
3125 ifnet_t ifp,
3126 u_int32_t dlt,
3127 kern_packet_t pkt,
3128 void* hdr,
3129 size_t hlen)
3130 {
3131 bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 1);
3132 }
3133
3134 void
bpf_tap_packet_in(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)3135 bpf_tap_packet_in(
3136 ifnet_t ifp,
3137 u_int32_t dlt,
3138 kern_packet_t pkt,
3139 void* hdr,
3140 size_t hlen)
3141 {
3142 bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 0);
3143 }
3144
3145 #endif /* SKYWALK */
3146
3147 static errno_t
bpf_copydata(struct bpf_packet * pkt,size_t off,size_t len,void * out_data)3148 bpf_copydata(struct bpf_packet *pkt, size_t off, size_t len, void* out_data)
3149 {
3150 errno_t err = 0;
3151 if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
3152 err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
3153 #if SKYWALK
3154 } else if (pkt->bpfp_type == BPF_PACKET_TYPE_PKT) {
3155 err = kern_packet_copy_bytes(pkt->bpfp_pkt, off, len, out_data);
3156 #endif /* SKYWALK */
3157 } else {
3158 err = EINVAL;
3159 }
3160
3161 return err;
3162 }
3163
3164 static void
copy_bpf_packet_offset(struct bpf_packet * pkt,void * dst,size_t len,size_t offset)3165 copy_bpf_packet_offset(struct bpf_packet * pkt, void * dst, size_t len, size_t offset)
3166 {
3167 /* copy the optional header */
3168 if (offset < pkt->bpfp_header_length) {
3169 size_t count = MIN(len, pkt->bpfp_header_length - offset);
3170 caddr_t src = (caddr_t)pkt->bpfp_header;
3171 bcopy(src + offset, dst, count);
3172 len -= count;
3173 dst = (void *)((uintptr_t)dst + count);
3174 offset = 0;
3175 } else {
3176 offset -= pkt->bpfp_header_length;
3177 }
3178
3179 if (len == 0) {
3180 /* nothing past the header */
3181 return;
3182 }
3183 /* copy the packet */
3184 switch (pkt->bpfp_type) {
3185 case BPF_PACKET_TYPE_MBUF:
3186 bpf_mcopy(pkt->bpfp_mbuf, dst, len, offset);
3187 break;
3188 #if SKYWALK
3189 case BPF_PACKET_TYPE_PKT:
3190 bpf_pktcopy(pkt->bpfp_pkt, dst, len, offset);
3191 break;
3192 #endif /* SKYWALK */
3193 default:
3194 break;
3195 }
3196 }
3197
3198 static void
copy_bpf_packet(struct bpf_packet * pkt,void * dst,size_t len)3199 copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
3200 {
3201 copy_bpf_packet_offset(pkt, dst, len, 0);
3202 }
3203
3204 static uint32_t
get_esp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)3205 get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
3206 const uint32_t remaining_caplen)
3207 {
3208 /*
3209 * For some reason tcpdump expects to have one byte beyond the ESP header
3210 */
3211 uint32_t trunc_len = ESP_HDR_SIZE + 1;
3212
3213 if (trunc_len > remaining_caplen) {
3214 return remaining_caplen;
3215 }
3216
3217 return trunc_len;
3218 }
3219
3220 static uint32_t
get_isakmp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)3221 get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
3222 const uint32_t remaining_caplen)
3223 {
3224 /*
3225 * Include the payload generic header
3226 */
3227 uint32_t trunc_len = ISAKMP_HDR_SIZE;
3228
3229 if (trunc_len > remaining_caplen) {
3230 return remaining_caplen;
3231 }
3232
3233 return trunc_len;
3234 }
3235
3236 static uint32_t
get_isakmp_natt_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3237 get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint32_t off,
3238 const uint32_t remaining_caplen)
3239 {
3240 int err = 0;
3241 uint32_t trunc_len = 0;
3242 char payload[remaining_caplen];
3243
3244 err = bpf_copydata(pkt, off, remaining_caplen, payload);
3245 if (err != 0) {
3246 return remaining_caplen;
3247 }
3248 /*
3249 * They are three cases:
3250 * - IKE: payload start with 4 bytes header set to zero before ISAKMP header
3251 * - keep alive: 1 byte payload
3252 * - otherwise it's ESP
3253 */
3254 if (remaining_caplen >= 4 &&
3255 payload[0] == 0 && payload[1] == 0 &&
3256 payload[2] == 0 && payload[3] == 0) {
3257 trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
3258 } else if (remaining_caplen == 1) {
3259 trunc_len = 1;
3260 } else {
3261 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3262 }
3263
3264 if (trunc_len > remaining_caplen) {
3265 return remaining_caplen;
3266 }
3267
3268 return trunc_len;
3269 }
3270
3271 static uint32_t
get_udp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3272 get_udp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3273 {
3274 int err = 0;
3275 uint32_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
3276
3277 if (trunc_len >= remaining_caplen) {
3278 return remaining_caplen;
3279 }
3280
3281 struct udphdr udphdr;
3282 err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
3283 if (err != 0) {
3284 return remaining_caplen;
3285 }
3286
3287 u_short sport, dport;
3288
3289 sport = EXTRACT_SHORT(&udphdr.uh_sport);
3290 dport = EXTRACT_SHORT(&udphdr.uh_dport);
3291
3292 if (dport == PORT_DNS || sport == PORT_DNS) {
3293 /*
3294 * Full UDP payload for DNS
3295 */
3296 trunc_len = remaining_caplen;
3297 } else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) ||
3298 (sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
3299 /*
3300 * Full UDP payload for BOOTP and DHCP
3301 */
3302 trunc_len = remaining_caplen;
3303 } else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
3304 /*
3305 * Return the ISAKMP header
3306 */
3307 trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
3308 remaining_caplen - sizeof(struct udphdr));
3309 } else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
3310 trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
3311 remaining_caplen - sizeof(struct udphdr));
3312 }
3313 if (trunc_len >= remaining_caplen) {
3314 return remaining_caplen;
3315 }
3316
3317 return trunc_len;
3318 }
3319
3320 static uint32_t
get_tcp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3321 get_tcp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3322 {
3323 int err = 0;
3324 uint32_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
3325 if (trunc_len >= remaining_caplen) {
3326 return remaining_caplen;
3327 }
3328
3329 struct tcphdr tcphdr;
3330 err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
3331 if (err != 0) {
3332 return remaining_caplen;
3333 }
3334
3335 u_short sport, dport;
3336 sport = EXTRACT_SHORT(&tcphdr.th_sport);
3337 dport = EXTRACT_SHORT(&tcphdr.th_dport);
3338
3339 if (dport == PORT_DNS || sport == PORT_DNS) {
3340 /*
3341 * Full TCP payload for DNS
3342 */
3343 trunc_len = remaining_caplen;
3344 } else {
3345 trunc_len = (uint16_t)(tcphdr.th_off << 2);
3346 }
3347 if (trunc_len >= remaining_caplen) {
3348 return remaining_caplen;
3349 }
3350
3351 return trunc_len;
3352 }
3353
3354 static uint32_t
get_proto_trunc_len(uint8_t proto,struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3355 get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3356 {
3357 uint32_t trunc_len;
3358
3359 switch (proto) {
3360 case IPPROTO_ICMP: {
3361 /*
3362 * Full IMCP payload
3363 */
3364 trunc_len = remaining_caplen;
3365 break;
3366 }
3367 case IPPROTO_ICMPV6: {
3368 /*
3369 * Full IMCPV6 payload
3370 */
3371 trunc_len = remaining_caplen;
3372 break;
3373 }
3374 case IPPROTO_IGMP: {
3375 /*
3376 * Full IGMP payload
3377 */
3378 trunc_len = remaining_caplen;
3379 break;
3380 }
3381 case IPPROTO_UDP: {
3382 trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3383 break;
3384 }
3385 case IPPROTO_TCP: {
3386 trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3387 break;
3388 }
3389 case IPPROTO_ESP: {
3390 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3391 break;
3392 }
3393 default: {
3394 /*
3395 * By default we only include the IP header
3396 */
3397 trunc_len = 0;
3398 break;
3399 }
3400 }
3401 if (trunc_len >= remaining_caplen) {
3402 return remaining_caplen;
3403 }
3404
3405 return trunc_len;
3406 }
3407
3408 static uint32_t
get_ip_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3409 get_ip_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3410 {
3411 int err = 0;
3412 uint32_t iplen = sizeof(struct ip);
3413 if (iplen >= remaining_caplen) {
3414 return remaining_caplen;
3415 }
3416
3417 struct ip iphdr;
3418 err = bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
3419 if (err != 0) {
3420 return remaining_caplen;
3421 }
3422
3423 uint8_t proto = 0;
3424
3425 iplen = (uint16_t)(iphdr.ip_hl << 2);
3426 if (iplen >= remaining_caplen) {
3427 return remaining_caplen;
3428 }
3429
3430 proto = iphdr.ip_p;
3431 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3432
3433 if (iplen >= remaining_caplen) {
3434 return remaining_caplen;
3435 }
3436
3437 return iplen;
3438 }
3439
3440 static uint32_t
get_ip6_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3441 get_ip6_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3442 {
3443 int err = 0;
3444 uint32_t iplen = sizeof(struct ip6_hdr);
3445 if (iplen >= remaining_caplen) {
3446 return remaining_caplen;
3447 }
3448
3449 struct ip6_hdr ip6hdr;
3450 err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
3451 if (err != 0) {
3452 return remaining_caplen;
3453 }
3454
3455 uint8_t proto = 0;
3456
3457 /*
3458 * TBD: process the extension headers
3459 */
3460 proto = ip6hdr.ip6_nxt;
3461 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3462
3463 if (iplen >= remaining_caplen) {
3464 return remaining_caplen;
3465 }
3466
3467 return iplen;
3468 }
3469
3470 static uint32_t
get_ether_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3471 get_ether_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3472 {
3473 int err = 0;
3474 uint32_t ethlen = sizeof(struct ether_header);
3475 if (ethlen >= remaining_caplen) {
3476 return remaining_caplen;
3477 }
3478
3479 struct ether_header eh = {};
3480 err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
3481 if (err != 0) {
3482 return remaining_caplen;
3483 }
3484
3485 u_short type = EXTRACT_SHORT(&eh.ether_type);
3486 /* Include full ARP */
3487 if (type == ETHERTYPE_ARP) {
3488 ethlen = remaining_caplen;
3489 } else if (type == ETHERTYPE_IP) {
3490 ethlen += get_ip_trunc_len(pkt, off + sizeof(struct ether_header),
3491 remaining_caplen - ethlen);
3492 } else if (type == ETHERTYPE_IPV6) {
3493 ethlen += get_ip6_trunc_len(pkt, off + sizeof(struct ether_header),
3494 remaining_caplen - ethlen);
3495 } else {
3496 ethlen = MIN(BPF_MIN_PKT_SIZE, remaining_caplen);
3497 }
3498 return ethlen;
3499 }
3500
3501
3502 static uint32_t
get_pkt_trunc_len(struct bpf_packet * pkt)3503 get_pkt_trunc_len(struct bpf_packet *pkt)
3504 {
3505 struct pktap_header *pktap = (struct pktap_header *) (pkt->bpfp_header);
3506 uint32_t in_pkt_len = 0;
3507 uint32_t out_pkt_len = 0;
3508 uint32_t tlen = 0;
3509 uint32_t pre_adjust; // L2 header not in mbuf or kern_packet
3510
3511 // bpfp_total_length must contain the BPF packet header
3512 assert3u(pkt->bpfp_total_length, >=, pkt->bpfp_header_length);
3513
3514 // The BPF packet header must contain the pktap header
3515 assert3u(pkt->bpfp_header_length, >=, pktap->pth_length);
3516
3517 // The pre frame length (L2 header) must be contained in the packet
3518 assert3u(pkt->bpfp_total_length, >=, pktap->pth_length + pktap->pth_frame_pre_length);
3519
3520 /*
3521 * pktap->pth_frame_pre_length is the L2 header length and accounts
3522 * for both L2 header in the packet payload and pre_adjust.
3523 *
3524 * pre_adjust represents an adjustment for a pseudo L2 header that is not
3525 * part of packet payload -- not in the mbuf or kern_packet -- and comes
3526 * just after the pktap header.
3527 *
3528 * pktap->pth_length is the size of the pktap header (exclude pre_adjust)
3529 *
3530 * pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3531 */
3532 pre_adjust = (uint32_t)(pkt->bpfp_header_length - pktap->pth_length);
3533
3534 if (pktap->pth_iftype == IFT_ETHER) {
3535 /*
3536 * We need to parse the Ethernet header to find the network layer
3537 * protocol
3538 */
3539 in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pre_adjust);
3540
3541 out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
3542
3543 tlen = pktap->pth_length + pre_adjust + out_pkt_len;
3544 } else {
3545 /*
3546 * For other interface types, we only know to parse IPv4 and IPv6.
3547 *
3548 * To get to the beginning of the IPv4 or IPv6 packet, we need to to skip
3549 * over the L2 header that is the actual packet payload (mbuf or kern_packet)
3550 */
3551 uint32_t off; // offset past the L2 header in the actual packet payload
3552
3553 off = pktap->pth_frame_pre_length - pre_adjust;
3554
3555 in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pktap->pth_frame_pre_length);
3556
3557 if (pktap->pth_protocol_family == AF_INET) {
3558 out_pkt_len = get_ip_trunc_len(pkt, off, in_pkt_len);
3559 } else if (pktap->pth_protocol_family == AF_INET6) {
3560 out_pkt_len = get_ip6_trunc_len(pkt, off, in_pkt_len);
3561 } else {
3562 out_pkt_len = MIN(BPF_MIN_PKT_SIZE, in_pkt_len);
3563 }
3564 tlen = pktap->pth_length + pktap->pth_frame_pre_length + out_pkt_len;
3565 }
3566
3567 // Verify we do not overflow the buffer
3568 if (__improbable(tlen > pkt->bpfp_total_length)) {
3569 bool do_panic = bpf_debug != 0 ? true : false;
3570
3571 #if DEBUG
3572 do_panic = true;
3573 #endif /* DEBUG */
3574 if (do_panic) {
3575 panic("%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3576 __func__, __LINE__,
3577 tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3578 } else {
3579 os_log(OS_LOG_DEFAULT,
3580 "%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3581 __func__, __LINE__,
3582 tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3583 }
3584 bpf_trunc_overflow += 1;
3585 tlen = (uint32_t)pkt->bpfp_total_length;
3586 }
3587
3588 return tlen;
3589 }
3590
3591 static uint8_t
get_common_prefix_size(const void * a,const void * b,uint8_t max_bytes)3592 get_common_prefix_size(const void *a, const void *b, uint8_t max_bytes)
3593 {
3594 uint8_t max_words = max_bytes >> 2;
3595 const uint32_t *x = (const uint32_t *)a;
3596 const uint32_t *y = (const uint32_t *)b;
3597 uint8_t i;
3598
3599 for (i = 0; i < max_words; i++) {
3600 if (x[i] != y[i]) {
3601 break;
3602 }
3603 }
3604 return (uint8_t)(i << 2);
3605 }
3606
3607 /*
3608 * Move the packet data from interface memory (pkt) into the
3609 * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
3610 * otherwise 0.
3611 */
3612 static void
catchpacket(struct bpf_d * d,struct bpf_packet * pkt,u_int snaplen,int outbound)3613 catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
3614 u_int snaplen, int outbound)
3615 {
3616 struct bpf_hdr *hp;
3617 struct bpf_hdr_ext *ehp;
3618 uint32_t totlen, curlen;
3619 uint32_t hdrlen, caplen;
3620 int do_wakeup = 0;
3621 u_char *payload;
3622 struct timeval tv;
3623
3624 hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
3625 (d->bd_flags & BPF_COMP_REQ) ? d->bd_bif->bif_comphdrlen:
3626 d->bd_bif->bif_hdrlen;
3627 /*
3628 * Figure out how many bytes to move. If the packet is
3629 * greater or equal to the snapshot length, transfer that
3630 * much. Otherwise, transfer the whole packet (unless
3631 * we hit the buffer size limit).
3632 */
3633 totlen = hdrlen + MIN(snaplen, (int)pkt->bpfp_total_length);
3634 if (totlen > d->bd_bufsize) {
3635 totlen = d->bd_bufsize;
3636 }
3637
3638 if (hdrlen > totlen) {
3639 return;
3640 }
3641
3642 /*
3643 * Round up the end of the previous packet to the next longword.
3644 */
3645 curlen = BPF_WORDALIGN(d->bd_slen);
3646 if (curlen + totlen > d->bd_bufsize) {
3647 /*
3648 * This packet will overflow the storage buffer.
3649 * Rotate the buffers if we can, then wakeup any
3650 * pending reads.
3651 *
3652 * We cannot rotate buffers if a read is in progress
3653 * so drop the packet
3654 */
3655 if (d->bd_hbuf_read != 0) {
3656 ++d->bd_dcount;
3657 return;
3658 }
3659
3660 if (d->bd_fbuf == NULL) {
3661 if (d->bd_headdrop == 0) {
3662 /*
3663 * We haven't completed the previous read yet,
3664 * so drop the packet.
3665 */
3666 ++d->bd_dcount;
3667 return;
3668 }
3669 /*
3670 * Drop the hold buffer as it contains older packets
3671 */
3672 d->bd_dcount += d->bd_hcnt;
3673 d->bd_fbuf = d->bd_hbuf;
3674 ROTATE_BUFFERS(d);
3675 } else {
3676 ROTATE_BUFFERS(d);
3677 }
3678 do_wakeup = 1;
3679 curlen = 0;
3680 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
3681 /*
3682 * Immediate mode is set, or the read timeout has
3683 * already expired during a select call. A packet
3684 * arrived, so the reader should be woken up.
3685 */
3686 do_wakeup = 1;
3687 }
3688
3689 /*
3690 * Append the bpf header.
3691 */
3692 microtime(&tv);
3693 if (d->bd_flags & BPF_EXTENDED_HDR) {
3694 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
3695 memset(ehp, 0, sizeof(*ehp));
3696 ehp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3697 ehp->bh_tstamp.tv_usec = tv.tv_usec;
3698
3699 ehp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3700 ehp->bh_hdrlen = (u_short)hdrlen;
3701 caplen = ehp->bh_caplen = totlen - hdrlen;
3702 payload = (u_char *)ehp + hdrlen;
3703
3704 if (outbound) {
3705 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3706 } else {
3707 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
3708 }
3709
3710 if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
3711 struct mbuf *m = pkt->bpfp_mbuf;
3712
3713 if (outbound) {
3714 /* only do lookups on non-raw INPCB */
3715 if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID |
3716 PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)) ==
3717 (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC) &&
3718 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3719 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
3720 if (m->m_pkthdr.pkt_proto == IPPROTO_TCP) {
3721 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_TCP;
3722 } else if (m->m_pkthdr.pkt_proto == IPPROTO_UDP) {
3723 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_UDP;
3724 }
3725 }
3726 ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
3727 if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
3728 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3729 }
3730 if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
3731 ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
3732 }
3733 if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
3734 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3735 }
3736 if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
3737 ehp->bh_unsent_bytes =
3738 m->m_pkthdr.bufstatus_if;
3739 ehp->bh_unsent_snd =
3740 m->m_pkthdr.bufstatus_sndbuf;
3741 }
3742 } else {
3743 if (m->m_pkthdr.pkt_flags & PKTF_WAKE_PKT) {
3744 ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3745 }
3746 }
3747 #if SKYWALK
3748 } else {
3749 kern_packet_t kern_pkt = pkt->bpfp_pkt;
3750 packet_flowid_t flowid = 0;
3751
3752 if (outbound) {
3753 /*
3754 * Note: pp_init() asserts that kern_packet_svc_class_t is equivalent
3755 * to mbuf_svc_class_t
3756 */
3757 ehp->bh_svc = so_svc2tc((mbuf_svc_class_t)kern_packet_get_service_class(kern_pkt));
3758 if (kern_packet_get_transport_retransmit(kern_pkt)) {
3759 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3760 }
3761 if (kern_packet_get_transport_last_packet(kern_pkt)) {
3762 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3763 }
3764 } else {
3765 if (kern_packet_get_wake_flag(kern_pkt)) {
3766 ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3767 }
3768 }
3769 ehp->bh_trace_tag = kern_packet_get_trace_tag(kern_pkt);
3770 if (kern_packet_get_flowid(kern_pkt, &flowid) == 0) {
3771 ehp->bh_flowid = flowid;
3772 }
3773 #endif /* SKYWALK */
3774 }
3775 } else {
3776 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
3777 memset(hp, 0, BPF_WORDALIGN(sizeof(*hp)));
3778 hp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3779 hp->bh_tstamp.tv_usec = tv.tv_usec;
3780 hp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3781 hp->bh_hdrlen = (u_short)hdrlen;
3782 caplen = hp->bh_caplen = totlen - hdrlen;
3783 payload = (u_char *)hp + hdrlen;
3784 }
3785 if (d->bd_flags & BPF_COMP_REQ) {
3786 uint8_t common_prefix_size = 0;
3787 uint8_t copy_len = MIN((uint8_t)caplen, BPF_HDR_COMP_LEN_MAX);
3788
3789 copy_bpf_packet(pkt, d->bd_prev_fbuf, copy_len);
3790
3791 if (d->bd_prev_slen != 0) {
3792 common_prefix_size = get_common_prefix_size(d->bd_prev_fbuf,
3793 d->bd_prev_sbuf, MIN(copy_len, d->bd_prev_slen));
3794 }
3795
3796 if (d->bd_flags & BPF_COMP_ENABLED) {
3797 assert3u(caplen, >=, common_prefix_size);
3798 copy_bpf_packet_offset(pkt, payload, caplen - common_prefix_size,
3799 common_prefix_size);
3800 d->bd_slen = curlen + totlen - common_prefix_size;
3801 } else {
3802 copy_bpf_packet(pkt, payload, caplen);
3803 d->bd_slen = curlen + totlen;
3804 }
3805
3806 /*
3807 * Update the caplen only if compression is enabled -- the caller
3808 * must pay attention to bpf_hdr_comp_enable
3809 */
3810 if (d->bd_flags & BPF_EXTENDED_HDR) {
3811 ehp->bh_complen = common_prefix_size;
3812 if (d->bd_flags & BPF_COMP_ENABLED) {
3813 ehp->bh_caplen -= common_prefix_size;
3814 }
3815 } else {
3816 struct bpf_comp_hdr *hcp;
3817
3818 hcp = (struct bpf_comp_hdr *)(void *)(d->bd_sbuf + curlen);
3819 hcp->bh_complen = common_prefix_size;
3820 if (d->bd_flags & BPF_COMP_ENABLED) {
3821 hcp->bh_caplen -= common_prefix_size;
3822 }
3823 }
3824
3825 if (common_prefix_size > 0) {
3826 d->bd_bcs.bcs_total_compressed_prefix_size += common_prefix_size;
3827 if (common_prefix_size > d->bd_bcs.bcs_max_compressed_prefix_size) {
3828 d->bd_bcs.bcs_max_compressed_prefix_size = common_prefix_size;
3829 }
3830 d->bd_bcs.bcs_count_compressed_prefix += 1;
3831 } else {
3832 d->bd_bcs.bcs_count_no_common_prefix += 1;
3833 }
3834
3835 /* The current compression buffer becomes the previous one */
3836 caddr_t tmp = d->bd_prev_sbuf;
3837 d->bd_prev_sbuf = d->bd_prev_fbuf;
3838 d->bd_prev_slen = copy_len;
3839 d->bd_prev_fbuf = tmp;
3840 } else {
3841 /*
3842 * Copy the packet data into the store buffer and update its length.
3843 */
3844 copy_bpf_packet(pkt, payload, caplen);
3845 d->bd_slen = curlen + totlen;
3846 }
3847 d->bd_scnt += 1;
3848 d->bd_bcs.bcs_total_hdr_size += pkt->bpfp_header_length;
3849 d->bd_bcs.bcs_total_size += caplen;
3850
3851 if (do_wakeup) {
3852 bpf_wakeup(d);
3853 }
3854 }
3855
3856
3857 static void
bpf_freebufs(struct bpf_d * d)3858 bpf_freebufs(struct bpf_d *d)
3859 {
3860 if (d->bd_sbuf != NULL) {
3861 kfree_data_addr(d->bd_sbuf);
3862 }
3863 if (d->bd_hbuf != NULL) {
3864 kfree_data_addr(d->bd_hbuf);
3865 }
3866 if (d->bd_fbuf != NULL) {
3867 kfree_data_addr(d->bd_fbuf);
3868 }
3869
3870 if (d->bd_prev_sbuf != NULL) {
3871 kfree_data_addr(d->bd_prev_sbuf);
3872 }
3873 if (d->bd_prev_fbuf != NULL) {
3874 kfree_data_addr(d->bd_prev_fbuf);
3875 }
3876 }
3877 /*
3878 * Initialize all nonzero fields of a descriptor.
3879 */
3880 static int
bpf_allocbufs(struct bpf_d * d)3881 bpf_allocbufs(struct bpf_d *d)
3882 {
3883 bpf_freebufs(d);
3884
3885 d->bd_fbuf = (caddr_t) kalloc_data(d->bd_bufsize, Z_WAITOK | Z_ZERO);
3886 if (d->bd_fbuf == NULL) {
3887 goto nobufs;
3888 }
3889
3890 d->bd_sbuf = (caddr_t) kalloc_data(d->bd_bufsize, Z_WAITOK | Z_ZERO);
3891 if (d->bd_sbuf == NULL) {
3892 goto nobufs;
3893 }
3894 d->bd_slen = 0;
3895 d->bd_hlen = 0;
3896 d->bd_scnt = 0;
3897 d->bd_hcnt = 0;
3898
3899 d->bd_prev_slen = 0;
3900 if (d->bd_flags & BPF_COMP_REQ) {
3901 d->bd_prev_sbuf = (caddr_t) kalloc_data(BPF_HDR_COMP_LEN_MAX, Z_WAITOK | Z_ZERO);
3902 if (d->bd_prev_sbuf == NULL) {
3903 goto nobufs;
3904 }
3905 d->bd_prev_fbuf = (caddr_t) kalloc_data(BPF_HDR_COMP_LEN_MAX, Z_WAITOK | Z_ZERO);
3906 if (d->bd_prev_fbuf == NULL) {
3907 goto nobufs;
3908 }
3909 }
3910 return 0;
3911 nobufs:
3912 bpf_freebufs(d);
3913 return ENOMEM;
3914 }
3915
3916 /*
3917 * Free buffers currently in use by a descriptor.
3918 * Called on close.
3919 */
3920 static void
bpf_freed(struct bpf_d * d)3921 bpf_freed(struct bpf_d *d)
3922 {
3923 /*
3924 * We don't need to lock out interrupts since this descriptor has
3925 * been detached from its interface and it yet hasn't been marked
3926 * free.
3927 */
3928 if (d->bd_hbuf_read != 0) {
3929 panic("bpf buffer freed during read");
3930 }
3931
3932 bpf_freebufs(d);
3933
3934 if (d->bd_filter) {
3935 kfree_data_addr(d->bd_filter);
3936 }
3937 }
3938
3939 /*
3940 * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
3941 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
3942 * size of the link header (variable length headers not yet supported).
3943 */
3944 void
bpfattach(struct ifnet * ifp,u_int dlt,u_int hdrlen)3945 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
3946 {
3947 bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
3948 }
3949
3950 errno_t
bpf_attach(ifnet_t ifp,u_int32_t dlt,u_int32_t hdrlen,bpf_send_func send,bpf_tap_func tap)3951 bpf_attach(
3952 ifnet_t ifp,
3953 u_int32_t dlt,
3954 u_int32_t hdrlen,
3955 bpf_send_func send,
3956 bpf_tap_func tap)
3957 {
3958 struct bpf_if *bp;
3959 struct bpf_if *bp_new;
3960 struct bpf_if *bp_before_first = NULL;
3961 struct bpf_if *bp_first = NULL;
3962 struct bpf_if *bp_last = NULL;
3963 boolean_t found;
3964
3965 /*
3966 * Z_NOFAIL will cause a panic if the allocation fails
3967 */
3968 bp_new = kalloc_type(struct bpf_if, Z_WAITOK | Z_NOFAIL | Z_ZERO);
3969
3970 lck_mtx_lock(bpf_mlock);
3971
3972 /*
3973 * Check if this interface/dlt is already attached. Remember the
3974 * first and last attachment for this interface, as well as the
3975 * element before the first attachment.
3976 */
3977 found = FALSE;
3978 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
3979 if (bp->bif_ifp != ifp) {
3980 if (bp_first != NULL) {
3981 /* no more elements for this interface */
3982 break;
3983 }
3984 bp_before_first = bp;
3985 } else {
3986 if (bp->bif_dlt == dlt) {
3987 found = TRUE;
3988 break;
3989 }
3990 if (bp_first == NULL) {
3991 bp_first = bp;
3992 }
3993 bp_last = bp;
3994 }
3995 }
3996 if (found) {
3997 lck_mtx_unlock(bpf_mlock);
3998 os_log_error(OS_LOG_DEFAULT,
3999 "bpfattach - %s with dlt %d is already attached",
4000 if_name(ifp), dlt);
4001 kfree_type(struct bpf_if, bp_new);
4002 return EEXIST;
4003 }
4004
4005 bp_new->bif_ifp = ifp;
4006 bp_new->bif_dlt = dlt;
4007 bp_new->bif_send = send;
4008 bp_new->bif_tap = tap;
4009
4010 if (bp_first == NULL) {
4011 /* No other entries for this ifp */
4012 bp_new->bif_next = bpf_iflist;
4013 bpf_iflist = bp_new;
4014 } else {
4015 if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
4016 /* Make this the first entry for this interface */
4017 if (bp_before_first != NULL) {
4018 /* point the previous to us */
4019 bp_before_first->bif_next = bp_new;
4020 } else {
4021 /* we're the new head */
4022 bpf_iflist = bp_new;
4023 }
4024 bp_new->bif_next = bp_first;
4025 } else {
4026 /* Add this after the last entry for this interface */
4027 bp_new->bif_next = bp_last->bif_next;
4028 bp_last->bif_next = bp_new;
4029 }
4030 }
4031
4032 /*
4033 * Compute the length of the bpf header. This is not necessarily
4034 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
4035 * that the network layer header begins on a longword boundary (for
4036 * performance reasons and to alleviate alignment restrictions).
4037 */
4038 bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
4039 bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
4040 sizeof(struct bpf_hdr_ext)) - hdrlen;
4041 bp_new->bif_comphdrlen = BPF_WORDALIGN(hdrlen +
4042 sizeof(struct bpf_comp_hdr)) - hdrlen;
4043
4044 /* Take a reference on the interface */
4045 ifnet_reference(ifp);
4046
4047 lck_mtx_unlock(bpf_mlock);
4048
4049 return 0;
4050 }
4051
4052 /*
4053 * Detach bpf from an interface. This involves detaching each descriptor
4054 * associated with the interface, and leaving bd_bif NULL. Notify each
4055 * descriptor as it's detached so that any sleepers wake up and get
4056 * ENXIO.
4057 */
4058 void
bpfdetach(struct ifnet * ifp)4059 bpfdetach(struct ifnet *ifp)
4060 {
4061 struct bpf_if *bp, *bp_prev, *bp_next;
4062 struct bpf_d *d;
4063
4064 if (bpf_debug != 0) {
4065 os_log(OS_LOG_DEFAULT, "%s: %s", __func__, if_name(ifp));
4066 }
4067
4068 lck_mtx_lock(bpf_mlock);
4069
4070 /*
4071 * Build the list of devices attached to that interface
4072 * that we need to free while keeping the lock to maintain
4073 * the integrity of the interface list
4074 */
4075 bp_prev = NULL;
4076 for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
4077 bp_next = bp->bif_next;
4078
4079 if (ifp != bp->bif_ifp) {
4080 bp_prev = bp;
4081 continue;
4082 }
4083 /* Unlink from the interface list */
4084 if (bp_prev) {
4085 bp_prev->bif_next = bp->bif_next;
4086 } else {
4087 bpf_iflist = bp->bif_next;
4088 }
4089
4090 /* Detach the devices attached to the interface */
4091 while ((d = bp->bif_dlist) != NULL) {
4092 /*
4093 * Take an extra reference to prevent the device
4094 * from being freed when bpf_detachd() releases
4095 * the reference for the interface list
4096 */
4097 bpf_acquire_d(d);
4098 bpf_detachd(d);
4099 bpf_wakeup(d);
4100 bpf_release_d(d);
4101 }
4102 ifnet_release(ifp);
4103 }
4104
4105 lck_mtx_unlock(bpf_mlock);
4106 }
4107
4108 void
bpf_init(__unused void * unused)4109 bpf_init(__unused void *unused)
4110 {
4111 int maj;
4112
4113 /* bpf_comp_hdr is an overlay of bpf_hdr */
4114 _CASSERT(BPF_WORDALIGN(sizeof(struct bpf_hdr)) ==
4115 BPF_WORDALIGN(sizeof(struct bpf_comp_hdr)));
4116
4117 /* compression length must fits in a byte */
4118 _CASSERT(BPF_HDR_COMP_LEN_MAX <= UCHAR_MAX );
4119
4120 (void) PE_parse_boot_argn("bpf_hdr_comp", &bpf_hdr_comp_enable,
4121 sizeof(bpf_hdr_comp_enable));
4122
4123 if (bpf_devsw_installed == 0) {
4124 bpf_devsw_installed = 1;
4125 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
4126 if (maj == -1) {
4127 bpf_devsw_installed = 0;
4128 os_log_error(OS_LOG_DEFAULT,
4129 "bpf_init: failed to allocate a major number");
4130 return;
4131 }
4132
4133 for (int i = 0; i < NBPFILTER; i++) {
4134 bpf_make_dev_t(maj);
4135 }
4136 }
4137 }
4138
4139 static int
4140 sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
4141 {
4142 #pragma unused(arg1, arg2)
4143 int i, err;
4144
4145 i = bpf_maxbufsize;
4146
4147 err = sysctl_handle_int(oidp, &i, 0, req);
4148 if (err != 0 || req->newptr == USER_ADDR_NULL) {
4149 return err;
4150 }
4151
4152 if (i < 0 || i > BPF_BUFSIZE_CAP) {
4153 i = BPF_BUFSIZE_CAP;
4154 }
4155
4156 bpf_maxbufsize = i;
4157 return err;
4158 }
4159
4160 static int
4161 sysctl_bpf_bufsize_cap SYSCTL_HANDLER_ARGS
4162 {
4163 #pragma unused(arg1, arg2)
4164 int i, err;
4165
4166 i = BPF_BUFSIZE_CAP;
4167
4168 err = sysctl_handle_int(oidp, &i, 0, req);
4169 if (err != 0 || req->newptr == USER_ADDR_NULL) {
4170 return err;
4171 }
4172
4173 return err;
4174 }
4175
4176 /*
4177 * Fill filter statistics
4178 */
4179 static void
bpfstats_fill_xbpf(struct xbpf_d * d,struct bpf_d * bd)4180 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
4181 {
4182 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
4183
4184 d->bd_structsize = sizeof(struct xbpf_d);
4185 d->bd_promisc = bd->bd_promisc != 0 ? 1 : 0;
4186 d->bd_immediate = d->bd_immediate != 0 ? 1 : 0;
4187 d->bd_hdrcmplt = bd->bd_hdrcmplt != 0 ? 1 : 0;
4188 d->bd_async = bd->bd_async != 0 ? 1 : 0;
4189 d->bd_headdrop = bd->bd_headdrop != 0 ? 1 : 0;
4190 d->bd_seesent = bd->bd_seesent != 0 ? 1 : 0;
4191 d->bh_compreq = bd->bd_flags & BPF_COMP_REQ ? 1 : 0;
4192 d->bh_compenabled = bd->bd_flags & BPF_COMP_ENABLED ? 1 : 0;
4193 d->bd_exthdr = bd->bd_flags & BPF_EXTENDED_HDR ? 1 : 0;
4194 d->bd_trunc = bd->bd_flags & BPF_TRUNCATE ? 1 : 0;
4195 d->bd_pkthdrv2 = bd->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
4196
4197 d->bd_dev_minor = (uint8_t)bd->bd_dev_minor;
4198
4199 d->bd_sig = bd->bd_sig;
4200
4201 d->bd_rcount = bd->bd_rcount;
4202 d->bd_dcount = bd->bd_dcount;
4203 d->bd_fcount = bd->bd_fcount;
4204 d->bd_slen = bd->bd_slen;
4205 d->bd_hlen = bd->bd_hlen;
4206 d->bd_bufsize = bd->bd_bufsize;
4207 d->bd_pid = bd->bd_pid;
4208 if (bd->bd_bif != NULL && bd->bd_bif->bif_ifp != NULL) {
4209 strlcpy(d->bd_ifname,
4210 bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
4211 }
4212
4213 d->bd_comp_count = bd->bd_bcs.bcs_count_compressed_prefix;
4214 d->bd_comp_size = bd->bd_bcs.bcs_total_compressed_prefix_size;
4215
4216 d->bd_scnt = bd->bd_scnt;
4217 d->bd_hcnt = bd->bd_hcnt;
4218
4219 d->bd_read_count = bd->bd_bcs.bcs_total_read;
4220 d->bd_fsize = bd->bd_bcs.bcs_total_size;
4221 }
4222
4223 /*
4224 * Handle `netstat -B' stats request
4225 */
4226 static int
4227 sysctl_bpf_stats SYSCTL_HANDLER_ARGS
4228 {
4229 int error;
4230 struct xbpf_d *xbdbuf;
4231 unsigned int x_cnt;
4232 vm_size_t buf_size;
4233
4234 if (req->oldptr == USER_ADDR_NULL) {
4235 return SYSCTL_OUT(req, 0, nbpfilter * sizeof(struct xbpf_d));
4236 }
4237 if (nbpfilter == 0) {
4238 return SYSCTL_OUT(req, 0, 0);
4239 }
4240 buf_size = req->oldlen;
4241 xbdbuf = kalloc_data(buf_size, Z_WAITOK | Z_ZERO);
4242
4243 lck_mtx_lock(bpf_mlock);
4244 if (buf_size < (nbpfilter * sizeof(struct xbpf_d))) {
4245 lck_mtx_unlock(bpf_mlock);
4246 kfree_data(xbdbuf, buf_size);
4247 return ENOMEM;
4248 }
4249 x_cnt = 0;
4250 unsigned int i;
4251
4252 for (i = 0; i < nbpfilter; i++) {
4253 struct bpf_d *bd = bpf_dtab[i];
4254 struct xbpf_d *xbd;
4255
4256 if (bd == NULL || bd == BPF_DEV_RESERVED ||
4257 (bd->bd_flags & BPF_CLOSING) != 0) {
4258 continue;
4259 }
4260 VERIFY(x_cnt < nbpfilter);
4261
4262 xbd = &xbdbuf[x_cnt++];
4263 bpfstats_fill_xbpf(xbd, bd);
4264 }
4265 lck_mtx_unlock(bpf_mlock);
4266
4267 error = SYSCTL_OUT(req, xbdbuf, x_cnt * sizeof(struct xbpf_d));
4268 kfree_data(xbdbuf, buf_size);
4269 return error;
4270 }
4271