1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1990, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from the Stanford/CMU enet packet filter,
33 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35 * Berkeley Laboratory.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)bpf.c 8.2 (Berkeley) 3/28/94
66 *
67 * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 #include "bpf.h"
77
78 #ifndef __GNUC__
79 #define inline
80 #else
81 #define inline __inline
82 #endif
83
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/conf.h>
87 #include <sys/malloc.h>
88 #include <sys/mbuf.h>
89 #include <sys/time.h>
90 #include <sys/proc.h>
91 #include <sys/signalvar.h>
92 #include <sys/filio.h>
93 #include <sys/sockio.h>
94 #include <sys/ttycom.h>
95 #include <sys/filedesc.h>
96 #include <sys/uio_internal.h>
97 #include <sys/file_internal.h>
98 #include <sys/event.h>
99
100 #include <sys/poll.h>
101
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/vnode.h>
105
106 #include <net/if.h>
107 #include <net/bpf.h>
108 #include <net/bpfdesc.h>
109
110 #include <netinet/in.h>
111 #include <netinet/ip.h>
112 #include <netinet/ip6.h>
113 #include <netinet/in_pcb.h>
114 #include <netinet/in_var.h>
115 #include <netinet/ip_var.h>
116 #include <netinet/tcp.h>
117 #include <netinet/tcp_var.h>
118 #include <netinet/udp.h>
119 #include <netinet/udp_var.h>
120 #include <netinet/if_ether.h>
121 #include <netinet/isakmp.h>
122 #include <netinet6/esp.h>
123 #include <sys/kernel.h>
124 #include <sys/sysctl.h>
125 #include <net/firewire.h>
126
127 #include <miscfs/devfs/devfs.h>
128 #include <net/dlil.h>
129 #include <net/pktap.h>
130
131 #include <kern/locks.h>
132 #include <kern/thread_call.h>
133 #include <libkern/section_keywords.h>
134
135 #include <os/log.h>
136
137 extern int tvtohz(struct timeval *);
138
139 #define BPF_BUFSIZE 4096
140 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
141
142 #define PRINET 26 /* interruptible */
143
144 #define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
145 #define ESP_HDR_SIZE sizeof(struct newesp)
146
147 typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
148
149 /*
150 * The default read buffer size is patchable.
151 */
152 static unsigned int bpf_bufsize = BPF_BUFSIZE;
153 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
154 &bpf_bufsize, 0, "");
155
156 static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
157 extern const int copysize_limit_panic;
158 #define BPF_MAXSIZE_CAP (copysize_limit_panic >> 1)
159 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
160 SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
161 &bpf_maxbufsize, 0,
162 sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
163
164 static unsigned int bpf_maxdevices = 256;
165 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
166 &bpf_maxdevices, 0, "");
167 /*
168 * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
169 * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
170 * explicitly to be able to use DLT_PKTAP.
171 */
172 #if !XNU_TARGET_OS_OSX
173 static unsigned int bpf_wantpktap = 1;
174 #else /* XNU_TARGET_OS_OSX */
175 static unsigned int bpf_wantpktap = 0;
176 #endif /* XNU_TARGET_OS_OSX */
177 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
178 &bpf_wantpktap, 0, "");
179
180 static int bpf_debug = 0;
181 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
182 &bpf_debug, 0, "");
183
184 static unsigned long bpf_trunc_overflow = 0;
185 SYSCTL_ULONG(_debug, OID_AUTO, bpf_trunc_overflow, CTLFLAG_RD | CTLFLAG_LOCKED,
186 &bpf_trunc_overflow, "");
187
188 /*
189 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
190 * bpf_dtab holds pointer to the descriptors, indexed by minor device #
191 */
192 static struct bpf_if *bpf_iflist;
193 #ifdef __APPLE__
194 /*
195 * BSD now stores the bpf_d in the dev_t which is a struct
196 * on their system. Our dev_t is an int, so we still store
197 * the bpf_d in a separate table indexed by minor device #.
198 *
199 * The value stored in bpf_dtab[n] represent three states:
200 * NULL: device not opened
201 * BPF_DEV_RESERVED: device opening or closing
202 * other: device <n> opened with pointer to storage
203 */
204 #define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
205 static struct bpf_d **bpf_dtab = NULL;
206 static unsigned int bpf_dtab_size = 0;
207 static unsigned int nbpfilter = 0;
208
209 static LCK_GRP_DECLARE(bpf_mlock_grp, "bpf");
210 static LCK_MTX_DECLARE(bpf_mlock_data, &bpf_mlock_grp);
211 static lck_mtx_t *const bpf_mlock = &bpf_mlock_data;
212
213 #endif /* __APPLE__ */
214
215 static int bpf_allocbufs(struct bpf_d *);
216 static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
217 static int bpf_detachd(struct bpf_d *d, int);
218 static void bpf_freed(struct bpf_d *);
219 static int bpf_movein(struct uio *, int,
220 struct mbuf **, struct sockaddr *, int *);
221 static int bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool);
222 static void bpf_timed_out(void *, void *);
223 static void bpf_wakeup(struct bpf_d *);
224 static uint32_t get_pkt_trunc_len(struct bpf_packet *);
225 static void catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
226 static void reset_d(struct bpf_d *);
227 static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
228 static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
229 static int bpf_setdlt(struct bpf_d *, u_int);
230 static int bpf_set_traffic_class(struct bpf_d *, int);
231 static void bpf_set_packet_service_class(struct mbuf *, int);
232
233 static void bpf_acquire_d(struct bpf_d *);
234 static void bpf_release_d(struct bpf_d *);
235
236 static int bpf_devsw_installed;
237
238 void bpf_init(void *unused);
239 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
240
241 /*
242 * Darwin differs from BSD here, the following are static
243 * on BSD and not static on Darwin.
244 */
245 d_open_t bpfopen;
246 d_close_t bpfclose;
247 d_read_t bpfread;
248 d_write_t bpfwrite;
249 ioctl_fcn_t bpfioctl;
250 select_fcn_t bpfselect;
251
252 /* Darwin's cdevsw struct differs slightly from BSDs */
253 #define CDEV_MAJOR 23
254 static const struct cdevsw bpf_cdevsw = {
255 .d_open = bpfopen,
256 .d_close = bpfclose,
257 .d_read = bpfread,
258 .d_write = bpfwrite,
259 .d_ioctl = bpfioctl,
260 .d_stop = eno_stop,
261 .d_reset = eno_reset,
262 .d_ttys = NULL,
263 .d_select = bpfselect,
264 .d_mmap = eno_mmap,
265 .d_strategy = eno_strat,
266 .d_reserved_1 = eno_getc,
267 .d_reserved_2 = eno_putc,
268 .d_type = 0
269 };
270
271 #define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
272
273 static int
bpf_movein(struct uio * uio,int linktype,struct mbuf ** mp,struct sockaddr * sockp,int * datlen)274 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp,
275 struct sockaddr *sockp, int *datlen)
276 {
277 struct mbuf *m;
278 int error;
279 int len;
280 uint8_t sa_family;
281 int hlen;
282
283 switch (linktype) {
284 #if SLIP
285 case DLT_SLIP:
286 sa_family = AF_INET;
287 hlen = 0;
288 break;
289 #endif /* SLIP */
290
291 case DLT_EN10MB:
292 sa_family = AF_UNSPEC;
293 /* XXX Would MAXLINKHDR be better? */
294 hlen = sizeof(struct ether_header);
295 break;
296
297 #if FDDI
298 case DLT_FDDI:
299 #if defined(__FreeBSD__) || defined(__bsdi__)
300 sa_family = AF_IMPLINK;
301 hlen = 0;
302 #else
303 sa_family = AF_UNSPEC;
304 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
305 hlen = 24;
306 #endif
307 break;
308 #endif /* FDDI */
309
310 case DLT_RAW:
311 case DLT_NULL:
312 sa_family = AF_UNSPEC;
313 hlen = 0;
314 break;
315
316 #ifdef __FreeBSD__
317 case DLT_ATM_RFC1483:
318 /*
319 * en atm driver requires 4-byte atm pseudo header.
320 * though it isn't standard, vpi:vci needs to be
321 * specified anyway.
322 */
323 sa_family = AF_UNSPEC;
324 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
325 break;
326 #endif
327
328 case DLT_PPP:
329 sa_family = AF_UNSPEC;
330 hlen = 4; /* This should match PPP_HDRLEN */
331 break;
332
333 case DLT_APPLE_IP_OVER_IEEE1394:
334 sa_family = AF_UNSPEC;
335 hlen = sizeof(struct firewire_header);
336 break;
337
338 case DLT_IEEE802_11: /* IEEE 802.11 wireless */
339 sa_family = AF_IEEE80211;
340 hlen = 0;
341 break;
342
343 case DLT_IEEE802_11_RADIO:
344 sa_family = AF_IEEE80211;
345 hlen = 0;
346 break;
347
348 default:
349 return EIO;
350 }
351
352 // LP64todo - fix this!
353 len = (int)uio_resid(uio);
354 if (len < hlen || (unsigned)len > MCLBYTES || len - hlen > MCLBYTES) {
355 return EIO;
356 }
357
358 *datlen = len - hlen;
359
360 if (sockp) {
361 /*
362 * Build a sockaddr based on the data link layer type.
363 * We do this at this level because the ethernet header
364 * is copied directly into the data field of the sockaddr.
365 * In the case of SLIP, there is no header and the packet
366 * is forwarded as is.
367 * Also, we are careful to leave room at the front of the mbuf
368 * for the link level header.
369 */
370 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
371 return EIO;
372 }
373 sockp->sa_family = sa_family;
374 } else {
375 /*
376 * We're directly sending the packet data supplied by
377 * the user; we don't need to make room for the link
378 * header, and don't need the header length value any
379 * more, so set it to 0.
380 */
381 hlen = 0;
382 }
383
384 MGETHDR(m, M_WAIT, MT_DATA);
385 if (m == 0) {
386 return ENOBUFS;
387 }
388 if ((unsigned)len > MHLEN) {
389 MCLGET(m, M_WAIT);
390 if ((m->m_flags & M_EXT) == 0) {
391 error = ENOBUFS;
392 goto bad;
393 }
394 }
395 m->m_pkthdr.len = m->m_len = len;
396 m->m_pkthdr.rcvif = NULL;
397 *mp = m;
398
399 /*
400 * Make room for link header.
401 */
402 if (hlen != 0) {
403 m->m_pkthdr.len -= hlen;
404 m->m_len -= hlen;
405 m->m_data += hlen; /* XXX */
406 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
407 if (error) {
408 goto bad;
409 }
410 }
411 error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
412 if (error) {
413 goto bad;
414 }
415
416 /* Check for multicast destination */
417 switch (linktype) {
418 case DLT_EN10MB: {
419 struct ether_header *eh;
420
421 eh = mtod(m, struct ether_header *);
422 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
423 if (_ether_cmp(etherbroadcastaddr,
424 eh->ether_dhost) == 0) {
425 m->m_flags |= M_BCAST;
426 } else {
427 m->m_flags |= M_MCAST;
428 }
429 }
430 break;
431 }
432 }
433
434 return 0;
435 bad:
436 m_freem(m);
437 return error;
438 }
439
440 #ifdef __APPLE__
441
442 /*
443 * The dynamic addition of a new device node must block all processes that
444 * are opening the last device so that no process will get an unexpected
445 * ENOENT
446 */
447 static void
bpf_make_dev_t(int maj)448 bpf_make_dev_t(int maj)
449 {
450 static int bpf_growing = 0;
451 unsigned int cur_size = nbpfilter, i;
452
453 if (nbpfilter >= bpf_maxdevices) {
454 return;
455 }
456
457 while (bpf_growing) {
458 /* Wait until new device has been created */
459 (void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
460 }
461 if (nbpfilter > cur_size) {
462 /* other thread grew it already */
463 return;
464 }
465 bpf_growing = 1;
466
467 /* need to grow bpf_dtab first */
468 if (nbpfilter == bpf_dtab_size) {
469 unsigned int new_dtab_size;
470 struct bpf_d **new_dtab = NULL;
471
472 new_dtab_size = bpf_dtab_size + NBPFILTER;
473 new_dtab = krealloc_type(struct bpf_d *,
474 bpf_dtab_size, new_dtab_size, bpf_dtab, Z_WAITOK | Z_ZERO);
475 if (new_dtab == 0) {
476 printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
477 goto done;
478 }
479 bpf_dtab = new_dtab;
480 bpf_dtab_size = new_dtab_size;
481 }
482 i = nbpfilter++;
483 (void) devfs_make_node(makedev(maj, i),
484 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
485 "bpf%d", i);
486 done:
487 bpf_growing = 0;
488 wakeup((caddr_t)&bpf_growing);
489 }
490
491 #endif
492
493 /*
494 * Attach file to the bpf interface, i.e. make d listen on bp.
495 */
496 static errno_t
bpf_attachd(struct bpf_d * d,struct bpf_if * bp)497 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
498 {
499 int first = bp->bif_dlist == NULL;
500 int error = 0;
501
502 /*
503 * Point d at bp, and add d to the interface's list of listeners.
504 * Finally, point the driver's bpf cookie at the interface so
505 * it will divert packets to bpf.
506 */
507 d->bd_bif = bp;
508 d->bd_next = bp->bif_dlist;
509 bp->bif_dlist = d;
510
511 /*
512 * Take a reference on the device even if an error is returned
513 * because we keep the device in the interface's list of listeners
514 */
515 bpf_acquire_d(d);
516
517 if (first) {
518 /* Find the default bpf entry for this ifp */
519 if (bp->bif_ifp->if_bpf == NULL) {
520 struct bpf_if *tmp, *primary = NULL;
521
522 for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
523 if (tmp->bif_ifp == bp->bif_ifp) {
524 primary = tmp;
525 break;
526 }
527 }
528 bp->bif_ifp->if_bpf = primary;
529 }
530 /* Only call dlil_set_bpf_tap for primary dlt */
531 if (bp->bif_ifp->if_bpf == bp) {
532 dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
533 bpf_tap_callback);
534 }
535
536 if (bp->bif_tap != NULL) {
537 error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
538 BPF_TAP_INPUT_OUTPUT);
539 }
540 }
541
542 /*
543 * Reset the detach flags in case we previously detached an interface
544 */
545 d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
546
547 if (bp->bif_dlt == DLT_PKTAP) {
548 d->bd_flags |= BPF_FINALIZE_PKTAP;
549 } else {
550 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
551 }
552 return error;
553 }
554
555 /*
556 * Detach a file from its interface.
557 *
558 * Return 1 if was closed by some thread, 0 otherwise
559 */
560 static int
bpf_detachd(struct bpf_d * d,int closing)561 bpf_detachd(struct bpf_d *d, int closing)
562 {
563 struct bpf_d **p;
564 struct bpf_if *bp;
565 struct ifnet *ifp;
566
567 int bpf_closed = d->bd_flags & BPF_CLOSING;
568 /*
569 * Some other thread already detached
570 */
571 if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0) {
572 goto done;
573 }
574 /*
575 * This thread is doing the detach
576 */
577 d->bd_flags |= BPF_DETACHING;
578
579 ifp = d->bd_bif->bif_ifp;
580 bp = d->bd_bif;
581
582 if (bpf_debug != 0) {
583 printf("%s: %llx %s%s\n",
584 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
585 if_name(ifp), closing ? " closing" : "");
586 }
587
588 /* Remove d from the interface's descriptor list. */
589 p = &bp->bif_dlist;
590 while (*p != d) {
591 p = &(*p)->bd_next;
592 if (*p == 0) {
593 panic("bpf_detachd: descriptor not in list");
594 }
595 }
596 *p = (*p)->bd_next;
597 if (bp->bif_dlist == 0) {
598 /*
599 * Let the driver know that there are no more listeners.
600 */
601 /* Only call dlil_set_bpf_tap for primary dlt */
602 if (bp->bif_ifp->if_bpf == bp) {
603 dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
604 }
605 if (bp->bif_tap) {
606 bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
607 }
608
609 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
610 if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
611 break;
612 }
613 }
614 if (bp == NULL) {
615 ifp->if_bpf = NULL;
616 }
617 }
618 d->bd_bif = NULL;
619 /*
620 * Check if this descriptor had requested promiscuous mode.
621 * If so, turn it off.
622 */
623 if (d->bd_promisc) {
624 d->bd_promisc = 0;
625 lck_mtx_unlock(bpf_mlock);
626 if (ifnet_set_promiscuous(ifp, 0)) {
627 /*
628 * Something is really wrong if we were able to put
629 * the driver into promiscuous mode, but can't
630 * take it out.
631 * Most likely the network interface is gone.
632 */
633 printf("%s: ifnet_set_promiscuous failed\n", __func__);
634 }
635 lck_mtx_lock(bpf_mlock);
636 }
637
638 /*
639 * Wake up other thread that are waiting for this thread to finish
640 * detaching
641 */
642 d->bd_flags &= ~BPF_DETACHING;
643 d->bd_flags |= BPF_DETACHED;
644
645 /* Refresh the local variable as d could have been modified */
646 bpf_closed = d->bd_flags & BPF_CLOSING;
647 /*
648 * Note that We've kept the reference because we may have dropped
649 * the lock when turning off promiscuous mode
650 */
651 bpf_release_d(d);
652
653 done:
654 /*
655 * When closing makes sure no other thread refer to the bpf_d
656 */
657 if (bpf_debug != 0) {
658 printf("%s: %llx done\n",
659 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
660 }
661 /*
662 * Let the caller know the bpf_d is closed
663 */
664 if (bpf_closed) {
665 return 1;
666 } else {
667 return 0;
668 }
669 }
670
671 /*
672 * Start asynchronous timer, if necessary.
673 * Must be called with bpf_mlock held.
674 */
675 static void
bpf_start_timer(struct bpf_d * d)676 bpf_start_timer(struct bpf_d *d)
677 {
678 uint64_t deadline;
679 struct timeval tv;
680
681 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
682 tv.tv_sec = d->bd_rtout / hz;
683 tv.tv_usec = (d->bd_rtout % hz) * tick;
684
685 clock_interval_to_deadline(
686 (uint32_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
687 NSEC_PER_USEC, &deadline);
688 /*
689 * The state is BPF_IDLE, so the timer hasn't
690 * been started yet, and hasn't gone off yet;
691 * there is no thread call scheduled, so this
692 * won't change the schedule.
693 *
694 * XXX - what if, by the time it gets entered,
695 * the deadline has already passed?
696 */
697 thread_call_enter_delayed(d->bd_thread_call, deadline);
698 d->bd_state = BPF_WAITING;
699 }
700 }
701
702 /*
703 * Cancel asynchronous timer.
704 * Must be called with bpf_mlock held.
705 */
706 static boolean_t
bpf_stop_timer(struct bpf_d * d)707 bpf_stop_timer(struct bpf_d *d)
708 {
709 /*
710 * If the timer has already gone off, this does nothing.
711 * Our caller is expected to set d->bd_state to BPF_IDLE,
712 * with the bpf_mlock, after we are called. bpf_timed_out()
713 * also grabs bpf_mlock, so, if the timer has gone off and
714 * bpf_timed_out() hasn't finished, it's waiting for the
715 * lock; when this thread releases the lock, it will
716 * find the state is BPF_IDLE, and just release the
717 * lock and return.
718 */
719 return thread_call_cancel(d->bd_thread_call);
720 }
721
722 void
bpf_acquire_d(struct bpf_d * d)723 bpf_acquire_d(struct bpf_d *d)
724 {
725 void *lr_saved = __builtin_return_address(0);
726
727 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
728
729 d->bd_refcnt += 1;
730
731 d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
732 d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
733 }
734
735 void
bpf_release_d(struct bpf_d * d)736 bpf_release_d(struct bpf_d *d)
737 {
738 void *lr_saved = __builtin_return_address(0);
739
740 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
741
742 if (d->bd_refcnt <= 0) {
743 panic("%s: %p refcnt <= 0", __func__, d);
744 }
745
746 d->bd_refcnt -= 1;
747
748 d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
749 d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
750
751 if (d->bd_refcnt == 0) {
752 /* Assert the device is detached */
753 if ((d->bd_flags & BPF_DETACHED) == 0) {
754 panic("%s: %p BPF_DETACHED not set", __func__, d);
755 }
756
757 kfree_type(struct bpf_d, d);
758 }
759 }
760
761 /*
762 * Open ethernet device. Returns ENXIO for illegal minor device number,
763 * EBUSY if file is open by another process.
764 */
765 /* ARGSUSED */
766 int
bpfopen(dev_t dev,int flags,__unused int fmt,struct proc * p)767 bpfopen(dev_t dev, int flags, __unused int fmt,
768 struct proc *p)
769 {
770 struct bpf_d *d;
771
772 lck_mtx_lock(bpf_mlock);
773 if ((unsigned int) minor(dev) >= nbpfilter) {
774 lck_mtx_unlock(bpf_mlock);
775 return ENXIO;
776 }
777 /*
778 * New device nodes are created on demand when opening the last one.
779 * The programming model is for processes to loop on the minor starting
780 * at 0 as long as EBUSY is returned. The loop stops when either the
781 * open succeeds or an error other that EBUSY is returned. That means
782 * that bpf_make_dev_t() must block all processes that are opening the
783 * last node. If not all processes are blocked, they could unexpectedly
784 * get ENOENT and abort their opening loop.
785 */
786 if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
787 bpf_make_dev_t(major(dev));
788 }
789
790 /*
791 * Each minor can be opened by only one process. If the requested
792 * minor is in use, return EBUSY.
793 *
794 * Important: bpfopen() and bpfclose() have to check and set the status
795 * of a device in the same lockin context otherwise the device may be
796 * leaked because the vnode use count will be unpextectly greater than 1
797 * when close() is called.
798 */
799 if (bpf_dtab[minor(dev)] == NULL) {
800 /* Reserve while opening */
801 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
802 } else {
803 lck_mtx_unlock(bpf_mlock);
804 return EBUSY;
805 }
806 d = kalloc_type(struct bpf_d, M_WAIT | Z_ZERO);
807 if (d == NULL) {
808 /* this really is a catastrophic failure */
809 printf("bpfopen: malloc bpf_d failed\n");
810 bpf_dtab[minor(dev)] = NULL;
811 lck_mtx_unlock(bpf_mlock);
812 return ENOMEM;
813 }
814
815 /* Mark "in use" and do most initialization. */
816 bpf_acquire_d(d);
817 d->bd_bufsize = bpf_bufsize;
818 d->bd_sig = SIGIO;
819 d->bd_seesent = 1;
820 d->bd_oflags = flags;
821 d->bd_state = BPF_IDLE;
822 d->bd_traffic_class = SO_TC_BE;
823 d->bd_flags |= BPF_DETACHED;
824 if (bpf_wantpktap) {
825 d->bd_flags |= BPF_WANT_PKTAP;
826 } else {
827 d->bd_flags &= ~BPF_WANT_PKTAP;
828 }
829 d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
830 if (d->bd_thread_call == NULL) {
831 printf("bpfopen: malloc thread call failed\n");
832 bpf_dtab[minor(dev)] = NULL;
833 bpf_release_d(d);
834 lck_mtx_unlock(bpf_mlock);
835
836 return ENOMEM;
837 }
838 d->bd_opened_by = p;
839 uuid_generate(d->bd_uuid);
840
841 bpf_dtab[minor(dev)] = d; /* Mark opened */
842 lck_mtx_unlock(bpf_mlock);
843
844 return 0;
845 }
846
847 /*
848 * Close the descriptor by detaching it from its interface,
849 * deallocating its buffers, and marking it free.
850 */
851 /* ARGSUSED */
852 int
bpfclose(dev_t dev,__unused int flags,__unused int fmt,__unused struct proc * p)853 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
854 __unused struct proc *p)
855 {
856 struct bpf_d *d;
857
858 /* Take BPF lock to ensure no other thread is using the device */
859 lck_mtx_lock(bpf_mlock);
860
861 d = bpf_dtab[minor(dev)];
862 if (d == NULL || d == BPF_DEV_RESERVED) {
863 lck_mtx_unlock(bpf_mlock);
864 return ENXIO;
865 }
866
867 /*
868 * Other threads may call bpd_detachd() if we drop the bpf_mlock
869 */
870 d->bd_flags |= BPF_CLOSING;
871
872 if (bpf_debug != 0) {
873 printf("%s: %llx\n",
874 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
875 }
876
877 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
878
879 /*
880 * Deal with any in-progress timeouts.
881 */
882 switch (d->bd_state) {
883 case BPF_IDLE:
884 /*
885 * Not waiting for a timeout, and no timeout happened.
886 */
887 break;
888
889 case BPF_WAITING:
890 /*
891 * Waiting for a timeout.
892 * Cancel any timer that has yet to go off,
893 * and mark the state as "closing".
894 * Then drop the lock to allow any timers that
895 * *have* gone off to run to completion, and wait
896 * for them to finish.
897 */
898 if (!bpf_stop_timer(d)) {
899 /*
900 * There was no pending call, so the call must
901 * have been in progress. Wait for the call to
902 * complete; we have to drop the lock while
903 * waiting. to let the in-progrss call complete
904 */
905 d->bd_state = BPF_DRAINING;
906 while (d->bd_state == BPF_DRAINING) {
907 msleep((caddr_t)d, bpf_mlock, PRINET,
908 "bpfdraining", NULL);
909 }
910 }
911 d->bd_state = BPF_IDLE;
912 break;
913
914 case BPF_TIMED_OUT:
915 /*
916 * Timer went off, and the timeout routine finished.
917 */
918 d->bd_state = BPF_IDLE;
919 break;
920
921 case BPF_DRAINING:
922 /*
923 * Another thread is blocked on a close waiting for
924 * a timeout to finish.
925 * This "shouldn't happen", as the first thread to enter
926 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
927 * all subsequent threads should see that and fail with
928 * ENXIO.
929 */
930 panic("Two threads blocked in a BPF close");
931 break;
932 }
933
934 if (d->bd_bif) {
935 bpf_detachd(d, 1);
936 }
937 selthreadclear(&d->bd_sel);
938 thread_call_free(d->bd_thread_call);
939
940 while (d->bd_hbuf_read != 0) {
941 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
942 }
943
944 bpf_freed(d);
945
946 /* Mark free in same context as bpfopen comes to check */
947 bpf_dtab[minor(dev)] = NULL; /* Mark closed */
948
949 bpf_release_d(d);
950
951 lck_mtx_unlock(bpf_mlock);
952
953 return 0;
954 }
955
956 #define BPF_SLEEP bpf_sleep
957
958 static int
bpf_sleep(struct bpf_d * d,int pri,const char * wmesg,int timo)959 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
960 {
961 u_int64_t abstime = 0;
962
963 if (timo != 0) {
964 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
965 }
966
967 return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
968 }
969
970 static void
bpf_finalize_pktap(struct bpf_hdr * hp,struct pktap_header * pktaphdr)971 bpf_finalize_pktap(struct bpf_hdr *hp, struct pktap_header *pktaphdr)
972 {
973 if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
974 struct pktap_v2_hdr *pktap_v2_hdr;
975
976 pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
977
978 if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
979 pktap_v2_finalize_proc_info(pktap_v2_hdr);
980 }
981 } else {
982 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
983 pktap_finalize_proc_info(pktaphdr);
984 }
985
986 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
987 hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
988 hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
989 }
990 }
991 }
992
993 /*
994 * Rotate the packet buffers in descriptor d. Move the store buffer
995 * into the hold slot, and the free buffer into the store slot.
996 * Zero the length of the new store buffer.
997 */
998 #define ROTATE_BUFFERS(d) \
999 if (d->bd_hbuf_read != 0) \
1000 panic("rotating bpf buffers during read"); \
1001 (d)->bd_hbuf = (d)->bd_sbuf; \
1002 (d)->bd_hlen = (d)->bd_slen; \
1003 (d)->bd_hcnt = (d)->bd_scnt; \
1004 (d)->bd_sbuf = (d)->bd_fbuf; \
1005 (d)->bd_slen = 0; \
1006 (d)->bd_scnt = 0; \
1007 (d)->bd_fbuf = NULL;
1008 /*
1009 * bpfread - read next chunk of packets from buffers
1010 */
1011 int
bpfread(dev_t dev,struct uio * uio,int ioflag)1012 bpfread(dev_t dev, struct uio *uio, int ioflag)
1013 {
1014 struct bpf_d *d;
1015 caddr_t hbuf;
1016 int timed_out, hbuf_len;
1017 int error;
1018 int flags;
1019
1020 lck_mtx_lock(bpf_mlock);
1021
1022 d = bpf_dtab[minor(dev)];
1023 if (d == NULL || d == BPF_DEV_RESERVED ||
1024 (d->bd_flags & BPF_CLOSING) != 0) {
1025 lck_mtx_unlock(bpf_mlock);
1026 return ENXIO;
1027 }
1028
1029 bpf_acquire_d(d);
1030
1031 /*
1032 * Restrict application to use a buffer the same size as
1033 * as kernel buffers.
1034 */
1035 if (uio_resid(uio) != d->bd_bufsize) {
1036 bpf_release_d(d);
1037 lck_mtx_unlock(bpf_mlock);
1038 return EINVAL;
1039 }
1040
1041 if (d->bd_state == BPF_WAITING) {
1042 bpf_stop_timer(d);
1043 }
1044
1045 timed_out = (d->bd_state == BPF_TIMED_OUT);
1046 d->bd_state = BPF_IDLE;
1047
1048 while (d->bd_hbuf_read != 0) {
1049 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1050 }
1051
1052 if ((d->bd_flags & BPF_CLOSING) != 0) {
1053 bpf_release_d(d);
1054 lck_mtx_unlock(bpf_mlock);
1055 return ENXIO;
1056 }
1057 /*
1058 * If the hold buffer is empty, then do a timed sleep, which
1059 * ends when the timeout expires or when enough packets
1060 * have arrived to fill the store buffer.
1061 */
1062 while (d->bd_hbuf == 0) {
1063 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) &&
1064 d->bd_slen != 0) {
1065 /*
1066 * We're in immediate mode, or are reading
1067 * in non-blocking mode, or a timer was
1068 * started before the read (e.g., by select()
1069 * or poll()) and has expired and a packet(s)
1070 * either arrived since the previous
1071 * read or arrived while we were asleep.
1072 * Rotate the buffers and return what's here.
1073 */
1074 ROTATE_BUFFERS(d);
1075 break;
1076 }
1077
1078 /*
1079 * No data is available, check to see if the bpf device
1080 * is still pointed at a real interface. If not, return
1081 * ENXIO so that the userland process knows to rebind
1082 * it before using it again.
1083 */
1084 if (d->bd_bif == NULL) {
1085 bpf_release_d(d);
1086 lck_mtx_unlock(bpf_mlock);
1087 return ENXIO;
1088 }
1089 if (ioflag & IO_NDELAY) {
1090 bpf_release_d(d);
1091 lck_mtx_unlock(bpf_mlock);
1092 return EWOULDBLOCK;
1093 }
1094 error = BPF_SLEEP(d, PRINET | PCATCH, "bpf", d->bd_rtout);
1095 /*
1096 * Make sure device is still opened
1097 */
1098 if ((d->bd_flags & BPF_CLOSING) != 0) {
1099 bpf_release_d(d);
1100 lck_mtx_unlock(bpf_mlock);
1101 return ENXIO;
1102 }
1103
1104 while (d->bd_hbuf_read != 0) {
1105 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1106 NULL);
1107 }
1108
1109 if ((d->bd_flags & BPF_CLOSING) != 0) {
1110 bpf_release_d(d);
1111 lck_mtx_unlock(bpf_mlock);
1112 return ENXIO;
1113 }
1114
1115 if (error == EINTR || error == ERESTART) {
1116 if (d->bd_hbuf != NULL) {
1117 /*
1118 * Because we msleep, the hold buffer might
1119 * be filled when we wake up. Avoid rotating
1120 * in this case.
1121 */
1122 break;
1123 }
1124 if (d->bd_slen != 0) {
1125 /*
1126 * Sometimes we may be interrupted often and
1127 * the sleep above will not timeout.
1128 * Regardless, we should rotate the buffers
1129 * if there's any new data pending and
1130 * return it.
1131 */
1132 ROTATE_BUFFERS(d);
1133 break;
1134 }
1135 bpf_release_d(d);
1136 lck_mtx_unlock(bpf_mlock);
1137 if (error == ERESTART) {
1138 printf("%s: %llx ERESTART to EINTR\n",
1139 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
1140 error = EINTR;
1141 }
1142 return error;
1143 }
1144 if (error == EWOULDBLOCK) {
1145 /*
1146 * On a timeout, return what's in the buffer,
1147 * which may be nothing. If there is something
1148 * in the store buffer, we can rotate the buffers.
1149 */
1150 if (d->bd_hbuf) {
1151 /*
1152 * We filled up the buffer in between
1153 * getting the timeout and arriving
1154 * here, so we don't need to rotate.
1155 */
1156 break;
1157 }
1158
1159 if (d->bd_slen == 0) {
1160 bpf_release_d(d);
1161 lck_mtx_unlock(bpf_mlock);
1162 return 0;
1163 }
1164 ROTATE_BUFFERS(d);
1165 break;
1166 }
1167 }
1168 /*
1169 * At this point, we know we have something in the hold slot.
1170 */
1171
1172 /*
1173 * Set the hold buffer read. So we do not
1174 * rotate the buffers until the hold buffer
1175 * read is complete. Also to avoid issues resulting
1176 * from page faults during disk sleep (<rdar://problem/13436396>).
1177 */
1178 d->bd_hbuf_read = 1;
1179 hbuf = d->bd_hbuf;
1180 hbuf_len = d->bd_hlen;
1181 flags = d->bd_flags;
1182 lck_mtx_unlock(bpf_mlock);
1183
1184 #ifdef __APPLE__
1185 /*
1186 * Before we move data to userland, we fill out the extended
1187 * header fields.
1188 */
1189 if (flags & BPF_EXTENDED_HDR) {
1190 char *p;
1191
1192 p = hbuf;
1193 while (p < hbuf + hbuf_len) {
1194 struct bpf_hdr_ext *ehp;
1195 uint32_t flowid;
1196 struct so_procinfo soprocinfo;
1197 int found = 0;
1198
1199 ehp = (struct bpf_hdr_ext *)(void *)p;
1200 if ((flowid = ehp->bh_flowid) != 0) {
1201 if (ehp->bh_proto == IPPROTO_TCP) {
1202 found = inp_findinpcb_procinfo(&tcbinfo,
1203 flowid, &soprocinfo);
1204 } else if (ehp->bh_proto == IPPROTO_UDP) {
1205 found = inp_findinpcb_procinfo(&udbinfo,
1206 flowid, &soprocinfo);
1207 }
1208 if (found == 1) {
1209 ehp->bh_pid = soprocinfo.spi_pid;
1210 strlcpy(&ehp->bh_comm[0], &soprocinfo.spi_proc_name[0], sizeof(ehp->bh_comm));
1211 }
1212 ehp->bh_flowid = 0;
1213 }
1214
1215 if (flags & BPF_FINALIZE_PKTAP) {
1216 struct pktap_header *pktaphdr;
1217
1218 pktaphdr = (struct pktap_header *)(void *)
1219 (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1220
1221 bpf_finalize_pktap((struct bpf_hdr *) ehp,
1222 pktaphdr);
1223 }
1224 p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1225 }
1226 } else if (flags & BPF_FINALIZE_PKTAP) {
1227 char *p;
1228
1229 p = hbuf;
1230 while (p < hbuf + hbuf_len) {
1231 struct bpf_hdr *hp;
1232 struct pktap_header *pktaphdr;
1233
1234 hp = (struct bpf_hdr *)(void *)p;
1235 pktaphdr = (struct pktap_header *)(void *)
1236 (p + BPF_WORDALIGN(hp->bh_hdrlen));
1237
1238 bpf_finalize_pktap(hp, pktaphdr);
1239
1240 p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1241 }
1242 }
1243 #endif
1244
1245 /*
1246 * Move data from hold buffer into user space.
1247 * We know the entire buffer is transferred since
1248 * we checked above that the read buffer is bpf_bufsize bytes.
1249 */
1250 error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1251
1252 lck_mtx_lock(bpf_mlock);
1253 /*
1254 * Make sure device is still opened
1255 */
1256 if ((d->bd_flags & BPF_CLOSING) != 0) {
1257 bpf_release_d(d);
1258 lck_mtx_unlock(bpf_mlock);
1259 return ENXIO;
1260 }
1261
1262 d->bd_hbuf_read = 0;
1263 d->bd_fbuf = d->bd_hbuf;
1264 d->bd_hbuf = NULL;
1265 d->bd_hlen = 0;
1266 d->bd_hcnt = 0;
1267 wakeup((caddr_t)d);
1268
1269 bpf_release_d(d);
1270 lck_mtx_unlock(bpf_mlock);
1271 return error;
1272 }
1273
1274 /*
1275 * If there are processes sleeping on this descriptor, wake them up.
1276 */
1277 static void
bpf_wakeup(struct bpf_d * d)1278 bpf_wakeup(struct bpf_d *d)
1279 {
1280 if (d->bd_state == BPF_WAITING) {
1281 bpf_stop_timer(d);
1282 d->bd_state = BPF_IDLE;
1283 }
1284 wakeup((caddr_t)d);
1285 if (d->bd_async && d->bd_sig && d->bd_sigio) {
1286 pgsigio(d->bd_sigio, d->bd_sig);
1287 }
1288
1289 selwakeup(&d->bd_sel);
1290 if ((d->bd_flags & BPF_KNOTE)) {
1291 KNOTE(&d->bd_sel.si_note, 1);
1292 }
1293 }
1294
1295 static void
bpf_timed_out(void * arg,__unused void * dummy)1296 bpf_timed_out(void *arg, __unused void *dummy)
1297 {
1298 struct bpf_d *d = (struct bpf_d *)arg;
1299
1300 lck_mtx_lock(bpf_mlock);
1301 if (d->bd_state == BPF_WAITING) {
1302 /*
1303 * There's a select or kqueue waiting for this; if there's
1304 * now stuff to read, wake it up.
1305 */
1306 d->bd_state = BPF_TIMED_OUT;
1307 if (d->bd_slen != 0) {
1308 bpf_wakeup(d);
1309 }
1310 } else if (d->bd_state == BPF_DRAINING) {
1311 /*
1312 * A close is waiting for this to finish.
1313 * Mark it as finished, and wake the close up.
1314 */
1315 d->bd_state = BPF_IDLE;
1316 bpf_wakeup(d);
1317 }
1318 lck_mtx_unlock(bpf_mlock);
1319 }
1320
1321 /* keep in sync with bpf_movein above: */
1322 #define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
1323
1324 int
bpfwrite(dev_t dev,struct uio * uio,__unused int ioflag)1325 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1326 {
1327 struct bpf_d *d;
1328 struct ifnet *ifp;
1329 struct mbuf *m = NULL;
1330 int error;
1331 char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1332 int datlen = 0;
1333 int bif_dlt;
1334 int bd_hdrcmplt;
1335
1336 lck_mtx_lock(bpf_mlock);
1337
1338 d = bpf_dtab[minor(dev)];
1339 if (d == NULL || d == BPF_DEV_RESERVED ||
1340 (d->bd_flags & BPF_CLOSING) != 0) {
1341 lck_mtx_unlock(bpf_mlock);
1342 return ENXIO;
1343 }
1344
1345 bpf_acquire_d(d);
1346
1347 if (d->bd_bif == 0) {
1348 bpf_release_d(d);
1349 lck_mtx_unlock(bpf_mlock);
1350 return ENXIO;
1351 }
1352
1353 ifp = d->bd_bif->bif_ifp;
1354
1355 if ((ifp->if_flags & IFF_UP) == 0) {
1356 bpf_release_d(d);
1357 lck_mtx_unlock(bpf_mlock);
1358 return ENETDOWN;
1359 }
1360 if (uio_resid(uio) == 0) {
1361 bpf_release_d(d);
1362 lck_mtx_unlock(bpf_mlock);
1363 return 0;
1364 }
1365 ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1366
1367 /*
1368 * fix for PR-6849527
1369 * geting variables onto stack before dropping lock for bpf_movein()
1370 */
1371 bif_dlt = (int)d->bd_bif->bif_dlt;
1372 bd_hdrcmplt = d->bd_hdrcmplt;
1373
1374 /* bpf_movein allocating mbufs; drop lock */
1375 lck_mtx_unlock(bpf_mlock);
1376
1377 error = bpf_movein(uio, bif_dlt, &m,
1378 bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1379 &datlen);
1380
1381 /* take the lock again */
1382 lck_mtx_lock(bpf_mlock);
1383 if (error) {
1384 bpf_release_d(d);
1385 lck_mtx_unlock(bpf_mlock);
1386 return error;
1387 }
1388
1389 /* verify the device is still open */
1390 if ((d->bd_flags & BPF_CLOSING) != 0) {
1391 bpf_release_d(d);
1392 lck_mtx_unlock(bpf_mlock);
1393 m_freem(m);
1394 return ENXIO;
1395 }
1396
1397 if (d->bd_bif == NULL) {
1398 bpf_release_d(d);
1399 lck_mtx_unlock(bpf_mlock);
1400 m_free(m);
1401 return ENXIO;
1402 }
1403
1404 if ((unsigned)datlen > ifp->if_mtu) {
1405 bpf_release_d(d);
1406 lck_mtx_unlock(bpf_mlock);
1407 m_freem(m);
1408 return EMSGSIZE;
1409 }
1410
1411 bpf_set_packet_service_class(m, d->bd_traffic_class);
1412
1413 lck_mtx_unlock(bpf_mlock);
1414
1415 /*
1416 * The driver frees the mbuf.
1417 */
1418 if (d->bd_hdrcmplt) {
1419 if (d->bd_bif->bif_send) {
1420 error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1421 } else {
1422 error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1423 }
1424 } else {
1425 error = dlil_output(ifp, PF_INET, m, NULL,
1426 (struct sockaddr *)dst_buf, 0, NULL);
1427 }
1428
1429 lck_mtx_lock(bpf_mlock);
1430 bpf_release_d(d);
1431 lck_mtx_unlock(bpf_mlock);
1432
1433 return error;
1434 }
1435
1436 /*
1437 * Reset a descriptor by flushing its packet buffer and clearing the
1438 * receive and drop counts.
1439 */
1440 static void
reset_d(struct bpf_d * d)1441 reset_d(struct bpf_d *d)
1442 {
1443 if (d->bd_hbuf_read != 0) {
1444 panic("resetting buffers during read");
1445 }
1446
1447 if (d->bd_hbuf) {
1448 /* Free the hold buffer. */
1449 d->bd_fbuf = d->bd_hbuf;
1450 d->bd_hbuf = NULL;
1451 }
1452 d->bd_slen = 0;
1453 d->bd_hlen = 0;
1454 d->bd_scnt = 0;
1455 d->bd_hcnt = 0;
1456 d->bd_rcount = 0;
1457 d->bd_dcount = 0;
1458 }
1459
1460 static struct bpf_d *
bpf_get_device_from_uuid(uuid_t uuid)1461 bpf_get_device_from_uuid(uuid_t uuid)
1462 {
1463 unsigned int i;
1464
1465 for (i = 0; i < nbpfilter; i++) {
1466 struct bpf_d *d = bpf_dtab[i];
1467
1468 if (d == NULL || d == BPF_DEV_RESERVED ||
1469 (d->bd_flags & BPF_CLOSING) != 0) {
1470 continue;
1471 }
1472 if (uuid_compare(uuid, d->bd_uuid) == 0) {
1473 return d;
1474 }
1475 }
1476
1477 return NULL;
1478 }
1479
1480 /*
1481 * The BIOCSETUP command "atomically" attach to the interface and
1482 * copy the buffer from another interface. This minimizes the risk
1483 * of missing packet because this is done while holding
1484 * the BPF global lock
1485 */
1486 static int
bpf_setup(struct bpf_d * d_to,uuid_t uuid_from,ifnet_t ifp)1487 bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1488 {
1489 struct bpf_d *d_from;
1490 int error = 0;
1491
1492 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1493
1494 /*
1495 * Sanity checks
1496 */
1497 d_from = bpf_get_device_from_uuid(uuid_from);
1498 if (d_from == NULL) {
1499 error = ENOENT;
1500 os_log_info(OS_LOG_DEFAULT,
1501 "%s: uuids not found error %d",
1502 __func__, error);
1503 return error;
1504 }
1505 if (d_from->bd_opened_by != d_to->bd_opened_by) {
1506 error = EACCES;
1507 os_log_info(OS_LOG_DEFAULT,
1508 "%s: processes not matching error %d",
1509 __func__, error);
1510 return error;
1511 }
1512
1513 /*
1514 * Prevent any read while copying
1515 */
1516 while (d_to->bd_hbuf_read != 0) {
1517 msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
1518 }
1519 d_to->bd_hbuf_read = 1;
1520
1521 while (d_from->bd_hbuf_read != 0) {
1522 msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
1523 }
1524 d_from->bd_hbuf_read = 1;
1525
1526 /*
1527 * Verify the devices have not been closed
1528 */
1529 if (d_to->bd_flags & BPF_CLOSING) {
1530 error = ENXIO;
1531 os_log_info(OS_LOG_DEFAULT,
1532 "%s: d_to is closing error %d",
1533 __func__, error);
1534 goto done;
1535 }
1536 if (d_from->bd_flags & BPF_CLOSING) {
1537 error = ENXIO;
1538 os_log_info(OS_LOG_DEFAULT,
1539 "%s: d_from is closing error %d",
1540 __func__, error);
1541 goto done;
1542 }
1543
1544 /*
1545 * For now require the same buffer size
1546 */
1547 if (d_from->bd_bufsize != d_to->bd_bufsize) {
1548 error = EINVAL;
1549 os_log_info(OS_LOG_DEFAULT,
1550 "%s: bufsizes not matching error %d",
1551 __func__, error);
1552 goto done;
1553 }
1554
1555 /*
1556 * Attach to the interface
1557 */
1558 error = bpf_setif(d_to, ifp, false, true);
1559 if (error != 0) {
1560 os_log_info(OS_LOG_DEFAULT,
1561 "%s: bpf_setif() failed error %d",
1562 __func__, error);
1563 goto done;
1564 }
1565
1566 /*
1567 * Make sure the buffers are setup as expected by bpf_setif()
1568 */
1569 ASSERT(d_to->bd_hbuf == NULL);
1570 ASSERT(d_to->bd_sbuf != NULL);
1571 ASSERT(d_to->bd_fbuf != NULL);
1572
1573 /*
1574 * Copy the buffers and update the pointers and counts
1575 */
1576 memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
1577 d_to->bd_slen = d_from->bd_slen;
1578 d_to->bd_scnt = d_from->bd_scnt;
1579
1580 if (d_from->bd_hbuf != NULL) {
1581 d_to->bd_hbuf = d_to->bd_fbuf;
1582 d_to->bd_fbuf = NULL;
1583 memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
1584 }
1585 d_to->bd_hlen = d_from->bd_hlen;
1586 d_to->bd_hcnt = d_from->bd_hcnt;
1587
1588 if (bpf_debug > 0) {
1589 os_log_info(OS_LOG_DEFAULT,
1590 "%s: done slen %u scnt %u hlen %u hcnt %u",
1591 __func__, d_to->bd_slen, d_to->bd_scnt,
1592 d_to->bd_hlen, d_to->bd_hcnt);
1593 }
1594 done:
1595 d_from->bd_hbuf_read = 0;
1596 wakeup((caddr_t)d_from);
1597
1598 d_to->bd_hbuf_read = 0;
1599 wakeup((caddr_t)d_to);
1600
1601 return error;
1602 }
1603
1604 /*
1605 * FIONREAD Check for read packet available.
1606 * SIOCGIFADDR Get interface address - convenient hook to driver.
1607 * BIOCGBLEN Get buffer len [for read()].
1608 * BIOCSETF Set ethernet read filter.
1609 * BIOCFLUSH Flush read packet buffer.
1610 * BIOCPROMISC Put interface into promiscuous mode.
1611 * BIOCGDLT Get link layer type.
1612 * BIOCGETIF Get interface name.
1613 * BIOCSETIF Set interface.
1614 * BIOCSRTIMEOUT Set read timeout.
1615 * BIOCGRTIMEOUT Get read timeout.
1616 * BIOCGSTATS Get packet stats.
1617 * BIOCIMMEDIATE Set immediate mode.
1618 * BIOCVERSION Get filter language version.
1619 * BIOCGHDRCMPLT Get "header already complete" flag
1620 * BIOCSHDRCMPLT Set "header already complete" flag
1621 * BIOCGSEESENT Get "see packets sent" flag
1622 * BIOCSSEESENT Set "see packets sent" flag
1623 * BIOCSETTC Set traffic class.
1624 * BIOCGETTC Get traffic class.
1625 * BIOCSEXTHDR Set "extended header" flag
1626 * BIOCSHEADDROP Drop head of the buffer if user is not reading
1627 * BIOCGHEADDROP Get "head-drop" flag
1628 */
1629 /* ARGSUSED */
1630 int
bpfioctl(dev_t dev,u_long cmd,caddr_t addr,__unused int flags,struct proc * p)1631 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1632 struct proc *p)
1633 {
1634 struct bpf_d *d;
1635 int error = 0;
1636 u_int int_arg;
1637 struct ifreq ifr;
1638
1639 lck_mtx_lock(bpf_mlock);
1640
1641 d = bpf_dtab[minor(dev)];
1642 if (d == NULL || d == BPF_DEV_RESERVED ||
1643 (d->bd_flags & BPF_CLOSING) != 0) {
1644 lck_mtx_unlock(bpf_mlock);
1645 return ENXIO;
1646 }
1647
1648 bpf_acquire_d(d);
1649
1650 if (d->bd_state == BPF_WAITING) {
1651 bpf_stop_timer(d);
1652 }
1653 d->bd_state = BPF_IDLE;
1654
1655 switch (cmd) {
1656 default:
1657 error = EINVAL;
1658 break;
1659
1660 /*
1661 * Check for read packet available.
1662 */
1663 case FIONREAD: /* int */
1664 {
1665 int n;
1666
1667 n = d->bd_slen;
1668 if (d->bd_hbuf && d->bd_hbuf_read == 0) {
1669 n += d->bd_hlen;
1670 }
1671
1672 bcopy(&n, addr, sizeof(n));
1673 break;
1674 }
1675
1676 case SIOCGIFADDR: /* struct ifreq */
1677 {
1678 struct ifnet *ifp;
1679
1680 if (d->bd_bif == 0) {
1681 error = EINVAL;
1682 } else {
1683 ifp = d->bd_bif->bif_ifp;
1684 error = ifnet_ioctl(ifp, 0, cmd, addr);
1685 }
1686 break;
1687 }
1688
1689 /*
1690 * Get buffer len [for read()].
1691 */
1692 case BIOCGBLEN: /* u_int */
1693 bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1694 break;
1695
1696 /*
1697 * Set buffer length.
1698 */
1699 case BIOCSBLEN: { /* u_int */
1700 u_int size;
1701 unsigned int maxbufsize = bpf_maxbufsize;
1702
1703 /*
1704 * Allow larger buffer in head drop mode to with the
1705 * assumption the reading process may be low priority but
1706 * is interested in the most recent traffic
1707 */
1708 if (d->bd_headdrop != 0) {
1709 maxbufsize = 2 * bpf_maxbufsize;
1710 }
1711
1712 if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
1713 /*
1714 * Interface already attached, unable to change buffers
1715 */
1716 error = EINVAL;
1717 break;
1718 }
1719 bcopy(addr, &size, sizeof(size));
1720
1721 if (size > maxbufsize) {
1722 d->bd_bufsize = maxbufsize;
1723
1724 os_log_info(OS_LOG_DEFAULT,
1725 "%s bufsize capped to %u from %u",
1726 __func__, d->bd_bufsize, size);
1727 } else if (size < BPF_MINBUFSIZE) {
1728 d->bd_bufsize = BPF_MINBUFSIZE;
1729
1730 os_log_info(OS_LOG_DEFAULT,
1731 "%s bufsize bumped to %u from %u",
1732 __func__, d->bd_bufsize, size);
1733 } else {
1734 d->bd_bufsize = size;
1735 }
1736
1737 /* It's a read/write ioctl */
1738 bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1739 break;
1740 }
1741 /*
1742 * Set link layer read filter.
1743 */
1744 case BIOCSETF32:
1745 case BIOCSETFNR32: { /* struct bpf_program32 */
1746 struct bpf_program32 prg32;
1747
1748 bcopy(addr, &prg32, sizeof(prg32));
1749 error = bpf_setf(d, prg32.bf_len,
1750 CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1751 break;
1752 }
1753
1754 case BIOCSETF64:
1755 case BIOCSETFNR64: { /* struct bpf_program64 */
1756 struct bpf_program64 prg64;
1757
1758 bcopy(addr, &prg64, sizeof(prg64));
1759 error = bpf_setf(d, prg64.bf_len, CAST_USER_ADDR_T(prg64.bf_insns), cmd);
1760 break;
1761 }
1762
1763 /*
1764 * Flush read packet buffer.
1765 */
1766 case BIOCFLUSH:
1767 while (d->bd_hbuf_read != 0) {
1768 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1769 NULL);
1770 }
1771 if ((d->bd_flags & BPF_CLOSING) != 0) {
1772 error = ENXIO;
1773 break;
1774 }
1775 reset_d(d);
1776 break;
1777
1778 /*
1779 * Put interface into promiscuous mode.
1780 */
1781 case BIOCPROMISC:
1782 if (d->bd_bif == 0) {
1783 /*
1784 * No interface attached yet.
1785 */
1786 error = EINVAL;
1787 break;
1788 }
1789 if (d->bd_promisc == 0) {
1790 lck_mtx_unlock(bpf_mlock);
1791 error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1792 lck_mtx_lock(bpf_mlock);
1793 if (error == 0) {
1794 d->bd_promisc = 1;
1795 }
1796 }
1797 break;
1798
1799 /*
1800 * Get device parameters.
1801 */
1802 case BIOCGDLT: /* u_int */
1803 if (d->bd_bif == 0) {
1804 error = EINVAL;
1805 } else {
1806 bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
1807 }
1808 break;
1809
1810 /*
1811 * Get a list of supported data link types.
1812 */
1813 case BIOCGDLTLIST: /* struct bpf_dltlist */
1814 if (d->bd_bif == NULL) {
1815 error = EINVAL;
1816 } else {
1817 error = bpf_getdltlist(d, addr, p);
1818 }
1819 break;
1820
1821 /*
1822 * Set data link type.
1823 */
1824 case BIOCSDLT: /* u_int */
1825 if (d->bd_bif == NULL) {
1826 error = EINVAL;
1827 } else {
1828 u_int dlt;
1829
1830 bcopy(addr, &dlt, sizeof(dlt));
1831
1832 if (dlt == DLT_PKTAP &&
1833 !(d->bd_flags & BPF_WANT_PKTAP)) {
1834 dlt = DLT_RAW;
1835 }
1836 error = bpf_setdlt(d, dlt);
1837 }
1838 break;
1839
1840 /*
1841 * Get interface name.
1842 */
1843 case BIOCGETIF: /* struct ifreq */
1844 if (d->bd_bif == 0) {
1845 error = EINVAL;
1846 } else {
1847 struct ifnet *const ifp = d->bd_bif->bif_ifp;
1848
1849 snprintf(((struct ifreq *)(void *)addr)->ifr_name,
1850 sizeof(ifr.ifr_name), "%s", if_name(ifp));
1851 }
1852 break;
1853
1854 /*
1855 * Set interface.
1856 */
1857 case BIOCSETIF: { /* struct ifreq */
1858 ifnet_t ifp;
1859
1860 bcopy(addr, &ifr, sizeof(ifr));
1861 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1862 ifp = ifunit(ifr.ifr_name);
1863 if (ifp == NULL) {
1864 error = ENXIO;
1865 } else {
1866 error = bpf_setif(d, ifp, true, false);
1867 }
1868 break;
1869 }
1870
1871 /*
1872 * Set read timeout.
1873 */
1874 case BIOCSRTIMEOUT32: { /* struct user32_timeval */
1875 struct user32_timeval _tv;
1876 struct timeval tv;
1877
1878 bcopy(addr, &_tv, sizeof(_tv));
1879 tv.tv_sec = _tv.tv_sec;
1880 tv.tv_usec = _tv.tv_usec;
1881
1882 /*
1883 * Subtract 1 tick from tvtohz() since this isn't
1884 * a one-shot timer.
1885 */
1886 if ((error = itimerfix(&tv)) == 0) {
1887 d->bd_rtout = tvtohz(&tv) - 1;
1888 }
1889 break;
1890 }
1891
1892 case BIOCSRTIMEOUT64: { /* struct user64_timeval */
1893 struct user64_timeval _tv;
1894 struct timeval tv;
1895
1896 bcopy(addr, &_tv, sizeof(_tv));
1897 tv.tv_sec = (__darwin_time_t)_tv.tv_sec;
1898 tv.tv_usec = _tv.tv_usec;
1899
1900 /*
1901 * Subtract 1 tick from tvtohz() since this isn't
1902 * a one-shot timer.
1903 */
1904 if ((error = itimerfix(&tv)) == 0) {
1905 d->bd_rtout = tvtohz(&tv) - 1;
1906 }
1907 break;
1908 }
1909
1910 /*
1911 * Get read timeout.
1912 */
1913 case BIOCGRTIMEOUT32: { /* struct user32_timeval */
1914 struct user32_timeval tv;
1915
1916 bzero(&tv, sizeof(tv));
1917 tv.tv_sec = d->bd_rtout / hz;
1918 tv.tv_usec = (d->bd_rtout % hz) * tick;
1919 bcopy(&tv, addr, sizeof(tv));
1920 break;
1921 }
1922
1923 case BIOCGRTIMEOUT64: { /* struct user64_timeval */
1924 struct user64_timeval tv;
1925
1926 bzero(&tv, sizeof(tv));
1927 tv.tv_sec = d->bd_rtout / hz;
1928 tv.tv_usec = (d->bd_rtout % hz) * tick;
1929 bcopy(&tv, addr, sizeof(tv));
1930 break;
1931 }
1932
1933 /*
1934 * Get packet stats.
1935 */
1936 case BIOCGSTATS: { /* struct bpf_stat */
1937 struct bpf_stat bs;
1938
1939 bzero(&bs, sizeof(bs));
1940 bs.bs_recv = d->bd_rcount;
1941 bs.bs_drop = d->bd_dcount;
1942 bcopy(&bs, addr, sizeof(bs));
1943 break;
1944 }
1945
1946 /*
1947 * Set immediate mode.
1948 */
1949 case BIOCIMMEDIATE: /* u_int */
1950 d->bd_immediate = *(u_char *)(void *)addr;
1951 break;
1952
1953 case BIOCVERSION: { /* struct bpf_version */
1954 struct bpf_version bv;
1955
1956 bzero(&bv, sizeof(bv));
1957 bv.bv_major = BPF_MAJOR_VERSION;
1958 bv.bv_minor = BPF_MINOR_VERSION;
1959 bcopy(&bv, addr, sizeof(bv));
1960 break;
1961 }
1962
1963 /*
1964 * Get "header already complete" flag
1965 */
1966 case BIOCGHDRCMPLT: /* u_int */
1967 bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
1968 break;
1969
1970 /*
1971 * Set "header already complete" flag
1972 */
1973 case BIOCSHDRCMPLT: /* u_int */
1974 bcopy(addr, &int_arg, sizeof(int_arg));
1975 d->bd_hdrcmplt = int_arg ? 1 : 0;
1976 break;
1977
1978 /*
1979 * Get "see sent packets" flag
1980 */
1981 case BIOCGSEESENT: /* u_int */
1982 bcopy(&d->bd_seesent, addr, sizeof(u_int));
1983 break;
1984
1985 /*
1986 * Set "see sent packets" flag
1987 */
1988 case BIOCSSEESENT: /* u_int */
1989 bcopy(addr, &d->bd_seesent, sizeof(u_int));
1990 break;
1991
1992 /*
1993 * Set traffic service class
1994 */
1995 case BIOCSETTC: { /* int */
1996 int tc;
1997
1998 bcopy(addr, &tc, sizeof(int));
1999 error = bpf_set_traffic_class(d, tc);
2000 break;
2001 }
2002
2003 /*
2004 * Get traffic service class
2005 */
2006 case BIOCGETTC: /* int */
2007 bcopy(&d->bd_traffic_class, addr, sizeof(int));
2008 break;
2009
2010 case FIONBIO: /* Non-blocking I/O; int */
2011 break;
2012
2013 case FIOASYNC: /* Send signal on receive packets; int */
2014 bcopy(addr, &d->bd_async, sizeof(int));
2015 break;
2016 #ifndef __APPLE__
2017 case FIOSETOWN:
2018 error = fsetown(*(int *)addr, &d->bd_sigio);
2019 break;
2020
2021 case FIOGETOWN:
2022 *(int *)addr = fgetown(d->bd_sigio);
2023 break;
2024
2025 /* This is deprecated, FIOSETOWN should be used instead. */
2026 case TIOCSPGRP:
2027 error = fsetown(-(*(int *)addr), &d->bd_sigio);
2028 break;
2029
2030 /* This is deprecated, FIOGETOWN should be used instead. */
2031 case TIOCGPGRP:
2032 *(int *)addr = -fgetown(d->bd_sigio);
2033 break;
2034 #endif
2035 case BIOCSRSIG: { /* Set receive signal; u_int */
2036 u_int sig;
2037
2038 bcopy(addr, &sig, sizeof(u_int));
2039
2040 if (sig >= NSIG) {
2041 error = EINVAL;
2042 } else {
2043 d->bd_sig = sig;
2044 }
2045 break;
2046 }
2047 case BIOCGRSIG: /* u_int */
2048 bcopy(&d->bd_sig, addr, sizeof(u_int));
2049 break;
2050 #ifdef __APPLE__
2051 case BIOCSEXTHDR: /* u_int */
2052 bcopy(addr, &int_arg, sizeof(int_arg));
2053 if (int_arg) {
2054 d->bd_flags |= BPF_EXTENDED_HDR;
2055 } else {
2056 d->bd_flags &= ~BPF_EXTENDED_HDR;
2057 }
2058 break;
2059
2060 case BIOCGIFATTACHCOUNT: { /* struct ifreq */
2061 ifnet_t ifp;
2062 struct bpf_if *bp;
2063
2064 bcopy(addr, &ifr, sizeof(ifr));
2065 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2066 ifp = ifunit(ifr.ifr_name);
2067 if (ifp == NULL) {
2068 error = ENXIO;
2069 break;
2070 }
2071 ifr.ifr_intval = 0;
2072 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2073 struct bpf_d *bpf_d;
2074
2075 if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) {
2076 continue;
2077 }
2078 for (bpf_d = bp->bif_dlist; bpf_d;
2079 bpf_d = bpf_d->bd_next) {
2080 ifr.ifr_intval += 1;
2081 }
2082 }
2083 bcopy(&ifr, addr, sizeof(ifr));
2084 break;
2085 }
2086 case BIOCGWANTPKTAP: /* u_int */
2087 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
2088 bcopy(&int_arg, addr, sizeof(int_arg));
2089 break;
2090
2091 case BIOCSWANTPKTAP: /* u_int */
2092 bcopy(addr, &int_arg, sizeof(int_arg));
2093 if (int_arg) {
2094 d->bd_flags |= BPF_WANT_PKTAP;
2095 } else {
2096 d->bd_flags &= ~BPF_WANT_PKTAP;
2097 }
2098 break;
2099 #endif
2100
2101 case BIOCSHEADDROP:
2102 bcopy(addr, &int_arg, sizeof(int_arg));
2103 d->bd_headdrop = int_arg ? 1 : 0;
2104 break;
2105
2106 case BIOCGHEADDROP:
2107 bcopy(&d->bd_headdrop, addr, sizeof(int));
2108 break;
2109
2110 case BIOCSTRUNCATE:
2111 bcopy(addr, &int_arg, sizeof(int_arg));
2112 if (int_arg) {
2113 d->bd_flags |= BPF_TRUNCATE;
2114 } else {
2115 d->bd_flags &= ~BPF_TRUNCATE;
2116 }
2117 break;
2118
2119 case BIOCGETUUID:
2120 bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
2121 break;
2122
2123 case BIOCSETUP: {
2124 struct bpf_setup_args bsa;
2125 ifnet_t ifp;
2126
2127 bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
2128 bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
2129 ifp = ifunit(bsa.bsa_ifname);
2130 if (ifp == NULL) {
2131 error = ENXIO;
2132 os_log_info(OS_LOG_DEFAULT,
2133 "%s: ifnet not found for %s error %d",
2134 __func__, bsa.bsa_ifname, error);
2135 break;
2136 }
2137
2138 error = bpf_setup(d, bsa.bsa_uuid, ifp);
2139 break;
2140 }
2141 case BIOCSPKTHDRV2:
2142 bcopy(addr, &int_arg, sizeof(int_arg));
2143 if (int_arg != 0) {
2144 d->bd_flags |= BPF_PKTHDRV2;
2145 } else {
2146 d->bd_flags &= ~BPF_PKTHDRV2;
2147 }
2148 break;
2149
2150 case BIOCGPKTHDRV2:
2151 int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
2152 bcopy(&int_arg, addr, sizeof(int));
2153 break;
2154 }
2155
2156 bpf_release_d(d);
2157 lck_mtx_unlock(bpf_mlock);
2158
2159 return error;
2160 }
2161
2162 /*
2163 * Set d's packet filter program to fp. If this file already has a filter,
2164 * free it and replace it. Returns EINVAL for bogus requests.
2165 */
2166 static int
bpf_setf(struct bpf_d * d,u_int bf_len,user_addr_t bf_insns,u_long cmd)2167 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2168 u_long cmd)
2169 {
2170 struct bpf_insn *fcode, *old;
2171 u_int flen, size;
2172
2173 while (d->bd_hbuf_read != 0) {
2174 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2175 }
2176
2177 if ((d->bd_flags & BPF_CLOSING) != 0) {
2178 return ENXIO;
2179 }
2180
2181 old = d->bd_filter;
2182 if (bf_insns == USER_ADDR_NULL) {
2183 if (bf_len != 0) {
2184 return EINVAL;
2185 }
2186 d->bd_filter = NULL;
2187 reset_d(d);
2188 if (old != 0) {
2189 kfree_data_addr(old);
2190 }
2191 return 0;
2192 }
2193 flen = bf_len;
2194 if (flen > BPF_MAXINSNS) {
2195 return EINVAL;
2196 }
2197
2198 size = flen * sizeof(struct bpf_insn);
2199 fcode = (struct bpf_insn *) kalloc_data(size, M_WAIT);
2200 #ifdef __APPLE__
2201 if (fcode == NULL) {
2202 return ENOBUFS;
2203 }
2204 #endif
2205 if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
2206 bpf_validate(fcode, (int)flen)) {
2207 d->bd_filter = fcode;
2208
2209 if (cmd == BIOCSETF32 || cmd == BIOCSETF64) {
2210 reset_d(d);
2211 }
2212
2213 if (old != 0) {
2214 kfree_data_addr(old);
2215 }
2216
2217 return 0;
2218 }
2219 kfree_data(fcode, size);
2220 return EINVAL;
2221 }
2222
2223 /*
2224 * Detach a file from its current interface (if attached at all) and attach
2225 * to the interface indicated by the name stored in ifr.
2226 * Return an errno or 0.
2227 */
2228 static int
bpf_setif(struct bpf_d * d,ifnet_t theywant,bool do_reset,bool has_hbuf_read)2229 bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read)
2230 {
2231 struct bpf_if *bp;
2232 int error;
2233
2234 while (d->bd_hbuf_read != 0 && !has_hbuf_read) {
2235 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2236 }
2237
2238 if ((d->bd_flags & BPF_CLOSING) != 0) {
2239 return ENXIO;
2240 }
2241
2242 /*
2243 * Look through attached interfaces for the named one.
2244 */
2245 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2246 struct ifnet *ifp = bp->bif_ifp;
2247
2248 if (ifp == 0 || ifp != theywant) {
2249 continue;
2250 }
2251 /*
2252 * Do not use DLT_PKTAP, unless requested explicitly
2253 */
2254 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2255 continue;
2256 }
2257 /*
2258 * Skip the coprocessor interface
2259 */
2260 if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
2261 continue;
2262 }
2263 /*
2264 * We found the requested interface.
2265 * Allocate the packet buffers.
2266 */
2267 error = bpf_allocbufs(d);
2268 if (error != 0) {
2269 return error;
2270 }
2271 /*
2272 * Detach if attached to something else.
2273 */
2274 if (bp != d->bd_bif) {
2275 if (d->bd_bif != NULL) {
2276 if (bpf_detachd(d, 0) != 0) {
2277 return ENXIO;
2278 }
2279 }
2280 if (bpf_attachd(d, bp) != 0) {
2281 return ENXIO;
2282 }
2283 }
2284 if (do_reset) {
2285 reset_d(d);
2286 }
2287 return 0;
2288 }
2289 /* Not found. */
2290 return ENXIO;
2291 }
2292
2293 /*
2294 * Get a list of available data link type of the interface.
2295 */
2296 static int
bpf_getdltlist(struct bpf_d * d,caddr_t addr,struct proc * p)2297 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2298 {
2299 u_int n;
2300 int error;
2301 struct ifnet *ifp;
2302 struct bpf_if *bp;
2303 user_addr_t dlist;
2304 struct bpf_dltlist bfl;
2305
2306 bcopy(addr, &bfl, sizeof(bfl));
2307 if (proc_is64bit(p)) {
2308 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2309 } else {
2310 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2311 }
2312
2313 ifp = d->bd_bif->bif_ifp;
2314 n = 0;
2315 error = 0;
2316
2317 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2318 if (bp->bif_ifp != ifp) {
2319 continue;
2320 }
2321 /*
2322 * Do not use DLT_PKTAP, unless requested explicitly
2323 */
2324 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2325 continue;
2326 }
2327 if (dlist != USER_ADDR_NULL) {
2328 if (n >= bfl.bfl_len) {
2329 return ENOMEM;
2330 }
2331 error = copyout(&bp->bif_dlt, dlist,
2332 sizeof(bp->bif_dlt));
2333 if (error != 0) {
2334 break;
2335 }
2336 dlist += sizeof(bp->bif_dlt);
2337 }
2338 n++;
2339 }
2340 bfl.bfl_len = n;
2341 bcopy(&bfl, addr, sizeof(bfl));
2342
2343 return error;
2344 }
2345
2346 /*
2347 * Set the data link type of a BPF instance.
2348 */
2349 static int
bpf_setdlt(struct bpf_d * d,uint32_t dlt)2350 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2351 {
2352 int error, opromisc;
2353 struct ifnet *ifp;
2354 struct bpf_if *bp;
2355
2356 if (d->bd_bif->bif_dlt == dlt) {
2357 return 0;
2358 }
2359
2360 while (d->bd_hbuf_read != 0) {
2361 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2362 }
2363
2364 if ((d->bd_flags & BPF_CLOSING) != 0) {
2365 return ENXIO;
2366 }
2367
2368 ifp = d->bd_bif->bif_ifp;
2369 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2370 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2371 /*
2372 * Do not use DLT_PKTAP, unless requested explicitly
2373 */
2374 if (bp->bif_dlt == DLT_PKTAP &&
2375 !(d->bd_flags & BPF_WANT_PKTAP)) {
2376 continue;
2377 }
2378 break;
2379 }
2380 }
2381 if (bp != NULL) {
2382 opromisc = d->bd_promisc;
2383 if (bpf_detachd(d, 0) != 0) {
2384 return ENXIO;
2385 }
2386 error = bpf_attachd(d, bp);
2387 if (error) {
2388 printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
2389 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp),
2390 error);
2391 return error;
2392 }
2393 reset_d(d);
2394 if (opromisc) {
2395 lck_mtx_unlock(bpf_mlock);
2396 error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2397 lck_mtx_lock(bpf_mlock);
2398 if (error) {
2399 printf("%s: ifpromisc %s%d failed (%d)\n",
2400 __func__, ifnet_name(bp->bif_ifp),
2401 ifnet_unit(bp->bif_ifp), error);
2402 } else {
2403 d->bd_promisc = 1;
2404 }
2405 }
2406 }
2407 return bp == NULL ? EINVAL : 0;
2408 }
2409
2410 static int
bpf_set_traffic_class(struct bpf_d * d,int tc)2411 bpf_set_traffic_class(struct bpf_d *d, int tc)
2412 {
2413 int error = 0;
2414
2415 if (!SO_VALID_TC(tc)) {
2416 error = EINVAL;
2417 } else {
2418 d->bd_traffic_class = tc;
2419 }
2420
2421 return error;
2422 }
2423
2424 static void
bpf_set_packet_service_class(struct mbuf * m,int tc)2425 bpf_set_packet_service_class(struct mbuf *m, int tc)
2426 {
2427 if (!(m->m_flags & M_PKTHDR)) {
2428 return;
2429 }
2430
2431 VERIFY(SO_VALID_TC(tc));
2432 (void) m_set_service_class(m, so_tc2msc(tc));
2433 }
2434
2435 /*
2436 * Support for select()
2437 *
2438 * Return true iff the specific operation will not block indefinitely.
2439 * Otherwise, return false but make a note that a selwakeup() must be done.
2440 */
2441 int
bpfselect(dev_t dev,int which,void * wql,struct proc * p)2442 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2443 {
2444 struct bpf_d *d;
2445 int ret = 0;
2446
2447 lck_mtx_lock(bpf_mlock);
2448
2449 d = bpf_dtab[minor(dev)];
2450 if (d == NULL || d == BPF_DEV_RESERVED ||
2451 (d->bd_flags & BPF_CLOSING) != 0) {
2452 lck_mtx_unlock(bpf_mlock);
2453 return ENXIO;
2454 }
2455
2456 bpf_acquire_d(d);
2457
2458 if (d->bd_bif == NULL) {
2459 bpf_release_d(d);
2460 lck_mtx_unlock(bpf_mlock);
2461 return ENXIO;
2462 }
2463
2464 while (d->bd_hbuf_read != 0) {
2465 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2466 }
2467
2468 if ((d->bd_flags & BPF_CLOSING) != 0) {
2469 bpf_release_d(d);
2470 lck_mtx_unlock(bpf_mlock);
2471 return ENXIO;
2472 }
2473
2474 switch (which) {
2475 case FREAD:
2476 if (d->bd_hlen != 0 ||
2477 ((d->bd_immediate ||
2478 d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
2479 ret = 1; /* read has data to return */
2480 } else {
2481 /*
2482 * Read has no data to return.
2483 * Make the select wait, and start a timer if
2484 * necessary.
2485 */
2486 selrecord(p, &d->bd_sel, wql);
2487 bpf_start_timer(d);
2488 }
2489 break;
2490
2491 case FWRITE:
2492 /* can't determine whether a write would block */
2493 ret = 1;
2494 break;
2495 }
2496
2497 bpf_release_d(d);
2498 lck_mtx_unlock(bpf_mlock);
2499
2500 return ret;
2501 }
2502
2503 /*
2504 * Support for kevent() system call. Register EVFILT_READ filters and
2505 * reject all others.
2506 */
2507 int bpfkqfilter(dev_t dev, struct knote *kn);
2508 static void filt_bpfdetach(struct knote *);
2509 static int filt_bpfread(struct knote *, long);
2510 static int filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev);
2511 static int filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev);
2512
2513 SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2514 .f_isfd = 1,
2515 .f_detach = filt_bpfdetach,
2516 .f_event = filt_bpfread,
2517 .f_touch = filt_bpftouch,
2518 .f_process = filt_bpfprocess,
2519 };
2520
2521 static int
filt_bpfread_common(struct knote * kn,struct kevent_qos_s * kev,struct bpf_d * d)2522 filt_bpfread_common(struct knote *kn, struct kevent_qos_s *kev, struct bpf_d *d)
2523 {
2524 int ready = 0;
2525 int64_t data = 0;
2526
2527 if (d->bd_immediate) {
2528 /*
2529 * If there's data in the hold buffer, it's the
2530 * amount of data a read will return.
2531 *
2532 * If there's no data in the hold buffer, but
2533 * there's data in the store buffer, a read will
2534 * immediately rotate the store buffer to the
2535 * hold buffer, the amount of data in the store
2536 * buffer is the amount of data a read will
2537 * return.
2538 *
2539 * If there's no data in either buffer, we're not
2540 * ready to read.
2541 */
2542 data = (d->bd_hlen == 0 || d->bd_hbuf_read != 0 ?
2543 d->bd_slen : d->bd_hlen);
2544 int64_t lowwat = knote_low_watermark(kn);
2545 if (lowwat > d->bd_bufsize) {
2546 lowwat = d->bd_bufsize;
2547 }
2548 ready = (data >= lowwat);
2549 } else {
2550 /*
2551 * If there's data in the hold buffer, it's the
2552 * amount of data a read will return.
2553 *
2554 * If there's no data in the hold buffer, but
2555 * there's data in the store buffer, if the
2556 * timer has expired a read will immediately
2557 * rotate the store buffer to the hold buffer,
2558 * so the amount of data in the store buffer is
2559 * the amount of data a read will return.
2560 *
2561 * If there's no data in either buffer, or there's
2562 * no data in the hold buffer and the timer hasn't
2563 * expired, we're not ready to read.
2564 */
2565 data = ((d->bd_hlen == 0 || d->bd_hbuf_read != 0) &&
2566 d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
2567 ready = (data > 0);
2568 }
2569 if (!ready) {
2570 bpf_start_timer(d);
2571 } else if (kev) {
2572 knote_fill_kevent(kn, kev, data);
2573 }
2574
2575 return ready;
2576 }
2577
2578 int
bpfkqfilter(dev_t dev,struct knote * kn)2579 bpfkqfilter(dev_t dev, struct knote *kn)
2580 {
2581 struct bpf_d *d;
2582 int res;
2583
2584 /*
2585 * Is this device a bpf?
2586 */
2587 if (major(dev) != CDEV_MAJOR || kn->kn_filter != EVFILT_READ) {
2588 knote_set_error(kn, EINVAL);
2589 return 0;
2590 }
2591
2592 lck_mtx_lock(bpf_mlock);
2593
2594 d = bpf_dtab[minor(dev)];
2595
2596 if (d == NULL || d == BPF_DEV_RESERVED ||
2597 (d->bd_flags & BPF_CLOSING) != 0 ||
2598 d->bd_bif == NULL) {
2599 lck_mtx_unlock(bpf_mlock);
2600 knote_set_error(kn, ENXIO);
2601 return 0;
2602 }
2603
2604 kn->kn_hook = d;
2605 kn->kn_filtid = EVFILTID_BPFREAD;
2606 KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2607 d->bd_flags |= BPF_KNOTE;
2608
2609 /* capture the current state */
2610 res = filt_bpfread_common(kn, NULL, d);
2611
2612 lck_mtx_unlock(bpf_mlock);
2613
2614 return res;
2615 }
2616
2617 static void
filt_bpfdetach(struct knote * kn)2618 filt_bpfdetach(struct knote *kn)
2619 {
2620 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2621
2622 lck_mtx_lock(bpf_mlock);
2623 if (d->bd_flags & BPF_KNOTE) {
2624 KNOTE_DETACH(&d->bd_sel.si_note, kn);
2625 d->bd_flags &= ~BPF_KNOTE;
2626 }
2627 lck_mtx_unlock(bpf_mlock);
2628 }
2629
2630 static int
filt_bpfread(struct knote * kn,long hint)2631 filt_bpfread(struct knote *kn, long hint)
2632 {
2633 #pragma unused(hint)
2634 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2635
2636 return filt_bpfread_common(kn, NULL, d);
2637 }
2638
2639 static int
filt_bpftouch(struct knote * kn,struct kevent_qos_s * kev)2640 filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev)
2641 {
2642 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2643 int res;
2644
2645 lck_mtx_lock(bpf_mlock);
2646
2647 /* save off the lowat threshold and flag */
2648 kn->kn_sdata = kev->data;
2649 kn->kn_sfflags = kev->fflags;
2650
2651 /* output data will be re-generated here */
2652 res = filt_bpfread_common(kn, NULL, d);
2653
2654 lck_mtx_unlock(bpf_mlock);
2655
2656 return res;
2657 }
2658
2659 static int
filt_bpfprocess(struct knote * kn,struct kevent_qos_s * kev)2660 filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev)
2661 {
2662 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2663 int res;
2664
2665 lck_mtx_lock(bpf_mlock);
2666 res = filt_bpfread_common(kn, kev, d);
2667 lck_mtx_unlock(bpf_mlock);
2668
2669 return res;
2670 }
2671
2672 /*
2673 * Copy data from an mbuf chain into a buffer. This code is derived
2674 * from m_copydata in kern/uipc_mbuf.c.
2675 */
2676 static void
bpf_mcopy(struct mbuf * m,void * dst_arg,size_t len)2677 bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
2678 {
2679 u_int count;
2680 u_char *dst;
2681
2682 dst = dst_arg;
2683 while (len > 0) {
2684 if (m == 0) {
2685 panic("bpf_mcopy");
2686 }
2687 count = MIN(m->m_len, (u_int)len);
2688 bcopy(mbuf_data(m), dst, count);
2689 m = m->m_next;
2690 dst += count;
2691 len -= count;
2692 }
2693 }
2694
2695 static inline void
bpf_tap_imp(ifnet_t ifp,u_int32_t dlt,struct bpf_packet * bpf_pkt,int outbound)2696 bpf_tap_imp(
2697 ifnet_t ifp,
2698 u_int32_t dlt,
2699 struct bpf_packet *bpf_pkt,
2700 int outbound)
2701 {
2702 struct bpf_d *d;
2703 u_int slen;
2704 struct bpf_if *bp;
2705
2706 /*
2707 * It's possible that we get here after the bpf descriptor has been
2708 * detached from the interface; in such a case we simply return.
2709 * Lock ordering is important since we can be called asynchronously
2710 * (from IOKit) to process an inbound packet; when that happens
2711 * we would have been holding its "gateLock" and will be acquiring
2712 * "bpf_mlock" upon entering this routine. Due to that, we release
2713 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2714 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2715 * when a ifnet_set_promiscuous request simultaneously collides with
2716 * an inbound packet being passed into the tap callback.
2717 */
2718 lck_mtx_lock(bpf_mlock);
2719 if (ifp->if_bpf == NULL) {
2720 lck_mtx_unlock(bpf_mlock);
2721 return;
2722 }
2723 for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2724 if (bp->bif_ifp != ifp) {
2725 /* wrong interface */
2726 bp = NULL;
2727 break;
2728 }
2729 if (dlt == 0 || bp->bif_dlt == dlt) {
2730 /* tapping default DLT or DLT matches */
2731 break;
2732 }
2733 }
2734 if (bp == NULL) {
2735 goto done;
2736 }
2737 for (d = bp->bif_dlist; d != NULL; d = d->bd_next) {
2738 struct bpf_packet *bpf_pkt_saved = bpf_pkt;
2739 struct bpf_packet bpf_pkt_tmp = {};
2740 struct pktap_header_buffer bpfp_header_tmp = {};
2741
2742 if (outbound && !d->bd_seesent) {
2743 continue;
2744 }
2745
2746 ++d->bd_rcount;
2747 slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
2748 (u_int)bpf_pkt->bpfp_total_length, 0);
2749
2750 if (bp->bif_ifp->if_type == IFT_PKTAP &&
2751 bp->bif_dlt == DLT_PKTAP) {
2752 if (d->bd_flags & BPF_TRUNCATE) {
2753 slen = min(slen, get_pkt_trunc_len(bpf_pkt));
2754 }
2755 /*
2756 * Need to copy the bpf_pkt because the conversion
2757 * to v2 pktap header modifies the content of the
2758 * bpfp_header
2759 */
2760 if ((d->bd_flags & BPF_PKTHDRV2) &&
2761 bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
2762 bpf_pkt_tmp = *bpf_pkt;
2763
2764 bpf_pkt = &bpf_pkt_tmp;
2765
2766 memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
2767 bpf_pkt->bpfp_header_length);
2768
2769 bpf_pkt->bpfp_header = &bpfp_header_tmp;
2770
2771 convert_to_pktap_header_to_v2(bpf_pkt,
2772 !!(d->bd_flags & BPF_TRUNCATE));
2773 }
2774 }
2775 if (slen != 0) {
2776 catchpacket(d, bpf_pkt, slen, outbound);
2777 }
2778 bpf_pkt = bpf_pkt_saved;
2779 }
2780
2781 done:
2782 lck_mtx_unlock(bpf_mlock);
2783 }
2784
2785 static inline void
bpf_tap_mbuf(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen,int outbound)2786 bpf_tap_mbuf(
2787 ifnet_t ifp,
2788 u_int32_t dlt,
2789 mbuf_t m,
2790 void* hdr,
2791 size_t hlen,
2792 int outbound)
2793 {
2794 struct bpf_packet bpf_pkt;
2795 struct mbuf *m0;
2796
2797 if (ifp->if_bpf == NULL) {
2798 /* quickly check without taking lock */
2799 return;
2800 }
2801 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2802 bpf_pkt.bpfp_mbuf = m;
2803 bpf_pkt.bpfp_total_length = 0;
2804 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
2805 bpf_pkt.bpfp_total_length += m0->m_len;
2806 }
2807 bpf_pkt.bpfp_header = hdr;
2808 if (hdr != NULL) {
2809 bpf_pkt.bpfp_total_length += hlen;
2810 bpf_pkt.bpfp_header_length = hlen;
2811 } else {
2812 bpf_pkt.bpfp_header_length = 0;
2813 }
2814 bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2815 }
2816
2817 void
bpf_tap_out(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)2818 bpf_tap_out(
2819 ifnet_t ifp,
2820 u_int32_t dlt,
2821 mbuf_t m,
2822 void* hdr,
2823 size_t hlen)
2824 {
2825 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
2826 }
2827
2828 void
bpf_tap_in(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)2829 bpf_tap_in(
2830 ifnet_t ifp,
2831 u_int32_t dlt,
2832 mbuf_t m,
2833 void* hdr,
2834 size_t hlen)
2835 {
2836 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
2837 }
2838
2839 /* Callback registered with Ethernet driver. */
2840 static int
bpf_tap_callback(struct ifnet * ifp,struct mbuf * m)2841 bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2842 {
2843 bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
2844
2845 return 0;
2846 }
2847
2848 #if SKYWALK
2849 #include <skywalk/os_skywalk_private.h>
2850
2851 static void
bpf_pktcopy(kern_packet_t pkt,void * dst_arg,size_t len)2852 bpf_pktcopy(kern_packet_t pkt, void *dst_arg, size_t len)
2853 {
2854 kern_buflet_t buflet = NULL;
2855 size_t count;
2856 u_char *dst;
2857
2858 dst = dst_arg;
2859 while (len > 0) {
2860 uint8_t *addr;
2861
2862 u_int32_t buflet_length;
2863
2864 buflet = kern_packet_get_next_buflet(pkt, buflet);
2865 VERIFY(buflet != NULL);
2866 addr = kern_buflet_get_data_address(buflet);
2867 VERIFY(addr != NULL);
2868 addr += kern_buflet_get_data_offset(buflet);
2869 buflet_length = kern_buflet_get_data_length(buflet);
2870 count = MIN(buflet_length, len);
2871 bcopy((void *)addr, (void *)dst, count);
2872 dst += count;
2873 len -= count;
2874 }
2875 }
2876
2877 static inline void
bpf_tap_packet(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen,int outbound)2878 bpf_tap_packet(
2879 ifnet_t ifp,
2880 u_int32_t dlt,
2881 kern_packet_t pkt,
2882 void* hdr,
2883 size_t hlen,
2884 int outbound)
2885 {
2886 struct bpf_packet bpf_pkt;
2887 struct mbuf * m;
2888
2889 if (ifp->if_bpf == NULL) {
2890 /* quickly check without taking lock */
2891 return;
2892 }
2893 m = kern_packet_get_mbuf(pkt);
2894 if (m != NULL) {
2895 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2896 bpf_pkt.bpfp_mbuf = m;
2897 bpf_pkt.bpfp_total_length = m_length(m);
2898 } else {
2899 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_PKT;
2900 bpf_pkt.bpfp_pkt = pkt;
2901 bpf_pkt.bpfp_total_length = kern_packet_get_data_length(pkt);
2902 }
2903 bpf_pkt.bpfp_header = hdr;
2904 bpf_pkt.bpfp_header_length = hlen;
2905 if (hlen != 0) {
2906 bpf_pkt.bpfp_total_length += hlen;
2907 }
2908 bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2909 }
2910
2911 void
bpf_tap_packet_out(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)2912 bpf_tap_packet_out(
2913 ifnet_t ifp,
2914 u_int32_t dlt,
2915 kern_packet_t pkt,
2916 void* hdr,
2917 size_t hlen)
2918 {
2919 bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 1);
2920 }
2921
2922 void
bpf_tap_packet_in(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)2923 bpf_tap_packet_in(
2924 ifnet_t ifp,
2925 u_int32_t dlt,
2926 kern_packet_t pkt,
2927 void* hdr,
2928 size_t hlen)
2929 {
2930 bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 0);
2931 }
2932
2933 #endif /* SKYWALK */
2934
2935 static errno_t
bpf_copydata(struct bpf_packet * pkt,size_t off,size_t len,void * out_data)2936 bpf_copydata(struct bpf_packet *pkt, size_t off, size_t len, void* out_data)
2937 {
2938 errno_t err = 0;
2939 if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
2940 err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
2941 #if SKYWALK
2942 } else if (pkt->bpfp_type == BPF_PACKET_TYPE_PKT) {
2943 err = kern_packet_copy_bytes(pkt->bpfp_pkt, off, len, out_data);
2944 #endif /* SKYWALK */
2945 } else {
2946 err = EINVAL;
2947 }
2948
2949 return err;
2950 }
2951
2952 static void
copy_bpf_packet(struct bpf_packet * pkt,void * dst,size_t len)2953 copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
2954 {
2955 /* copy the optional header */
2956 if (pkt->bpfp_header_length != 0) {
2957 size_t count = MIN(len, pkt->bpfp_header_length);
2958 bcopy(pkt->bpfp_header, dst, count);
2959 len -= count;
2960 dst = (void *)((uintptr_t)dst + count);
2961 }
2962 if (len == 0) {
2963 /* nothing past the header */
2964 return;
2965 }
2966 /* copy the packet */
2967 switch (pkt->bpfp_type) {
2968 case BPF_PACKET_TYPE_MBUF:
2969 bpf_mcopy(pkt->bpfp_mbuf, dst, len);
2970 break;
2971 #if SKYWALK
2972 case BPF_PACKET_TYPE_PKT:
2973 bpf_pktcopy(pkt->bpfp_pkt, dst, len);
2974 break;
2975 #endif /* SKYWALK */
2976 default:
2977 break;
2978 }
2979 }
2980
2981 static uint32_t
get_esp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)2982 get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
2983 const uint32_t remaining_caplen)
2984 {
2985 /*
2986 * For some reason tcpdump expects to have one byte beyond the ESP header
2987 */
2988 uint32_t trunc_len = ESP_HDR_SIZE + 1;
2989
2990 if (trunc_len > remaining_caplen) {
2991 return remaining_caplen;
2992 }
2993
2994 return trunc_len;
2995 }
2996
2997 static uint32_t
get_isakmp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)2998 get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
2999 const uint32_t remaining_caplen)
3000 {
3001 /*
3002 * Include the payload generic header
3003 */
3004 uint32_t trunc_len = ISAKMP_HDR_SIZE;
3005
3006 if (trunc_len > remaining_caplen) {
3007 return remaining_caplen;
3008 }
3009
3010 return trunc_len;
3011 }
3012
3013 static uint32_t
get_isakmp_natt_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3014 get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint32_t off,
3015 const uint32_t remaining_caplen)
3016 {
3017 int err = 0;
3018 uint32_t trunc_len = 0;
3019 char payload[remaining_caplen];
3020
3021 err = bpf_copydata(pkt, off, remaining_caplen, payload);
3022 if (err != 0) {
3023 return remaining_caplen;
3024 }
3025 /*
3026 * They are three cases:
3027 * - IKE: payload start with 4 bytes header set to zero before ISAKMP header
3028 * - keep alive: 1 byte payload
3029 * - otherwise it's ESP
3030 */
3031 if (remaining_caplen >= 4 &&
3032 payload[0] == 0 && payload[1] == 0 &&
3033 payload[2] == 0 && payload[3] == 0) {
3034 trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
3035 } else if (remaining_caplen == 1) {
3036 trunc_len = 1;
3037 } else {
3038 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3039 }
3040
3041 if (trunc_len > remaining_caplen) {
3042 return remaining_caplen;
3043 }
3044
3045 return trunc_len;
3046 }
3047
3048 static uint32_t
get_udp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3049 get_udp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3050 {
3051 int err = 0;
3052 uint32_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
3053
3054 if (trunc_len >= remaining_caplen) {
3055 return remaining_caplen;
3056 }
3057
3058 struct udphdr udphdr;
3059 err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
3060 if (err != 0) {
3061 return remaining_caplen;
3062 }
3063
3064 u_short sport, dport;
3065
3066 sport = EXTRACT_SHORT(&udphdr.uh_sport);
3067 dport = EXTRACT_SHORT(&udphdr.uh_dport);
3068
3069 if (dport == PORT_DNS || sport == PORT_DNS) {
3070 /*
3071 * Full UDP payload for DNS
3072 */
3073 trunc_len = remaining_caplen;
3074 } else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) ||
3075 (sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
3076 /*
3077 * Full UDP payload for BOOTP and DHCP
3078 */
3079 trunc_len = remaining_caplen;
3080 } else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
3081 /*
3082 * Return the ISAKMP header
3083 */
3084 trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
3085 remaining_caplen - sizeof(struct udphdr));
3086 } else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
3087 trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
3088 remaining_caplen - sizeof(struct udphdr));
3089 }
3090 if (trunc_len >= remaining_caplen) {
3091 return remaining_caplen;
3092 }
3093
3094 return trunc_len;
3095 }
3096
3097 static uint32_t
get_tcp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3098 get_tcp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3099 {
3100 int err = 0;
3101 uint32_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
3102 if (trunc_len >= remaining_caplen) {
3103 return remaining_caplen;
3104 }
3105
3106 struct tcphdr tcphdr;
3107 err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
3108 if (err != 0) {
3109 return remaining_caplen;
3110 }
3111
3112 u_short sport, dport;
3113 sport = EXTRACT_SHORT(&tcphdr.th_sport);
3114 dport = EXTRACT_SHORT(&tcphdr.th_dport);
3115
3116 if (dport == PORT_DNS || sport == PORT_DNS) {
3117 /*
3118 * Full TCP payload for DNS
3119 */
3120 trunc_len = remaining_caplen;
3121 } else {
3122 trunc_len = (uint16_t)(tcphdr.th_off << 2);
3123 }
3124 if (trunc_len >= remaining_caplen) {
3125 return remaining_caplen;
3126 }
3127
3128 return trunc_len;
3129 }
3130
3131 static uint32_t
get_proto_trunc_len(uint8_t proto,struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3132 get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3133 {
3134 uint32_t trunc_len;
3135
3136 switch (proto) {
3137 case IPPROTO_ICMP: {
3138 /*
3139 * Full IMCP payload
3140 */
3141 trunc_len = remaining_caplen;
3142 break;
3143 }
3144 case IPPROTO_ICMPV6: {
3145 /*
3146 * Full IMCPV6 payload
3147 */
3148 trunc_len = remaining_caplen;
3149 break;
3150 }
3151 case IPPROTO_IGMP: {
3152 /*
3153 * Full IGMP payload
3154 */
3155 trunc_len = remaining_caplen;
3156 break;
3157 }
3158 case IPPROTO_UDP: {
3159 trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3160 break;
3161 }
3162 case IPPROTO_TCP: {
3163 trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3164 break;
3165 }
3166 case IPPROTO_ESP: {
3167 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3168 break;
3169 }
3170 default: {
3171 /*
3172 * By default we only include the IP header
3173 */
3174 trunc_len = 0;
3175 break;
3176 }
3177 }
3178 if (trunc_len >= remaining_caplen) {
3179 return remaining_caplen;
3180 }
3181
3182 return trunc_len;
3183 }
3184
3185 static uint32_t
get_ip_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3186 get_ip_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3187 {
3188 int err = 0;
3189 uint32_t iplen = sizeof(struct ip);
3190 if (iplen >= remaining_caplen) {
3191 return remaining_caplen;
3192 }
3193
3194 struct ip iphdr;
3195 err = bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
3196 if (err != 0) {
3197 return remaining_caplen;
3198 }
3199
3200 uint8_t proto = 0;
3201
3202 iplen = (uint16_t)(iphdr.ip_hl << 2);
3203 if (iplen >= remaining_caplen) {
3204 return remaining_caplen;
3205 }
3206
3207 proto = iphdr.ip_p;
3208 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3209
3210 if (iplen >= remaining_caplen) {
3211 return remaining_caplen;
3212 }
3213
3214 return iplen;
3215 }
3216
3217 static uint32_t
get_ip6_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3218 get_ip6_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3219 {
3220 int err = 0;
3221 uint32_t iplen = sizeof(struct ip6_hdr);
3222 if (iplen >= remaining_caplen) {
3223 return remaining_caplen;
3224 }
3225
3226 struct ip6_hdr ip6hdr;
3227 err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
3228 if (err != 0) {
3229 return remaining_caplen;
3230 }
3231
3232 uint8_t proto = 0;
3233
3234 /*
3235 * TBD: process the extension headers
3236 */
3237 proto = ip6hdr.ip6_nxt;
3238 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3239
3240 if (iplen >= remaining_caplen) {
3241 return remaining_caplen;
3242 }
3243
3244 return iplen;
3245 }
3246
3247 static uint32_t
get_ether_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3248 get_ether_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3249 {
3250 int err = 0;
3251 uint32_t ethlen = sizeof(struct ether_header);
3252 if (ethlen >= remaining_caplen) {
3253 return remaining_caplen;
3254 }
3255
3256 struct ether_header eh = {};
3257 err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
3258 if (err != 0) {
3259 return remaining_caplen;
3260 }
3261
3262 u_short type = EXTRACT_SHORT(&eh.ether_type);
3263 /* Include full ARP */
3264 if (type == ETHERTYPE_ARP) {
3265 ethlen = remaining_caplen;
3266 } else if (type == ETHERTYPE_IP) {
3267 ethlen += get_ip_trunc_len(pkt, off + sizeof(struct ether_header),
3268 remaining_caplen - ethlen);
3269 } else if (type == ETHERTYPE_IPV6) {
3270 ethlen += get_ip6_trunc_len(pkt, off + sizeof(struct ether_header),
3271 remaining_caplen - ethlen);
3272 } else {
3273 ethlen = MIN(BPF_MIN_PKT_SIZE, remaining_caplen);
3274 }
3275 return ethlen;
3276 }
3277
3278 #include <kern/assert.h>
3279
3280 static uint32_t
get_pkt_trunc_len(struct bpf_packet * pkt)3281 get_pkt_trunc_len(struct bpf_packet *pkt)
3282 {
3283 struct pktap_header *pktap = (struct pktap_header *) (pkt->bpfp_header);
3284 uint32_t in_pkt_len = 0;
3285 uint32_t out_pkt_len = 0;
3286 uint32_t tlen = 0;
3287 uint32_t pre_adjust; // L2 header not in mbuf or kern_packet
3288
3289 // bpfp_total_length must contain the BPF packet header
3290 assert3u(pkt->bpfp_total_length, >=, pkt->bpfp_header_length);
3291
3292 // The BPF packet header must contain the pktap header
3293 assert3u(pkt->bpfp_header_length, >=, pktap->pth_length);
3294
3295 // The pre frame length (L2 header) must be contained in the packet
3296 assert3u(pkt->bpfp_total_length, >=, pktap->pth_length + pktap->pth_frame_pre_length);
3297
3298 /*
3299 * pktap->pth_frame_pre_length is the L2 header length and accounts
3300 * for both L2 header in the packet payload and pre_adjust.
3301 *
3302 * pre_adjust represents an adjustment for a pseudo L2 header that is not
3303 * part of packet payload -- not in the mbuf or kern_packet -- and comes
3304 * just after the pktap header.
3305 *
3306 * pktap->pth_length is the size of the pktap header (exclude pre_adjust)
3307 *
3308 * pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3309 */
3310 pre_adjust = (uint32_t)(pkt->bpfp_header_length - pktap->pth_length);
3311
3312 if (pktap->pth_iftype == IFT_ETHER) {
3313 /*
3314 * We need to parse the Ethernet header to find the network layer
3315 * protocol
3316 */
3317 in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pre_adjust);
3318
3319 out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
3320
3321 tlen = pktap->pth_length + pre_adjust + out_pkt_len;
3322 } else {
3323 /*
3324 * For other interface types, we only know to parse IPv4 and IPv6.
3325 *
3326 * To get to the beginning of the IPv4 or IPv6 packet, we need to to skip
3327 * over the L2 header that is the actual packet payload (mbuf or kern_packet)
3328 */
3329 uint32_t off; // offset past the L2 header in the actual packet payload
3330
3331 off = pktap->pth_frame_pre_length - pre_adjust;
3332
3333 in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pktap->pth_frame_pre_length);
3334
3335 if (pktap->pth_protocol_family == AF_INET) {
3336 out_pkt_len = get_ip_trunc_len(pkt, off, in_pkt_len);
3337 } else if (pktap->pth_protocol_family == AF_INET6) {
3338 out_pkt_len = get_ip6_trunc_len(pkt, off, in_pkt_len);
3339 } else {
3340 out_pkt_len = MIN(BPF_MIN_PKT_SIZE, in_pkt_len);
3341 }
3342 tlen = pktap->pth_length + pktap->pth_frame_pre_length + out_pkt_len;
3343 }
3344
3345 // Verify we do not overflow the buffer
3346 if (__improbable(tlen > pkt->bpfp_total_length)) {
3347 bool do_panic = bpf_debug != 0 ? true : false;
3348
3349 #if DEBUG
3350 do_panic = true;
3351 #endif /* DEBUG */
3352 if (do_panic) {
3353 panic("%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u\n",
3354 __func__, __LINE__,
3355 tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3356 } else {
3357 os_log(OS_LOG_DEFAULT,
3358 "%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3359 __func__, __LINE__,
3360 tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3361 }
3362 bpf_trunc_overflow += 1;
3363 tlen = (uint32_t)pkt->bpfp_total_length;
3364 }
3365
3366 return tlen;
3367 }
3368
3369 /*
3370 * Move the packet data from interface memory (pkt) into the
3371 * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
3372 * otherwise 0.
3373 */
3374 static void
catchpacket(struct bpf_d * d,struct bpf_packet * pkt,u_int snaplen,int outbound)3375 catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
3376 u_int snaplen, int outbound)
3377 {
3378 struct bpf_hdr *hp;
3379 struct bpf_hdr_ext *ehp;
3380 int totlen, curlen;
3381 int hdrlen, caplen;
3382 int do_wakeup = 0;
3383 u_char *payload;
3384 struct timeval tv;
3385
3386 hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
3387 d->bd_bif->bif_hdrlen;
3388 /*
3389 * Figure out how many bytes to move. If the packet is
3390 * greater or equal to the snapshot length, transfer that
3391 * much. Otherwise, transfer the whole packet (unless
3392 * we hit the buffer size limit).
3393 */
3394 totlen = hdrlen + MIN(snaplen, (int)pkt->bpfp_total_length);
3395 if (totlen > d->bd_bufsize) {
3396 totlen = d->bd_bufsize;
3397 }
3398
3399 if (hdrlen > totlen) {
3400 return;
3401 }
3402
3403 /*
3404 * Round up the end of the previous packet to the next longword.
3405 */
3406 curlen = BPF_WORDALIGN(d->bd_slen);
3407 if (curlen + totlen > d->bd_bufsize) {
3408 /*
3409 * This packet will overflow the storage buffer.
3410 * Rotate the buffers if we can, then wakeup any
3411 * pending reads.
3412 *
3413 * We cannot rotate buffers if a read is in progress
3414 * so drop the packet
3415 */
3416 if (d->bd_hbuf_read != 0) {
3417 ++d->bd_dcount;
3418 return;
3419 }
3420
3421 if (d->bd_fbuf == NULL) {
3422 if (d->bd_headdrop == 0) {
3423 /*
3424 * We haven't completed the previous read yet,
3425 * so drop the packet.
3426 */
3427 ++d->bd_dcount;
3428 return;
3429 }
3430 /*
3431 * Drop the hold buffer as it contains older packets
3432 */
3433 d->bd_dcount += d->bd_hcnt;
3434 d->bd_fbuf = d->bd_hbuf;
3435 ROTATE_BUFFERS(d);
3436 } else {
3437 ROTATE_BUFFERS(d);
3438 }
3439 do_wakeup = 1;
3440 curlen = 0;
3441 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
3442 /*
3443 * Immediate mode is set, or the read timeout has
3444 * already expired during a select call. A packet
3445 * arrived, so the reader should be woken up.
3446 */
3447 do_wakeup = 1;
3448 }
3449
3450 /*
3451 * Append the bpf header.
3452 */
3453 microtime(&tv);
3454 if (d->bd_flags & BPF_EXTENDED_HDR) {
3455 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
3456 memset(ehp, 0, sizeof(*ehp));
3457 ehp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3458 ehp->bh_tstamp.tv_usec = tv.tv_usec;
3459
3460 ehp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3461 ehp->bh_hdrlen = (u_short)hdrlen;
3462 caplen = ehp->bh_caplen = totlen - hdrlen;
3463 payload = (u_char *)ehp + hdrlen;
3464
3465 if (outbound) {
3466 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3467 } else {
3468 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
3469 }
3470
3471 if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
3472 struct mbuf *m = pkt->bpfp_mbuf;
3473
3474 if (outbound) {
3475 /* only do lookups on non-raw INPCB */
3476 if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID |
3477 PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)) ==
3478 (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC) &&
3479 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3480 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
3481 ehp->bh_proto = m->m_pkthdr.pkt_proto;
3482 }
3483 ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
3484 if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
3485 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3486 }
3487 if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
3488 ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
3489 }
3490 if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
3491 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3492 }
3493 if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
3494 ehp->bh_unsent_bytes =
3495 m->m_pkthdr.bufstatus_if;
3496 ehp->bh_unsent_snd =
3497 m->m_pkthdr.bufstatus_sndbuf;
3498 }
3499 } else {
3500 if (m->m_pkthdr.pkt_flags & PKTF_WAKE_PKT) {
3501 ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3502 }
3503 }
3504 #if SKYWALK
3505 } else {
3506 kern_packet_t kern_pkt = pkt->bpfp_pkt;
3507
3508 if (outbound) {
3509 /*
3510 * Note: pp_init() asserts that kern_packet_svc_class_t is equivalent
3511 * to mbuf_svc_class_t
3512 */
3513 ehp->bh_svc = so_svc2tc((mbuf_svc_class_t)kern_packet_get_service_class(kern_pkt));
3514 if (kern_packet_get_transport_retransmit(kern_pkt)) {
3515 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3516 }
3517 if (kern_packet_get_transport_last_packet(kern_pkt)) {
3518 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3519 }
3520 } else {
3521 if (kern_packet_get_wake_flag(kern_pkt)) {
3522 ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3523 }
3524 }
3525 #endif /* SKYWALK */
3526 }
3527 } else {
3528 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
3529 hp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3530 hp->bh_tstamp.tv_usec = tv.tv_usec;
3531 hp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3532 hp->bh_hdrlen = (u_short)hdrlen;
3533 caplen = hp->bh_caplen = totlen - hdrlen;
3534 payload = (u_char *)hp + hdrlen;
3535 }
3536 /*
3537 * Copy the packet data into the store buffer and update its length.
3538 */
3539 copy_bpf_packet(pkt, payload, caplen);
3540 d->bd_slen = curlen + totlen;
3541 d->bd_scnt += 1;
3542
3543 if (do_wakeup) {
3544 bpf_wakeup(d);
3545 }
3546 }
3547
3548 /*
3549 * Initialize all nonzero fields of a descriptor.
3550 */
3551 static int
bpf_allocbufs(struct bpf_d * d)3552 bpf_allocbufs(struct bpf_d *d)
3553 {
3554 if (d->bd_sbuf != NULL) {
3555 kfree_data_addr(d->bd_sbuf);
3556 d->bd_sbuf = NULL;
3557 }
3558 if (d->bd_hbuf != NULL) {
3559 kfree_data_addr(d->bd_hbuf);
3560 d->bd_hbuf = NULL;
3561 }
3562 if (d->bd_fbuf != NULL) {
3563 kfree_data_addr(d->bd_fbuf);
3564 d->bd_fbuf = NULL;
3565 }
3566
3567 d->bd_fbuf = (caddr_t) kalloc_data(d->bd_bufsize, M_WAIT);
3568 if (d->bd_fbuf == NULL) {
3569 return ENOBUFS;
3570 }
3571
3572 d->bd_sbuf = (caddr_t) kalloc_data(d->bd_bufsize, M_WAIT);
3573 if (d->bd_sbuf == NULL) {
3574 kfree_data(d->bd_fbuf, d->bd_bufsize);
3575 d->bd_fbuf = NULL;
3576 return ENOBUFS;
3577 }
3578 d->bd_slen = 0;
3579 d->bd_hlen = 0;
3580 d->bd_scnt = 0;
3581 d->bd_hcnt = 0;
3582 return 0;
3583 }
3584
3585 /*
3586 * Free buffers currently in use by a descriptor.
3587 * Called on close.
3588 */
3589 static void
bpf_freed(struct bpf_d * d)3590 bpf_freed(struct bpf_d *d)
3591 {
3592 /*
3593 * We don't need to lock out interrupts since this descriptor has
3594 * been detached from its interface and it yet hasn't been marked
3595 * free.
3596 */
3597 if (d->bd_hbuf_read != 0) {
3598 panic("bpf buffer freed during read");
3599 }
3600
3601 if (d->bd_sbuf != 0) {
3602 kfree_data_addr(d->bd_sbuf);
3603 if (d->bd_hbuf != 0) {
3604 kfree_data_addr(d->bd_hbuf);
3605 }
3606 if (d->bd_fbuf != 0) {
3607 kfree_data_addr(d->bd_fbuf);
3608 }
3609 }
3610 if (d->bd_filter) {
3611 kfree_data_addr(d->bd_filter);
3612 }
3613 }
3614
3615 /*
3616 * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
3617 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
3618 * size of the link header (variable length headers not yet supported).
3619 */
3620 void
bpfattach(struct ifnet * ifp,u_int dlt,u_int hdrlen)3621 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
3622 {
3623 bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
3624 }
3625
3626 errno_t
bpf_attach(ifnet_t ifp,u_int32_t dlt,u_int32_t hdrlen,bpf_send_func send,bpf_tap_func tap)3627 bpf_attach(
3628 ifnet_t ifp,
3629 u_int32_t dlt,
3630 u_int32_t hdrlen,
3631 bpf_send_func send,
3632 bpf_tap_func tap)
3633 {
3634 struct bpf_if *bp;
3635 struct bpf_if *bp_new;
3636 struct bpf_if *bp_before_first = NULL;
3637 struct bpf_if *bp_first = NULL;
3638 struct bpf_if *bp_last = NULL;
3639 boolean_t found;
3640
3641 bp_new = kalloc_type(struct bpf_if, M_WAIT | Z_ZERO);
3642 if (bp_new == 0) {
3643 panic("bpfattach");
3644 }
3645
3646 lck_mtx_lock(bpf_mlock);
3647
3648 /*
3649 * Check if this interface/dlt is already attached. Remember the
3650 * first and last attachment for this interface, as well as the
3651 * element before the first attachment.
3652 */
3653 found = FALSE;
3654 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
3655 if (bp->bif_ifp != ifp) {
3656 if (bp_first != NULL) {
3657 /* no more elements for this interface */
3658 break;
3659 }
3660 bp_before_first = bp;
3661 } else {
3662 if (bp->bif_dlt == dlt) {
3663 found = TRUE;
3664 break;
3665 }
3666 if (bp_first == NULL) {
3667 bp_first = bp;
3668 }
3669 bp_last = bp;
3670 }
3671 }
3672 if (found) {
3673 lck_mtx_unlock(bpf_mlock);
3674 printf("bpfattach - %s with dlt %d is already attached\n",
3675 if_name(ifp), dlt);
3676 kfree_type(struct bpf_if, bp_new);
3677 return EEXIST;
3678 }
3679
3680 bp_new->bif_ifp = ifp;
3681 bp_new->bif_dlt = dlt;
3682 bp_new->bif_send = send;
3683 bp_new->bif_tap = tap;
3684
3685 if (bp_first == NULL) {
3686 /* No other entries for this ifp */
3687 bp_new->bif_next = bpf_iflist;
3688 bpf_iflist = bp_new;
3689 } else {
3690 if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
3691 /* Make this the first entry for this interface */
3692 if (bp_before_first != NULL) {
3693 /* point the previous to us */
3694 bp_before_first->bif_next = bp_new;
3695 } else {
3696 /* we're the new head */
3697 bpf_iflist = bp_new;
3698 }
3699 bp_new->bif_next = bp_first;
3700 } else {
3701 /* Add this after the last entry for this interface */
3702 bp_new->bif_next = bp_last->bif_next;
3703 bp_last->bif_next = bp_new;
3704 }
3705 }
3706
3707 /*
3708 * Compute the length of the bpf header. This is not necessarily
3709 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
3710 * that the network layer header begins on a longword boundary (for
3711 * performance reasons and to alleviate alignment restrictions).
3712 */
3713 bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
3714 bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
3715 sizeof(struct bpf_hdr_ext)) - hdrlen;
3716
3717 /* Take a reference on the interface */
3718 ifnet_reference(ifp);
3719
3720 lck_mtx_unlock(bpf_mlock);
3721
3722 #ifndef __APPLE__
3723 if (bootverbose) {
3724 printf("bpf: %s attached\n", if_name(ifp));
3725 }
3726 #endif
3727
3728 return 0;
3729 }
3730
3731 /*
3732 * Detach bpf from an interface. This involves detaching each descriptor
3733 * associated with the interface, and leaving bd_bif NULL. Notify each
3734 * descriptor as it's detached so that any sleepers wake up and get
3735 * ENXIO.
3736 */
3737 void
bpfdetach(struct ifnet * ifp)3738 bpfdetach(struct ifnet *ifp)
3739 {
3740 struct bpf_if *bp, *bp_prev, *bp_next;
3741 struct bpf_d *d;
3742
3743 if (bpf_debug != 0) {
3744 printf("%s: %s\n", __func__, if_name(ifp));
3745 }
3746
3747 lck_mtx_lock(bpf_mlock);
3748
3749 /*
3750 * Build the list of devices attached to that interface
3751 * that we need to free while keeping the lock to maintain
3752 * the integrity of the interface list
3753 */
3754 bp_prev = NULL;
3755 for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
3756 bp_next = bp->bif_next;
3757
3758 if (ifp != bp->bif_ifp) {
3759 bp_prev = bp;
3760 continue;
3761 }
3762 /* Unlink from the interface list */
3763 if (bp_prev) {
3764 bp_prev->bif_next = bp->bif_next;
3765 } else {
3766 bpf_iflist = bp->bif_next;
3767 }
3768
3769 /* Detach the devices attached to the interface */
3770 while ((d = bp->bif_dlist) != NULL) {
3771 /*
3772 * Take an extra reference to prevent the device
3773 * from being freed when bpf_detachd() releases
3774 * the reference for the interface list
3775 */
3776 bpf_acquire_d(d);
3777 bpf_detachd(d, 0);
3778 bpf_wakeup(d);
3779 bpf_release_d(d);
3780 }
3781 ifnet_release(ifp);
3782 }
3783
3784 lck_mtx_unlock(bpf_mlock);
3785 }
3786
3787 void
bpf_init(__unused void * unused)3788 bpf_init(__unused void *unused)
3789 {
3790 #ifdef __APPLE__
3791 int maj;
3792
3793 if (bpf_devsw_installed == 0) {
3794 bpf_devsw_installed = 1;
3795 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
3796 if (maj == -1) {
3797 bpf_devsw_installed = 0;
3798 printf("bpf_init: failed to allocate a major number\n");
3799 return;
3800 }
3801
3802 for (int i = 0; i < NBPFILTER; i++) {
3803 bpf_make_dev_t(maj);
3804 }
3805 }
3806 #else
3807 cdevsw_add(&bpf_cdevsw);
3808 #endif
3809 }
3810
3811 #ifndef __APPLE__
3812 SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE + CDEV_MAJOR, bpf_drvinit, NULL);
3813 #endif
3814
3815 static int
3816 sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
3817 {
3818 #pragma unused(arg1, arg2)
3819 int i, err;
3820
3821 i = bpf_maxbufsize;
3822
3823 err = sysctl_handle_int(oidp, &i, 0, req);
3824 if (err != 0 || req->newptr == USER_ADDR_NULL) {
3825 return err;
3826 }
3827
3828 if (i < 0 || i > BPF_MAXSIZE_CAP) {
3829 i = BPF_MAXSIZE_CAP;
3830 }
3831
3832 bpf_maxbufsize = i;
3833 return err;
3834 }
3835