xref: /xnu-8792.41.9/bsd/net/bpf.c (revision 5c2921b07a2480ab43ec66f5b9e41cb872bc554f)
1 /*
2  * Copyright (c) 2000-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1990, 1991, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * This code is derived from the Stanford/CMU enet packet filter,
33  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35  * Berkeley Laboratory.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)bpf.c	8.2 (Berkeley) 3/28/94
66  *
67  * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68  */
69 /*
70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71  * support for mandatory and extensible security protections.  This notice
72  * is included in support of clause 2.2 (b) of the Apple Public License,
73  * Version 2.0.
74  */
75 
76 #include "bpf.h"
77 
78 #ifndef __GNUC__
79 #define inline
80 #else
81 #define inline __inline
82 #endif
83 
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/conf.h>
87 #include <sys/malloc.h>
88 #include <sys/mbuf.h>
89 #include <sys/time.h>
90 #include <sys/proc.h>
91 #include <sys/signalvar.h>
92 #include <sys/filio.h>
93 #include <sys/sockio.h>
94 #include <sys/ttycom.h>
95 #include <sys/filedesc.h>
96 #include <sys/uio_internal.h>
97 #include <sys/file_internal.h>
98 #include <sys/event.h>
99 
100 #include <sys/poll.h>
101 
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/vnode.h>
105 
106 #include <net/if.h>
107 #include <net/bpf.h>
108 #include <net/bpfdesc.h>
109 
110 #include <netinet/in.h>
111 #include <netinet/ip.h>
112 #include <netinet/ip6.h>
113 #include <netinet/in_pcb.h>
114 #include <netinet/in_var.h>
115 #include <netinet/ip_var.h>
116 #include <netinet/tcp.h>
117 #include <netinet/tcp_var.h>
118 #include <netinet/udp.h>
119 #include <netinet/udp_var.h>
120 #include <netinet/if_ether.h>
121 #include <netinet/isakmp.h>
122 #include <netinet6/esp.h>
123 #include <sys/kernel.h>
124 #include <sys/sysctl.h>
125 #include <net/firewire.h>
126 
127 #include <miscfs/devfs/devfs.h>
128 #include <net/dlil.h>
129 #include <net/pktap.h>
130 
131 #include <kern/assert.h>
132 #include <kern/locks.h>
133 #include <kern/thread_call.h>
134 #include <libkern/section_keywords.h>
135 
136 #include <os/log.h>
137 
138 extern int tvtohz(struct timeval *);
139 extern char *proc_name_address(void *p);
140 
141 #define BPF_BUFSIZE 4096
142 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
143 
144 #define PRINET  26                      /* interruptible */
145 
146 #define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
147 #define ESP_HDR_SIZE sizeof(struct newesp)
148 
149 typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
150 
151 /*
152  * The default read buffer size is patchable.
153  */
154 static unsigned int bpf_bufsize = BPF_BUFSIZE;
155 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
156     &bpf_bufsize, 0, "");
157 
158 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
159 static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
160 SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
161     &bpf_maxbufsize, 0,
162     sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
163 
164 extern const int copysize_limit_panic;
165 #define BPF_BUFSIZE_CAP (copysize_limit_panic >> 1)
166 static int sysctl_bpf_bufsize_cap SYSCTL_HANDLER_ARGS;
167 SYSCTL_PROC(_debug, OID_AUTO, bpf_bufsize_cap, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
168     0, 0,
169     sysctl_bpf_bufsize_cap, "I", "Upper limit on BPF max buffer size");
170 
171 static unsigned int bpf_maxdevices = 256;
172 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RD | CTLFLAG_LOCKED,
173     &bpf_maxdevices, 0, "");
174 /*
175  * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
176  * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
177  * explicitly to be able to use DLT_PKTAP.
178  */
179 #if !XNU_TARGET_OS_OSX
180 static unsigned int bpf_wantpktap = 1;
181 #else /* XNU_TARGET_OS_OSX */
182 static unsigned int bpf_wantpktap = 0;
183 #endif /* XNU_TARGET_OS_OSX */
184 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
185     &bpf_wantpktap, 0, "");
186 
187 static int bpf_debug = 0;
188 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
189     &bpf_debug, 0, "");
190 
191 static unsigned long bpf_trunc_overflow = 0;
192 SYSCTL_ULONG(_debug, OID_AUTO, bpf_trunc_overflow, CTLFLAG_RD | CTLFLAG_LOCKED,
193     &bpf_trunc_overflow, "");
194 
195 static int bpf_hdr_comp_enable = 1;
196 SYSCTL_INT(_debug, OID_AUTO, bpf_hdr_comp_enable, CTLFLAG_RW | CTLFLAG_LOCKED,
197     &bpf_hdr_comp_enable, 1, "");
198 
199 static int sysctl_bpf_stats SYSCTL_HANDLER_ARGS;
200 SYSCTL_PROC(_debug, OID_AUTO, bpf_stats, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
201     0, 0,
202     sysctl_bpf_stats, "S", "BPF statistics");
203 
204 /*
205  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
206  *  bpf_dtab holds pointer to the descriptors, indexed by minor device #
207  */
208 static struct bpf_if    *bpf_iflist;
209 /*
210  * BSD now stores the bpf_d in the dev_t which is a struct
211  * on their system. Our dev_t is an int, so we still store
212  * the bpf_d in a separate table indexed by minor device #.
213  *
214  * The value stored in bpf_dtab[n] represent three states:
215  *  NULL: device not opened
216  *  BPF_DEV_RESERVED: device opening or closing
217  *  other: device <n> opened with pointer to storage
218  */
219 #define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
220 static struct bpf_d **bpf_dtab = NULL;
221 static unsigned int bpf_dtab_size = 0;
222 static unsigned int nbpfilter = 0;
223 static unsigned bpf_bpfd_cnt = 0;
224 
225 static LCK_GRP_DECLARE(bpf_mlock_grp, "bpf");
226 static LCK_MTX_DECLARE(bpf_mlock_data, &bpf_mlock_grp);
227 static lck_mtx_t *const bpf_mlock = &bpf_mlock_data;
228 
229 static int      bpf_allocbufs(struct bpf_d *);
230 static errno_t  bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
231 static int      bpf_detachd(struct bpf_d *d);
232 static void     bpf_freed(struct bpf_d *);
233 static int      bpf_movein(struct uio *, int,
234     struct mbuf **, struct sockaddr *, int *);
235 static int      bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool, bool);
236 static void     bpf_timed_out(void *, void *);
237 static void     bpf_wakeup(struct bpf_d *);
238 static uint32_t get_pkt_trunc_len(struct bpf_packet *);
239 static void     catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
240 static void     reset_d(struct bpf_d *);
241 static int      bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
242 static int      bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
243 static int      bpf_setdlt(struct bpf_d *, u_int);
244 static int      bpf_set_traffic_class(struct bpf_d *, int);
245 static void     bpf_set_packet_service_class(struct mbuf *, int);
246 
247 static void     bpf_acquire_d(struct bpf_d *);
248 static void     bpf_release_d(struct bpf_d *);
249 
250 static  int bpf_devsw_installed;
251 
252 void bpf_init(void *unused);
253 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
254 
255 /*
256  * Darwin differs from BSD here, the following are static
257  * on BSD and not static on Darwin.
258  */
259 d_open_t            bpfopen;
260 d_close_t           bpfclose;
261 d_read_t            bpfread;
262 d_write_t           bpfwrite;
263 ioctl_fcn_t         bpfioctl;
264 select_fcn_t        bpfselect;
265 
266 /* Darwin's cdevsw struct differs slightly from BSDs */
267 #define CDEV_MAJOR 23
268 static const struct cdevsw bpf_cdevsw = {
269 	.d_open       = bpfopen,
270 	.d_close      = bpfclose,
271 	.d_read       = bpfread,
272 	.d_write      = bpfwrite,
273 	.d_ioctl      = bpfioctl,
274 	.d_stop       = eno_stop,
275 	.d_reset      = eno_reset,
276 	.d_ttys       = NULL,
277 	.d_select     = bpfselect,
278 	.d_mmap       = eno_mmap,
279 	.d_strategy   = eno_strat,
280 	.d_reserved_1 = eno_getc,
281 	.d_reserved_2 = eno_putc,
282 	.d_type       = 0
283 };
284 
285 #define SOCKADDR_HDR_LEN           offsetof(struct sockaddr, sa_data)
286 
287 static int
bpf_movein(struct uio * uio,int linktype,struct mbuf ** mp,struct sockaddr * sockp,int * datlen)288 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp,
289     struct sockaddr *sockp, int *datlen)
290 {
291 	struct mbuf *m;
292 	int error;
293 	int len;
294 	uint8_t sa_family;
295 	int hlen;
296 
297 	switch (linktype) {
298 #if SLIP
299 	case DLT_SLIP:
300 		sa_family = AF_INET;
301 		hlen = 0;
302 		break;
303 #endif /* SLIP */
304 
305 	case DLT_EN10MB:
306 		sa_family = AF_UNSPEC;
307 		/* XXX Would MAXLINKHDR be better? */
308 		hlen = sizeof(struct ether_header);
309 		break;
310 
311 #if FDDI
312 	case DLT_FDDI:
313 #if defined(__FreeBSD__) || defined(__bsdi__)
314 		sa_family = AF_IMPLINK;
315 		hlen = 0;
316 #else
317 		sa_family = AF_UNSPEC;
318 		/* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
319 		hlen = 24;
320 #endif
321 		break;
322 #endif /* FDDI */
323 
324 	case DLT_RAW:
325 	case DLT_NULL:
326 		sa_family = AF_UNSPEC;
327 		hlen = 0;
328 		break;
329 
330 #ifdef __FreeBSD__
331 	case DLT_ATM_RFC1483:
332 		/*
333 		 * en atm driver requires 4-byte atm pseudo header.
334 		 * though it isn't standard, vpi:vci needs to be
335 		 * specified anyway.
336 		 */
337 		sa_family = AF_UNSPEC;
338 		hlen = 12;      /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
339 		break;
340 #endif
341 
342 	case DLT_PPP:
343 		sa_family = AF_UNSPEC;
344 		hlen = 4;       /* This should match PPP_HDRLEN */
345 		break;
346 
347 	case DLT_APPLE_IP_OVER_IEEE1394:
348 		sa_family = AF_UNSPEC;
349 		hlen = sizeof(struct firewire_header);
350 		break;
351 
352 	case DLT_IEEE802_11:            /* IEEE 802.11 wireless */
353 		sa_family = AF_IEEE80211;
354 		hlen = 0;
355 		break;
356 
357 	case DLT_IEEE802_11_RADIO:
358 		sa_family = AF_IEEE80211;
359 		hlen = 0;
360 		break;
361 
362 	default:
363 		return EIO;
364 	}
365 
366 	// LP64todo - fix this!
367 	len = (int)uio_resid(uio);
368 	if (len < hlen || (unsigned)len > MCLBYTES || len - hlen > MCLBYTES) {
369 		return EIO;
370 	}
371 
372 	*datlen = len - hlen;
373 
374 	if (sockp) {
375 		/*
376 		 * Build a sockaddr based on the data link layer type.
377 		 * We do this at this level because the ethernet header
378 		 * is copied directly into the data field of the sockaddr.
379 		 * In the case of SLIP, there is no header and the packet
380 		 * is forwarded as is.
381 		 * Also, we are careful to leave room at the front of the mbuf
382 		 * for the link level header.
383 		 */
384 		if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
385 			return EIO;
386 		}
387 		sockp->sa_family = sa_family;
388 	} else {
389 		/*
390 		 * We're directly sending the packet data supplied by
391 		 * the user; we don't need to make room for the link
392 		 * header, and don't need the header length value any
393 		 * more, so set it to 0.
394 		 */
395 		hlen = 0;
396 	}
397 
398 	MGETHDR(m, M_WAIT, MT_DATA);
399 	if (m == 0) {
400 		return ENOBUFS;
401 	}
402 	if ((unsigned)len > MHLEN) {
403 		MCLGET(m, M_WAIT);
404 		if ((m->m_flags & M_EXT) == 0) {
405 			error = ENOBUFS;
406 			goto bad;
407 		}
408 	}
409 	m->m_pkthdr.len = m->m_len = len;
410 	m->m_pkthdr.rcvif = NULL;
411 	*mp = m;
412 
413 	/*
414 	 * Make room for link header.
415 	 */
416 	if (hlen != 0) {
417 		m->m_pkthdr.len -= hlen;
418 		m->m_len -= hlen;
419 		m->m_data += hlen; /* XXX */
420 		error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
421 		if (error) {
422 			goto bad;
423 		}
424 	}
425 	error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
426 	if (error) {
427 		goto bad;
428 	}
429 
430 	/* Check for multicast destination */
431 	switch (linktype) {
432 	case DLT_EN10MB: {
433 		struct ether_header *eh;
434 
435 		eh = mtod(m, struct ether_header *);
436 		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
437 			if (_ether_cmp(etherbroadcastaddr,
438 			    eh->ether_dhost) == 0) {
439 				m->m_flags |= M_BCAST;
440 			} else {
441 				m->m_flags |= M_MCAST;
442 			}
443 		}
444 		break;
445 	}
446 	}
447 
448 	return 0;
449 bad:
450 	m_freem(m);
451 	return error;
452 }
453 
454 /*
455  * The dynamic addition of a new device node must block all processes that
456  * are opening the last device so that no process will get an unexpected
457  * ENOENT
458  */
459 static void
bpf_make_dev_t(int maj)460 bpf_make_dev_t(int maj)
461 {
462 	static int              bpf_growing = 0;
463 	unsigned int    cur_size = nbpfilter, i;
464 
465 	if (nbpfilter >= bpf_maxdevices) {
466 		return;
467 	}
468 
469 	while (bpf_growing) {
470 		/* Wait until new device has been created */
471 		(void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
472 	}
473 	if (nbpfilter > cur_size) {
474 		/* other thread grew it already */
475 		return;
476 	}
477 	bpf_growing = 1;
478 
479 	/* need to grow bpf_dtab first */
480 	if (nbpfilter == bpf_dtab_size) {
481 		unsigned int new_dtab_size;
482 		struct bpf_d **new_dtab = NULL;
483 
484 		new_dtab_size = bpf_dtab_size + NBPFILTER;
485 		new_dtab = krealloc_type(struct bpf_d *,
486 		    bpf_dtab_size, new_dtab_size, bpf_dtab, Z_WAITOK | Z_ZERO);
487 		if (new_dtab == 0) {
488 			os_log_error(OS_LOG_DEFAULT, "bpf_make_dev_t: malloc bpf_dtab failed");
489 			goto done;
490 		}
491 		bpf_dtab = new_dtab;
492 		bpf_dtab_size = new_dtab_size;
493 	}
494 	i = nbpfilter++;
495 	(void) devfs_make_node(makedev(maj, i),
496 	    DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
497 	    "bpf%d", i);
498 done:
499 	bpf_growing = 0;
500 	wakeup((caddr_t)&bpf_growing);
501 }
502 
503 /*
504  * Attach file to the bpf interface, i.e. make d listen on bp.
505  */
506 static errno_t
bpf_attachd(struct bpf_d * d,struct bpf_if * bp)507 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
508 {
509 	int first = bp->bif_dlist == NULL;
510 	int     error = 0;
511 
512 	/*
513 	 * Point d at bp, and add d to the interface's list of listeners.
514 	 * Finally, point the driver's bpf cookie at the interface so
515 	 * it will divert packets to bpf.
516 	 */
517 	d->bd_bif = bp;
518 	d->bd_next = bp->bif_dlist;
519 	bp->bif_dlist = d;
520 	bpf_bpfd_cnt++;
521 
522 	/*
523 	 * Take a reference on the device even if an error is returned
524 	 * because we keep the device in the interface's list of listeners
525 	 */
526 	bpf_acquire_d(d);
527 
528 	if (first) {
529 		/* Find the default bpf entry for this ifp */
530 		if (bp->bif_ifp->if_bpf == NULL) {
531 			struct bpf_if   *tmp, *primary = NULL;
532 
533 			for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
534 				if (tmp->bif_ifp == bp->bif_ifp) {
535 					primary = tmp;
536 					break;
537 				}
538 			}
539 			bp->bif_ifp->if_bpf = primary;
540 		}
541 		/* Only call dlil_set_bpf_tap for primary dlt */
542 		if (bp->bif_ifp->if_bpf == bp) {
543 			dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
544 			    bpf_tap_callback);
545 		}
546 
547 		if (bp->bif_tap != NULL) {
548 			error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
549 			    BPF_TAP_INPUT_OUTPUT);
550 		}
551 	}
552 
553 	/*
554 	 * Reset the detach flags in case we previously detached an interface
555 	 */
556 	d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
557 
558 	if (bp->bif_dlt == DLT_PKTAP) {
559 		d->bd_flags |= BPF_FINALIZE_PKTAP;
560 	} else {
561 		d->bd_flags &= ~BPF_FINALIZE_PKTAP;
562 	}
563 	return error;
564 }
565 
566 /*
567  * Detach a file from its interface.
568  *
569  * Return 1 if was closed by some thread, 0 otherwise
570  */
571 static int
bpf_detachd(struct bpf_d * d)572 bpf_detachd(struct bpf_d *d)
573 {
574 	struct bpf_d **p;
575 	struct bpf_if *bp;
576 	struct ifnet  *ifp;
577 
578 	int bpf_closed = d->bd_flags & BPF_CLOSING;
579 	/*
580 	 * Some other thread already detached
581 	 */
582 	if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0) {
583 		goto done;
584 	}
585 	/*
586 	 * This thread is doing the detach
587 	 */
588 	d->bd_flags |= BPF_DETACHING;
589 
590 	ifp = d->bd_bif->bif_ifp;
591 	bp = d->bd_bif;
592 
593 	/* Remove d from the interface's descriptor list. */
594 	p = &bp->bif_dlist;
595 	while (*p != d) {
596 		p = &(*p)->bd_next;
597 		if (*p == 0) {
598 			panic("bpf_detachd: descriptor not in list");
599 		}
600 	}
601 	*p = (*p)->bd_next;
602 	bpf_bpfd_cnt--;
603 	if (bp->bif_dlist == 0) {
604 		/*
605 		 * Let the driver know that there are no more listeners.
606 		 */
607 		/* Only call dlil_set_bpf_tap for primary dlt */
608 		if (bp->bif_ifp->if_bpf == bp) {
609 			dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
610 		}
611 		if (bp->bif_tap) {
612 			bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
613 		}
614 
615 		for (bp = bpf_iflist; bp; bp = bp->bif_next) {
616 			if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
617 				break;
618 			}
619 		}
620 		if (bp == NULL) {
621 			ifp->if_bpf = NULL;
622 		}
623 	}
624 	d->bd_bif = NULL;
625 	/*
626 	 * Check if this descriptor had requested promiscuous mode.
627 	 * If so, turn it off.
628 	 */
629 	if (d->bd_promisc) {
630 		d->bd_promisc = 0;
631 		lck_mtx_unlock(bpf_mlock);
632 		if (ifnet_set_promiscuous(ifp, 0)) {
633 			/*
634 			 * Something is really wrong if we were able to put
635 			 * the driver into promiscuous mode, but can't
636 			 * take it out.
637 			 * Most likely the network interface is gone.
638 			 */
639 			os_log_error(OS_LOG_DEFAULT,
640 			    "%s: bpf%d ifnet_set_promiscuous %s failed",
641 			    __func__, d->bd_dev_minor, if_name(ifp));
642 		}
643 		lck_mtx_lock(bpf_mlock);
644 	}
645 
646 	/*
647 	 * Wake up other thread that are waiting for this thread to finish
648 	 * detaching
649 	 */
650 	d->bd_flags &= ~BPF_DETACHING;
651 	d->bd_flags |= BPF_DETACHED;
652 
653 	/* Refresh the local variable as d could have been modified */
654 	bpf_closed = d->bd_flags & BPF_CLOSING;
655 
656 	os_log(OS_LOG_DEFAULT, "bpf%d%s detached from %s fcount %llu dcount %llu",
657 	    d->bd_dev_minor, bpf_closed ? " closed and" : "", if_name(ifp),
658 	    d->bd_fcount, d->bd_dcount);
659 
660 	/*
661 	 * Note that We've kept the reference because we may have dropped
662 	 * the lock when turning off promiscuous mode
663 	 */
664 	bpf_release_d(d);
665 done:
666 	/*
667 	 * Let the caller know the bpf_d is closed
668 	 */
669 	if (bpf_closed) {
670 		return 1;
671 	} else {
672 		return 0;
673 	}
674 }
675 
676 /*
677  * Start asynchronous timer, if necessary.
678  * Must be called with bpf_mlock held.
679  */
680 static void
bpf_start_timer(struct bpf_d * d)681 bpf_start_timer(struct bpf_d *d)
682 {
683 	uint64_t deadline;
684 	struct timeval tv;
685 
686 	if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
687 		tv.tv_sec = d->bd_rtout / hz;
688 		tv.tv_usec = (d->bd_rtout % hz) * tick;
689 
690 		clock_interval_to_deadline(
691 			(uint32_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
692 			NSEC_PER_USEC, &deadline);
693 		/*
694 		 * The state is BPF_IDLE, so the timer hasn't
695 		 * been started yet, and hasn't gone off yet;
696 		 * there is no thread call scheduled, so this
697 		 * won't change the schedule.
698 		 *
699 		 * XXX - what if, by the time it gets entered,
700 		 * the deadline has already passed?
701 		 */
702 		thread_call_enter_delayed(d->bd_thread_call, deadline);
703 		d->bd_state = BPF_WAITING;
704 	}
705 }
706 
707 /*
708  * Cancel asynchronous timer.
709  * Must be called with bpf_mlock held.
710  */
711 static boolean_t
bpf_stop_timer(struct bpf_d * d)712 bpf_stop_timer(struct bpf_d *d)
713 {
714 	/*
715 	 * If the timer has already gone off, this does nothing.
716 	 * Our caller is expected to set d->bd_state to BPF_IDLE,
717 	 * with the bpf_mlock, after we are called. bpf_timed_out()
718 	 * also grabs bpf_mlock, so, if the timer has gone off and
719 	 * bpf_timed_out() hasn't finished, it's waiting for the
720 	 * lock; when this thread releases the lock, it will
721 	 * find the state is BPF_IDLE, and just release the
722 	 * lock and return.
723 	 */
724 	return thread_call_cancel(d->bd_thread_call);
725 }
726 
727 void
bpf_acquire_d(struct bpf_d * d)728 bpf_acquire_d(struct bpf_d *d)
729 {
730 	void *lr_saved =  __builtin_return_address(0);
731 
732 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
733 
734 	d->bd_refcnt += 1;
735 
736 	d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
737 	d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
738 }
739 
740 void
bpf_release_d(struct bpf_d * d)741 bpf_release_d(struct bpf_d *d)
742 {
743 	void *lr_saved =  __builtin_return_address(0);
744 
745 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
746 
747 	if (d->bd_refcnt <= 0) {
748 		panic("%s: %p refcnt <= 0", __func__, d);
749 	}
750 
751 	d->bd_refcnt -= 1;
752 
753 	d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
754 	d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
755 
756 	if (d->bd_refcnt == 0) {
757 		/* Assert the device is detached */
758 		if ((d->bd_flags & BPF_DETACHED) == 0) {
759 			panic("%s: %p BPF_DETACHED not set", __func__, d);
760 		}
761 
762 		kfree_type(struct bpf_d, d);
763 	}
764 }
765 
766 /*
767  * Open ethernet device.  Returns ENXIO for illegal minor device number,
768  * EBUSY if file is open by another process.
769  */
770 /* ARGSUSED */
771 int
bpfopen(dev_t dev,int flags,__unused int fmt,struct proc * p)772 bpfopen(dev_t dev, int flags, __unused int fmt,
773     struct proc *p)
774 {
775 	struct bpf_d *d;
776 
777 	lck_mtx_lock(bpf_mlock);
778 	if ((unsigned int) minor(dev) >= nbpfilter) {
779 		lck_mtx_unlock(bpf_mlock);
780 		return ENXIO;
781 	}
782 	/*
783 	 * New device nodes are created on demand when opening the last one.
784 	 * The programming model is for processes to loop on the minor starting
785 	 * at 0 as long as EBUSY is returned. The loop stops when either the
786 	 * open succeeds or an error other that EBUSY is returned. That means
787 	 * that bpf_make_dev_t() must block all processes that are opening the
788 	 * last  node. If not all processes are blocked, they could unexpectedly
789 	 * get ENOENT and abort their opening loop.
790 	 */
791 	if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
792 		bpf_make_dev_t(major(dev));
793 	}
794 
795 	/*
796 	 * Each minor can be opened by only one process.  If the requested
797 	 * minor is in use, return EBUSY.
798 	 *
799 	 * Important: bpfopen() and bpfclose() have to check and set the status
800 	 * of a device in the same lockin context otherwise the device may be
801 	 * leaked because the vnode use count will be unpextectly greater than 1
802 	 * when close() is called.
803 	 */
804 	if (bpf_dtab[minor(dev)] == NULL) {
805 		/* Reserve while opening */
806 		bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
807 	} else {
808 		lck_mtx_unlock(bpf_mlock);
809 		return EBUSY;
810 	}
811 	d = kalloc_type(struct bpf_d, Z_WAITOK | Z_ZERO);
812 	if (d == NULL) {
813 		/* this really is a catastrophic failure */
814 		os_log_error(OS_LOG_DEFAULT,
815 		    "bpfopen: bpf%d kalloc_type bpf_d failed", minor(dev));
816 		bpf_dtab[minor(dev)] = NULL;
817 		lck_mtx_unlock(bpf_mlock);
818 		return ENOMEM;
819 	}
820 
821 	/* Mark "in use" and do most initialization. */
822 	bpf_acquire_d(d);
823 	d->bd_bufsize = bpf_bufsize;
824 	d->bd_sig = SIGIO;
825 	d->bd_seesent = 1;
826 	d->bd_oflags = flags;
827 	d->bd_state = BPF_IDLE;
828 	d->bd_traffic_class = SO_TC_BE;
829 	d->bd_flags |= BPF_DETACHED;
830 	if (bpf_wantpktap) {
831 		d->bd_flags |= BPF_WANT_PKTAP;
832 	} else {
833 		d->bd_flags &= ~BPF_WANT_PKTAP;
834 	}
835 
836 	d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
837 	if (d->bd_thread_call == NULL) {
838 		os_log_error(OS_LOG_DEFAULT, "bpfopen: bpf%d malloc thread call failed",
839 		    minor(dev));
840 		bpf_dtab[minor(dev)] = NULL;
841 		bpf_release_d(d);
842 		lck_mtx_unlock(bpf_mlock);
843 
844 		return ENOMEM;
845 	}
846 	d->bd_opened_by = p;
847 	uuid_generate(d->bd_uuid);
848 	d->bd_pid = proc_pid(p);
849 
850 	d->bd_dev_minor = minor(dev);
851 	bpf_dtab[minor(dev)] = d; /* Mark opened */
852 	lck_mtx_unlock(bpf_mlock);
853 
854 	if (bpf_debug) {
855 		os_log(OS_LOG_DEFAULT, "bpf%u opened by %s.%u",
856 		    d->bd_dev_minor, proc_name_address(p), d->bd_pid);
857 	}
858 	return 0;
859 }
860 
861 /*
862  * Close the descriptor by detaching it from its interface,
863  * deallocating its buffers, and marking it free.
864  */
865 /* ARGSUSED */
866 int
bpfclose(dev_t dev,__unused int flags,__unused int fmt,__unused struct proc * p)867 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
868     __unused struct proc *p)
869 {
870 	struct bpf_d *d;
871 
872 	/* Take BPF lock to ensure no other thread is using the device */
873 	lck_mtx_lock(bpf_mlock);
874 
875 	d = bpf_dtab[minor(dev)];
876 	if (d == NULL || d == BPF_DEV_RESERVED) {
877 		lck_mtx_unlock(bpf_mlock);
878 		return ENXIO;
879 	}
880 
881 	/*
882 	 * Other threads may call bpd_detachd() if we drop the bpf_mlock
883 	 */
884 	d->bd_flags |= BPF_CLOSING;
885 
886 	if (bpf_debug != 0) {
887 		os_log(OS_LOG_DEFAULT, "%s: bpf%d",
888 		    __func__, d->bd_dev_minor);
889 	}
890 
891 	bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
892 
893 	/*
894 	 * Deal with any in-progress timeouts.
895 	 */
896 	switch (d->bd_state) {
897 	case BPF_IDLE:
898 		/*
899 		 * Not waiting for a timeout, and no timeout happened.
900 		 */
901 		break;
902 
903 	case BPF_WAITING:
904 		/*
905 		 * Waiting for a timeout.
906 		 * Cancel any timer that has yet to go off,
907 		 * and mark the state as "closing".
908 		 * Then drop the lock to allow any timers that
909 		 * *have* gone off to run to completion, and wait
910 		 * for them to finish.
911 		 */
912 		if (!bpf_stop_timer(d)) {
913 			/*
914 			 * There was no pending call, so the call must
915 			 * have been in progress. Wait for the call to
916 			 * complete; we have to drop the lock while
917 			 * waiting. to let the in-progrss call complete
918 			 */
919 			d->bd_state = BPF_DRAINING;
920 			while (d->bd_state == BPF_DRAINING) {
921 				msleep((caddr_t)d, bpf_mlock, PRINET,
922 				    "bpfdraining", NULL);
923 			}
924 		}
925 		d->bd_state = BPF_IDLE;
926 		break;
927 
928 	case BPF_TIMED_OUT:
929 		/*
930 		 * Timer went off, and the timeout routine finished.
931 		 */
932 		d->bd_state = BPF_IDLE;
933 		break;
934 
935 	case BPF_DRAINING:
936 		/*
937 		 * Another thread is blocked on a close waiting for
938 		 * a timeout to finish.
939 		 * This "shouldn't happen", as the first thread to enter
940 		 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
941 		 * all subsequent threads should see that and fail with
942 		 * ENXIO.
943 		 */
944 		panic("Two threads blocked in a BPF close");
945 		break;
946 	}
947 
948 	if (d->bd_bif) {
949 		bpf_detachd(d);
950 	}
951 	selthreadclear(&d->bd_sel);
952 	thread_call_free(d->bd_thread_call);
953 
954 	while (d->bd_hbuf_read != 0) {
955 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
956 	}
957 
958 	if (bpf_debug) {
959 		os_log(OS_LOG_DEFAULT,
960 		    "bpf%u closed by %s.%u dcount %llu fcount %llu ccount %llu",
961 		    d->bd_dev_minor, proc_name_address(p), d->bd_pid,
962 		    d->bd_dcount, d->bd_fcount, d->bd_bcs.bcs_count_compressed_prefix);
963 	}
964 
965 	bpf_freed(d);
966 
967 	/* Mark free in same context as bpfopen comes to check */
968 	bpf_dtab[minor(dev)] = NULL;                    /* Mark closed */
969 
970 	bpf_release_d(d);
971 
972 	lck_mtx_unlock(bpf_mlock);
973 
974 	return 0;
975 }
976 
977 #define BPF_SLEEP bpf_sleep
978 
979 static int
bpf_sleep(struct bpf_d * d,int pri,const char * wmesg,int timo)980 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
981 {
982 	u_int64_t abstime = 0;
983 
984 	if (timo != 0) {
985 		clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
986 	}
987 
988 	return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
989 }
990 
991 static void
bpf_finalize_pktap(struct bpf_hdr * hp,struct pktap_header * pktaphdr)992 bpf_finalize_pktap(struct bpf_hdr *hp, struct pktap_header *pktaphdr)
993 {
994 	if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
995 		struct pktap_v2_hdr *pktap_v2_hdr;
996 
997 		pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
998 
999 		if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
1000 			pktap_v2_finalize_proc_info(pktap_v2_hdr);
1001 		}
1002 	} else {
1003 		if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
1004 			pktap_finalize_proc_info(pktaphdr);
1005 		}
1006 
1007 		if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1008 			hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
1009 			hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
1010 		}
1011 	}
1012 }
1013 
1014 /*
1015  * Rotate the packet buffers in descriptor d.  Move the store buffer
1016  * into the hold slot, and the free buffer into the store slot.
1017  * Zero the length of the new store buffer.
1018  *
1019  * Note: in head drop mode, the hold buffer can be dropped so the fist packet of the
1020  * store buffer cannot be compressed as it otherwise would refer to deleted data
1021  * in a dropped hold buffer that the reader process does know about
1022  */
1023 #define ROTATE_BUFFERS(d) do { \
1024 	if (d->bd_hbuf_read != 0) \
1025 	        panic("rotating bpf buffers during read"); \
1026 	(d)->bd_hbuf = (d)->bd_sbuf; \
1027 	(d)->bd_hlen = (d)->bd_slen; \
1028 	(d)->bd_hcnt = (d)->bd_scnt; \
1029 	(d)->bd_sbuf = (d)->bd_fbuf; \
1030 	(d)->bd_slen = 0; \
1031 	(d)->bd_scnt = 0; \
1032 	(d)->bd_fbuf = NULL; \
1033 	if ((d)->bd_headdrop != 0) \
1034 	        (d)->bd_prev_slen = 0; \
1035 } while(false)
1036 
1037 /*
1038  *  bpfread - read next chunk of packets from buffers
1039  */
1040 int
bpfread(dev_t dev,struct uio * uio,int ioflag)1041 bpfread(dev_t dev, struct uio *uio, int ioflag)
1042 {
1043 	struct bpf_d *d;
1044 	caddr_t hbuf;
1045 	int timed_out, hbuf_len;
1046 	int error;
1047 	int flags;
1048 
1049 	lck_mtx_lock(bpf_mlock);
1050 
1051 	d = bpf_dtab[minor(dev)];
1052 	if (d == NULL || d == BPF_DEV_RESERVED ||
1053 	    (d->bd_flags & BPF_CLOSING) != 0) {
1054 		lck_mtx_unlock(bpf_mlock);
1055 		return ENXIO;
1056 	}
1057 
1058 	bpf_acquire_d(d);
1059 
1060 	/*
1061 	 * Restrict application to use a buffer the same size as
1062 	 * as kernel buffers.
1063 	 */
1064 	if (uio_resid(uio) != d->bd_bufsize) {
1065 		bpf_release_d(d);
1066 		lck_mtx_unlock(bpf_mlock);
1067 		return EINVAL;
1068 	}
1069 
1070 	if (d->bd_state == BPF_WAITING) {
1071 		bpf_stop_timer(d);
1072 	}
1073 
1074 	timed_out = (d->bd_state == BPF_TIMED_OUT);
1075 	d->bd_state = BPF_IDLE;
1076 
1077 	while (d->bd_hbuf_read != 0) {
1078 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1079 	}
1080 
1081 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1082 		bpf_release_d(d);
1083 		lck_mtx_unlock(bpf_mlock);
1084 		return ENXIO;
1085 	}
1086 	/*
1087 	 * If the hold buffer is empty, then do a timed sleep, which
1088 	 * ends when the timeout expires or when enough packets
1089 	 * have arrived to fill the store buffer.
1090 	 */
1091 	while (d->bd_hbuf == 0) {
1092 		if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) &&
1093 		    d->bd_slen != 0) {
1094 			/*
1095 			 * We're in immediate mode, or are reading
1096 			 * in non-blocking mode, or a timer was
1097 			 * started before the read (e.g., by select()
1098 			 * or poll()) and has expired and a packet(s)
1099 			 * either arrived since the previous
1100 			 * read or arrived while we were asleep.
1101 			 * Rotate the buffers and return what's here.
1102 			 */
1103 			ROTATE_BUFFERS(d);
1104 			break;
1105 		}
1106 
1107 		/*
1108 		 * No data is available, check to see if the bpf device
1109 		 * is still pointed at a real interface.  If not, return
1110 		 * ENXIO so that the userland process knows to rebind
1111 		 * it before using it again.
1112 		 */
1113 		if (d->bd_bif == NULL) {
1114 			bpf_release_d(d);
1115 			lck_mtx_unlock(bpf_mlock);
1116 			return ENXIO;
1117 		}
1118 		if (ioflag & IO_NDELAY) {
1119 			bpf_release_d(d);
1120 			lck_mtx_unlock(bpf_mlock);
1121 			return EWOULDBLOCK;
1122 		}
1123 		error = BPF_SLEEP(d, PRINET | PCATCH, "bpf", d->bd_rtout);
1124 		/*
1125 		 * Make sure device is still opened
1126 		 */
1127 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1128 			bpf_release_d(d);
1129 			lck_mtx_unlock(bpf_mlock);
1130 			return ENXIO;
1131 		}
1132 
1133 		while (d->bd_hbuf_read != 0) {
1134 			msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1135 			    NULL);
1136 		}
1137 
1138 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1139 			bpf_release_d(d);
1140 			lck_mtx_unlock(bpf_mlock);
1141 			return ENXIO;
1142 		}
1143 
1144 		if (error == EINTR || error == ERESTART) {
1145 			if (d->bd_hbuf != NULL) {
1146 				/*
1147 				 * Because we msleep, the hold buffer might
1148 				 * be filled when we wake up.  Avoid rotating
1149 				 * in this case.
1150 				 */
1151 				break;
1152 			}
1153 			if (d->bd_slen != 0) {
1154 				/*
1155 				 * Sometimes we may be interrupted often and
1156 				 * the sleep above will not timeout.
1157 				 * Regardless, we should rotate the buffers
1158 				 * if there's any new data pending and
1159 				 * return it.
1160 				 */
1161 				ROTATE_BUFFERS(d);
1162 				break;
1163 			}
1164 			bpf_release_d(d);
1165 			lck_mtx_unlock(bpf_mlock);
1166 			if (error == ERESTART) {
1167 				os_log(OS_LOG_DEFAULT, "%s: bpf%d ERESTART to EINTR",
1168 				    __func__, d->bd_dev_minor);
1169 				error = EINTR;
1170 			}
1171 			return error;
1172 		}
1173 		if (error == EWOULDBLOCK) {
1174 			/*
1175 			 * On a timeout, return what's in the buffer,
1176 			 * which may be nothing.  If there is something
1177 			 * in the store buffer, we can rotate the buffers.
1178 			 */
1179 			if (d->bd_hbuf) {
1180 				/*
1181 				 * We filled up the buffer in between
1182 				 * getting the timeout and arriving
1183 				 * here, so we don't need to rotate.
1184 				 */
1185 				break;
1186 			}
1187 
1188 			if (d->bd_slen == 0) {
1189 				bpf_release_d(d);
1190 				lck_mtx_unlock(bpf_mlock);
1191 				return 0;
1192 			}
1193 			ROTATE_BUFFERS(d);
1194 			break;
1195 		}
1196 	}
1197 	/*
1198 	 * At this point, we know we have something in the hold slot.
1199 	 */
1200 
1201 	/*
1202 	 * Set the hold buffer read. So we do not
1203 	 * rotate the buffers until the hold buffer
1204 	 * read is complete. Also to avoid issues resulting
1205 	 * from page faults during disk sleep (<rdar://problem/13436396>).
1206 	 */
1207 	d->bd_hbuf_read = 1;
1208 	hbuf = d->bd_hbuf;
1209 	hbuf_len = d->bd_hlen;
1210 	flags = d->bd_flags;
1211 	d->bd_bcs.bcs_total_read += d->bd_hcnt;
1212 	lck_mtx_unlock(bpf_mlock);
1213 
1214 	/*
1215 	 * Before we move data to userland, we fill out the extended
1216 	 * header fields.
1217 	 */
1218 	if (flags & BPF_EXTENDED_HDR) {
1219 		char *p;
1220 
1221 		p = hbuf;
1222 		while (p < hbuf + hbuf_len) {
1223 			struct bpf_hdr_ext *ehp;
1224 			uint32_t flowid;
1225 			struct so_procinfo soprocinfo;
1226 			int found = 0;
1227 
1228 			ehp = (struct bpf_hdr_ext *)(void *)p;
1229 			if ((flowid = ehp->bh_flowid) != 0) {
1230 				if (ehp->bh_flags & BPF_HDR_EXT_FLAGS_TCP) {
1231 					ehp->bh_flags &= ~BPF_HDR_EXT_FLAGS_TCP;
1232 					found = inp_findinpcb_procinfo(&tcbinfo,
1233 					    flowid, &soprocinfo);
1234 				} else if (ehp->bh_flags == BPF_HDR_EXT_FLAGS_UDP) {
1235 					ehp->bh_flags &= ~BPF_HDR_EXT_FLAGS_UDP;
1236 					found = inp_findinpcb_procinfo(&udbinfo,
1237 					    flowid, &soprocinfo);
1238 				}
1239 				if (found == 1) {
1240 					ehp->bh_pid = soprocinfo.spi_pid;
1241 					strlcpy(&ehp->bh_comm[0], &soprocinfo.spi_proc_name[0], sizeof(ehp->bh_comm));
1242 				}
1243 				ehp->bh_flowid = 0;
1244 			}
1245 
1246 			if ((flags & BPF_FINALIZE_PKTAP) != 0 && ehp->bh_complen == 0) {
1247 				struct pktap_header *pktaphdr;
1248 
1249 				pktaphdr = (struct pktap_header *)(void *)
1250 				    (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1251 
1252 				bpf_finalize_pktap((struct bpf_hdr *) ehp,
1253 				    pktaphdr);
1254 			}
1255 			p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1256 		}
1257 	} else if (flags & BPF_FINALIZE_PKTAP) {
1258 		char *p;
1259 
1260 		p = hbuf;
1261 
1262 		while (p < hbuf + hbuf_len) {
1263 			struct bpf_hdr *hp;
1264 			struct pktap_header *pktaphdr;
1265 
1266 			hp = (struct bpf_hdr *)(void *)p;
1267 
1268 			/*
1269 			 * Cannot finalize a compressed pktap header as we may not have
1270 			 * all the fields present
1271 			 */
1272 			if (d->bd_flags & BPF_COMP_ENABLED) {
1273 				struct bpf_comp_hdr *hcp;
1274 
1275 				hcp = (struct bpf_comp_hdr *)(void *)p;
1276 
1277 				if (hcp->bh_complen != 0) {
1278 					p += BPF_WORDALIGN(hcp->bh_hdrlen + hcp->bh_caplen);
1279 					continue;
1280 				}
1281 			}
1282 
1283 			pktaphdr = (struct pktap_header *)(void *)
1284 			    (p + BPF_WORDALIGN(hp->bh_hdrlen));
1285 
1286 			bpf_finalize_pktap(hp, pktaphdr);
1287 
1288 			p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1289 		}
1290 	}
1291 
1292 	/*
1293 	 * Move data from hold buffer into user space.
1294 	 * We know the entire buffer is transferred since
1295 	 * we checked above that the read buffer is bpf_bufsize bytes.
1296 	 */
1297 	error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1298 
1299 	lck_mtx_lock(bpf_mlock);
1300 	/*
1301 	 * Make sure device is still opened
1302 	 */
1303 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1304 		bpf_release_d(d);
1305 		lck_mtx_unlock(bpf_mlock);
1306 		return ENXIO;
1307 	}
1308 
1309 	d->bd_hbuf_read = 0;
1310 	d->bd_fbuf = d->bd_hbuf;
1311 	d->bd_hbuf = NULL;
1312 	d->bd_hlen = 0;
1313 	d->bd_hcnt = 0;
1314 	wakeup((caddr_t)d);
1315 
1316 	bpf_release_d(d);
1317 	lck_mtx_unlock(bpf_mlock);
1318 	return error;
1319 }
1320 
1321 /*
1322  * If there are processes sleeping on this descriptor, wake them up.
1323  */
1324 static void
bpf_wakeup(struct bpf_d * d)1325 bpf_wakeup(struct bpf_d *d)
1326 {
1327 	if (d->bd_state == BPF_WAITING) {
1328 		bpf_stop_timer(d);
1329 		d->bd_state = BPF_IDLE;
1330 	}
1331 	wakeup((caddr_t)d);
1332 	if (d->bd_async && d->bd_sig && d->bd_sigio) {
1333 		pgsigio(d->bd_sigio, d->bd_sig);
1334 	}
1335 
1336 	selwakeup(&d->bd_sel);
1337 	if ((d->bd_flags & BPF_KNOTE)) {
1338 		KNOTE(&d->bd_sel.si_note, 1);
1339 	}
1340 }
1341 
1342 static void
bpf_timed_out(void * arg,__unused void * dummy)1343 bpf_timed_out(void *arg, __unused void *dummy)
1344 {
1345 	struct bpf_d *d = (struct bpf_d *)arg;
1346 
1347 	lck_mtx_lock(bpf_mlock);
1348 	if (d->bd_state == BPF_WAITING) {
1349 		/*
1350 		 * There's a select or kqueue waiting for this; if there's
1351 		 * now stuff to read, wake it up.
1352 		 */
1353 		d->bd_state = BPF_TIMED_OUT;
1354 		if (d->bd_slen != 0) {
1355 			bpf_wakeup(d);
1356 		}
1357 	} else if (d->bd_state == BPF_DRAINING) {
1358 		/*
1359 		 * A close is waiting for this to finish.
1360 		 * Mark it as finished, and wake the close up.
1361 		 */
1362 		d->bd_state = BPF_IDLE;
1363 		bpf_wakeup(d);
1364 	}
1365 	lck_mtx_unlock(bpf_mlock);
1366 }
1367 
1368 /* keep in sync with bpf_movein above: */
1369 #define MAX_DATALINK_HDR_LEN    (sizeof(struct firewire_header))
1370 
1371 int
bpfwrite(dev_t dev,struct uio * uio,__unused int ioflag)1372 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1373 {
1374 	struct bpf_d *d;
1375 	struct ifnet *ifp;
1376 	struct mbuf *m = NULL;
1377 	int error;
1378 	char              dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1379 	int datlen = 0;
1380 	int bif_dlt;
1381 	int bd_hdrcmplt;
1382 
1383 	lck_mtx_lock(bpf_mlock);
1384 
1385 	d = bpf_dtab[minor(dev)];
1386 	if (d == NULL || d == BPF_DEV_RESERVED ||
1387 	    (d->bd_flags & BPF_CLOSING) != 0) {
1388 		lck_mtx_unlock(bpf_mlock);
1389 		return ENXIO;
1390 	}
1391 
1392 	bpf_acquire_d(d);
1393 
1394 	++d->bd_wcount;
1395 
1396 	if (d->bd_bif == 0) {
1397 		++d->bd_wdcount;
1398 		bpf_release_d(d);
1399 		lck_mtx_unlock(bpf_mlock);
1400 		return ENXIO;
1401 	}
1402 
1403 	ifp = d->bd_bif->bif_ifp;
1404 
1405 	if ((ifp->if_flags & IFF_UP) == 0) {
1406 		++d->bd_wdcount;
1407 		bpf_release_d(d);
1408 		lck_mtx_unlock(bpf_mlock);
1409 		return ENETDOWN;
1410 	}
1411 	if (uio_resid(uio) == 0) {
1412 		bpf_release_d(d);
1413 		lck_mtx_unlock(bpf_mlock);
1414 		return 0;
1415 	}
1416 	((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1417 
1418 	/*
1419 	 * fix for PR-6849527
1420 	 * geting variables onto stack before dropping lock for bpf_movein()
1421 	 */
1422 	bif_dlt = (int)d->bd_bif->bif_dlt;
1423 	bd_hdrcmplt  = d->bd_hdrcmplt;
1424 
1425 	/* bpf_movein allocating mbufs; drop lock */
1426 	lck_mtx_unlock(bpf_mlock);
1427 
1428 	error = bpf_movein(uio, bif_dlt, &m,
1429 	    bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1430 	    &datlen);
1431 
1432 	/* take the lock again */
1433 	lck_mtx_lock(bpf_mlock);
1434 	if (error != 0) {
1435 		++d->bd_wdcount;
1436 		bpf_release_d(d);
1437 		lck_mtx_unlock(bpf_mlock);
1438 		return error;
1439 	}
1440 
1441 	/* verify the device is still open */
1442 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1443 		++d->bd_wdcount;
1444 		bpf_release_d(d);
1445 		lck_mtx_unlock(bpf_mlock);
1446 		m_freem(m);
1447 		return ENXIO;
1448 	}
1449 
1450 	if (d->bd_bif == NULL) {
1451 		++d->bd_wdcount;
1452 		bpf_release_d(d);
1453 		lck_mtx_unlock(bpf_mlock);
1454 		m_free(m);
1455 		return ENXIO;
1456 	}
1457 
1458 	if ((unsigned)datlen > ifp->if_mtu) {
1459 		++d->bd_wdcount;
1460 		bpf_release_d(d);
1461 		lck_mtx_unlock(bpf_mlock);
1462 		m_freem(m);
1463 		return EMSGSIZE;
1464 	}
1465 
1466 	bpf_set_packet_service_class(m, d->bd_traffic_class);
1467 
1468 	lck_mtx_unlock(bpf_mlock);
1469 
1470 	/*
1471 	 * The driver frees the mbuf.
1472 	 */
1473 	if (d->bd_hdrcmplt) {
1474 		if (d->bd_bif->bif_send) {
1475 			error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1476 		} else {
1477 			error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1478 		}
1479 	} else {
1480 		error = dlil_output(ifp, PF_INET, m, NULL,
1481 		    (struct sockaddr *)dst_buf, 0, NULL);
1482 	}
1483 
1484 	lck_mtx_lock(bpf_mlock);
1485 	if (error != 0) {
1486 		++d->bd_wdcount;
1487 	}
1488 	bpf_release_d(d);
1489 	lck_mtx_unlock(bpf_mlock);
1490 
1491 	return error;
1492 }
1493 
1494 /*
1495  * Reset a descriptor by flushing its packet buffer and clearing the
1496  * receive and drop counts.
1497  */
1498 static void
reset_d(struct bpf_d * d)1499 reset_d(struct bpf_d *d)
1500 {
1501 	if (d->bd_hbuf_read != 0) {
1502 		panic("resetting buffers during read");
1503 	}
1504 
1505 	if (d->bd_hbuf) {
1506 		/* Free the hold buffer. */
1507 		d->bd_fbuf = d->bd_hbuf;
1508 		d->bd_hbuf = NULL;
1509 	}
1510 	d->bd_slen = 0;
1511 	d->bd_hlen = 0;
1512 	d->bd_scnt = 0;
1513 	d->bd_hcnt = 0;
1514 	d->bd_rcount = 0;
1515 	d->bd_dcount = 0;
1516 	d->bd_fcount = 0;
1517 	d->bd_wcount = 0;
1518 	d->bd_wdcount = 0;
1519 
1520 	d->bd_prev_slen = 0;
1521 }
1522 
1523 static struct bpf_d *
bpf_get_device_from_uuid(uuid_t uuid)1524 bpf_get_device_from_uuid(uuid_t uuid)
1525 {
1526 	unsigned int i;
1527 
1528 	for (i = 0; i < nbpfilter; i++) {
1529 		struct bpf_d *d = bpf_dtab[i];
1530 
1531 		if (d == NULL || d == BPF_DEV_RESERVED ||
1532 		    (d->bd_flags & BPF_CLOSING) != 0) {
1533 			continue;
1534 		}
1535 		if (uuid_compare(uuid, d->bd_uuid) == 0) {
1536 			return d;
1537 		}
1538 	}
1539 
1540 	return NULL;
1541 }
1542 
1543 /*
1544  * The BIOCSETUP command "atomically" attach to the interface and
1545  * copy the buffer from another interface. This minimizes the risk
1546  * of missing packet because this is done while holding
1547  * the BPF global lock
1548  */
1549 static int
bpf_setup(struct bpf_d * d_to,uuid_t uuid_from,ifnet_t ifp)1550 bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1551 {
1552 	struct bpf_d *d_from;
1553 	int error = 0;
1554 
1555 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1556 
1557 	/*
1558 	 * Sanity checks
1559 	 */
1560 	d_from = bpf_get_device_from_uuid(uuid_from);
1561 	if (d_from == NULL) {
1562 		error = ENOENT;
1563 		os_log_error(OS_LOG_DEFAULT,
1564 		    "%s: uuids not found error %d",
1565 		    __func__, error);
1566 		return error;
1567 	}
1568 	if (d_from->bd_opened_by != d_to->bd_opened_by) {
1569 		error = EACCES;
1570 		os_log_error(OS_LOG_DEFAULT,
1571 		    "%s: processes not matching error %d",
1572 		    __func__, error);
1573 		return error;
1574 	}
1575 
1576 	/*
1577 	 * Prevent any read while copying
1578 	 */
1579 	while (d_to->bd_hbuf_read != 0) {
1580 		msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
1581 	}
1582 	d_to->bd_hbuf_read = 1;
1583 
1584 	while (d_from->bd_hbuf_read != 0) {
1585 		msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
1586 	}
1587 	d_from->bd_hbuf_read = 1;
1588 
1589 	/*
1590 	 * Verify the devices have not been closed
1591 	 */
1592 	if (d_to->bd_flags & BPF_CLOSING) {
1593 		error = ENXIO;
1594 		os_log_error(OS_LOG_DEFAULT,
1595 		    "%s: d_to is closing error %d",
1596 		    __func__, error);
1597 		goto done;
1598 	}
1599 	if (d_from->bd_flags & BPF_CLOSING) {
1600 		error = ENXIO;
1601 		os_log_error(OS_LOG_DEFAULT,
1602 		    "%s: d_from is closing error %d",
1603 		    __func__, error);
1604 		goto done;
1605 	}
1606 
1607 	/*
1608 	 * For now require the same buffer size
1609 	 */
1610 	if (d_from->bd_bufsize != d_to->bd_bufsize) {
1611 		error = EINVAL;
1612 		os_log_error(OS_LOG_DEFAULT,
1613 		    "%s: bufsizes not matching error %d",
1614 		    __func__, error);
1615 		goto done;
1616 	}
1617 
1618 	/*
1619 	 * Copy relevant options and flags
1620 	 */
1621 	d_to->bd_flags = d_from->bd_flags & (BPF_EXTENDED_HDR | BPF_WANT_PKTAP |
1622 	    BPF_FINALIZE_PKTAP | BPF_TRUNCATE | BPF_PKTHDRV2 |
1623 	    BPF_COMP_REQ | BPF_COMP_ENABLED);
1624 
1625 	d_to->bd_headdrop = d_from->bd_headdrop;
1626 
1627 	/*
1628 	 * Allocate and copy the buffers
1629 	 */
1630 	error = bpf_allocbufs(d_to);
1631 	if (error != 0) {
1632 		goto done;
1633 	}
1634 
1635 	/*
1636 	 * Make sure the buffers are setup as expected by bpf_setif()
1637 	 */
1638 	ASSERT(d_to->bd_hbuf == NULL);
1639 	ASSERT(d_to->bd_sbuf != NULL);
1640 	ASSERT(d_to->bd_fbuf != NULL);
1641 
1642 	/*
1643 	 * Copy the buffers and update the pointers and counts
1644 	 */
1645 	memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
1646 	d_to->bd_slen = d_from->bd_slen;
1647 	d_to->bd_scnt = d_from->bd_scnt;
1648 
1649 	if (d_from->bd_hbuf != NULL) {
1650 		d_to->bd_hbuf = d_to->bd_fbuf;
1651 		d_to->bd_fbuf = NULL;
1652 		memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
1653 	}
1654 	d_to->bd_hlen = d_from->bd_hlen;
1655 	d_to->bd_hcnt = d_from->bd_hcnt;
1656 
1657 	if (d_to->bd_flags & BPF_COMP_REQ) {
1658 		ASSERT(d_to->bd_prev_sbuf != NULL);
1659 		ASSERT(d_to->bd_prev_fbuf != NULL);
1660 
1661 		d_to->bd_prev_slen = d_from->bd_prev_slen;
1662 		ASSERT(d_to->bd_prev_slen <= BPF_HDR_COMP_LEN_MAX);
1663 		memcpy(d_to->bd_prev_sbuf, d_from->bd_prev_sbuf, BPF_HDR_COMP_LEN_MAX);
1664 	}
1665 
1666 	d_to->bd_bcs = d_from->bd_bcs;
1667 
1668 	/*
1669 	 * Attach to the interface:
1670 	 * - don't reset the buffers
1671 	 * - we already prevent reads
1672 	 * - the buffers are already allocated
1673 	 */
1674 	error = bpf_setif(d_to, ifp, false, true, true);
1675 	if (error != 0) {
1676 		os_log_error(OS_LOG_DEFAULT,
1677 		    "%s: bpf_setif() failed error %d",
1678 		    __func__, error);
1679 		goto done;
1680 	}
1681 done:
1682 	d_from->bd_hbuf_read = 0;
1683 	wakeup((caddr_t)d_from);
1684 
1685 	d_to->bd_hbuf_read = 0;
1686 	wakeup((caddr_t)d_to);
1687 
1688 	return error;
1689 }
1690 
1691 #if DEVELOPMENT || DEBUG
1692 #define BPF_IOC_LIST \
1693 	X(FIONREAD) \
1694 	X(SIOCGIFADDR) \
1695 	X(BIOCGBLEN) \
1696 	X(BIOCSBLEN) \
1697 	X(BIOCSETF32) \
1698 	X(BIOCSETFNR32) \
1699 	X(BIOCSETF64) \
1700 	X(BIOCSETFNR64) \
1701 	X(BIOCFLUSH) \
1702 	X(BIOCPROMISC) \
1703 	X(BIOCGDLT) \
1704 	X(BIOCGDLTLIST) \
1705 	X(BIOCSDLT) \
1706 	X(BIOCGETIF) \
1707 	X(BIOCSETIF) \
1708 	X(BIOCSRTIMEOUT32) \
1709 	X(BIOCSRTIMEOUT64) \
1710 	X(BIOCGRTIMEOUT32) \
1711 	X(BIOCGRTIMEOUT64) \
1712 	X(BIOCGSTATS) \
1713 	X(BIOCIMMEDIATE) \
1714 	X(BIOCVERSION) \
1715 	X(BIOCGHDRCMPLT) \
1716 	X(BIOCSHDRCMPLT) \
1717 	X(BIOCGSEESENT) \
1718 	X(BIOCSSEESENT) \
1719 	X(BIOCSETTC) \
1720 	X(BIOCGETTC) \
1721 	X(FIONBIO) \
1722 	X(FIOASYNC) \
1723 	X(BIOCSRSIG) \
1724 	X(BIOCGRSIG) \
1725 	X(BIOCSEXTHDR) \
1726 	X(BIOCGIFATTACHCOUNT) \
1727 	X(BIOCGWANTPKTAP) \
1728 	X(BIOCSWANTPKTAP) \
1729 	X(BIOCSHEADDROP) \
1730 	X(BIOCGHEADDROP) \
1731 	X(BIOCSTRUNCATE) \
1732 	X(BIOCGETUUID) \
1733 	X(BIOCSETUP) \
1734 	X(BIOCSPKTHDRV2) \
1735 	X(BIOCGHDRCOMP) \
1736 	X(BIOCSHDRCOMP) \
1737 	X(BIOCGHDRCOMPSTATS) \
1738 	X(BIOCGHDRCOMPON)
1739 
1740 static void
log_bpf_ioctl_str(struct bpf_d * d,u_long cmd)1741 log_bpf_ioctl_str(struct bpf_d *d, u_long cmd)
1742 {
1743 	const char *p = NULL;
1744 	char str[32];
1745 
1746 #define X(x) case x: { p = #x ; printf("%s\n", p); break; }
1747 	switch (cmd) {
1748 		BPF_IOC_LIST
1749 	}
1750 #undef X
1751 	if (p == NULL) {
1752 		snprintf(str, sizeof(str), "0x%08x", (unsigned int)cmd);
1753 		p = str;
1754 	}
1755 	os_log(OS_LOG_DEFAULT, "bpfioctl bpf%u %s",
1756 	    d->bd_dev_minor, p);
1757 }
1758 #endif /* DEVELOPMENT || DEBUG */
1759 
1760 /*
1761  *  FIONREAD		Check for read packet available.
1762  *  SIOCGIFADDR		Get interface address - convenient hook to driver.
1763  *  BIOCGBLEN		Get buffer len [for read()].
1764  *  BIOCSETF		Set ethernet read filter.
1765  *  BIOCFLUSH		Flush read packet buffer.
1766  *  BIOCPROMISC		Put interface into promiscuous mode.
1767  *  BIOCGDLT		Get link layer type.
1768  *  BIOCGETIF		Get interface name.
1769  *  BIOCSETIF		Set interface.
1770  *  BIOCSRTIMEOUT	Set read timeout.
1771  *  BIOCGRTIMEOUT	Get read timeout.
1772  *  BIOCGSTATS		Get packet stats.
1773  *  BIOCIMMEDIATE	Set immediate mode.
1774  *  BIOCVERSION		Get filter language version.
1775  *  BIOCGHDRCMPLT	Get "header already complete" flag
1776  *  BIOCSHDRCMPLT	Set "header already complete" flag
1777  *  BIOCGSEESENT	Get "see packets sent" flag
1778  *  BIOCSSEESENT	Set "see packets sent" flag
1779  *  BIOCSETTC		Set traffic class.
1780  *  BIOCGETTC		Get traffic class.
1781  *  BIOCSEXTHDR		Set "extended header" flag
1782  *  BIOCSHEADDROP	Drop head of the buffer if user is not reading
1783  *  BIOCGHEADDROP	Get "head-drop" flag
1784  */
1785 /* ARGSUSED */
1786 int
bpfioctl(dev_t dev,u_long cmd,caddr_t addr,__unused int flags,struct proc * p)1787 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1788     struct proc *p)
1789 {
1790 	struct bpf_d *d;
1791 	int error = 0;
1792 	u_int int_arg;
1793 	struct ifreq ifr = {};
1794 
1795 	lck_mtx_lock(bpf_mlock);
1796 
1797 	d = bpf_dtab[minor(dev)];
1798 	if (d == NULL || d == BPF_DEV_RESERVED ||
1799 	    (d->bd_flags & BPF_CLOSING) != 0) {
1800 		lck_mtx_unlock(bpf_mlock);
1801 		return ENXIO;
1802 	}
1803 
1804 	bpf_acquire_d(d);
1805 
1806 	if (d->bd_state == BPF_WAITING) {
1807 		bpf_stop_timer(d);
1808 	}
1809 	d->bd_state = BPF_IDLE;
1810 
1811 #if DEVELOPMENT || DEBUG
1812 	if (bpf_debug > 0) {
1813 		log_bpf_ioctl_str(d, cmd);
1814 	}
1815 #endif /* DEVELOPMENT || DEBUG */
1816 
1817 	switch (cmd) {
1818 	default:
1819 		error = EINVAL;
1820 		break;
1821 
1822 	/*
1823 	 * Check for read packet available.
1824 	 */
1825 	case FIONREAD:                  /* int */
1826 	{
1827 		int n;
1828 
1829 		n = d->bd_slen;
1830 		if (d->bd_hbuf && d->bd_hbuf_read == 0) {
1831 			n += d->bd_hlen;
1832 		}
1833 
1834 		bcopy(&n, addr, sizeof(n));
1835 		break;
1836 	}
1837 
1838 	case SIOCGIFADDR:               /* struct ifreq */
1839 	{
1840 		struct ifnet *ifp;
1841 
1842 		if (d->bd_bif == 0) {
1843 			error = EINVAL;
1844 		} else {
1845 			ifp = d->bd_bif->bif_ifp;
1846 			error = ifnet_ioctl(ifp, 0, cmd, addr);
1847 		}
1848 		break;
1849 	}
1850 
1851 	/*
1852 	 * Get buffer len [for read()].
1853 	 */
1854 	case BIOCGBLEN:                 /* u_int */
1855 		bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1856 		break;
1857 
1858 	/*
1859 	 * Set buffer length.
1860 	 */
1861 	case BIOCSBLEN: {               /* u_int */
1862 		u_int size;
1863 
1864 		if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
1865 			/*
1866 			 * Interface already attached, unable to change buffers
1867 			 */
1868 			error = EINVAL;
1869 			break;
1870 		}
1871 		bcopy(addr, &size, sizeof(size));
1872 
1873 		if (size > BPF_BUFSIZE_CAP) {
1874 			d->bd_bufsize = BPF_BUFSIZE_CAP;
1875 
1876 			os_log_info(OS_LOG_DEFAULT,
1877 			    "bpf%d BIOCSBLEN capped to %u from %u",
1878 			    minor(dev), d->bd_bufsize, size);
1879 		} else if (size < BPF_MINBUFSIZE) {
1880 			d->bd_bufsize = BPF_MINBUFSIZE;
1881 
1882 			os_log_info(OS_LOG_DEFAULT,
1883 			    "bpf%d BIOCSBLEN bumped to %u from %u",
1884 			    minor(dev), d->bd_bufsize, size);
1885 		} else {
1886 			d->bd_bufsize = size;
1887 
1888 			os_log_info(OS_LOG_DEFAULT,
1889 			    "bpf%d BIOCSBLEN %u",
1890 			    minor(dev), d->bd_bufsize);
1891 		}
1892 
1893 		/* It's a read/write ioctl */
1894 		bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1895 		break;
1896 	}
1897 	/*
1898 	 * Set link layer read filter.
1899 	 */
1900 	case BIOCSETF32:
1901 	case BIOCSETFNR32: {            /* struct bpf_program32 */
1902 		struct bpf_program32 prg32;
1903 
1904 		bcopy(addr, &prg32, sizeof(prg32));
1905 		error = bpf_setf(d, prg32.bf_len,
1906 		    CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1907 		break;
1908 	}
1909 
1910 	case BIOCSETF64:
1911 	case BIOCSETFNR64: {            /* struct bpf_program64 */
1912 		struct bpf_program64 prg64;
1913 
1914 		bcopy(addr, &prg64, sizeof(prg64));
1915 		error = bpf_setf(d, prg64.bf_len, CAST_USER_ADDR_T(prg64.bf_insns), cmd);
1916 		break;
1917 	}
1918 
1919 	/*
1920 	 * Flush read packet buffer.
1921 	 */
1922 	case BIOCFLUSH:
1923 		while (d->bd_hbuf_read != 0) {
1924 			msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1925 			    NULL);
1926 		}
1927 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1928 			error = ENXIO;
1929 			break;
1930 		}
1931 		reset_d(d);
1932 		break;
1933 
1934 	/*
1935 	 * Put interface into promiscuous mode.
1936 	 */
1937 	case BIOCPROMISC:
1938 		if (d->bd_bif == 0) {
1939 			/*
1940 			 * No interface attached yet.
1941 			 */
1942 			error = EINVAL;
1943 			break;
1944 		}
1945 		if (d->bd_promisc == 0) {
1946 			lck_mtx_unlock(bpf_mlock);
1947 			error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1948 			lck_mtx_lock(bpf_mlock);
1949 			if (error == 0) {
1950 				d->bd_promisc = 1;
1951 			}
1952 		}
1953 		break;
1954 
1955 	/*
1956 	 * Get device parameters.
1957 	 */
1958 	case BIOCGDLT:                  /* u_int */
1959 		if (d->bd_bif == 0) {
1960 			error = EINVAL;
1961 		} else {
1962 			bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
1963 		}
1964 		break;
1965 
1966 	/*
1967 	 * Get a list of supported data link types.
1968 	 */
1969 	case BIOCGDLTLIST:              /* struct bpf_dltlist */
1970 		if (d->bd_bif == NULL) {
1971 			error = EINVAL;
1972 		} else {
1973 			error = bpf_getdltlist(d, addr, p);
1974 		}
1975 		break;
1976 
1977 	/*
1978 	 * Set data link type.
1979 	 */
1980 	case BIOCSDLT:                  /* u_int */
1981 		if (d->bd_bif == NULL) {
1982 			error = EINVAL;
1983 		} else {
1984 			u_int dlt;
1985 
1986 			bcopy(addr, &dlt, sizeof(dlt));
1987 
1988 			if (dlt == DLT_PKTAP &&
1989 			    !(d->bd_flags & BPF_WANT_PKTAP)) {
1990 				dlt = DLT_RAW;
1991 			}
1992 			error = bpf_setdlt(d, dlt);
1993 		}
1994 		break;
1995 
1996 	/*
1997 	 * Get interface name.
1998 	 */
1999 	case BIOCGETIF:                 /* struct ifreq */
2000 		if (d->bd_bif == 0) {
2001 			error = EINVAL;
2002 		} else {
2003 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
2004 
2005 			snprintf(((struct ifreq *)(void *)addr)->ifr_name,
2006 			    sizeof(ifr.ifr_name), "%s", if_name(ifp));
2007 		}
2008 		break;
2009 
2010 	/*
2011 	 * Set interface.
2012 	 */
2013 	case BIOCSETIF: {               /* struct ifreq */
2014 		ifnet_t ifp;
2015 
2016 		bcopy(addr, &ifr, sizeof(ifr));
2017 		ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2018 		ifp = ifunit(ifr.ifr_name);
2019 		if (ifp == NULL) {
2020 			error = ENXIO;
2021 		} else {
2022 			error = bpf_setif(d, ifp, true, false, false);
2023 		}
2024 		break;
2025 	}
2026 
2027 	/*
2028 	 * Set read timeout.
2029 	 */
2030 	case BIOCSRTIMEOUT32: {         /* struct user32_timeval */
2031 		struct user32_timeval _tv;
2032 		struct timeval tv;
2033 
2034 		bcopy(addr, &_tv, sizeof(_tv));
2035 		tv.tv_sec  = _tv.tv_sec;
2036 		tv.tv_usec = _tv.tv_usec;
2037 
2038 		/*
2039 		 * Subtract 1 tick from tvtohz() since this isn't
2040 		 * a one-shot timer.
2041 		 */
2042 		if ((error = itimerfix(&tv)) == 0) {
2043 			d->bd_rtout = tvtohz(&tv) - 1;
2044 		}
2045 		break;
2046 	}
2047 
2048 	case BIOCSRTIMEOUT64: {         /* struct user64_timeval */
2049 		struct user64_timeval _tv;
2050 		struct timeval tv;
2051 
2052 		bcopy(addr, &_tv, sizeof(_tv));
2053 		tv.tv_sec  = (__darwin_time_t)_tv.tv_sec;
2054 		tv.tv_usec = _tv.tv_usec;
2055 
2056 		/*
2057 		 * Subtract 1 tick from tvtohz() since this isn't
2058 		 * a one-shot timer.
2059 		 */
2060 		if ((error = itimerfix(&tv)) == 0) {
2061 			d->bd_rtout = tvtohz(&tv) - 1;
2062 		}
2063 		break;
2064 	}
2065 
2066 	/*
2067 	 * Get read timeout.
2068 	 */
2069 	case BIOCGRTIMEOUT32: {         /* struct user32_timeval */
2070 		struct user32_timeval tv;
2071 
2072 		bzero(&tv, sizeof(tv));
2073 		tv.tv_sec = d->bd_rtout / hz;
2074 		tv.tv_usec = (d->bd_rtout % hz) * tick;
2075 		bcopy(&tv, addr, sizeof(tv));
2076 		break;
2077 	}
2078 
2079 	case BIOCGRTIMEOUT64: {         /* struct user64_timeval */
2080 		struct user64_timeval tv;
2081 
2082 		bzero(&tv, sizeof(tv));
2083 		tv.tv_sec = d->bd_rtout / hz;
2084 		tv.tv_usec = (d->bd_rtout % hz) * tick;
2085 		bcopy(&tv, addr, sizeof(tv));
2086 		break;
2087 	}
2088 
2089 	/*
2090 	 * Get packet stats.
2091 	 */
2092 	case BIOCGSTATS: {              /* struct bpf_stat */
2093 		struct bpf_stat bs;
2094 
2095 		bzero(&bs, sizeof(bs));
2096 		bs.bs_recv = (u_int)d->bd_rcount;
2097 		bs.bs_drop = (u_int)d->bd_dcount;
2098 		bcopy(&bs, addr, sizeof(bs));
2099 		break;
2100 	}
2101 
2102 	/*
2103 	 * Set immediate mode.
2104 	 */
2105 	case BIOCIMMEDIATE:             /* u_int */
2106 		d->bd_immediate = *(u_char *)(void *)addr;
2107 		break;
2108 
2109 	case BIOCVERSION: {             /* struct bpf_version */
2110 		struct bpf_version bv;
2111 
2112 		bzero(&bv, sizeof(bv));
2113 		bv.bv_major = BPF_MAJOR_VERSION;
2114 		bv.bv_minor = BPF_MINOR_VERSION;
2115 		bcopy(&bv, addr, sizeof(bv));
2116 		break;
2117 	}
2118 
2119 	/*
2120 	 * Get "header already complete" flag
2121 	 */
2122 	case BIOCGHDRCMPLT:             /* u_int */
2123 		bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
2124 		break;
2125 
2126 	/*
2127 	 * Set "header already complete" flag
2128 	 */
2129 	case BIOCSHDRCMPLT:             /* u_int */
2130 		bcopy(addr, &int_arg, sizeof(int_arg));
2131 		d->bd_hdrcmplt = int_arg ? 1 : 0;
2132 		break;
2133 
2134 	/*
2135 	 * Get "see sent packets" flag
2136 	 */
2137 	case BIOCGSEESENT:              /* u_int */
2138 		bcopy(&d->bd_seesent, addr, sizeof(u_int));
2139 		break;
2140 
2141 	/*
2142 	 * Set "see sent packets" flag
2143 	 */
2144 	case BIOCSSEESENT:              /* u_int */
2145 		bcopy(addr, &d->bd_seesent, sizeof(u_int));
2146 		break;
2147 
2148 	/*
2149 	 * Set traffic service class
2150 	 */
2151 	case BIOCSETTC: {               /* int */
2152 		int tc;
2153 
2154 		bcopy(addr, &tc, sizeof(int));
2155 		error = bpf_set_traffic_class(d, tc);
2156 		break;
2157 	}
2158 
2159 	/*
2160 	 * Get traffic service class
2161 	 */
2162 	case BIOCGETTC:                 /* int */
2163 		bcopy(&d->bd_traffic_class, addr, sizeof(int));
2164 		break;
2165 
2166 	case FIONBIO:           /* Non-blocking I/O; int */
2167 		break;
2168 
2169 	case FIOASYNC:          /* Send signal on receive packets; int */
2170 		bcopy(addr, &d->bd_async, sizeof(int));
2171 		break;
2172 
2173 	case BIOCSRSIG: {       /* Set receive signal; u_int */
2174 		u_int sig;
2175 
2176 		bcopy(addr, &sig, sizeof(u_int));
2177 
2178 		if (sig >= NSIG) {
2179 			error = EINVAL;
2180 		} else {
2181 			d->bd_sig = sig;
2182 		}
2183 		break;
2184 	}
2185 	case BIOCGRSIG:                 /* u_int */
2186 		bcopy(&d->bd_sig, addr, sizeof(u_int));
2187 		break;
2188 
2189 	case BIOCSEXTHDR:               /* u_int */
2190 		bcopy(addr, &int_arg, sizeof(int_arg));
2191 		if (int_arg) {
2192 			d->bd_flags |= BPF_EXTENDED_HDR;
2193 		} else {
2194 			d->bd_flags &= ~BPF_EXTENDED_HDR;
2195 		}
2196 		break;
2197 
2198 	case BIOCGIFATTACHCOUNT: {              /* struct ifreq */
2199 		ifnet_t ifp;
2200 		struct bpf_if *bp;
2201 
2202 		bcopy(addr, &ifr, sizeof(ifr));
2203 		ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2204 		ifp = ifunit(ifr.ifr_name);
2205 		if (ifp == NULL) {
2206 			error = ENXIO;
2207 			break;
2208 		}
2209 		ifr.ifr_intval = 0;
2210 		for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2211 			struct bpf_d *bpf_d;
2212 
2213 			if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) {
2214 				continue;
2215 			}
2216 			for (bpf_d = bp->bif_dlist; bpf_d;
2217 			    bpf_d = bpf_d->bd_next) {
2218 				ifr.ifr_intval += 1;
2219 			}
2220 		}
2221 		bcopy(&ifr, addr, sizeof(ifr));
2222 		break;
2223 	}
2224 	case BIOCGWANTPKTAP:                    /* u_int */
2225 		int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
2226 		bcopy(&int_arg, addr, sizeof(int_arg));
2227 		break;
2228 
2229 	case BIOCSWANTPKTAP:                    /* u_int */
2230 		bcopy(addr, &int_arg, sizeof(int_arg));
2231 		if (int_arg) {
2232 			d->bd_flags |= BPF_WANT_PKTAP;
2233 		} else {
2234 			d->bd_flags &= ~BPF_WANT_PKTAP;
2235 		}
2236 		break;
2237 
2238 	case BIOCSHEADDROP:
2239 		bcopy(addr, &int_arg, sizeof(int_arg));
2240 		d->bd_headdrop = int_arg ? 1 : 0;
2241 		break;
2242 
2243 	case BIOCGHEADDROP:
2244 		bcopy(&d->bd_headdrop, addr, sizeof(int));
2245 		break;
2246 
2247 	case BIOCSTRUNCATE:
2248 		bcopy(addr, &int_arg, sizeof(int_arg));
2249 		if (int_arg) {
2250 			d->bd_flags |=  BPF_TRUNCATE;
2251 		} else {
2252 			d->bd_flags &= ~BPF_TRUNCATE;
2253 		}
2254 		break;
2255 
2256 	case BIOCGETUUID:
2257 		bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
2258 		break;
2259 
2260 	case BIOCSETUP: {
2261 		struct bpf_setup_args bsa;
2262 		ifnet_t ifp;
2263 
2264 		bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
2265 		bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
2266 		ifp = ifunit(bsa.bsa_ifname);
2267 		if (ifp == NULL) {
2268 			error = ENXIO;
2269 			os_log_error(OS_LOG_DEFAULT,
2270 			    "%s: ifnet not found for %s error %d",
2271 			    __func__, bsa.bsa_ifname, error);
2272 			break;
2273 		}
2274 
2275 		error = bpf_setup(d, bsa.bsa_uuid, ifp);
2276 		break;
2277 	}
2278 	case BIOCSPKTHDRV2:
2279 		bcopy(addr, &int_arg, sizeof(int_arg));
2280 		if (int_arg != 0) {
2281 			d->bd_flags |= BPF_PKTHDRV2;
2282 		} else {
2283 			d->bd_flags &= ~BPF_PKTHDRV2;
2284 		}
2285 		break;
2286 
2287 	case BIOCGPKTHDRV2:
2288 		int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
2289 		bcopy(&int_arg, addr, sizeof(int_arg));
2290 		break;
2291 
2292 	case BIOCGHDRCOMP:
2293 		int_arg = d->bd_flags & BPF_COMP_REQ ? 1 : 0;
2294 		bcopy(&int_arg, addr, sizeof(int_arg));
2295 		break;
2296 
2297 	case BIOCSHDRCOMP:
2298 		bcopy(addr, &int_arg, sizeof(int_arg));
2299 		if (int_arg != 0 && int_arg != 1) {
2300 			return EINVAL;
2301 		}
2302 		if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
2303 			/*
2304 			 * Interface already attached, unable to change buffers
2305 			 */
2306 			error = EINVAL;
2307 			break;
2308 		}
2309 		if (int_arg != 0) {
2310 			d->bd_flags |= BPF_COMP_REQ;
2311 			if (bpf_hdr_comp_enable != 0) {
2312 				d->bd_flags |= BPF_COMP_ENABLED;
2313 			}
2314 		} else {
2315 			d->bd_flags &= ~(BPF_COMP_REQ | BPF_COMP_ENABLED);
2316 		}
2317 		break;
2318 
2319 	case BIOCGHDRCOMPON:
2320 		int_arg = d->bd_flags & BPF_COMP_ENABLED ? 1 : 0;
2321 		bcopy(&int_arg, addr, sizeof(int_arg));
2322 		break;
2323 
2324 	case BIOCGHDRCOMPSTATS: {
2325 		struct bpf_comp_stats bcs = {};
2326 
2327 		bcs = d->bd_bcs;
2328 
2329 		bcopy(&bcs, addr, sizeof(bcs));
2330 		break;
2331 	}
2332 	}
2333 
2334 	bpf_release_d(d);
2335 	lck_mtx_unlock(bpf_mlock);
2336 
2337 	return error;
2338 }
2339 
2340 /*
2341  * Set d's packet filter program to fp.  If this file already has a filter,
2342  * free it and replace it.  Returns EINVAL for bogus requests.
2343  */
2344 static int
bpf_setf(struct bpf_d * d,u_int bf_len,user_addr_t bf_insns,u_long cmd)2345 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2346     u_long cmd)
2347 {
2348 	struct bpf_insn *fcode, *old;
2349 	u_int flen, size;
2350 
2351 	while (d->bd_hbuf_read != 0) {
2352 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2353 	}
2354 
2355 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2356 		return ENXIO;
2357 	}
2358 
2359 	old = d->bd_filter;
2360 	if (bf_insns == USER_ADDR_NULL) {
2361 		if (bf_len != 0) {
2362 			return EINVAL;
2363 		}
2364 		d->bd_filter = NULL;
2365 		reset_d(d);
2366 		if (old != 0) {
2367 			kfree_data_addr(old);
2368 		}
2369 		return 0;
2370 	}
2371 	flen = bf_len;
2372 	if (flen > BPF_MAXINSNS) {
2373 		return EINVAL;
2374 	}
2375 
2376 	size = flen * sizeof(struct bpf_insn);
2377 	fcode = (struct bpf_insn *) kalloc_data(size, Z_WAITOK | Z_ZERO);
2378 	if (fcode == NULL) {
2379 		return ENOMEM;
2380 	}
2381 	if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
2382 	    bpf_validate(fcode, (int)flen)) {
2383 		d->bd_filter = fcode;
2384 
2385 		if (cmd == BIOCSETF32 || cmd == BIOCSETF64) {
2386 			reset_d(d);
2387 		}
2388 
2389 		if (old != 0) {
2390 			kfree_data_addr(old);
2391 		}
2392 
2393 		return 0;
2394 	}
2395 	kfree_data(fcode, size);
2396 	return EINVAL;
2397 }
2398 
2399 /*
2400  * Detach a file from its current interface (if attached at all) and attach
2401  * to the interface indicated by the name stored in ifr.
2402  * Return an errno or 0.
2403  */
2404 static int
bpf_setif(struct bpf_d * d,ifnet_t theywant,bool do_reset,bool has_hbuf_read,bool has_bufs_allocated)2405 bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read,
2406     bool has_bufs_allocated)
2407 {
2408 	struct bpf_if *bp;
2409 	int error;
2410 
2411 	while (d->bd_hbuf_read != 0 && !has_hbuf_read) {
2412 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2413 	}
2414 
2415 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2416 		return ENXIO;
2417 	}
2418 
2419 	/*
2420 	 * Look through attached interfaces for the named one.
2421 	 */
2422 	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2423 		struct ifnet *ifp = bp->bif_ifp;
2424 
2425 		if (ifp == 0 || ifp != theywant) {
2426 			continue;
2427 		}
2428 		/*
2429 		 * Do not use DLT_PKTAP, unless requested explicitly
2430 		 */
2431 		if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2432 			continue;
2433 		}
2434 		/*
2435 		 * Skip the coprocessor interface
2436 		 */
2437 		if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
2438 			continue;
2439 		}
2440 		/*
2441 		 * We found the requested interface.
2442 		 * Allocate the packet buffers.
2443 		 */
2444 		if (has_bufs_allocated == false) {
2445 			error = bpf_allocbufs(d);
2446 			if (error != 0) {
2447 				return error;
2448 			}
2449 		}
2450 		/*
2451 		 * Detach if attached to something else.
2452 		 */
2453 		if (bp != d->bd_bif) {
2454 			if (d->bd_bif != NULL) {
2455 				if (bpf_detachd(d) != 0) {
2456 					return ENXIO;
2457 				}
2458 			}
2459 			if (bpf_attachd(d, bp) != 0) {
2460 				return ENXIO;
2461 			}
2462 		}
2463 		if (do_reset) {
2464 			reset_d(d);
2465 		}
2466 		os_log(OS_LOG_DEFAULT, "bpf%u attached to %s",
2467 		    d->bd_dev_minor, if_name(theywant));
2468 		return 0;
2469 	}
2470 	/* Not found. */
2471 	return ENXIO;
2472 }
2473 
2474 /*
2475  * Get a list of available data link type of the interface.
2476  */
2477 static int
bpf_getdltlist(struct bpf_d * d,caddr_t addr,struct proc * p)2478 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2479 {
2480 	u_int           n;
2481 	int             error;
2482 	struct ifnet    *ifp;
2483 	struct bpf_if   *bp;
2484 	user_addr_t     dlist;
2485 	struct bpf_dltlist bfl;
2486 
2487 	bcopy(addr, &bfl, sizeof(bfl));
2488 	if (proc_is64bit(p)) {
2489 		dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2490 	} else {
2491 		dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2492 	}
2493 
2494 	ifp = d->bd_bif->bif_ifp;
2495 	n = 0;
2496 	error = 0;
2497 
2498 	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2499 		if (bp->bif_ifp != ifp) {
2500 			continue;
2501 		}
2502 		/*
2503 		 * Do not use DLT_PKTAP, unless requested explicitly
2504 		 */
2505 		if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2506 			continue;
2507 		}
2508 		if (dlist != USER_ADDR_NULL) {
2509 			if (n >= bfl.bfl_len) {
2510 				return ENOMEM;
2511 			}
2512 			error = copyout(&bp->bif_dlt, dlist,
2513 			    sizeof(bp->bif_dlt));
2514 			if (error != 0) {
2515 				break;
2516 			}
2517 			dlist += sizeof(bp->bif_dlt);
2518 		}
2519 		n++;
2520 	}
2521 	bfl.bfl_len = n;
2522 	bcopy(&bfl, addr, sizeof(bfl));
2523 
2524 	return error;
2525 }
2526 
2527 /*
2528  * Set the data link type of a BPF instance.
2529  */
2530 static int
bpf_setdlt(struct bpf_d * d,uint32_t dlt)2531 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2532 {
2533 	int error, opromisc;
2534 	struct ifnet *ifp;
2535 	struct bpf_if *bp;
2536 
2537 	if (d->bd_bif->bif_dlt == dlt) {
2538 		return 0;
2539 	}
2540 
2541 	while (d->bd_hbuf_read != 0) {
2542 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2543 	}
2544 
2545 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2546 		return ENXIO;
2547 	}
2548 
2549 	ifp = d->bd_bif->bif_ifp;
2550 	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2551 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2552 			/*
2553 			 * Do not use DLT_PKTAP, unless requested explicitly
2554 			 */
2555 			if (bp->bif_dlt == DLT_PKTAP &&
2556 			    !(d->bd_flags & BPF_WANT_PKTAP)) {
2557 				continue;
2558 			}
2559 			break;
2560 		}
2561 	}
2562 	if (bp != NULL) {
2563 		opromisc = d->bd_promisc;
2564 		if (bpf_detachd(d) != 0) {
2565 			return ENXIO;
2566 		}
2567 		error = bpf_attachd(d, bp);
2568 		if (error != 0) {
2569 			os_log_error(OS_LOG_DEFAULT,
2570 			    "bpf_setdlt: bpf%d bpf_attachd %s error %d",
2571 			    d->bd_dev_minor, if_name(bp->bif_ifp),
2572 			    error);
2573 			return error;
2574 		}
2575 		reset_d(d);
2576 		if (opromisc) {
2577 			lck_mtx_unlock(bpf_mlock);
2578 			error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2579 			lck_mtx_lock(bpf_mlock);
2580 			if (error != 0) {
2581 				os_log_error(OS_LOG_DEFAULT,
2582 				    "bpf_setdlt: bpf%d ifpromisc %s error %d",
2583 				    d->bd_dev_minor, if_name(bp->bif_ifp), error);
2584 			} else {
2585 				d->bd_promisc = 1;
2586 			}
2587 		}
2588 	}
2589 	return bp == NULL ? EINVAL : 0;
2590 }
2591 
2592 static int
bpf_set_traffic_class(struct bpf_d * d,int tc)2593 bpf_set_traffic_class(struct bpf_d *d, int tc)
2594 {
2595 	int error = 0;
2596 
2597 	if (!SO_VALID_TC(tc)) {
2598 		error = EINVAL;
2599 	} else {
2600 		d->bd_traffic_class = tc;
2601 	}
2602 
2603 	return error;
2604 }
2605 
2606 static void
bpf_set_packet_service_class(struct mbuf * m,int tc)2607 bpf_set_packet_service_class(struct mbuf *m, int tc)
2608 {
2609 	if (!(m->m_flags & M_PKTHDR)) {
2610 		return;
2611 	}
2612 
2613 	VERIFY(SO_VALID_TC(tc));
2614 	(void) m_set_service_class(m, so_tc2msc(tc));
2615 }
2616 
2617 /*
2618  * Support for select()
2619  *
2620  * Return true iff the specific operation will not block indefinitely.
2621  * Otherwise, return false but make a note that a selwakeup() must be done.
2622  */
2623 int
bpfselect(dev_t dev,int which,void * wql,struct proc * p)2624 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2625 {
2626 	struct bpf_d *d;
2627 	int ret = 0;
2628 
2629 	lck_mtx_lock(bpf_mlock);
2630 
2631 	d = bpf_dtab[minor(dev)];
2632 	if (d == NULL || d == BPF_DEV_RESERVED ||
2633 	    (d->bd_flags & BPF_CLOSING) != 0) {
2634 		lck_mtx_unlock(bpf_mlock);
2635 		return ENXIO;
2636 	}
2637 
2638 	bpf_acquire_d(d);
2639 
2640 	if (d->bd_bif == NULL) {
2641 		bpf_release_d(d);
2642 		lck_mtx_unlock(bpf_mlock);
2643 		return ENXIO;
2644 	}
2645 
2646 	while (d->bd_hbuf_read != 0) {
2647 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2648 	}
2649 
2650 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2651 		bpf_release_d(d);
2652 		lck_mtx_unlock(bpf_mlock);
2653 		return ENXIO;
2654 	}
2655 
2656 	switch (which) {
2657 	case FREAD:
2658 		if (d->bd_hlen != 0 ||
2659 		    ((d->bd_immediate ||
2660 		    d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
2661 			ret = 1;         /* read has data to return */
2662 		} else {
2663 			/*
2664 			 * Read has no data to return.
2665 			 * Make the select wait, and start a timer if
2666 			 * necessary.
2667 			 */
2668 			selrecord(p, &d->bd_sel, wql);
2669 			bpf_start_timer(d);
2670 		}
2671 		break;
2672 
2673 	case FWRITE:
2674 		/* can't determine whether a write would block */
2675 		ret = 1;
2676 		break;
2677 	}
2678 
2679 	bpf_release_d(d);
2680 	lck_mtx_unlock(bpf_mlock);
2681 
2682 	return ret;
2683 }
2684 
2685 /*
2686  * Support for kevent() system call.  Register EVFILT_READ filters and
2687  * reject all others.
2688  */
2689 int bpfkqfilter(dev_t dev, struct knote *kn);
2690 static void filt_bpfdetach(struct knote *);
2691 static int filt_bpfread(struct knote *, long);
2692 static int filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev);
2693 static int filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev);
2694 
2695 SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2696 	.f_isfd = 1,
2697 	.f_detach = filt_bpfdetach,
2698 	.f_event = filt_bpfread,
2699 	.f_touch = filt_bpftouch,
2700 	.f_process = filt_bpfprocess,
2701 };
2702 
2703 static int
filt_bpfread_common(struct knote * kn,struct kevent_qos_s * kev,struct bpf_d * d)2704 filt_bpfread_common(struct knote *kn, struct kevent_qos_s *kev, struct bpf_d *d)
2705 {
2706 	int ready = 0;
2707 	int64_t data = 0;
2708 
2709 	if (d->bd_immediate) {
2710 		/*
2711 		 * If there's data in the hold buffer, it's the
2712 		 * amount of data a read will return.
2713 		 *
2714 		 * If there's no data in the hold buffer, but
2715 		 * there's data in the store buffer, a read will
2716 		 * immediately rotate the store buffer to the
2717 		 * hold buffer, the amount of data in the store
2718 		 * buffer is the amount of data a read will
2719 		 * return.
2720 		 *
2721 		 * If there's no data in either buffer, we're not
2722 		 * ready to read.
2723 		 */
2724 		data = (d->bd_hlen == 0 || d->bd_hbuf_read != 0 ?
2725 		    d->bd_slen : d->bd_hlen);
2726 		int64_t lowwat = knote_low_watermark(kn);
2727 		if (lowwat > d->bd_bufsize) {
2728 			lowwat = d->bd_bufsize;
2729 		}
2730 		ready = (data >= lowwat);
2731 	} else {
2732 		/*
2733 		 * If there's data in the hold buffer, it's the
2734 		 * amount of data a read will return.
2735 		 *
2736 		 * If there's no data in the hold buffer, but
2737 		 * there's data in the store buffer, if the
2738 		 * timer has expired a read will immediately
2739 		 * rotate the store buffer to the hold buffer,
2740 		 * so the amount of data in the store buffer is
2741 		 * the amount of data a read will return.
2742 		 *
2743 		 * If there's no data in either buffer, or there's
2744 		 * no data in the hold buffer and the timer hasn't
2745 		 * expired, we're not ready to read.
2746 		 */
2747 		data = ((d->bd_hlen == 0 || d->bd_hbuf_read != 0) &&
2748 		    d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
2749 		ready = (data > 0);
2750 	}
2751 	if (!ready) {
2752 		bpf_start_timer(d);
2753 	} else if (kev) {
2754 		knote_fill_kevent(kn, kev, data);
2755 	}
2756 
2757 	return ready;
2758 }
2759 
2760 int
bpfkqfilter(dev_t dev,struct knote * kn)2761 bpfkqfilter(dev_t dev, struct knote *kn)
2762 {
2763 	struct bpf_d *d;
2764 	int res;
2765 
2766 	/*
2767 	 * Is this device a bpf?
2768 	 */
2769 	if (major(dev) != CDEV_MAJOR || kn->kn_filter != EVFILT_READ) {
2770 		knote_set_error(kn, EINVAL);
2771 		return 0;
2772 	}
2773 
2774 	lck_mtx_lock(bpf_mlock);
2775 
2776 	d = bpf_dtab[minor(dev)];
2777 
2778 	if (d == NULL || d == BPF_DEV_RESERVED ||
2779 	    (d->bd_flags & BPF_CLOSING) != 0 ||
2780 	    d->bd_bif == NULL) {
2781 		lck_mtx_unlock(bpf_mlock);
2782 		knote_set_error(kn, ENXIO);
2783 		return 0;
2784 	}
2785 
2786 	kn->kn_hook = d;
2787 	kn->kn_filtid = EVFILTID_BPFREAD;
2788 	KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2789 	d->bd_flags |= BPF_KNOTE;
2790 
2791 	/* capture the current state */
2792 	res = filt_bpfread_common(kn, NULL, d);
2793 
2794 	lck_mtx_unlock(bpf_mlock);
2795 
2796 	return res;
2797 }
2798 
2799 static void
filt_bpfdetach(struct knote * kn)2800 filt_bpfdetach(struct knote *kn)
2801 {
2802 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2803 
2804 	lck_mtx_lock(bpf_mlock);
2805 	if (d->bd_flags & BPF_KNOTE) {
2806 		KNOTE_DETACH(&d->bd_sel.si_note, kn);
2807 		d->bd_flags &= ~BPF_KNOTE;
2808 	}
2809 	lck_mtx_unlock(bpf_mlock);
2810 }
2811 
2812 static int
filt_bpfread(struct knote * kn,long hint)2813 filt_bpfread(struct knote *kn, long hint)
2814 {
2815 #pragma unused(hint)
2816 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2817 
2818 	return filt_bpfread_common(kn, NULL, d);
2819 }
2820 
2821 static int
filt_bpftouch(struct knote * kn,struct kevent_qos_s * kev)2822 filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev)
2823 {
2824 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2825 	int res;
2826 
2827 	lck_mtx_lock(bpf_mlock);
2828 
2829 	/* save off the lowat threshold and flag */
2830 	kn->kn_sdata = kev->data;
2831 	kn->kn_sfflags = kev->fflags;
2832 
2833 	/* output data will be re-generated here */
2834 	res = filt_bpfread_common(kn, NULL, d);
2835 
2836 	lck_mtx_unlock(bpf_mlock);
2837 
2838 	return res;
2839 }
2840 
2841 static int
filt_bpfprocess(struct knote * kn,struct kevent_qos_s * kev)2842 filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev)
2843 {
2844 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2845 	int res;
2846 
2847 	lck_mtx_lock(bpf_mlock);
2848 	res = filt_bpfread_common(kn, kev, d);
2849 	lck_mtx_unlock(bpf_mlock);
2850 
2851 	return res;
2852 }
2853 
2854 /*
2855  * Copy data from an mbuf chain into a buffer.	This code is derived
2856  * from m_copydata in kern/uipc_mbuf.c.
2857  */
2858 static void
bpf_mcopy(struct mbuf * m,void * dst_arg,size_t len,size_t offset)2859 bpf_mcopy(struct mbuf *m, void *dst_arg, size_t len, size_t offset)
2860 {
2861 	u_int count;
2862 	u_char *dst;
2863 
2864 	dst = dst_arg;
2865 
2866 	while (offset >= m->m_len) {
2867 		offset -= m->m_len;
2868 		m = m->m_next;
2869 		if (m == NULL) {
2870 			panic("bpf_mcopy");
2871 		}
2872 		continue;
2873 	}
2874 
2875 	while (len > 0) {
2876 		if (m == NULL) {
2877 			panic("bpf_mcopy");
2878 		}
2879 		count = MIN(m->m_len - (u_int)offset, (u_int)len);
2880 		bcopy((u_char *)mbuf_data(m) + offset, dst, count);
2881 		m = m->m_next;
2882 		dst += count;
2883 		len -= count;
2884 		offset = 0;
2885 	}
2886 }
2887 
2888 static inline void
bpf_tap_imp(ifnet_t ifp,u_int32_t dlt,struct bpf_packet * bpf_pkt,int outbound)2889 bpf_tap_imp(
2890 	ifnet_t         ifp,
2891 	u_int32_t       dlt,
2892 	struct bpf_packet *bpf_pkt,
2893 	int             outbound)
2894 {
2895 	struct bpf_d    *d;
2896 	u_int slen;
2897 	struct bpf_if *bp;
2898 
2899 	/*
2900 	 * It's possible that we get here after the bpf descriptor has been
2901 	 * detached from the interface; in such a case we simply return.
2902 	 * Lock ordering is important since we can be called asynchronously
2903 	 * (from IOKit) to process an inbound packet; when that happens
2904 	 * we would have been holding its "gateLock" and will be acquiring
2905 	 * "bpf_mlock" upon entering this routine.  Due to that, we release
2906 	 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2907 	 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2908 	 * when a ifnet_set_promiscuous request simultaneously collides with
2909 	 * an inbound packet being passed into the tap callback.
2910 	 */
2911 	lck_mtx_lock(bpf_mlock);
2912 	if (ifp->if_bpf == NULL) {
2913 		lck_mtx_unlock(bpf_mlock);
2914 		return;
2915 	}
2916 	for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2917 		if (bp->bif_ifp != ifp) {
2918 			/* wrong interface */
2919 			bp = NULL;
2920 			break;
2921 		}
2922 		if (dlt == 0 || bp->bif_dlt == dlt) {
2923 			/* tapping default DLT or DLT matches */
2924 			break;
2925 		}
2926 	}
2927 	if (bp == NULL) {
2928 		goto done;
2929 	}
2930 	for (d = bp->bif_dlist; d != NULL; d = d->bd_next) {
2931 		struct bpf_packet *bpf_pkt_saved = bpf_pkt;
2932 		struct bpf_packet bpf_pkt_tmp = {};
2933 		struct pktap_header_buffer bpfp_header_tmp = {};
2934 
2935 		if (outbound && !d->bd_seesent) {
2936 			continue;
2937 		}
2938 
2939 		++d->bd_rcount;
2940 		slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
2941 		    (u_int)bpf_pkt->bpfp_total_length, 0);
2942 
2943 		if (slen != 0) {
2944 			if (bp->bif_ifp->if_type == IFT_PKTAP &&
2945 			    bp->bif_dlt == DLT_PKTAP) {
2946 				if (d->bd_flags & BPF_TRUNCATE) {
2947 					slen = min(slen, get_pkt_trunc_len(bpf_pkt));
2948 				}
2949 				/*
2950 				 * Need to copy the bpf_pkt because the conversion
2951 				 * to v2 pktap header modifies the content of the
2952 				 * bpfp_header
2953 				 */
2954 				if ((d->bd_flags & BPF_PKTHDRV2) &&
2955 				    bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
2956 					bpf_pkt_tmp = *bpf_pkt;
2957 
2958 					bpf_pkt = &bpf_pkt_tmp;
2959 
2960 					memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
2961 					    bpf_pkt->bpfp_header_length);
2962 
2963 					bpf_pkt->bpfp_header = &bpfp_header_tmp;
2964 
2965 					convert_to_pktap_header_to_v2(bpf_pkt,
2966 					    !!(d->bd_flags & BPF_TRUNCATE));
2967 				}
2968 			}
2969 			++d->bd_fcount;
2970 			catchpacket(d, bpf_pkt, slen, outbound);
2971 		}
2972 		bpf_pkt = bpf_pkt_saved;
2973 	}
2974 
2975 done:
2976 	lck_mtx_unlock(bpf_mlock);
2977 }
2978 
2979 static inline void
bpf_tap_mbuf(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen,int outbound)2980 bpf_tap_mbuf(
2981 	ifnet_t         ifp,
2982 	u_int32_t       dlt,
2983 	mbuf_t          m,
2984 	void*           hdr,
2985 	size_t          hlen,
2986 	int             outbound)
2987 {
2988 	struct bpf_packet bpf_pkt;
2989 	struct mbuf *m0;
2990 
2991 	if (ifp->if_bpf == NULL) {
2992 		/* quickly check without taking lock */
2993 		return;
2994 	}
2995 	bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2996 	bpf_pkt.bpfp_mbuf = m;
2997 	bpf_pkt.bpfp_total_length = 0;
2998 	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
2999 		bpf_pkt.bpfp_total_length += m0->m_len;
3000 	}
3001 	bpf_pkt.bpfp_header = hdr;
3002 	if (hdr != NULL) {
3003 		bpf_pkt.bpfp_total_length += hlen;
3004 		bpf_pkt.bpfp_header_length = hlen;
3005 	} else {
3006 		bpf_pkt.bpfp_header_length = 0;
3007 	}
3008 	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
3009 }
3010 
3011 void
bpf_tap_out(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)3012 bpf_tap_out(
3013 	ifnet_t         ifp,
3014 	u_int32_t       dlt,
3015 	mbuf_t          m,
3016 	void*           hdr,
3017 	size_t          hlen)
3018 {
3019 	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
3020 }
3021 
3022 void
bpf_tap_in(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)3023 bpf_tap_in(
3024 	ifnet_t         ifp,
3025 	u_int32_t       dlt,
3026 	mbuf_t          m,
3027 	void*           hdr,
3028 	size_t          hlen)
3029 {
3030 	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
3031 }
3032 
3033 /* Callback registered with Ethernet driver. */
3034 static int
bpf_tap_callback(struct ifnet * ifp,struct mbuf * m)3035 bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
3036 {
3037 	bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
3038 
3039 	return 0;
3040 }
3041 
3042 #if SKYWALK
3043 #include <skywalk/os_skywalk_private.h>
3044 
3045 static void
bpf_pktcopy(kern_packet_t pkt,void * dst_arg,size_t len,size_t offset)3046 bpf_pktcopy(kern_packet_t pkt, void *dst_arg, size_t len, size_t offset)
3047 {
3048 	kern_buflet_t   buflet = NULL;
3049 	size_t count;
3050 	u_char *dst;
3051 
3052 	dst = dst_arg;
3053 	while (len > 0) {
3054 		uint8_t         *addr;
3055 
3056 		u_int32_t       buflet_length;
3057 
3058 		buflet = kern_packet_get_next_buflet(pkt, buflet);
3059 		VERIFY(buflet != NULL);
3060 		addr = kern_buflet_get_data_address(buflet);
3061 		VERIFY(addr != NULL);
3062 		addr += kern_buflet_get_data_offset(buflet);
3063 		buflet_length = kern_buflet_get_data_length(buflet);
3064 		if (offset >= buflet_length) {
3065 			offset -= buflet_length;
3066 			continue;
3067 		}
3068 		count = MIN(buflet_length - offset, len);
3069 		bcopy((void *)(addr + offset), (void *)dst, count);
3070 		dst += count;
3071 		len -= count;
3072 		offset = 0;
3073 	}
3074 }
3075 
3076 static inline void
bpf_tap_packet(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen,int outbound)3077 bpf_tap_packet(
3078 	ifnet_t         ifp,
3079 	u_int32_t       dlt,
3080 	kern_packet_t   pkt,
3081 	void*           hdr,
3082 	size_t          hlen,
3083 	int             outbound)
3084 {
3085 	struct bpf_packet       bpf_pkt;
3086 	struct mbuf *           m;
3087 
3088 	if (ifp->if_bpf == NULL) {
3089 		/* quickly check without taking lock */
3090 		return;
3091 	}
3092 	m = kern_packet_get_mbuf(pkt);
3093 	if (m != NULL) {
3094 		bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
3095 		bpf_pkt.bpfp_mbuf = m;
3096 		bpf_pkt.bpfp_total_length = m_length(m);
3097 	} else {
3098 		bpf_pkt.bpfp_type = BPF_PACKET_TYPE_PKT;
3099 		bpf_pkt.bpfp_pkt = pkt;
3100 		bpf_pkt.bpfp_total_length = kern_packet_get_data_length(pkt);
3101 	}
3102 	bpf_pkt.bpfp_header = hdr;
3103 	bpf_pkt.bpfp_header_length = hlen;
3104 	if (hlen != 0) {
3105 		bpf_pkt.bpfp_total_length += hlen;
3106 	}
3107 	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
3108 }
3109 
3110 void
bpf_tap_packet_out(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)3111 bpf_tap_packet_out(
3112 	ifnet_t         ifp,
3113 	u_int32_t       dlt,
3114 	kern_packet_t   pkt,
3115 	void*           hdr,
3116 	size_t          hlen)
3117 {
3118 	bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 1);
3119 }
3120 
3121 void
bpf_tap_packet_in(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)3122 bpf_tap_packet_in(
3123 	ifnet_t         ifp,
3124 	u_int32_t       dlt,
3125 	kern_packet_t   pkt,
3126 	void*           hdr,
3127 	size_t          hlen)
3128 {
3129 	bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 0);
3130 }
3131 
3132 #endif /* SKYWALK */
3133 
3134 static errno_t
bpf_copydata(struct bpf_packet * pkt,size_t off,size_t len,void * out_data)3135 bpf_copydata(struct bpf_packet *pkt, size_t off, size_t len, void* out_data)
3136 {
3137 	errno_t err = 0;
3138 	if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
3139 		err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
3140 #if SKYWALK
3141 	} else if (pkt->bpfp_type == BPF_PACKET_TYPE_PKT) {
3142 		err = kern_packet_copy_bytes(pkt->bpfp_pkt, off, len, out_data);
3143 #endif /* SKYWALK */
3144 	} else {
3145 		err = EINVAL;
3146 	}
3147 
3148 	return err;
3149 }
3150 
3151 static void
copy_bpf_packet_offset(struct bpf_packet * pkt,void * dst,size_t len,size_t offset)3152 copy_bpf_packet_offset(struct bpf_packet * pkt, void * dst, size_t len, size_t offset)
3153 {
3154 	/* copy the optional header */
3155 	if (offset < pkt->bpfp_header_length) {
3156 		size_t  count = MIN(len, pkt->bpfp_header_length - offset);
3157 		caddr_t src = (caddr_t)pkt->bpfp_header;
3158 		bcopy(src + offset, dst, count);
3159 		len -= count;
3160 		dst = (void *)((uintptr_t)dst + count);
3161 		offset = 0;
3162 	} else {
3163 		offset -= pkt->bpfp_header_length;
3164 	}
3165 
3166 	if (len == 0) {
3167 		/* nothing past the header */
3168 		return;
3169 	}
3170 	/* copy the packet */
3171 	switch (pkt->bpfp_type) {
3172 	case BPF_PACKET_TYPE_MBUF:
3173 		bpf_mcopy(pkt->bpfp_mbuf, dst, len, offset);
3174 		break;
3175 #if SKYWALK
3176 	case BPF_PACKET_TYPE_PKT:
3177 		bpf_pktcopy(pkt->bpfp_pkt, dst, len, offset);
3178 		break;
3179 #endif /* SKYWALK */
3180 	default:
3181 		break;
3182 	}
3183 }
3184 
3185 static void
copy_bpf_packet(struct bpf_packet * pkt,void * dst,size_t len)3186 copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
3187 {
3188 	copy_bpf_packet_offset(pkt, dst, len, 0);
3189 }
3190 
3191 static uint32_t
get_esp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)3192 get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
3193     const uint32_t remaining_caplen)
3194 {
3195 	/*
3196 	 * For some reason tcpdump expects to have one byte beyond the ESP header
3197 	 */
3198 	uint32_t trunc_len = ESP_HDR_SIZE + 1;
3199 
3200 	if (trunc_len > remaining_caplen) {
3201 		return remaining_caplen;
3202 	}
3203 
3204 	return trunc_len;
3205 }
3206 
3207 static uint32_t
get_isakmp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)3208 get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
3209     const uint32_t remaining_caplen)
3210 {
3211 	/*
3212 	 * Include the payload generic header
3213 	 */
3214 	uint32_t trunc_len = ISAKMP_HDR_SIZE;
3215 
3216 	if (trunc_len > remaining_caplen) {
3217 		return remaining_caplen;
3218 	}
3219 
3220 	return trunc_len;
3221 }
3222 
3223 static uint32_t
get_isakmp_natt_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3224 get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint32_t off,
3225     const uint32_t remaining_caplen)
3226 {
3227 	int err = 0;
3228 	uint32_t trunc_len = 0;
3229 	char payload[remaining_caplen];
3230 
3231 	err = bpf_copydata(pkt, off, remaining_caplen, payload);
3232 	if (err != 0) {
3233 		return remaining_caplen;
3234 	}
3235 	/*
3236 	 * They are three cases:
3237 	 * - IKE: payload start with 4 bytes header set to zero before ISAKMP header
3238 	 * - keep alive: 1 byte payload
3239 	 * - otherwise it's ESP
3240 	 */
3241 	if (remaining_caplen >= 4 &&
3242 	    payload[0] == 0 && payload[1] == 0 &&
3243 	    payload[2] == 0 && payload[3] == 0) {
3244 		trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
3245 	} else if (remaining_caplen == 1) {
3246 		trunc_len = 1;
3247 	} else {
3248 		trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3249 	}
3250 
3251 	if (trunc_len > remaining_caplen) {
3252 		return remaining_caplen;
3253 	}
3254 
3255 	return trunc_len;
3256 }
3257 
3258 static uint32_t
get_udp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3259 get_udp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3260 {
3261 	int err = 0;
3262 	uint32_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
3263 
3264 	if (trunc_len >= remaining_caplen) {
3265 		return remaining_caplen;
3266 	}
3267 
3268 	struct udphdr udphdr;
3269 	err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
3270 	if (err != 0) {
3271 		return remaining_caplen;
3272 	}
3273 
3274 	u_short sport, dport;
3275 
3276 	sport = EXTRACT_SHORT(&udphdr.uh_sport);
3277 	dport = EXTRACT_SHORT(&udphdr.uh_dport);
3278 
3279 	if (dport == PORT_DNS || sport == PORT_DNS) {
3280 		/*
3281 		 * Full UDP payload for DNS
3282 		 */
3283 		trunc_len = remaining_caplen;
3284 	} else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) ||
3285 	    (sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
3286 		/*
3287 		 * Full UDP payload for BOOTP and DHCP
3288 		 */
3289 		trunc_len = remaining_caplen;
3290 	} else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
3291 		/*
3292 		 * Return the ISAKMP header
3293 		 */
3294 		trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
3295 		    remaining_caplen - sizeof(struct udphdr));
3296 	} else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
3297 		trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
3298 		    remaining_caplen - sizeof(struct udphdr));
3299 	}
3300 	if (trunc_len >= remaining_caplen) {
3301 		return remaining_caplen;
3302 	}
3303 
3304 	return trunc_len;
3305 }
3306 
3307 static uint32_t
get_tcp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3308 get_tcp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3309 {
3310 	int err = 0;
3311 	uint32_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
3312 	if (trunc_len >= remaining_caplen) {
3313 		return remaining_caplen;
3314 	}
3315 
3316 	struct tcphdr tcphdr;
3317 	err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
3318 	if (err != 0) {
3319 		return remaining_caplen;
3320 	}
3321 
3322 	u_short sport, dport;
3323 	sport = EXTRACT_SHORT(&tcphdr.th_sport);
3324 	dport = EXTRACT_SHORT(&tcphdr.th_dport);
3325 
3326 	if (dport == PORT_DNS || sport == PORT_DNS) {
3327 		/*
3328 		 * Full TCP payload  for DNS
3329 		 */
3330 		trunc_len = remaining_caplen;
3331 	} else {
3332 		trunc_len = (uint16_t)(tcphdr.th_off << 2);
3333 	}
3334 	if (trunc_len >= remaining_caplen) {
3335 		return remaining_caplen;
3336 	}
3337 
3338 	return trunc_len;
3339 }
3340 
3341 static uint32_t
get_proto_trunc_len(uint8_t proto,struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3342 get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3343 {
3344 	uint32_t trunc_len;
3345 
3346 	switch (proto) {
3347 	case IPPROTO_ICMP: {
3348 		/*
3349 		 * Full IMCP payload
3350 		 */
3351 		trunc_len = remaining_caplen;
3352 		break;
3353 	}
3354 	case IPPROTO_ICMPV6: {
3355 		/*
3356 		 * Full IMCPV6 payload
3357 		 */
3358 		trunc_len = remaining_caplen;
3359 		break;
3360 	}
3361 	case IPPROTO_IGMP: {
3362 		/*
3363 		 * Full IGMP payload
3364 		 */
3365 		trunc_len = remaining_caplen;
3366 		break;
3367 	}
3368 	case IPPROTO_UDP: {
3369 		trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3370 		break;
3371 	}
3372 	case IPPROTO_TCP: {
3373 		trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3374 		break;
3375 	}
3376 	case IPPROTO_ESP: {
3377 		trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3378 		break;
3379 	}
3380 	default: {
3381 		/*
3382 		 * By default we only include the IP header
3383 		 */
3384 		trunc_len = 0;
3385 		break;
3386 	}
3387 	}
3388 	if (trunc_len >= remaining_caplen) {
3389 		return remaining_caplen;
3390 	}
3391 
3392 	return trunc_len;
3393 }
3394 
3395 static uint32_t
get_ip_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3396 get_ip_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3397 {
3398 	int err = 0;
3399 	uint32_t iplen = sizeof(struct ip);
3400 	if (iplen >= remaining_caplen) {
3401 		return remaining_caplen;
3402 	}
3403 
3404 	struct ip iphdr;
3405 	err =  bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
3406 	if (err != 0) {
3407 		return remaining_caplen;
3408 	}
3409 
3410 	uint8_t proto = 0;
3411 
3412 	iplen = (uint16_t)(iphdr.ip_hl << 2);
3413 	if (iplen >= remaining_caplen) {
3414 		return remaining_caplen;
3415 	}
3416 
3417 	proto = iphdr.ip_p;
3418 	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3419 
3420 	if (iplen >= remaining_caplen) {
3421 		return remaining_caplen;
3422 	}
3423 
3424 	return iplen;
3425 }
3426 
3427 static uint32_t
get_ip6_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3428 get_ip6_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3429 {
3430 	int err = 0;
3431 	uint32_t iplen = sizeof(struct ip6_hdr);
3432 	if (iplen >= remaining_caplen) {
3433 		return remaining_caplen;
3434 	}
3435 
3436 	struct ip6_hdr ip6hdr;
3437 	err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
3438 	if (err != 0) {
3439 		return remaining_caplen;
3440 	}
3441 
3442 	uint8_t proto = 0;
3443 
3444 	/*
3445 	 * TBD: process the extension headers
3446 	 */
3447 	proto = ip6hdr.ip6_nxt;
3448 	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3449 
3450 	if (iplen >= remaining_caplen) {
3451 		return remaining_caplen;
3452 	}
3453 
3454 	return iplen;
3455 }
3456 
3457 static uint32_t
get_ether_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3458 get_ether_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3459 {
3460 	int err = 0;
3461 	uint32_t ethlen = sizeof(struct ether_header);
3462 	if (ethlen >= remaining_caplen) {
3463 		return remaining_caplen;
3464 	}
3465 
3466 	struct ether_header eh = {};
3467 	err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
3468 	if (err != 0) {
3469 		return remaining_caplen;
3470 	}
3471 
3472 	u_short type = EXTRACT_SHORT(&eh.ether_type);
3473 	/* Include full ARP */
3474 	if (type == ETHERTYPE_ARP) {
3475 		ethlen = remaining_caplen;
3476 	} else if (type == ETHERTYPE_IP) {
3477 		ethlen += get_ip_trunc_len(pkt, off + sizeof(struct ether_header),
3478 		    remaining_caplen - ethlen);
3479 	} else if (type == ETHERTYPE_IPV6) {
3480 		ethlen += get_ip6_trunc_len(pkt, off + sizeof(struct ether_header),
3481 		    remaining_caplen - ethlen);
3482 	} else {
3483 		ethlen = MIN(BPF_MIN_PKT_SIZE, remaining_caplen);
3484 	}
3485 	return ethlen;
3486 }
3487 
3488 
3489 static uint32_t
get_pkt_trunc_len(struct bpf_packet * pkt)3490 get_pkt_trunc_len(struct bpf_packet *pkt)
3491 {
3492 	struct pktap_header *pktap = (struct pktap_header *) (pkt->bpfp_header);
3493 	uint32_t in_pkt_len = 0;
3494 	uint32_t out_pkt_len = 0;
3495 	uint32_t tlen = 0;
3496 	uint32_t pre_adjust;    // L2 header not in mbuf or kern_packet
3497 
3498 	// bpfp_total_length must contain the BPF packet header
3499 	assert3u(pkt->bpfp_total_length, >=, pkt->bpfp_header_length);
3500 
3501 	// The BPF packet header must contain the pktap header
3502 	assert3u(pkt->bpfp_header_length, >=, pktap->pth_length);
3503 
3504 	// The pre frame length (L2 header) must be contained in the packet
3505 	assert3u(pkt->bpfp_total_length, >=, pktap->pth_length + pktap->pth_frame_pre_length);
3506 
3507 	/*
3508 	 * pktap->pth_frame_pre_length is the L2 header length and accounts
3509 	 * for both L2 header in the packet payload and pre_adjust.
3510 	 *
3511 	 * pre_adjust represents an adjustment for a pseudo L2 header that is not
3512 	 * part of packet payload -- not in the mbuf or kern_packet -- and comes
3513 	 * just after the pktap header.
3514 	 *
3515 	 * pktap->pth_length is the size of the pktap header (exclude pre_adjust)
3516 	 *
3517 	 * pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3518 	 */
3519 	pre_adjust = (uint32_t)(pkt->bpfp_header_length - pktap->pth_length);
3520 
3521 	if (pktap->pth_iftype == IFT_ETHER) {
3522 		/*
3523 		 * We need to parse the Ethernet header to find the network layer
3524 		 * protocol
3525 		 */
3526 		in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pre_adjust);
3527 
3528 		out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
3529 
3530 		tlen = pktap->pth_length + pre_adjust + out_pkt_len;
3531 	} else {
3532 		/*
3533 		 * For other interface types, we only know to parse IPv4 and IPv6.
3534 		 *
3535 		 * To get to the beginning of the IPv4 or IPv6 packet, we need to to skip
3536 		 * over the L2 header that is the actual packet payload (mbuf or kern_packet)
3537 		 */
3538 		uint32_t off;   // offset past the L2 header in the actual packet payload
3539 
3540 		off = pktap->pth_frame_pre_length - pre_adjust;
3541 
3542 		in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pktap->pth_frame_pre_length);
3543 
3544 		if (pktap->pth_protocol_family == AF_INET) {
3545 			out_pkt_len = get_ip_trunc_len(pkt, off, in_pkt_len);
3546 		} else if (pktap->pth_protocol_family == AF_INET6) {
3547 			out_pkt_len = get_ip6_trunc_len(pkt, off, in_pkt_len);
3548 		} else {
3549 			out_pkt_len = MIN(BPF_MIN_PKT_SIZE, in_pkt_len);
3550 		}
3551 		tlen = pktap->pth_length + pktap->pth_frame_pre_length + out_pkt_len;
3552 	}
3553 
3554 	// Verify we do not overflow the buffer
3555 	if (__improbable(tlen > pkt->bpfp_total_length)) {
3556 		bool do_panic = bpf_debug != 0 ? true : false;
3557 
3558 #if DEBUG
3559 		do_panic = true;
3560 #endif /* DEBUG */
3561 		if (do_panic) {
3562 			panic("%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3563 			    __func__, __LINE__,
3564 			    tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3565 		} else {
3566 			os_log(OS_LOG_DEFAULT,
3567 			    "%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3568 			    __func__, __LINE__,
3569 			    tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3570 		}
3571 		bpf_trunc_overflow += 1;
3572 		tlen = (uint32_t)pkt->bpfp_total_length;
3573 	}
3574 
3575 	return tlen;
3576 }
3577 
3578 static uint8_t
get_common_prefix_size(const void * a,const void * b,uint8_t max_bytes)3579 get_common_prefix_size(const void *a, const void *b, uint8_t max_bytes)
3580 {
3581 	uint8_t max_words = max_bytes >> 2;
3582 	const uint32_t *x = (const uint32_t *)a;
3583 	const uint32_t *y = (const uint32_t *)b;
3584 	uint8_t i;
3585 
3586 	for (i = 0; i < max_words; i++) {
3587 		if (x[i] != y[i]) {
3588 			break;
3589 		}
3590 	}
3591 	return (uint8_t)(i << 2);
3592 }
3593 
3594 /*
3595  * Move the packet data from interface memory (pkt) into the
3596  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
3597  * otherwise 0.
3598  */
3599 static void
catchpacket(struct bpf_d * d,struct bpf_packet * pkt,u_int snaplen,int outbound)3600 catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
3601     u_int snaplen, int outbound)
3602 {
3603 	struct bpf_hdr *hp;
3604 	struct bpf_hdr_ext *ehp;
3605 	uint32_t totlen, curlen;
3606 	uint32_t hdrlen, caplen;
3607 	int do_wakeup = 0;
3608 	u_char *payload;
3609 	struct timeval tv;
3610 
3611 	hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
3612 	    (d->bd_flags & BPF_COMP_REQ) ? d->bd_bif->bif_comphdrlen:
3613 	    d->bd_bif->bif_hdrlen;
3614 	/*
3615 	 * Figure out how many bytes to move.  If the packet is
3616 	 * greater or equal to the snapshot length, transfer that
3617 	 * much.  Otherwise, transfer the whole packet (unless
3618 	 * we hit the buffer size limit).
3619 	 */
3620 	totlen = hdrlen + MIN(snaplen, (int)pkt->bpfp_total_length);
3621 	if (totlen > d->bd_bufsize) {
3622 		totlen = d->bd_bufsize;
3623 	}
3624 
3625 	if (hdrlen > totlen) {
3626 		return;
3627 	}
3628 
3629 	/*
3630 	 * Round up the end of the previous packet to the next longword.
3631 	 */
3632 	curlen = BPF_WORDALIGN(d->bd_slen);
3633 	if (curlen + totlen > d->bd_bufsize) {
3634 		/*
3635 		 * This packet will overflow the storage buffer.
3636 		 * Rotate the buffers if we can, then wakeup any
3637 		 * pending reads.
3638 		 *
3639 		 * We cannot rotate buffers if a read is in progress
3640 		 * so drop the packet
3641 		 */
3642 		if (d->bd_hbuf_read != 0) {
3643 			++d->bd_dcount;
3644 			return;
3645 		}
3646 
3647 		if (d->bd_fbuf == NULL) {
3648 			if (d->bd_headdrop == 0) {
3649 				/*
3650 				 * We haven't completed the previous read yet,
3651 				 * so drop the packet.
3652 				 */
3653 				++d->bd_dcount;
3654 				return;
3655 			}
3656 			/*
3657 			 * Drop the hold buffer as it contains older packets
3658 			 */
3659 			d->bd_dcount += d->bd_hcnt;
3660 			d->bd_fbuf = d->bd_hbuf;
3661 			ROTATE_BUFFERS(d);
3662 		} else {
3663 			ROTATE_BUFFERS(d);
3664 		}
3665 		do_wakeup = 1;
3666 		curlen = 0;
3667 	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
3668 		/*
3669 		 * Immediate mode is set, or the read timeout has
3670 		 * already expired during a select call. A packet
3671 		 * arrived, so the reader should be woken up.
3672 		 */
3673 		do_wakeup = 1;
3674 	}
3675 
3676 	/*
3677 	 * Append the bpf header.
3678 	 */
3679 	microtime(&tv);
3680 	if (d->bd_flags & BPF_EXTENDED_HDR) {
3681 		ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
3682 		memset(ehp, 0, sizeof(*ehp));
3683 		ehp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3684 		ehp->bh_tstamp.tv_usec = tv.tv_usec;
3685 
3686 		ehp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3687 		ehp->bh_hdrlen = (u_short)hdrlen;
3688 		caplen = ehp->bh_caplen = totlen - hdrlen;
3689 		payload = (u_char *)ehp + hdrlen;
3690 
3691 		if (outbound) {
3692 			ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3693 		} else {
3694 			ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
3695 		}
3696 
3697 		if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
3698 			struct mbuf *m = pkt->bpfp_mbuf;
3699 
3700 			if (outbound) {
3701 				/* only do lookups on non-raw INPCB */
3702 				if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID |
3703 				    PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)) ==
3704 				    (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC) &&
3705 				    m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3706 					ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
3707 					if (m->m_pkthdr.pkt_proto == IPPROTO_TCP) {
3708 						ehp->bh_flags |= BPF_HDR_EXT_FLAGS_TCP;
3709 					} else if (m->m_pkthdr.pkt_proto == IPPROTO_UDP) {
3710 						ehp->bh_flags |= BPF_HDR_EXT_FLAGS_UDP;
3711 					}
3712 				}
3713 				ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
3714 				if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
3715 					ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3716 				}
3717 				if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
3718 					ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
3719 				}
3720 				if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
3721 					ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3722 				}
3723 				if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
3724 					ehp->bh_unsent_bytes =
3725 					    m->m_pkthdr.bufstatus_if;
3726 					ehp->bh_unsent_snd =
3727 					    m->m_pkthdr.bufstatus_sndbuf;
3728 				}
3729 			} else {
3730 				if (m->m_pkthdr.pkt_flags & PKTF_WAKE_PKT) {
3731 					ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3732 				}
3733 			}
3734 #if SKYWALK
3735 		} else {
3736 			kern_packet_t kern_pkt = pkt->bpfp_pkt;
3737 			packet_flowid_t flowid = 0;
3738 
3739 			if (outbound) {
3740 				/*
3741 				 * Note: pp_init() asserts that kern_packet_svc_class_t is equivalent
3742 				 * to mbuf_svc_class_t
3743 				 */
3744 				ehp->bh_svc = so_svc2tc((mbuf_svc_class_t)kern_packet_get_service_class(kern_pkt));
3745 				if (kern_packet_get_transport_retransmit(kern_pkt)) {
3746 					ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3747 				}
3748 				if (kern_packet_get_transport_last_packet(kern_pkt)) {
3749 					ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3750 				}
3751 			} else {
3752 				if (kern_packet_get_wake_flag(kern_pkt)) {
3753 					ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3754 				}
3755 			}
3756 			ehp->bh_trace_tag = kern_packet_get_trace_tag(kern_pkt);
3757 			if (kern_packet_get_flowid(kern_pkt, &flowid) == 0) {
3758 				ehp->bh_flowid = flowid;
3759 			}
3760 #endif /* SKYWALK */
3761 		}
3762 	} else {
3763 		hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
3764 		memset(hp, 0, BPF_WORDALIGN(sizeof(*hp)));
3765 		hp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3766 		hp->bh_tstamp.tv_usec = tv.tv_usec;
3767 		hp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3768 		hp->bh_hdrlen = (u_short)hdrlen;
3769 		caplen = hp->bh_caplen = totlen - hdrlen;
3770 		payload = (u_char *)hp + hdrlen;
3771 	}
3772 	if (d->bd_flags & BPF_COMP_REQ) {
3773 		uint8_t common_prefix_size = 0;
3774 		uint8_t copy_len = MIN((uint8_t)caplen, BPF_HDR_COMP_LEN_MAX);
3775 
3776 		copy_bpf_packet(pkt, d->bd_prev_fbuf, copy_len);
3777 
3778 		if (d->bd_prev_slen != 0) {
3779 			common_prefix_size = get_common_prefix_size(d->bd_prev_fbuf,
3780 			    d->bd_prev_sbuf, MIN(copy_len, d->bd_prev_slen));
3781 		}
3782 
3783 		if (d->bd_flags & BPF_COMP_ENABLED) {
3784 			assert3u(caplen, >=, common_prefix_size);
3785 			copy_bpf_packet_offset(pkt, payload, caplen - common_prefix_size,
3786 			    common_prefix_size);
3787 			d->bd_slen = curlen + totlen - common_prefix_size;
3788 		} else {
3789 			copy_bpf_packet(pkt, payload, caplen);
3790 			d->bd_slen = curlen + totlen;
3791 		}
3792 
3793 		/*
3794 		 * Update the caplen only if compression is enabled -- the caller
3795 		 * must pay attention to bpf_hdr_comp_enable
3796 		 */
3797 		if (d->bd_flags & BPF_EXTENDED_HDR) {
3798 			ehp->bh_complen = common_prefix_size;
3799 			if (d->bd_flags & BPF_COMP_ENABLED) {
3800 				ehp->bh_caplen -= common_prefix_size;
3801 			}
3802 		} else {
3803 			struct bpf_comp_hdr *hcp;
3804 
3805 			hcp = (struct bpf_comp_hdr *)(void *)(d->bd_sbuf + curlen);
3806 			hcp->bh_complen = common_prefix_size;
3807 			if (d->bd_flags & BPF_COMP_ENABLED) {
3808 				hcp->bh_caplen -= common_prefix_size;
3809 			}
3810 		}
3811 
3812 		if (common_prefix_size > 0) {
3813 			d->bd_bcs.bcs_total_compressed_prefix_size += common_prefix_size;
3814 			if (common_prefix_size > d->bd_bcs.bcs_max_compressed_prefix_size) {
3815 				d->bd_bcs.bcs_max_compressed_prefix_size = common_prefix_size;
3816 			}
3817 			d->bd_bcs.bcs_count_compressed_prefix += 1;
3818 		} else {
3819 			d->bd_bcs.bcs_count_no_common_prefix += 1;
3820 		}
3821 
3822 		/* The current compression buffer becomes the previous one */
3823 		caddr_t tmp = d->bd_prev_sbuf;
3824 		d->bd_prev_sbuf = d->bd_prev_fbuf;
3825 		d->bd_prev_slen = copy_len;
3826 		d->bd_prev_fbuf = tmp;
3827 	} else {
3828 		/*
3829 		 * Copy the packet data into the store buffer and update its length.
3830 		 */
3831 		copy_bpf_packet(pkt, payload, caplen);
3832 		d->bd_slen = curlen + totlen;
3833 	}
3834 	d->bd_scnt += 1;
3835 	d->bd_bcs.bcs_total_hdr_size += pkt->bpfp_header_length;
3836 	d->bd_bcs.bcs_total_size += caplen;
3837 
3838 	if (do_wakeup) {
3839 		bpf_wakeup(d);
3840 	}
3841 }
3842 
3843 
3844 static void
bpf_freebufs(struct bpf_d * d)3845 bpf_freebufs(struct bpf_d *d)
3846 {
3847 	if (d->bd_sbuf != NULL) {
3848 		kfree_data_addr(d->bd_sbuf);
3849 	}
3850 	if (d->bd_hbuf != NULL) {
3851 		kfree_data_addr(d->bd_hbuf);
3852 	}
3853 	if (d->bd_fbuf != NULL) {
3854 		kfree_data_addr(d->bd_fbuf);
3855 	}
3856 
3857 	if (d->bd_prev_sbuf != NULL) {
3858 		kfree_data_addr(d->bd_prev_sbuf);
3859 	}
3860 	if (d->bd_prev_fbuf != NULL) {
3861 		kfree_data_addr(d->bd_prev_fbuf);
3862 	}
3863 }
3864 /*
3865  * Initialize all nonzero fields of a descriptor.
3866  */
3867 static int
bpf_allocbufs(struct bpf_d * d)3868 bpf_allocbufs(struct bpf_d *d)
3869 {
3870 	bpf_freebufs(d);
3871 
3872 	d->bd_fbuf = (caddr_t) kalloc_data(d->bd_bufsize, Z_WAITOK | Z_ZERO);
3873 	if (d->bd_fbuf == NULL) {
3874 		goto nobufs;
3875 	}
3876 
3877 	d->bd_sbuf = (caddr_t) kalloc_data(d->bd_bufsize, Z_WAITOK | Z_ZERO);
3878 	if (d->bd_sbuf == NULL) {
3879 		goto nobufs;
3880 	}
3881 	d->bd_slen = 0;
3882 	d->bd_hlen = 0;
3883 	d->bd_scnt = 0;
3884 	d->bd_hcnt = 0;
3885 
3886 	d->bd_prev_slen = 0;
3887 	if (d->bd_flags & BPF_COMP_REQ) {
3888 		d->bd_prev_sbuf = (caddr_t) kalloc_data(BPF_HDR_COMP_LEN_MAX, Z_WAITOK | Z_ZERO);
3889 		if (d->bd_prev_sbuf == NULL) {
3890 			goto nobufs;
3891 		}
3892 		d->bd_prev_fbuf = (caddr_t) kalloc_data(BPF_HDR_COMP_LEN_MAX, Z_WAITOK | Z_ZERO);
3893 		if (d->bd_prev_fbuf == NULL) {
3894 			goto nobufs;
3895 		}
3896 	}
3897 	return 0;
3898 nobufs:
3899 	bpf_freebufs(d);
3900 	return ENOMEM;
3901 }
3902 
3903 /*
3904  * Free buffers currently in use by a descriptor.
3905  * Called on close.
3906  */
3907 static void
bpf_freed(struct bpf_d * d)3908 bpf_freed(struct bpf_d *d)
3909 {
3910 	/*
3911 	 * We don't need to lock out interrupts since this descriptor has
3912 	 * been detached from its interface and it yet hasn't been marked
3913 	 * free.
3914 	 */
3915 	if (d->bd_hbuf_read != 0) {
3916 		panic("bpf buffer freed during read");
3917 	}
3918 
3919 	bpf_freebufs(d);
3920 
3921 	if (d->bd_filter) {
3922 		kfree_data_addr(d->bd_filter);
3923 	}
3924 }
3925 
3926 /*
3927  * Attach an interface to bpf.	driverp is a pointer to a (struct bpf_if *)
3928  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
3929  * size of the link header (variable length headers not yet supported).
3930  */
3931 void
bpfattach(struct ifnet * ifp,u_int dlt,u_int hdrlen)3932 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
3933 {
3934 	bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
3935 }
3936 
3937 errno_t
bpf_attach(ifnet_t ifp,u_int32_t dlt,u_int32_t hdrlen,bpf_send_func send,bpf_tap_func tap)3938 bpf_attach(
3939 	ifnet_t ifp,
3940 	u_int32_t dlt,
3941 	u_int32_t hdrlen,
3942 	bpf_send_func send,
3943 	bpf_tap_func tap)
3944 {
3945 	struct bpf_if *bp;
3946 	struct bpf_if *bp_new;
3947 	struct bpf_if *bp_before_first = NULL;
3948 	struct bpf_if *bp_first = NULL;
3949 	struct bpf_if *bp_last = NULL;
3950 	boolean_t found;
3951 
3952 	/*
3953 	 * Z_NOFAIL will cause a panic if the allocation fails
3954 	 */
3955 	bp_new = kalloc_type(struct bpf_if, Z_WAITOK | Z_NOFAIL | Z_ZERO);
3956 
3957 	lck_mtx_lock(bpf_mlock);
3958 
3959 	/*
3960 	 * Check if this interface/dlt is already attached. Remember the
3961 	 * first and last attachment for this interface, as well as the
3962 	 * element before the first attachment.
3963 	 */
3964 	found = FALSE;
3965 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
3966 		if (bp->bif_ifp != ifp) {
3967 			if (bp_first != NULL) {
3968 				/* no more elements for this interface */
3969 				break;
3970 			}
3971 			bp_before_first = bp;
3972 		} else {
3973 			if (bp->bif_dlt == dlt) {
3974 				found = TRUE;
3975 				break;
3976 			}
3977 			if (bp_first == NULL) {
3978 				bp_first = bp;
3979 			}
3980 			bp_last = bp;
3981 		}
3982 	}
3983 	if (found) {
3984 		lck_mtx_unlock(bpf_mlock);
3985 		os_log_error(OS_LOG_DEFAULT,
3986 		    "bpfattach - %s with dlt %d is already attached",
3987 		    if_name(ifp), dlt);
3988 		kfree_type(struct bpf_if, bp_new);
3989 		return EEXIST;
3990 	}
3991 
3992 	bp_new->bif_ifp = ifp;
3993 	bp_new->bif_dlt = dlt;
3994 	bp_new->bif_send = send;
3995 	bp_new->bif_tap = tap;
3996 
3997 	if (bp_first == NULL) {
3998 		/* No other entries for this ifp */
3999 		bp_new->bif_next = bpf_iflist;
4000 		bpf_iflist = bp_new;
4001 	} else {
4002 		if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
4003 			/* Make this the first entry for this interface */
4004 			if (bp_before_first != NULL) {
4005 				/*  point the previous to us */
4006 				bp_before_first->bif_next = bp_new;
4007 			} else {
4008 				/* we're the new head */
4009 				bpf_iflist = bp_new;
4010 			}
4011 			bp_new->bif_next = bp_first;
4012 		} else {
4013 			/* Add this after the last entry for this interface */
4014 			bp_new->bif_next = bp_last->bif_next;
4015 			bp_last->bif_next = bp_new;
4016 		}
4017 	}
4018 
4019 	/*
4020 	 * Compute the length of the bpf header.  This is not necessarily
4021 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
4022 	 * that the network layer header begins on a longword boundary (for
4023 	 * performance reasons and to alleviate alignment restrictions).
4024 	 */
4025 	bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
4026 	bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
4027 	    sizeof(struct bpf_hdr_ext)) - hdrlen;
4028 	bp_new->bif_comphdrlen = BPF_WORDALIGN(hdrlen +
4029 	    sizeof(struct bpf_comp_hdr)) - hdrlen;
4030 
4031 	/* Take a reference on the interface */
4032 	ifnet_reference(ifp);
4033 
4034 	lck_mtx_unlock(bpf_mlock);
4035 
4036 	return 0;
4037 }
4038 
4039 /*
4040  * Detach bpf from an interface.  This involves detaching each descriptor
4041  * associated with the interface, and leaving bd_bif NULL.  Notify each
4042  * descriptor as it's detached so that any sleepers wake up and get
4043  * ENXIO.
4044  */
4045 void
bpfdetach(struct ifnet * ifp)4046 bpfdetach(struct ifnet *ifp)
4047 {
4048 	struct bpf_if   *bp, *bp_prev, *bp_next;
4049 	struct bpf_d    *d;
4050 
4051 	if (bpf_debug != 0) {
4052 		os_log(OS_LOG_DEFAULT, "%s: %s", __func__, if_name(ifp));
4053 	}
4054 
4055 	lck_mtx_lock(bpf_mlock);
4056 
4057 	/*
4058 	 * Build the list of devices attached to that interface
4059 	 * that we need to free while keeping the lock to maintain
4060 	 * the integrity of the interface list
4061 	 */
4062 	bp_prev = NULL;
4063 	for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
4064 		bp_next = bp->bif_next;
4065 
4066 		if (ifp != bp->bif_ifp) {
4067 			bp_prev = bp;
4068 			continue;
4069 		}
4070 		/* Unlink from the interface list */
4071 		if (bp_prev) {
4072 			bp_prev->bif_next = bp->bif_next;
4073 		} else {
4074 			bpf_iflist = bp->bif_next;
4075 		}
4076 
4077 		/* Detach the devices attached to the interface */
4078 		while ((d = bp->bif_dlist) != NULL) {
4079 			/*
4080 			 * Take an extra reference to prevent the device
4081 			 * from being freed when bpf_detachd() releases
4082 			 * the reference for the interface list
4083 			 */
4084 			bpf_acquire_d(d);
4085 			bpf_detachd(d);
4086 			bpf_wakeup(d);
4087 			bpf_release_d(d);
4088 		}
4089 		ifnet_release(ifp);
4090 	}
4091 
4092 	lck_mtx_unlock(bpf_mlock);
4093 }
4094 
4095 void
bpf_init(__unused void * unused)4096 bpf_init(__unused void *unused)
4097 {
4098 	int     maj;
4099 
4100 	/* bpf_comp_hdr is an overlay of bpf_hdr */
4101 	_CASSERT(BPF_WORDALIGN(sizeof(struct bpf_hdr)) ==
4102 	    BPF_WORDALIGN(sizeof(struct bpf_comp_hdr)));
4103 
4104 	/* compression length must fits in a byte */
4105 	_CASSERT(BPF_HDR_COMP_LEN_MAX <= UCHAR_MAX );
4106 
4107 	(void) PE_parse_boot_argn("bpf_hdr_comp", &bpf_hdr_comp_enable,
4108 	    sizeof(bpf_hdr_comp_enable));
4109 
4110 	if (bpf_devsw_installed == 0) {
4111 		bpf_devsw_installed = 1;
4112 		maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
4113 		if (maj == -1) {
4114 			bpf_devsw_installed = 0;
4115 			os_log_error(OS_LOG_DEFAULT,
4116 			    "bpf_init: failed to allocate a major number");
4117 			return;
4118 		}
4119 
4120 		for (int i = 0; i < NBPFILTER; i++) {
4121 			bpf_make_dev_t(maj);
4122 		}
4123 	}
4124 }
4125 
4126 static int
4127 sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
4128 {
4129 #pragma unused(arg1, arg2)
4130 	int i, err;
4131 
4132 	i = bpf_maxbufsize;
4133 
4134 	err = sysctl_handle_int(oidp, &i, 0, req);
4135 	if (err != 0 || req->newptr == USER_ADDR_NULL) {
4136 		return err;
4137 	}
4138 
4139 	if (i < 0 || i > BPF_BUFSIZE_CAP) {
4140 		i = BPF_BUFSIZE_CAP;
4141 	}
4142 
4143 	bpf_maxbufsize = i;
4144 	return err;
4145 }
4146 
4147 static int
4148 sysctl_bpf_bufsize_cap SYSCTL_HANDLER_ARGS
4149 {
4150 #pragma unused(arg1, arg2)
4151 	int i, err;
4152 
4153 	i = BPF_BUFSIZE_CAP;
4154 
4155 	err = sysctl_handle_int(oidp, &i, 0, req);
4156 	if (err != 0 || req->newptr == USER_ADDR_NULL) {
4157 		return err;
4158 	}
4159 
4160 	return err;
4161 }
4162 
4163 /*
4164  * Fill filter statistics
4165  */
4166 static void
bpfstats_fill_xbpf(struct xbpf_d * d,struct bpf_d * bd)4167 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
4168 {
4169 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
4170 
4171 	d->bd_structsize = sizeof(struct xbpf_d);
4172 	d->bd_promisc = bd->bd_promisc != 0 ? 1 : 0;
4173 	d->bd_immediate = d->bd_immediate != 0 ? 1 : 0;
4174 	d->bd_hdrcmplt = bd->bd_hdrcmplt != 0 ? 1 : 0;
4175 	d->bd_async = bd->bd_async != 0 ? 1 : 0;
4176 	d->bd_headdrop = bd->bd_headdrop != 0 ? 1 : 0;
4177 	d->bd_seesent = bd->bd_seesent != 0 ? 1 : 0;
4178 	d->bh_compreq = bd->bd_flags & BPF_COMP_REQ ? 1 : 0;
4179 	d->bh_compenabled = bd->bd_flags & BPF_COMP_ENABLED ? 1 : 0;
4180 	d->bd_exthdr = bd->bd_flags & BPF_EXTENDED_HDR ? 1 : 0;
4181 	d->bd_trunc = bd->bd_flags & BPF_TRUNCATE ? 1 : 0;
4182 	d->bd_pkthdrv2 = bd->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
4183 
4184 	d->bd_dev_minor = (uint8_t)bd->bd_dev_minor;
4185 
4186 	d->bd_sig = bd->bd_sig;
4187 
4188 	d->bd_rcount = bd->bd_rcount;
4189 	d->bd_dcount = bd->bd_dcount;
4190 	d->bd_fcount = bd->bd_fcount;
4191 	d->bd_slen = bd->bd_slen;
4192 	d->bd_hlen = bd->bd_hlen;
4193 	d->bd_bufsize = bd->bd_bufsize;
4194 	d->bd_pid = bd->bd_pid;
4195 	if (bd->bd_bif != NULL && bd->bd_bif->bif_ifp != NULL) {
4196 		strlcpy(d->bd_ifname,
4197 		    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
4198 	}
4199 
4200 	d->bd_comp_count = bd->bd_bcs.bcs_count_compressed_prefix;
4201 	d->bd_comp_size = bd->bd_bcs.bcs_total_compressed_prefix_size;
4202 
4203 	d->bd_scnt = bd->bd_scnt;
4204 	d->bd_hcnt = bd->bd_hcnt;
4205 
4206 	d->bd_read_count = bd->bd_bcs.bcs_total_read;
4207 	d->bd_fsize = bd->bd_bcs.bcs_total_size;
4208 }
4209 
4210 /*
4211  * Handle `netstat -B' stats request
4212  */
4213 static int
4214 sysctl_bpf_stats SYSCTL_HANDLER_ARGS
4215 {
4216 	int error;
4217 	struct xbpf_d *xbdbuf;
4218 	unsigned int x_cnt;
4219 	vm_size_t buf_size;
4220 
4221 	if (req->oldptr == USER_ADDR_NULL) {
4222 		return SYSCTL_OUT(req, 0, nbpfilter * sizeof(struct xbpf_d));
4223 	}
4224 	if (nbpfilter == 0) {
4225 		return SYSCTL_OUT(req, 0, 0);
4226 	}
4227 	buf_size = req->oldlen;
4228 	xbdbuf = kalloc_data(buf_size, Z_WAITOK | Z_ZERO);
4229 
4230 	lck_mtx_lock(bpf_mlock);
4231 	if (buf_size < (nbpfilter * sizeof(struct xbpf_d))) {
4232 		lck_mtx_unlock(bpf_mlock);
4233 		kfree_data(xbdbuf, buf_size);
4234 		return ENOMEM;
4235 	}
4236 	x_cnt = 0;
4237 	unsigned int i;
4238 
4239 	for (i = 0; i < nbpfilter; i++) {
4240 		struct bpf_d *bd = bpf_dtab[i];
4241 		struct xbpf_d *xbd;
4242 
4243 		if (bd == NULL || bd == BPF_DEV_RESERVED ||
4244 		    (bd->bd_flags & BPF_CLOSING) != 0) {
4245 			continue;
4246 		}
4247 		VERIFY(x_cnt < nbpfilter);
4248 
4249 		xbd = &xbdbuf[x_cnt++];
4250 		bpfstats_fill_xbpf(xbd, bd);
4251 	}
4252 	lck_mtx_unlock(bpf_mlock);
4253 
4254 	error = SYSCTL_OUT(req, xbdbuf, x_cnt * sizeof(struct xbpf_d));
4255 	kfree_data(xbdbuf, buf_size);
4256 	return error;
4257 }
4258