xref: /xnu-8792.81.2/bsd/net/bpf.c (revision 19c3b8c28c31cb8130e034cfb5df6bf9ba342d90)
1 /*
2  * Copyright (c) 2000-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1990, 1991, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * This code is derived from the Stanford/CMU enet packet filter,
33  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35  * Berkeley Laboratory.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)bpf.c	8.2 (Berkeley) 3/28/94
66  *
67  * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68  */
69 /*
70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71  * support for mandatory and extensible security protections.  This notice
72  * is included in support of clause 2.2 (b) of the Apple Public License,
73  * Version 2.0.
74  */
75 
76 #include "bpf.h"
77 
78 #ifndef __GNUC__
79 #define inline
80 #else
81 #define inline __inline
82 #endif
83 
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/conf.h>
87 #include <sys/malloc.h>
88 #include <sys/mbuf.h>
89 #include <sys/time.h>
90 #include <sys/proc.h>
91 #include <sys/signalvar.h>
92 #include <sys/filio.h>
93 #include <sys/sockio.h>
94 #include <sys/ttycom.h>
95 #include <sys/filedesc.h>
96 #include <sys/uio_internal.h>
97 #include <sys/file_internal.h>
98 #include <sys/event.h>
99 
100 #include <sys/poll.h>
101 
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/vnode.h>
105 
106 #include <net/if.h>
107 #include <net/bpf.h>
108 #include <net/bpfdesc.h>
109 
110 #include <netinet/in.h>
111 #include <netinet/ip.h>
112 #include <netinet/ip6.h>
113 #include <netinet/in_pcb.h>
114 #include <netinet/in_var.h>
115 #include <netinet/ip_var.h>
116 #include <netinet/tcp.h>
117 #include <netinet/tcp_var.h>
118 #include <netinet/udp.h>
119 #include <netinet/udp_var.h>
120 #include <netinet/if_ether.h>
121 #include <netinet/isakmp.h>
122 #include <netinet6/esp.h>
123 #include <sys/kernel.h>
124 #include <sys/sysctl.h>
125 #include <net/firewire.h>
126 
127 #include <miscfs/devfs/devfs.h>
128 #include <net/dlil.h>
129 #include <net/pktap.h>
130 
131 #include <kern/assert.h>
132 #include <kern/locks.h>
133 #include <kern/thread_call.h>
134 #include <libkern/section_keywords.h>
135 
136 #include <os/log.h>
137 
138 extern int tvtohz(struct timeval *);
139 extern char *proc_name_address(void *p);
140 
141 #define BPF_BUFSIZE 4096
142 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
143 
144 #define PRINET  26                      /* interruptible */
145 
146 #define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
147 #define ESP_HDR_SIZE sizeof(struct newesp)
148 
149 typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
150 
151 /*
152  * The default read buffer size is patchable.
153  */
154 static unsigned int bpf_bufsize = BPF_BUFSIZE;
155 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
156     &bpf_bufsize, 0, "");
157 
158 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
159 static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
160 SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
161     &bpf_maxbufsize, 0,
162     sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
163 
164 extern const int copysize_limit_panic;
165 #define BPF_BUFSIZE_CAP (copysize_limit_panic >> 1)
166 static int sysctl_bpf_bufsize_cap SYSCTL_HANDLER_ARGS;
167 SYSCTL_PROC(_debug, OID_AUTO, bpf_bufsize_cap, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
168     0, 0,
169     sysctl_bpf_bufsize_cap, "I", "Upper limit on BPF max buffer size");
170 
171 static unsigned int bpf_maxdevices = 256;
172 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RD | CTLFLAG_LOCKED,
173     &bpf_maxdevices, 0, "");
174 /*
175  * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
176  * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
177  * explicitly to be able to use DLT_PKTAP.
178  */
179 #if !XNU_TARGET_OS_OSX
180 static unsigned int bpf_wantpktap = 1;
181 #else /* XNU_TARGET_OS_OSX */
182 static unsigned int bpf_wantpktap = 0;
183 #endif /* XNU_TARGET_OS_OSX */
184 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
185     &bpf_wantpktap, 0, "");
186 
187 static int bpf_debug = 0;
188 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
189     &bpf_debug, 0, "");
190 
191 static unsigned long bpf_trunc_overflow = 0;
192 SYSCTL_ULONG(_debug, OID_AUTO, bpf_trunc_overflow, CTLFLAG_RD | CTLFLAG_LOCKED,
193     &bpf_trunc_overflow, "");
194 
195 static int bpf_hdr_comp_enable = 1;
196 SYSCTL_INT(_debug, OID_AUTO, bpf_hdr_comp_enable, CTLFLAG_RW | CTLFLAG_LOCKED,
197     &bpf_hdr_comp_enable, 1, "");
198 
199 static int sysctl_bpf_stats SYSCTL_HANDLER_ARGS;
200 SYSCTL_PROC(_debug, OID_AUTO, bpf_stats, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
201     0, 0,
202     sysctl_bpf_stats, "S", "BPF statistics");
203 
204 /*
205  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
206  *  bpf_dtab holds pointer to the descriptors, indexed by minor device #
207  */
208 static struct bpf_if    *bpf_iflist;
209 /*
210  * BSD now stores the bpf_d in the dev_t which is a struct
211  * on their system. Our dev_t is an int, so we still store
212  * the bpf_d in a separate table indexed by minor device #.
213  *
214  * The value stored in bpf_dtab[n] represent three states:
215  *  NULL: device not opened
216  *  BPF_DEV_RESERVED: device opening or closing
217  *  other: device <n> opened with pointer to storage
218  */
219 #define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
220 static struct bpf_d **bpf_dtab = NULL;
221 static unsigned int bpf_dtab_size = 0;
222 static unsigned int nbpfilter = 0;
223 static unsigned bpf_bpfd_cnt = 0;
224 
225 static LCK_GRP_DECLARE(bpf_mlock_grp, "bpf");
226 static LCK_MTX_DECLARE(bpf_mlock_data, &bpf_mlock_grp);
227 static lck_mtx_t *const bpf_mlock = &bpf_mlock_data;
228 
229 static int      bpf_allocbufs(struct bpf_d *);
230 static errno_t  bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
231 static int      bpf_detachd(struct bpf_d *d);
232 static void     bpf_freed(struct bpf_d *);
233 static int      bpf_movein(struct uio *, int,
234     struct mbuf **, struct sockaddr *, int *);
235 static int      bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool, bool);
236 static void     bpf_timed_out(void *, void *);
237 static void     bpf_wakeup(struct bpf_d *);
238 static uint32_t get_pkt_trunc_len(struct bpf_packet *);
239 static void     catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
240 static void     reset_d(struct bpf_d *);
241 static int      bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
242 static int      bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
243 static int      bpf_setdlt(struct bpf_d *, u_int);
244 static int      bpf_set_traffic_class(struct bpf_d *, int);
245 static void     bpf_set_packet_service_class(struct mbuf *, int);
246 
247 static void     bpf_acquire_d(struct bpf_d *);
248 static void     bpf_release_d(struct bpf_d *);
249 
250 static  int bpf_devsw_installed;
251 
252 void bpf_init(void *unused);
253 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
254 
255 /*
256  * Darwin differs from BSD here, the following are static
257  * on BSD and not static on Darwin.
258  */
259 d_open_t            bpfopen;
260 d_close_t           bpfclose;
261 d_read_t            bpfread;
262 d_write_t           bpfwrite;
263 ioctl_fcn_t         bpfioctl;
264 select_fcn_t        bpfselect;
265 
266 /* Darwin's cdevsw struct differs slightly from BSDs */
267 #define CDEV_MAJOR 23
268 static const struct cdevsw bpf_cdevsw = {
269 	.d_open       = bpfopen,
270 	.d_close      = bpfclose,
271 	.d_read       = bpfread,
272 	.d_write      = bpfwrite,
273 	.d_ioctl      = bpfioctl,
274 	.d_stop       = eno_stop,
275 	.d_reset      = eno_reset,
276 	.d_ttys       = NULL,
277 	.d_select     = bpfselect,
278 	.d_mmap       = eno_mmap,
279 	.d_strategy   = eno_strat,
280 	.d_reserved_1 = eno_getc,
281 	.d_reserved_2 = eno_putc,
282 	.d_type       = 0
283 };
284 
285 #define SOCKADDR_HDR_LEN           offsetof(struct sockaddr, sa_data)
286 
287 static int
bpf_movein(struct uio * uio,int linktype,struct mbuf ** mp,struct sockaddr * sockp,int * datlen)288 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp,
289     struct sockaddr *sockp, int *datlen)
290 {
291 	struct mbuf *m;
292 	int error;
293 	int len;
294 	uint8_t sa_family;
295 	int hlen;
296 
297 	switch (linktype) {
298 #if SLIP
299 	case DLT_SLIP:
300 		sa_family = AF_INET;
301 		hlen = 0;
302 		break;
303 #endif /* SLIP */
304 
305 	case DLT_EN10MB:
306 		sa_family = AF_UNSPEC;
307 		/* XXX Would MAXLINKHDR be better? */
308 		hlen = sizeof(struct ether_header);
309 		break;
310 
311 #if FDDI
312 	case DLT_FDDI:
313 #if defined(__FreeBSD__) || defined(__bsdi__)
314 		sa_family = AF_IMPLINK;
315 		hlen = 0;
316 #else
317 		sa_family = AF_UNSPEC;
318 		/* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
319 		hlen = 24;
320 #endif
321 		break;
322 #endif /* FDDI */
323 
324 	case DLT_RAW:
325 	case DLT_NULL:
326 		sa_family = AF_UNSPEC;
327 		hlen = 0;
328 		break;
329 
330 #ifdef __FreeBSD__
331 	case DLT_ATM_RFC1483:
332 		/*
333 		 * en atm driver requires 4-byte atm pseudo header.
334 		 * though it isn't standard, vpi:vci needs to be
335 		 * specified anyway.
336 		 */
337 		sa_family = AF_UNSPEC;
338 		hlen = 12;      /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
339 		break;
340 #endif
341 
342 	case DLT_PPP:
343 		sa_family = AF_UNSPEC;
344 		hlen = 4;       /* This should match PPP_HDRLEN */
345 		break;
346 
347 	case DLT_APPLE_IP_OVER_IEEE1394:
348 		sa_family = AF_UNSPEC;
349 		hlen = sizeof(struct firewire_header);
350 		break;
351 
352 	case DLT_IEEE802_11:            /* IEEE 802.11 wireless */
353 		sa_family = AF_IEEE80211;
354 		hlen = 0;
355 		break;
356 
357 	case DLT_IEEE802_11_RADIO:
358 		sa_family = AF_IEEE80211;
359 		hlen = 0;
360 		break;
361 
362 	default:
363 		return EIO;
364 	}
365 
366 	// LP64todo - fix this!
367 	len = (int)uio_resid(uio);
368 	if (len < hlen || (unsigned)len > MCLBYTES || len - hlen > MCLBYTES) {
369 		return EIO;
370 	}
371 
372 	*datlen = len - hlen;
373 
374 	if (sockp) {
375 		/*
376 		 * Build a sockaddr based on the data link layer type.
377 		 * We do this at this level because the ethernet header
378 		 * is copied directly into the data field of the sockaddr.
379 		 * In the case of SLIP, there is no header and the packet
380 		 * is forwarded as is.
381 		 * Also, we are careful to leave room at the front of the mbuf
382 		 * for the link level header.
383 		 */
384 		if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
385 			return EIO;
386 		}
387 		sockp->sa_family = sa_family;
388 	} else {
389 		/*
390 		 * We're directly sending the packet data supplied by
391 		 * the user; we don't need to make room for the link
392 		 * header, and don't need the header length value any
393 		 * more, so set it to 0.
394 		 */
395 		hlen = 0;
396 	}
397 
398 	MGETHDR(m, M_WAIT, MT_DATA);
399 	if (m == 0) {
400 		return ENOBUFS;
401 	}
402 	if ((unsigned)len > MHLEN) {
403 		MCLGET(m, M_WAIT);
404 		if ((m->m_flags & M_EXT) == 0) {
405 			error = ENOBUFS;
406 			goto bad;
407 		}
408 	}
409 	m->m_pkthdr.len = m->m_len = len;
410 	m->m_pkthdr.rcvif = NULL;
411 	*mp = m;
412 
413 	/*
414 	 * Make room for link header.
415 	 */
416 	if (hlen != 0) {
417 		m->m_pkthdr.len -= hlen;
418 		m->m_len -= hlen;
419 		m->m_data += hlen; /* XXX */
420 		error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
421 		if (error) {
422 			goto bad;
423 		}
424 	}
425 	error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
426 	if (error) {
427 		goto bad;
428 	}
429 
430 	/* Check for multicast destination */
431 	switch (linktype) {
432 	case DLT_EN10MB: {
433 		struct ether_header *eh;
434 
435 		eh = mtod(m, struct ether_header *);
436 		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
437 			if (_ether_cmp(etherbroadcastaddr,
438 			    eh->ether_dhost) == 0) {
439 				m->m_flags |= M_BCAST;
440 			} else {
441 				m->m_flags |= M_MCAST;
442 			}
443 		}
444 		break;
445 	}
446 	}
447 
448 	return 0;
449 bad:
450 	m_freem(m);
451 	return error;
452 }
453 
454 /*
455  * The dynamic addition of a new device node must block all processes that
456  * are opening the last device so that no process will get an unexpected
457  * ENOENT
458  */
459 static void
bpf_make_dev_t(int maj)460 bpf_make_dev_t(int maj)
461 {
462 	static int              bpf_growing = 0;
463 	unsigned int    cur_size = nbpfilter, i;
464 
465 	if (nbpfilter >= bpf_maxdevices) {
466 		return;
467 	}
468 
469 	while (bpf_growing) {
470 		/* Wait until new device has been created */
471 		(void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
472 	}
473 	if (nbpfilter > cur_size) {
474 		/* other thread grew it already */
475 		return;
476 	}
477 	bpf_growing = 1;
478 
479 	/* need to grow bpf_dtab first */
480 	if (nbpfilter == bpf_dtab_size) {
481 		unsigned int new_dtab_size;
482 		struct bpf_d **new_dtab = NULL;
483 
484 		new_dtab_size = bpf_dtab_size + NBPFILTER;
485 		new_dtab = krealloc_type(struct bpf_d *,
486 		    bpf_dtab_size, new_dtab_size, bpf_dtab, Z_WAITOK | Z_ZERO);
487 		if (new_dtab == 0) {
488 			os_log_error(OS_LOG_DEFAULT, "bpf_make_dev_t: malloc bpf_dtab failed");
489 			goto done;
490 		}
491 		bpf_dtab = new_dtab;
492 		bpf_dtab_size = new_dtab_size;
493 	}
494 	i = nbpfilter++;
495 	(void) devfs_make_node(makedev(maj, i),
496 	    DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
497 	    "bpf%d", i);
498 done:
499 	bpf_growing = 0;
500 	wakeup((caddr_t)&bpf_growing);
501 }
502 
503 /*
504  * Attach file to the bpf interface, i.e. make d listen on bp.
505  */
506 static errno_t
bpf_attachd(struct bpf_d * d,struct bpf_if * bp)507 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
508 {
509 	int first = bp->bif_dlist == NULL;
510 	int     error = 0;
511 
512 	/*
513 	 * Point d at bp, and add d to the interface's list of listeners.
514 	 * Finally, point the driver's bpf cookie at the interface so
515 	 * it will divert packets to bpf.
516 	 */
517 	d->bd_bif = bp;
518 	d->bd_next = bp->bif_dlist;
519 	bp->bif_dlist = d;
520 	bpf_bpfd_cnt++;
521 
522 	/*
523 	 * Take a reference on the device even if an error is returned
524 	 * because we keep the device in the interface's list of listeners
525 	 */
526 	bpf_acquire_d(d);
527 
528 	if (first) {
529 		/* Find the default bpf entry for this ifp */
530 		if (bp->bif_ifp->if_bpf == NULL) {
531 			struct bpf_if   *tmp, *primary = NULL;
532 
533 			for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
534 				if (tmp->bif_ifp == bp->bif_ifp) {
535 					primary = tmp;
536 					break;
537 				}
538 			}
539 			bp->bif_ifp->if_bpf = primary;
540 		}
541 		/* Only call dlil_set_bpf_tap for primary dlt */
542 		if (bp->bif_ifp->if_bpf == bp) {
543 			dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
544 			    bpf_tap_callback);
545 		}
546 
547 		if (bp->bif_tap != NULL) {
548 			error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
549 			    BPF_TAP_INPUT_OUTPUT);
550 		}
551 	}
552 
553 	/*
554 	 * Reset the detach flags in case we previously detached an interface
555 	 */
556 	d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
557 
558 	if (bp->bif_dlt == DLT_PKTAP) {
559 		d->bd_flags |= BPF_FINALIZE_PKTAP;
560 	} else {
561 		d->bd_flags &= ~BPF_FINALIZE_PKTAP;
562 	}
563 	return error;
564 }
565 
566 /*
567  * Detach a file from its interface.
568  *
569  * Return 1 if was closed by some thread, 0 otherwise
570  */
571 static int
bpf_detachd(struct bpf_d * d)572 bpf_detachd(struct bpf_d *d)
573 {
574 	struct bpf_d **p;
575 	struct bpf_if *bp;
576 	struct ifnet  *ifp;
577 	uint32_t dlt;
578 	bpf_tap_func disable_tap;
579 	uint8_t bd_promisc;
580 
581 
582 	int bpf_closed = d->bd_flags & BPF_CLOSING;
583 	/*
584 	 * Some other thread already detached
585 	 */
586 	if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0) {
587 		goto done;
588 	}
589 	/*
590 	 * This thread is doing the detach
591 	 */
592 	d->bd_flags |= BPF_DETACHING;
593 
594 	ifp = d->bd_bif->bif_ifp;
595 	bp = d->bd_bif;
596 
597 	/* Remove d from the interface's descriptor list. */
598 	p = &bp->bif_dlist;
599 	while (*p != d) {
600 		p = &(*p)->bd_next;
601 		if (*p == 0) {
602 			panic("bpf_detachd: descriptor not in list");
603 		}
604 	}
605 	*p = (*p)->bd_next;
606 	bpf_bpfd_cnt--;
607 	disable_tap = NULL;
608 	if (bp->bif_dlist == 0) {
609 		/*
610 		 * Let the driver know that there are no more listeners.
611 		 */
612 		/* Only call dlil_set_bpf_tap for primary dlt */
613 		if (bp->bif_ifp->if_bpf == bp) {
614 			dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
615 		}
616 
617 		disable_tap = bp->bif_tap;
618 		if (disable_tap) {
619 			dlt = bp->bif_dlt;
620 		}
621 
622 		for (bp = bpf_iflist; bp; bp = bp->bif_next) {
623 			if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
624 				break;
625 			}
626 		}
627 		if (bp == NULL) {
628 			ifp->if_bpf = NULL;
629 		}
630 	}
631 	d->bd_bif = NULL;
632 	/*
633 	 * Check if this descriptor had requested promiscuous mode.
634 	 * If so, turn it off.
635 	 */
636 	bd_promisc = d->bd_promisc;
637 	d->bd_promisc = 0;
638 
639 	lck_mtx_unlock(bpf_mlock);
640 	if (bd_promisc) {
641 		if (ifnet_set_promiscuous(ifp, 0)) {
642 			/*
643 			 * Something is really wrong if we were able to put
644 			 * the driver into promiscuous mode, but can't
645 			 * take it out.
646 			 * Most likely the network interface is gone.
647 			 */
648 			os_log_error(OS_LOG_DEFAULT,
649 			    "%s: bpf%d ifnet_set_promiscuous %s failed",
650 			    __func__, d->bd_dev_minor, if_name(ifp));
651 		}
652 	}
653 
654 	if (disable_tap) {
655 		disable_tap(ifp, dlt, BPF_TAP_DISABLE);
656 	}
657 	lck_mtx_lock(bpf_mlock);
658 
659 	/*
660 	 * Wake up other thread that are waiting for this thread to finish
661 	 * detaching
662 	 */
663 	d->bd_flags &= ~BPF_DETACHING;
664 	d->bd_flags |= BPF_DETACHED;
665 
666 	/* Refresh the local variable as d could have been modified */
667 	bpf_closed = d->bd_flags & BPF_CLOSING;
668 
669 	os_log(OS_LOG_DEFAULT, "bpf%d%s detached from %s fcount %llu dcount %llu",
670 	    d->bd_dev_minor, bpf_closed ? " closed and" : "", if_name(ifp),
671 	    d->bd_fcount, d->bd_dcount);
672 
673 	/*
674 	 * Note that We've kept the reference because we may have dropped
675 	 * the lock when turning off promiscuous mode
676 	 */
677 	bpf_release_d(d);
678 done:
679 	/*
680 	 * Let the caller know the bpf_d is closed
681 	 */
682 	if (bpf_closed) {
683 		return 1;
684 	} else {
685 		return 0;
686 	}
687 }
688 
689 /*
690  * Start asynchronous timer, if necessary.
691  * Must be called with bpf_mlock held.
692  */
693 static void
bpf_start_timer(struct bpf_d * d)694 bpf_start_timer(struct bpf_d *d)
695 {
696 	uint64_t deadline;
697 	struct timeval tv;
698 
699 	if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
700 		tv.tv_sec = d->bd_rtout / hz;
701 		tv.tv_usec = (d->bd_rtout % hz) * tick;
702 
703 		clock_interval_to_deadline(
704 			(uint32_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
705 			NSEC_PER_USEC, &deadline);
706 		/*
707 		 * The state is BPF_IDLE, so the timer hasn't
708 		 * been started yet, and hasn't gone off yet;
709 		 * there is no thread call scheduled, so this
710 		 * won't change the schedule.
711 		 *
712 		 * XXX - what if, by the time it gets entered,
713 		 * the deadline has already passed?
714 		 */
715 		thread_call_enter_delayed(d->bd_thread_call, deadline);
716 		d->bd_state = BPF_WAITING;
717 	}
718 }
719 
720 /*
721  * Cancel asynchronous timer.
722  * Must be called with bpf_mlock held.
723  */
724 static boolean_t
bpf_stop_timer(struct bpf_d * d)725 bpf_stop_timer(struct bpf_d *d)
726 {
727 	/*
728 	 * If the timer has already gone off, this does nothing.
729 	 * Our caller is expected to set d->bd_state to BPF_IDLE,
730 	 * with the bpf_mlock, after we are called. bpf_timed_out()
731 	 * also grabs bpf_mlock, so, if the timer has gone off and
732 	 * bpf_timed_out() hasn't finished, it's waiting for the
733 	 * lock; when this thread releases the lock, it will
734 	 * find the state is BPF_IDLE, and just release the
735 	 * lock and return.
736 	 */
737 	return thread_call_cancel(d->bd_thread_call);
738 }
739 
740 void
bpf_acquire_d(struct bpf_d * d)741 bpf_acquire_d(struct bpf_d *d)
742 {
743 	void *lr_saved =  __builtin_return_address(0);
744 
745 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
746 
747 	d->bd_refcnt += 1;
748 
749 	d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
750 	d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
751 }
752 
753 void
bpf_release_d(struct bpf_d * d)754 bpf_release_d(struct bpf_d *d)
755 {
756 	void *lr_saved =  __builtin_return_address(0);
757 
758 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
759 
760 	if (d->bd_refcnt <= 0) {
761 		panic("%s: %p refcnt <= 0", __func__, d);
762 	}
763 
764 	d->bd_refcnt -= 1;
765 
766 	d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
767 	d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
768 
769 	if (d->bd_refcnt == 0) {
770 		/* Assert the device is detached */
771 		if ((d->bd_flags & BPF_DETACHED) == 0) {
772 			panic("%s: %p BPF_DETACHED not set", __func__, d);
773 		}
774 
775 		kfree_type(struct bpf_d, d);
776 	}
777 }
778 
779 /*
780  * Open ethernet device.  Returns ENXIO for illegal minor device number,
781  * EBUSY if file is open by another process.
782  */
783 /* ARGSUSED */
784 int
bpfopen(dev_t dev,int flags,__unused int fmt,struct proc * p)785 bpfopen(dev_t dev, int flags, __unused int fmt,
786     struct proc *p)
787 {
788 	struct bpf_d *d;
789 
790 	lck_mtx_lock(bpf_mlock);
791 	if ((unsigned int) minor(dev) >= nbpfilter) {
792 		lck_mtx_unlock(bpf_mlock);
793 		return ENXIO;
794 	}
795 	/*
796 	 * New device nodes are created on demand when opening the last one.
797 	 * The programming model is for processes to loop on the minor starting
798 	 * at 0 as long as EBUSY is returned. The loop stops when either the
799 	 * open succeeds or an error other that EBUSY is returned. That means
800 	 * that bpf_make_dev_t() must block all processes that are opening the
801 	 * last  node. If not all processes are blocked, they could unexpectedly
802 	 * get ENOENT and abort their opening loop.
803 	 */
804 	if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
805 		bpf_make_dev_t(major(dev));
806 	}
807 
808 	/*
809 	 * Each minor can be opened by only one process.  If the requested
810 	 * minor is in use, return EBUSY.
811 	 *
812 	 * Important: bpfopen() and bpfclose() have to check and set the status
813 	 * of a device in the same lockin context otherwise the device may be
814 	 * leaked because the vnode use count will be unpextectly greater than 1
815 	 * when close() is called.
816 	 */
817 	if (bpf_dtab[minor(dev)] == NULL) {
818 		/* Reserve while opening */
819 		bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
820 	} else {
821 		lck_mtx_unlock(bpf_mlock);
822 		return EBUSY;
823 	}
824 	d = kalloc_type(struct bpf_d, Z_WAITOK | Z_ZERO);
825 	if (d == NULL) {
826 		/* this really is a catastrophic failure */
827 		os_log_error(OS_LOG_DEFAULT,
828 		    "bpfopen: bpf%d kalloc_type bpf_d failed", minor(dev));
829 		bpf_dtab[minor(dev)] = NULL;
830 		lck_mtx_unlock(bpf_mlock);
831 		return ENOMEM;
832 	}
833 
834 	/* Mark "in use" and do most initialization. */
835 	bpf_acquire_d(d);
836 	d->bd_bufsize = bpf_bufsize;
837 	d->bd_sig = SIGIO;
838 	d->bd_seesent = 1;
839 	d->bd_oflags = flags;
840 	d->bd_state = BPF_IDLE;
841 	d->bd_traffic_class = SO_TC_BE;
842 	d->bd_flags |= BPF_DETACHED;
843 	if (bpf_wantpktap) {
844 		d->bd_flags |= BPF_WANT_PKTAP;
845 	} else {
846 		d->bd_flags &= ~BPF_WANT_PKTAP;
847 	}
848 
849 	d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
850 	if (d->bd_thread_call == NULL) {
851 		os_log_error(OS_LOG_DEFAULT, "bpfopen: bpf%d malloc thread call failed",
852 		    minor(dev));
853 		bpf_dtab[minor(dev)] = NULL;
854 		bpf_release_d(d);
855 		lck_mtx_unlock(bpf_mlock);
856 
857 		return ENOMEM;
858 	}
859 	d->bd_opened_by = p;
860 	uuid_generate(d->bd_uuid);
861 	d->bd_pid = proc_pid(p);
862 
863 	d->bd_dev_minor = minor(dev);
864 	bpf_dtab[minor(dev)] = d; /* Mark opened */
865 	lck_mtx_unlock(bpf_mlock);
866 
867 	if (bpf_debug) {
868 		os_log(OS_LOG_DEFAULT, "bpf%u opened by %s.%u",
869 		    d->bd_dev_minor, proc_name_address(p), d->bd_pid);
870 	}
871 	return 0;
872 }
873 
874 /*
875  * Close the descriptor by detaching it from its interface,
876  * deallocating its buffers, and marking it free.
877  */
878 /* ARGSUSED */
879 int
bpfclose(dev_t dev,__unused int flags,__unused int fmt,__unused struct proc * p)880 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
881     __unused struct proc *p)
882 {
883 	struct bpf_d *d;
884 
885 	/* Take BPF lock to ensure no other thread is using the device */
886 	lck_mtx_lock(bpf_mlock);
887 
888 	d = bpf_dtab[minor(dev)];
889 	if (d == NULL || d == BPF_DEV_RESERVED) {
890 		lck_mtx_unlock(bpf_mlock);
891 		return ENXIO;
892 	}
893 
894 	/*
895 	 * Other threads may call bpd_detachd() if we drop the bpf_mlock
896 	 */
897 	d->bd_flags |= BPF_CLOSING;
898 
899 	if (bpf_debug != 0) {
900 		os_log(OS_LOG_DEFAULT, "%s: bpf%d",
901 		    __func__, d->bd_dev_minor);
902 	}
903 
904 	bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
905 
906 	/*
907 	 * Deal with any in-progress timeouts.
908 	 */
909 	switch (d->bd_state) {
910 	case BPF_IDLE:
911 		/*
912 		 * Not waiting for a timeout, and no timeout happened.
913 		 */
914 		break;
915 
916 	case BPF_WAITING:
917 		/*
918 		 * Waiting for a timeout.
919 		 * Cancel any timer that has yet to go off,
920 		 * and mark the state as "closing".
921 		 * Then drop the lock to allow any timers that
922 		 * *have* gone off to run to completion, and wait
923 		 * for them to finish.
924 		 */
925 		if (!bpf_stop_timer(d)) {
926 			/*
927 			 * There was no pending call, so the call must
928 			 * have been in progress. Wait for the call to
929 			 * complete; we have to drop the lock while
930 			 * waiting. to let the in-progrss call complete
931 			 */
932 			d->bd_state = BPF_DRAINING;
933 			while (d->bd_state == BPF_DRAINING) {
934 				msleep((caddr_t)d, bpf_mlock, PRINET,
935 				    "bpfdraining", NULL);
936 			}
937 		}
938 		d->bd_state = BPF_IDLE;
939 		break;
940 
941 	case BPF_TIMED_OUT:
942 		/*
943 		 * Timer went off, and the timeout routine finished.
944 		 */
945 		d->bd_state = BPF_IDLE;
946 		break;
947 
948 	case BPF_DRAINING:
949 		/*
950 		 * Another thread is blocked on a close waiting for
951 		 * a timeout to finish.
952 		 * This "shouldn't happen", as the first thread to enter
953 		 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
954 		 * all subsequent threads should see that and fail with
955 		 * ENXIO.
956 		 */
957 		panic("Two threads blocked in a BPF close");
958 		break;
959 	}
960 
961 	if (d->bd_bif) {
962 		bpf_detachd(d);
963 	}
964 	selthreadclear(&d->bd_sel);
965 	thread_call_free(d->bd_thread_call);
966 
967 	while (d->bd_hbuf_read != 0) {
968 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
969 	}
970 
971 	if (bpf_debug) {
972 		os_log(OS_LOG_DEFAULT,
973 		    "bpf%u closed by %s.%u dcount %llu fcount %llu ccount %llu",
974 		    d->bd_dev_minor, proc_name_address(p), d->bd_pid,
975 		    d->bd_dcount, d->bd_fcount, d->bd_bcs.bcs_count_compressed_prefix);
976 	}
977 
978 	bpf_freed(d);
979 
980 	/* Mark free in same context as bpfopen comes to check */
981 	bpf_dtab[minor(dev)] = NULL;                    /* Mark closed */
982 
983 	bpf_release_d(d);
984 
985 	lck_mtx_unlock(bpf_mlock);
986 
987 	return 0;
988 }
989 
990 #define BPF_SLEEP bpf_sleep
991 
992 static int
bpf_sleep(struct bpf_d * d,int pri,const char * wmesg,int timo)993 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
994 {
995 	u_int64_t abstime = 0;
996 
997 	if (timo != 0) {
998 		clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
999 	}
1000 
1001 	return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
1002 }
1003 
1004 static void
bpf_finalize_pktap(struct bpf_hdr * hp,struct pktap_header * pktaphdr)1005 bpf_finalize_pktap(struct bpf_hdr *hp, struct pktap_header *pktaphdr)
1006 {
1007 	if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
1008 		struct pktap_v2_hdr *pktap_v2_hdr;
1009 
1010 		pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
1011 
1012 		if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
1013 			pktap_v2_finalize_proc_info(pktap_v2_hdr);
1014 		}
1015 	} else {
1016 		if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
1017 			pktap_finalize_proc_info(pktaphdr);
1018 		}
1019 
1020 		if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1021 			hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
1022 			hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
1023 		}
1024 	}
1025 }
1026 
1027 /*
1028  * Rotate the packet buffers in descriptor d.  Move the store buffer
1029  * into the hold slot, and the free buffer into the store slot.
1030  * Zero the length of the new store buffer.
1031  *
1032  * Note: in head drop mode, the hold buffer can be dropped so the fist packet of the
1033  * store buffer cannot be compressed as it otherwise would refer to deleted data
1034  * in a dropped hold buffer that the reader process does know about
1035  */
1036 #define ROTATE_BUFFERS(d) do { \
1037 	if (d->bd_hbuf_read != 0) \
1038 	        panic("rotating bpf buffers during read"); \
1039 	(d)->bd_hbuf = (d)->bd_sbuf; \
1040 	(d)->bd_hlen = (d)->bd_slen; \
1041 	(d)->bd_hcnt = (d)->bd_scnt; \
1042 	(d)->bd_sbuf = (d)->bd_fbuf; \
1043 	(d)->bd_slen = 0; \
1044 	(d)->bd_scnt = 0; \
1045 	(d)->bd_fbuf = NULL; \
1046 	if ((d)->bd_headdrop != 0) \
1047 	        (d)->bd_prev_slen = 0; \
1048 } while(false)
1049 
1050 /*
1051  *  bpfread - read next chunk of packets from buffers
1052  */
1053 int
bpfread(dev_t dev,struct uio * uio,int ioflag)1054 bpfread(dev_t dev, struct uio *uio, int ioflag)
1055 {
1056 	struct bpf_d *d;
1057 	caddr_t hbuf;
1058 	int timed_out, hbuf_len;
1059 	int error;
1060 	int flags;
1061 
1062 	lck_mtx_lock(bpf_mlock);
1063 
1064 	d = bpf_dtab[minor(dev)];
1065 	if (d == NULL || d == BPF_DEV_RESERVED ||
1066 	    (d->bd_flags & BPF_CLOSING) != 0) {
1067 		lck_mtx_unlock(bpf_mlock);
1068 		return ENXIO;
1069 	}
1070 
1071 	bpf_acquire_d(d);
1072 
1073 	/*
1074 	 * Restrict application to use a buffer the same size as
1075 	 * as kernel buffers.
1076 	 */
1077 	if (uio_resid(uio) != d->bd_bufsize) {
1078 		bpf_release_d(d);
1079 		lck_mtx_unlock(bpf_mlock);
1080 		return EINVAL;
1081 	}
1082 
1083 	if (d->bd_state == BPF_WAITING) {
1084 		bpf_stop_timer(d);
1085 	}
1086 
1087 	timed_out = (d->bd_state == BPF_TIMED_OUT);
1088 	d->bd_state = BPF_IDLE;
1089 
1090 	while (d->bd_hbuf_read != 0) {
1091 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1092 	}
1093 
1094 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1095 		bpf_release_d(d);
1096 		lck_mtx_unlock(bpf_mlock);
1097 		return ENXIO;
1098 	}
1099 	/*
1100 	 * If the hold buffer is empty, then do a timed sleep, which
1101 	 * ends when the timeout expires or when enough packets
1102 	 * have arrived to fill the store buffer.
1103 	 */
1104 	while (d->bd_hbuf == 0) {
1105 		if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) &&
1106 		    d->bd_slen != 0) {
1107 			/*
1108 			 * We're in immediate mode, or are reading
1109 			 * in non-blocking mode, or a timer was
1110 			 * started before the read (e.g., by select()
1111 			 * or poll()) and has expired and a packet(s)
1112 			 * either arrived since the previous
1113 			 * read or arrived while we were asleep.
1114 			 * Rotate the buffers and return what's here.
1115 			 */
1116 			ROTATE_BUFFERS(d);
1117 			break;
1118 		}
1119 
1120 		/*
1121 		 * No data is available, check to see if the bpf device
1122 		 * is still pointed at a real interface.  If not, return
1123 		 * ENXIO so that the userland process knows to rebind
1124 		 * it before using it again.
1125 		 */
1126 		if (d->bd_bif == NULL) {
1127 			bpf_release_d(d);
1128 			lck_mtx_unlock(bpf_mlock);
1129 			return ENXIO;
1130 		}
1131 		if (ioflag & IO_NDELAY) {
1132 			bpf_release_d(d);
1133 			lck_mtx_unlock(bpf_mlock);
1134 			return EWOULDBLOCK;
1135 		}
1136 		error = BPF_SLEEP(d, PRINET | PCATCH, "bpf", d->bd_rtout);
1137 		/*
1138 		 * Make sure device is still opened
1139 		 */
1140 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1141 			bpf_release_d(d);
1142 			lck_mtx_unlock(bpf_mlock);
1143 			return ENXIO;
1144 		}
1145 
1146 		while (d->bd_hbuf_read != 0) {
1147 			msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1148 			    NULL);
1149 		}
1150 
1151 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1152 			bpf_release_d(d);
1153 			lck_mtx_unlock(bpf_mlock);
1154 			return ENXIO;
1155 		}
1156 
1157 		if (error == EINTR || error == ERESTART) {
1158 			if (d->bd_hbuf != NULL) {
1159 				/*
1160 				 * Because we msleep, the hold buffer might
1161 				 * be filled when we wake up.  Avoid rotating
1162 				 * in this case.
1163 				 */
1164 				break;
1165 			}
1166 			if (d->bd_slen != 0) {
1167 				/*
1168 				 * Sometimes we may be interrupted often and
1169 				 * the sleep above will not timeout.
1170 				 * Regardless, we should rotate the buffers
1171 				 * if there's any new data pending and
1172 				 * return it.
1173 				 */
1174 				ROTATE_BUFFERS(d);
1175 				break;
1176 			}
1177 			bpf_release_d(d);
1178 			lck_mtx_unlock(bpf_mlock);
1179 			if (error == ERESTART) {
1180 				os_log(OS_LOG_DEFAULT, "%s: bpf%d ERESTART to EINTR",
1181 				    __func__, d->bd_dev_minor);
1182 				error = EINTR;
1183 			}
1184 			return error;
1185 		}
1186 		if (error == EWOULDBLOCK) {
1187 			/*
1188 			 * On a timeout, return what's in the buffer,
1189 			 * which may be nothing.  If there is something
1190 			 * in the store buffer, we can rotate the buffers.
1191 			 */
1192 			if (d->bd_hbuf) {
1193 				/*
1194 				 * We filled up the buffer in between
1195 				 * getting the timeout and arriving
1196 				 * here, so we don't need to rotate.
1197 				 */
1198 				break;
1199 			}
1200 
1201 			if (d->bd_slen == 0) {
1202 				bpf_release_d(d);
1203 				lck_mtx_unlock(bpf_mlock);
1204 				return 0;
1205 			}
1206 			ROTATE_BUFFERS(d);
1207 			break;
1208 		}
1209 	}
1210 	/*
1211 	 * At this point, we know we have something in the hold slot.
1212 	 */
1213 
1214 	/*
1215 	 * Set the hold buffer read. So we do not
1216 	 * rotate the buffers until the hold buffer
1217 	 * read is complete. Also to avoid issues resulting
1218 	 * from page faults during disk sleep (<rdar://problem/13436396>).
1219 	 */
1220 	d->bd_hbuf_read = 1;
1221 	hbuf = d->bd_hbuf;
1222 	hbuf_len = d->bd_hlen;
1223 	flags = d->bd_flags;
1224 	d->bd_bcs.bcs_total_read += d->bd_hcnt;
1225 	lck_mtx_unlock(bpf_mlock);
1226 
1227 	/*
1228 	 * Before we move data to userland, we fill out the extended
1229 	 * header fields.
1230 	 */
1231 	if (flags & BPF_EXTENDED_HDR) {
1232 		char *p;
1233 
1234 		p = hbuf;
1235 		while (p < hbuf + hbuf_len) {
1236 			struct bpf_hdr_ext *ehp;
1237 			uint32_t flowid;
1238 			struct so_procinfo soprocinfo;
1239 			int found = 0;
1240 
1241 			ehp = (struct bpf_hdr_ext *)(void *)p;
1242 			if ((flowid = ehp->bh_flowid) != 0) {
1243 				if (ehp->bh_flags & BPF_HDR_EXT_FLAGS_TCP) {
1244 					ehp->bh_flags &= ~BPF_HDR_EXT_FLAGS_TCP;
1245 					found = inp_findinpcb_procinfo(&tcbinfo,
1246 					    flowid, &soprocinfo);
1247 				} else if (ehp->bh_flags == BPF_HDR_EXT_FLAGS_UDP) {
1248 					ehp->bh_flags &= ~BPF_HDR_EXT_FLAGS_UDP;
1249 					found = inp_findinpcb_procinfo(&udbinfo,
1250 					    flowid, &soprocinfo);
1251 				}
1252 				if (found == 1) {
1253 					ehp->bh_pid = soprocinfo.spi_pid;
1254 					strlcpy(&ehp->bh_comm[0], &soprocinfo.spi_proc_name[0], sizeof(ehp->bh_comm));
1255 				}
1256 				ehp->bh_flowid = 0;
1257 			}
1258 
1259 			if ((flags & BPF_FINALIZE_PKTAP) != 0 && ehp->bh_complen == 0) {
1260 				struct pktap_header *pktaphdr;
1261 
1262 				pktaphdr = (struct pktap_header *)(void *)
1263 				    (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1264 
1265 				bpf_finalize_pktap((struct bpf_hdr *) ehp,
1266 				    pktaphdr);
1267 			}
1268 			p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1269 		}
1270 	} else if (flags & BPF_FINALIZE_PKTAP) {
1271 		char *p;
1272 
1273 		p = hbuf;
1274 
1275 		while (p < hbuf + hbuf_len) {
1276 			struct bpf_hdr *hp;
1277 			struct pktap_header *pktaphdr;
1278 
1279 			hp = (struct bpf_hdr *)(void *)p;
1280 
1281 			/*
1282 			 * Cannot finalize a compressed pktap header as we may not have
1283 			 * all the fields present
1284 			 */
1285 			if (d->bd_flags & BPF_COMP_ENABLED) {
1286 				struct bpf_comp_hdr *hcp;
1287 
1288 				hcp = (struct bpf_comp_hdr *)(void *)p;
1289 
1290 				if (hcp->bh_complen != 0) {
1291 					p += BPF_WORDALIGN(hcp->bh_hdrlen + hcp->bh_caplen);
1292 					continue;
1293 				}
1294 			}
1295 
1296 			pktaphdr = (struct pktap_header *)(void *)
1297 			    (p + BPF_WORDALIGN(hp->bh_hdrlen));
1298 
1299 			bpf_finalize_pktap(hp, pktaphdr);
1300 
1301 			p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1302 		}
1303 	}
1304 
1305 	/*
1306 	 * Move data from hold buffer into user space.
1307 	 * We know the entire buffer is transferred since
1308 	 * we checked above that the read buffer is bpf_bufsize bytes.
1309 	 */
1310 	error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1311 
1312 	lck_mtx_lock(bpf_mlock);
1313 	/*
1314 	 * Make sure device is still opened
1315 	 */
1316 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1317 		bpf_release_d(d);
1318 		lck_mtx_unlock(bpf_mlock);
1319 		return ENXIO;
1320 	}
1321 
1322 	d->bd_hbuf_read = 0;
1323 	d->bd_fbuf = d->bd_hbuf;
1324 	d->bd_hbuf = NULL;
1325 	d->bd_hlen = 0;
1326 	d->bd_hcnt = 0;
1327 	wakeup((caddr_t)d);
1328 
1329 	bpf_release_d(d);
1330 	lck_mtx_unlock(bpf_mlock);
1331 	return error;
1332 }
1333 
1334 /*
1335  * If there are processes sleeping on this descriptor, wake them up.
1336  */
1337 static void
bpf_wakeup(struct bpf_d * d)1338 bpf_wakeup(struct bpf_d *d)
1339 {
1340 	if (d->bd_state == BPF_WAITING) {
1341 		bpf_stop_timer(d);
1342 		d->bd_state = BPF_IDLE;
1343 	}
1344 	wakeup((caddr_t)d);
1345 	if (d->bd_async && d->bd_sig && d->bd_sigio) {
1346 		pgsigio(d->bd_sigio, d->bd_sig);
1347 	}
1348 
1349 	selwakeup(&d->bd_sel);
1350 	if ((d->bd_flags & BPF_KNOTE)) {
1351 		KNOTE(&d->bd_sel.si_note, 1);
1352 	}
1353 }
1354 
1355 static void
bpf_timed_out(void * arg,__unused void * dummy)1356 bpf_timed_out(void *arg, __unused void *dummy)
1357 {
1358 	struct bpf_d *d = (struct bpf_d *)arg;
1359 
1360 	lck_mtx_lock(bpf_mlock);
1361 	if (d->bd_state == BPF_WAITING) {
1362 		/*
1363 		 * There's a select or kqueue waiting for this; if there's
1364 		 * now stuff to read, wake it up.
1365 		 */
1366 		d->bd_state = BPF_TIMED_OUT;
1367 		if (d->bd_slen != 0) {
1368 			bpf_wakeup(d);
1369 		}
1370 	} else if (d->bd_state == BPF_DRAINING) {
1371 		/*
1372 		 * A close is waiting for this to finish.
1373 		 * Mark it as finished, and wake the close up.
1374 		 */
1375 		d->bd_state = BPF_IDLE;
1376 		bpf_wakeup(d);
1377 	}
1378 	lck_mtx_unlock(bpf_mlock);
1379 }
1380 
1381 /* keep in sync with bpf_movein above: */
1382 #define MAX_DATALINK_HDR_LEN    (sizeof(struct firewire_header))
1383 
1384 int
bpfwrite(dev_t dev,struct uio * uio,__unused int ioflag)1385 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1386 {
1387 	struct bpf_d *d;
1388 	struct ifnet *ifp;
1389 	struct mbuf *m = NULL;
1390 	int error;
1391 	char              dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1392 	int datlen = 0;
1393 	int bif_dlt;
1394 	int bd_hdrcmplt;
1395 
1396 	lck_mtx_lock(bpf_mlock);
1397 
1398 	d = bpf_dtab[minor(dev)];
1399 	if (d == NULL || d == BPF_DEV_RESERVED ||
1400 	    (d->bd_flags & BPF_CLOSING) != 0) {
1401 		lck_mtx_unlock(bpf_mlock);
1402 		return ENXIO;
1403 	}
1404 
1405 	bpf_acquire_d(d);
1406 
1407 	++d->bd_wcount;
1408 
1409 	if (d->bd_bif == 0) {
1410 		++d->bd_wdcount;
1411 		bpf_release_d(d);
1412 		lck_mtx_unlock(bpf_mlock);
1413 		return ENXIO;
1414 	}
1415 
1416 	ifp = d->bd_bif->bif_ifp;
1417 
1418 	if ((ifp->if_flags & IFF_UP) == 0) {
1419 		++d->bd_wdcount;
1420 		bpf_release_d(d);
1421 		lck_mtx_unlock(bpf_mlock);
1422 		return ENETDOWN;
1423 	}
1424 	if (uio_resid(uio) == 0) {
1425 		bpf_release_d(d);
1426 		lck_mtx_unlock(bpf_mlock);
1427 		return 0;
1428 	}
1429 	((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1430 
1431 	/*
1432 	 * fix for PR-6849527
1433 	 * geting variables onto stack before dropping lock for bpf_movein()
1434 	 */
1435 	bif_dlt = (int)d->bd_bif->bif_dlt;
1436 	bd_hdrcmplt  = d->bd_hdrcmplt;
1437 
1438 	/* bpf_movein allocating mbufs; drop lock */
1439 	lck_mtx_unlock(bpf_mlock);
1440 
1441 	error = bpf_movein(uio, bif_dlt, &m,
1442 	    bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1443 	    &datlen);
1444 
1445 	/* take the lock again */
1446 	lck_mtx_lock(bpf_mlock);
1447 	if (error != 0) {
1448 		++d->bd_wdcount;
1449 		bpf_release_d(d);
1450 		lck_mtx_unlock(bpf_mlock);
1451 		return error;
1452 	}
1453 
1454 	/* verify the device is still open */
1455 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1456 		++d->bd_wdcount;
1457 		bpf_release_d(d);
1458 		lck_mtx_unlock(bpf_mlock);
1459 		m_freem(m);
1460 		return ENXIO;
1461 	}
1462 
1463 	if (d->bd_bif == NULL) {
1464 		++d->bd_wdcount;
1465 		bpf_release_d(d);
1466 		lck_mtx_unlock(bpf_mlock);
1467 		m_free(m);
1468 		return ENXIO;
1469 	}
1470 
1471 	if ((unsigned)datlen > ifp->if_mtu) {
1472 		++d->bd_wdcount;
1473 		bpf_release_d(d);
1474 		lck_mtx_unlock(bpf_mlock);
1475 		m_freem(m);
1476 		return EMSGSIZE;
1477 	}
1478 
1479 	bpf_set_packet_service_class(m, d->bd_traffic_class);
1480 
1481 	lck_mtx_unlock(bpf_mlock);
1482 
1483 	/*
1484 	 * The driver frees the mbuf.
1485 	 */
1486 	if (d->bd_hdrcmplt) {
1487 		if (d->bd_bif->bif_send) {
1488 			error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1489 		} else {
1490 			error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1491 		}
1492 	} else {
1493 		error = dlil_output(ifp, PF_INET, m, NULL,
1494 		    (struct sockaddr *)dst_buf, 0, NULL);
1495 	}
1496 
1497 	lck_mtx_lock(bpf_mlock);
1498 	if (error != 0) {
1499 		++d->bd_wdcount;
1500 	}
1501 	bpf_release_d(d);
1502 	lck_mtx_unlock(bpf_mlock);
1503 
1504 	return error;
1505 }
1506 
1507 /*
1508  * Reset a descriptor by flushing its packet buffer and clearing the
1509  * receive and drop counts.
1510  */
1511 static void
reset_d(struct bpf_d * d)1512 reset_d(struct bpf_d *d)
1513 {
1514 	if (d->bd_hbuf_read != 0) {
1515 		panic("resetting buffers during read");
1516 	}
1517 
1518 	if (d->bd_hbuf) {
1519 		/* Free the hold buffer. */
1520 		d->bd_fbuf = d->bd_hbuf;
1521 		d->bd_hbuf = NULL;
1522 	}
1523 	d->bd_slen = 0;
1524 	d->bd_hlen = 0;
1525 	d->bd_scnt = 0;
1526 	d->bd_hcnt = 0;
1527 	d->bd_rcount = 0;
1528 	d->bd_dcount = 0;
1529 	d->bd_fcount = 0;
1530 	d->bd_wcount = 0;
1531 	d->bd_wdcount = 0;
1532 
1533 	d->bd_prev_slen = 0;
1534 }
1535 
1536 static struct bpf_d *
bpf_get_device_from_uuid(uuid_t uuid)1537 bpf_get_device_from_uuid(uuid_t uuid)
1538 {
1539 	unsigned int i;
1540 
1541 	for (i = 0; i < nbpfilter; i++) {
1542 		struct bpf_d *d = bpf_dtab[i];
1543 
1544 		if (d == NULL || d == BPF_DEV_RESERVED ||
1545 		    (d->bd_flags & BPF_CLOSING) != 0) {
1546 			continue;
1547 		}
1548 		if (uuid_compare(uuid, d->bd_uuid) == 0) {
1549 			return d;
1550 		}
1551 	}
1552 
1553 	return NULL;
1554 }
1555 
1556 /*
1557  * The BIOCSETUP command "atomically" attach to the interface and
1558  * copy the buffer from another interface. This minimizes the risk
1559  * of missing packet because this is done while holding
1560  * the BPF global lock
1561  */
1562 static int
bpf_setup(struct bpf_d * d_to,uuid_t uuid_from,ifnet_t ifp)1563 bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1564 {
1565 	struct bpf_d *d_from;
1566 	int error = 0;
1567 
1568 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1569 
1570 	/*
1571 	 * Sanity checks
1572 	 */
1573 	d_from = bpf_get_device_from_uuid(uuid_from);
1574 	if (d_from == NULL) {
1575 		error = ENOENT;
1576 		os_log_error(OS_LOG_DEFAULT,
1577 		    "%s: uuids not found error %d",
1578 		    __func__, error);
1579 		return error;
1580 	}
1581 	if (d_from->bd_opened_by != d_to->bd_opened_by) {
1582 		error = EACCES;
1583 		os_log_error(OS_LOG_DEFAULT,
1584 		    "%s: processes not matching error %d",
1585 		    __func__, error);
1586 		return error;
1587 	}
1588 
1589 	/*
1590 	 * Prevent any read while copying
1591 	 */
1592 	while (d_to->bd_hbuf_read != 0) {
1593 		msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
1594 	}
1595 	d_to->bd_hbuf_read = 1;
1596 
1597 	while (d_from->bd_hbuf_read != 0) {
1598 		msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
1599 	}
1600 	d_from->bd_hbuf_read = 1;
1601 
1602 	/*
1603 	 * Verify the devices have not been closed
1604 	 */
1605 	if (d_to->bd_flags & BPF_CLOSING) {
1606 		error = ENXIO;
1607 		os_log_error(OS_LOG_DEFAULT,
1608 		    "%s: d_to is closing error %d",
1609 		    __func__, error);
1610 		goto done;
1611 	}
1612 	if (d_from->bd_flags & BPF_CLOSING) {
1613 		error = ENXIO;
1614 		os_log_error(OS_LOG_DEFAULT,
1615 		    "%s: d_from is closing error %d",
1616 		    __func__, error);
1617 		goto done;
1618 	}
1619 
1620 	/*
1621 	 * For now require the same buffer size
1622 	 */
1623 	if (d_from->bd_bufsize != d_to->bd_bufsize) {
1624 		error = EINVAL;
1625 		os_log_error(OS_LOG_DEFAULT,
1626 		    "%s: bufsizes not matching error %d",
1627 		    __func__, error);
1628 		goto done;
1629 	}
1630 
1631 	/*
1632 	 * Copy relevant options and flags
1633 	 */
1634 	d_to->bd_flags = d_from->bd_flags & (BPF_EXTENDED_HDR | BPF_WANT_PKTAP |
1635 	    BPF_FINALIZE_PKTAP | BPF_TRUNCATE | BPF_PKTHDRV2 |
1636 	    BPF_COMP_REQ | BPF_COMP_ENABLED);
1637 
1638 	d_to->bd_headdrop = d_from->bd_headdrop;
1639 
1640 	/*
1641 	 * Allocate and copy the buffers
1642 	 */
1643 	error = bpf_allocbufs(d_to);
1644 	if (error != 0) {
1645 		goto done;
1646 	}
1647 
1648 	/*
1649 	 * Make sure the buffers are setup as expected by bpf_setif()
1650 	 */
1651 	ASSERT(d_to->bd_hbuf == NULL);
1652 	ASSERT(d_to->bd_sbuf != NULL);
1653 	ASSERT(d_to->bd_fbuf != NULL);
1654 
1655 	/*
1656 	 * Copy the buffers and update the pointers and counts
1657 	 */
1658 	memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
1659 	d_to->bd_slen = d_from->bd_slen;
1660 	d_to->bd_scnt = d_from->bd_scnt;
1661 
1662 	if (d_from->bd_hbuf != NULL) {
1663 		d_to->bd_hbuf = d_to->bd_fbuf;
1664 		d_to->bd_fbuf = NULL;
1665 		memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
1666 	}
1667 	d_to->bd_hlen = d_from->bd_hlen;
1668 	d_to->bd_hcnt = d_from->bd_hcnt;
1669 
1670 	if (d_to->bd_flags & BPF_COMP_REQ) {
1671 		ASSERT(d_to->bd_prev_sbuf != NULL);
1672 		ASSERT(d_to->bd_prev_fbuf != NULL);
1673 
1674 		d_to->bd_prev_slen = d_from->bd_prev_slen;
1675 		ASSERT(d_to->bd_prev_slen <= BPF_HDR_COMP_LEN_MAX);
1676 		memcpy(d_to->bd_prev_sbuf, d_from->bd_prev_sbuf, BPF_HDR_COMP_LEN_MAX);
1677 	}
1678 
1679 	d_to->bd_bcs = d_from->bd_bcs;
1680 
1681 	/*
1682 	 * Attach to the interface:
1683 	 * - don't reset the buffers
1684 	 * - we already prevent reads
1685 	 * - the buffers are already allocated
1686 	 */
1687 	error = bpf_setif(d_to, ifp, false, true, true);
1688 	if (error != 0) {
1689 		os_log_error(OS_LOG_DEFAULT,
1690 		    "%s: bpf_setif() failed error %d",
1691 		    __func__, error);
1692 		goto done;
1693 	}
1694 done:
1695 	d_from->bd_hbuf_read = 0;
1696 	wakeup((caddr_t)d_from);
1697 
1698 	d_to->bd_hbuf_read = 0;
1699 	wakeup((caddr_t)d_to);
1700 
1701 	return error;
1702 }
1703 
1704 #if DEVELOPMENT || DEBUG
1705 #define BPF_IOC_LIST \
1706 	X(FIONREAD) \
1707 	X(SIOCGIFADDR) \
1708 	X(BIOCGBLEN) \
1709 	X(BIOCSBLEN) \
1710 	X(BIOCSETF32) \
1711 	X(BIOCSETFNR32) \
1712 	X(BIOCSETF64) \
1713 	X(BIOCSETFNR64) \
1714 	X(BIOCFLUSH) \
1715 	X(BIOCPROMISC) \
1716 	X(BIOCGDLT) \
1717 	X(BIOCGDLTLIST) \
1718 	X(BIOCSDLT) \
1719 	X(BIOCGETIF) \
1720 	X(BIOCSETIF) \
1721 	X(BIOCSRTIMEOUT32) \
1722 	X(BIOCSRTIMEOUT64) \
1723 	X(BIOCGRTIMEOUT32) \
1724 	X(BIOCGRTIMEOUT64) \
1725 	X(BIOCGSTATS) \
1726 	X(BIOCIMMEDIATE) \
1727 	X(BIOCVERSION) \
1728 	X(BIOCGHDRCMPLT) \
1729 	X(BIOCSHDRCMPLT) \
1730 	X(BIOCGSEESENT) \
1731 	X(BIOCSSEESENT) \
1732 	X(BIOCSETTC) \
1733 	X(BIOCGETTC) \
1734 	X(FIONBIO) \
1735 	X(FIOASYNC) \
1736 	X(BIOCSRSIG) \
1737 	X(BIOCGRSIG) \
1738 	X(BIOCSEXTHDR) \
1739 	X(BIOCGIFATTACHCOUNT) \
1740 	X(BIOCGWANTPKTAP) \
1741 	X(BIOCSWANTPKTAP) \
1742 	X(BIOCSHEADDROP) \
1743 	X(BIOCGHEADDROP) \
1744 	X(BIOCSTRUNCATE) \
1745 	X(BIOCGETUUID) \
1746 	X(BIOCSETUP) \
1747 	X(BIOCSPKTHDRV2) \
1748 	X(BIOCGHDRCOMP) \
1749 	X(BIOCSHDRCOMP) \
1750 	X(BIOCGHDRCOMPSTATS) \
1751 	X(BIOCGHDRCOMPON)
1752 
1753 static void
log_bpf_ioctl_str(struct bpf_d * d,u_long cmd)1754 log_bpf_ioctl_str(struct bpf_d *d, u_long cmd)
1755 {
1756 	const char *p = NULL;
1757 	char str[32];
1758 
1759 #define X(x) case x: { p = #x ; printf("%s\n", p); break; }
1760 	switch (cmd) {
1761 		BPF_IOC_LIST
1762 	}
1763 #undef X
1764 	if (p == NULL) {
1765 		snprintf(str, sizeof(str), "0x%08x", (unsigned int)cmd);
1766 		p = str;
1767 	}
1768 	os_log(OS_LOG_DEFAULT, "bpfioctl bpf%u %s",
1769 	    d->bd_dev_minor, p);
1770 }
1771 #endif /* DEVELOPMENT || DEBUG */
1772 
1773 /*
1774  *  FIONREAD		Check for read packet available.
1775  *  SIOCGIFADDR		Get interface address - convenient hook to driver.
1776  *  BIOCGBLEN		Get buffer len [for read()].
1777  *  BIOCSETF		Set ethernet read filter.
1778  *  BIOCFLUSH		Flush read packet buffer.
1779  *  BIOCPROMISC		Put interface into promiscuous mode.
1780  *  BIOCGDLT		Get link layer type.
1781  *  BIOCGETIF		Get interface name.
1782  *  BIOCSETIF		Set interface.
1783  *  BIOCSRTIMEOUT	Set read timeout.
1784  *  BIOCGRTIMEOUT	Get read timeout.
1785  *  BIOCGSTATS		Get packet stats.
1786  *  BIOCIMMEDIATE	Set immediate mode.
1787  *  BIOCVERSION		Get filter language version.
1788  *  BIOCGHDRCMPLT	Get "header already complete" flag
1789  *  BIOCSHDRCMPLT	Set "header already complete" flag
1790  *  BIOCGSEESENT	Get "see packets sent" flag
1791  *  BIOCSSEESENT	Set "see packets sent" flag
1792  *  BIOCSETTC		Set traffic class.
1793  *  BIOCGETTC		Get traffic class.
1794  *  BIOCSEXTHDR		Set "extended header" flag
1795  *  BIOCSHEADDROP	Drop head of the buffer if user is not reading
1796  *  BIOCGHEADDROP	Get "head-drop" flag
1797  */
1798 /* ARGSUSED */
1799 int
bpfioctl(dev_t dev,u_long cmd,caddr_t addr,__unused int flags,struct proc * p)1800 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1801     struct proc *p)
1802 {
1803 	struct bpf_d *d;
1804 	int error = 0;
1805 	u_int int_arg;
1806 	struct ifreq ifr = {};
1807 
1808 	lck_mtx_lock(bpf_mlock);
1809 
1810 	d = bpf_dtab[minor(dev)];
1811 	if (d == NULL || d == BPF_DEV_RESERVED ||
1812 	    (d->bd_flags & BPF_CLOSING) != 0) {
1813 		lck_mtx_unlock(bpf_mlock);
1814 		return ENXIO;
1815 	}
1816 
1817 	bpf_acquire_d(d);
1818 
1819 	if (d->bd_state == BPF_WAITING) {
1820 		bpf_stop_timer(d);
1821 	}
1822 	d->bd_state = BPF_IDLE;
1823 
1824 #if DEVELOPMENT || DEBUG
1825 	if (bpf_debug > 0) {
1826 		log_bpf_ioctl_str(d, cmd);
1827 	}
1828 #endif /* DEVELOPMENT || DEBUG */
1829 
1830 	switch (cmd) {
1831 	default:
1832 		error = EINVAL;
1833 		break;
1834 
1835 	/*
1836 	 * Check for read packet available.
1837 	 */
1838 	case FIONREAD:                  /* int */
1839 	{
1840 		int n;
1841 
1842 		n = d->bd_slen;
1843 		if (d->bd_hbuf && d->bd_hbuf_read == 0) {
1844 			n += d->bd_hlen;
1845 		}
1846 
1847 		bcopy(&n, addr, sizeof(n));
1848 		break;
1849 	}
1850 
1851 	case SIOCGIFADDR:               /* struct ifreq */
1852 	{
1853 		struct ifnet *ifp;
1854 
1855 		if (d->bd_bif == 0) {
1856 			error = EINVAL;
1857 		} else {
1858 			ifp = d->bd_bif->bif_ifp;
1859 			error = ifnet_ioctl(ifp, 0, cmd, addr);
1860 		}
1861 		break;
1862 	}
1863 
1864 	/*
1865 	 * Get buffer len [for read()].
1866 	 */
1867 	case BIOCGBLEN:                 /* u_int */
1868 		bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1869 		break;
1870 
1871 	/*
1872 	 * Set buffer length.
1873 	 */
1874 	case BIOCSBLEN: {               /* u_int */
1875 		u_int size;
1876 
1877 		if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
1878 			/*
1879 			 * Interface already attached, unable to change buffers
1880 			 */
1881 			error = EINVAL;
1882 			break;
1883 		}
1884 		bcopy(addr, &size, sizeof(size));
1885 
1886 		if (size > BPF_BUFSIZE_CAP) {
1887 			d->bd_bufsize = BPF_BUFSIZE_CAP;
1888 
1889 			os_log_info(OS_LOG_DEFAULT,
1890 			    "bpf%d BIOCSBLEN capped to %u from %u",
1891 			    minor(dev), d->bd_bufsize, size);
1892 		} else if (size < BPF_MINBUFSIZE) {
1893 			d->bd_bufsize = BPF_MINBUFSIZE;
1894 
1895 			os_log_info(OS_LOG_DEFAULT,
1896 			    "bpf%d BIOCSBLEN bumped to %u from %u",
1897 			    minor(dev), d->bd_bufsize, size);
1898 		} else {
1899 			d->bd_bufsize = size;
1900 
1901 			os_log_info(OS_LOG_DEFAULT,
1902 			    "bpf%d BIOCSBLEN %u",
1903 			    minor(dev), d->bd_bufsize);
1904 		}
1905 
1906 		/* It's a read/write ioctl */
1907 		bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1908 		break;
1909 	}
1910 	/*
1911 	 * Set link layer read filter.
1912 	 */
1913 	case BIOCSETF32:
1914 	case BIOCSETFNR32: {            /* struct bpf_program32 */
1915 		struct bpf_program32 prg32;
1916 
1917 		bcopy(addr, &prg32, sizeof(prg32));
1918 		error = bpf_setf(d, prg32.bf_len,
1919 		    CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1920 		break;
1921 	}
1922 
1923 	case BIOCSETF64:
1924 	case BIOCSETFNR64: {            /* struct bpf_program64 */
1925 		struct bpf_program64 prg64;
1926 
1927 		bcopy(addr, &prg64, sizeof(prg64));
1928 		error = bpf_setf(d, prg64.bf_len, CAST_USER_ADDR_T(prg64.bf_insns), cmd);
1929 		break;
1930 	}
1931 
1932 	/*
1933 	 * Flush read packet buffer.
1934 	 */
1935 	case BIOCFLUSH:
1936 		while (d->bd_hbuf_read != 0) {
1937 			msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1938 			    NULL);
1939 		}
1940 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1941 			error = ENXIO;
1942 			break;
1943 		}
1944 		reset_d(d);
1945 		break;
1946 
1947 	/*
1948 	 * Put interface into promiscuous mode.
1949 	 */
1950 	case BIOCPROMISC:
1951 		if (d->bd_bif == 0) {
1952 			/*
1953 			 * No interface attached yet.
1954 			 */
1955 			error = EINVAL;
1956 			break;
1957 		}
1958 		if (d->bd_promisc == 0) {
1959 			lck_mtx_unlock(bpf_mlock);
1960 			error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1961 			lck_mtx_lock(bpf_mlock);
1962 			if (error == 0) {
1963 				d->bd_promisc = 1;
1964 			}
1965 		}
1966 		break;
1967 
1968 	/*
1969 	 * Get device parameters.
1970 	 */
1971 	case BIOCGDLT:                  /* u_int */
1972 		if (d->bd_bif == 0) {
1973 			error = EINVAL;
1974 		} else {
1975 			bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
1976 		}
1977 		break;
1978 
1979 	/*
1980 	 * Get a list of supported data link types.
1981 	 */
1982 	case BIOCGDLTLIST:              /* struct bpf_dltlist */
1983 		if (d->bd_bif == NULL) {
1984 			error = EINVAL;
1985 		} else {
1986 			error = bpf_getdltlist(d, addr, p);
1987 		}
1988 		break;
1989 
1990 	/*
1991 	 * Set data link type.
1992 	 */
1993 	case BIOCSDLT:                  /* u_int */
1994 		if (d->bd_bif == NULL) {
1995 			error = EINVAL;
1996 		} else {
1997 			u_int dlt;
1998 
1999 			bcopy(addr, &dlt, sizeof(dlt));
2000 
2001 			if (dlt == DLT_PKTAP &&
2002 			    !(d->bd_flags & BPF_WANT_PKTAP)) {
2003 				dlt = DLT_RAW;
2004 			}
2005 			error = bpf_setdlt(d, dlt);
2006 		}
2007 		break;
2008 
2009 	/*
2010 	 * Get interface name.
2011 	 */
2012 	case BIOCGETIF:                 /* struct ifreq */
2013 		if (d->bd_bif == 0) {
2014 			error = EINVAL;
2015 		} else {
2016 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
2017 
2018 			snprintf(((struct ifreq *)(void *)addr)->ifr_name,
2019 			    sizeof(ifr.ifr_name), "%s", if_name(ifp));
2020 		}
2021 		break;
2022 
2023 	/*
2024 	 * Set interface.
2025 	 */
2026 	case BIOCSETIF: {               /* struct ifreq */
2027 		ifnet_t ifp;
2028 
2029 		bcopy(addr, &ifr, sizeof(ifr));
2030 		ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2031 		ifp = ifunit(ifr.ifr_name);
2032 		if (ifp == NULL) {
2033 			error = ENXIO;
2034 		} else {
2035 			error = bpf_setif(d, ifp, true, false, false);
2036 		}
2037 		break;
2038 	}
2039 
2040 	/*
2041 	 * Set read timeout.
2042 	 */
2043 	case BIOCSRTIMEOUT32: {         /* struct user32_timeval */
2044 		struct user32_timeval _tv;
2045 		struct timeval tv;
2046 
2047 		bcopy(addr, &_tv, sizeof(_tv));
2048 		tv.tv_sec  = _tv.tv_sec;
2049 		tv.tv_usec = _tv.tv_usec;
2050 
2051 		/*
2052 		 * Subtract 1 tick from tvtohz() since this isn't
2053 		 * a one-shot timer.
2054 		 */
2055 		if ((error = itimerfix(&tv)) == 0) {
2056 			d->bd_rtout = tvtohz(&tv) - 1;
2057 		}
2058 		break;
2059 	}
2060 
2061 	case BIOCSRTIMEOUT64: {         /* struct user64_timeval */
2062 		struct user64_timeval _tv;
2063 		struct timeval tv;
2064 
2065 		bcopy(addr, &_tv, sizeof(_tv));
2066 		tv.tv_sec  = (__darwin_time_t)_tv.tv_sec;
2067 		tv.tv_usec = _tv.tv_usec;
2068 
2069 		/*
2070 		 * Subtract 1 tick from tvtohz() since this isn't
2071 		 * a one-shot timer.
2072 		 */
2073 		if ((error = itimerfix(&tv)) == 0) {
2074 			d->bd_rtout = tvtohz(&tv) - 1;
2075 		}
2076 		break;
2077 	}
2078 
2079 	/*
2080 	 * Get read timeout.
2081 	 */
2082 	case BIOCGRTIMEOUT32: {         /* struct user32_timeval */
2083 		struct user32_timeval tv;
2084 
2085 		bzero(&tv, sizeof(tv));
2086 		tv.tv_sec = d->bd_rtout / hz;
2087 		tv.tv_usec = (d->bd_rtout % hz) * tick;
2088 		bcopy(&tv, addr, sizeof(tv));
2089 		break;
2090 	}
2091 
2092 	case BIOCGRTIMEOUT64: {         /* struct user64_timeval */
2093 		struct user64_timeval tv;
2094 
2095 		bzero(&tv, sizeof(tv));
2096 		tv.tv_sec = d->bd_rtout / hz;
2097 		tv.tv_usec = (d->bd_rtout % hz) * tick;
2098 		bcopy(&tv, addr, sizeof(tv));
2099 		break;
2100 	}
2101 
2102 	/*
2103 	 * Get packet stats.
2104 	 */
2105 	case BIOCGSTATS: {              /* struct bpf_stat */
2106 		struct bpf_stat bs;
2107 
2108 		bzero(&bs, sizeof(bs));
2109 		bs.bs_recv = (u_int)d->bd_rcount;
2110 		bs.bs_drop = (u_int)d->bd_dcount;
2111 		bcopy(&bs, addr, sizeof(bs));
2112 		break;
2113 	}
2114 
2115 	/*
2116 	 * Set immediate mode.
2117 	 */
2118 	case BIOCIMMEDIATE:             /* u_int */
2119 		d->bd_immediate = *(u_char *)(void *)addr;
2120 		break;
2121 
2122 	case BIOCVERSION: {             /* struct bpf_version */
2123 		struct bpf_version bv;
2124 
2125 		bzero(&bv, sizeof(bv));
2126 		bv.bv_major = BPF_MAJOR_VERSION;
2127 		bv.bv_minor = BPF_MINOR_VERSION;
2128 		bcopy(&bv, addr, sizeof(bv));
2129 		break;
2130 	}
2131 
2132 	/*
2133 	 * Get "header already complete" flag
2134 	 */
2135 	case BIOCGHDRCMPLT:             /* u_int */
2136 		bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
2137 		break;
2138 
2139 	/*
2140 	 * Set "header already complete" flag
2141 	 */
2142 	case BIOCSHDRCMPLT:             /* u_int */
2143 		bcopy(addr, &int_arg, sizeof(int_arg));
2144 		d->bd_hdrcmplt = int_arg ? 1 : 0;
2145 		break;
2146 
2147 	/*
2148 	 * Get "see sent packets" flag
2149 	 */
2150 	case BIOCGSEESENT:              /* u_int */
2151 		bcopy(&d->bd_seesent, addr, sizeof(u_int));
2152 		break;
2153 
2154 	/*
2155 	 * Set "see sent packets" flag
2156 	 */
2157 	case BIOCSSEESENT:              /* u_int */
2158 		bcopy(addr, &d->bd_seesent, sizeof(u_int));
2159 		break;
2160 
2161 	/*
2162 	 * Set traffic service class
2163 	 */
2164 	case BIOCSETTC: {               /* int */
2165 		int tc;
2166 
2167 		bcopy(addr, &tc, sizeof(int));
2168 		error = bpf_set_traffic_class(d, tc);
2169 		break;
2170 	}
2171 
2172 	/*
2173 	 * Get traffic service class
2174 	 */
2175 	case BIOCGETTC:                 /* int */
2176 		bcopy(&d->bd_traffic_class, addr, sizeof(int));
2177 		break;
2178 
2179 	case FIONBIO:           /* Non-blocking I/O; int */
2180 		break;
2181 
2182 	case FIOASYNC:          /* Send signal on receive packets; int */
2183 		bcopy(addr, &d->bd_async, sizeof(int));
2184 		break;
2185 
2186 	case BIOCSRSIG: {       /* Set receive signal; u_int */
2187 		u_int sig;
2188 
2189 		bcopy(addr, &sig, sizeof(u_int));
2190 
2191 		if (sig >= NSIG) {
2192 			error = EINVAL;
2193 		} else {
2194 			d->bd_sig = sig;
2195 		}
2196 		break;
2197 	}
2198 	case BIOCGRSIG:                 /* u_int */
2199 		bcopy(&d->bd_sig, addr, sizeof(u_int));
2200 		break;
2201 
2202 	case BIOCSEXTHDR:               /* u_int */
2203 		bcopy(addr, &int_arg, sizeof(int_arg));
2204 		if (int_arg) {
2205 			d->bd_flags |= BPF_EXTENDED_HDR;
2206 		} else {
2207 			d->bd_flags &= ~BPF_EXTENDED_HDR;
2208 		}
2209 		break;
2210 
2211 	case BIOCGIFATTACHCOUNT: {              /* struct ifreq */
2212 		ifnet_t ifp;
2213 		struct bpf_if *bp;
2214 
2215 		bcopy(addr, &ifr, sizeof(ifr));
2216 		ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2217 		ifp = ifunit(ifr.ifr_name);
2218 		if (ifp == NULL) {
2219 			error = ENXIO;
2220 			break;
2221 		}
2222 		ifr.ifr_intval = 0;
2223 		for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2224 			struct bpf_d *bpf_d;
2225 
2226 			if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) {
2227 				continue;
2228 			}
2229 			for (bpf_d = bp->bif_dlist; bpf_d;
2230 			    bpf_d = bpf_d->bd_next) {
2231 				ifr.ifr_intval += 1;
2232 			}
2233 		}
2234 		bcopy(&ifr, addr, sizeof(ifr));
2235 		break;
2236 	}
2237 	case BIOCGWANTPKTAP:                    /* u_int */
2238 		int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
2239 		bcopy(&int_arg, addr, sizeof(int_arg));
2240 		break;
2241 
2242 	case BIOCSWANTPKTAP:                    /* u_int */
2243 		bcopy(addr, &int_arg, sizeof(int_arg));
2244 		if (int_arg) {
2245 			d->bd_flags |= BPF_WANT_PKTAP;
2246 		} else {
2247 			d->bd_flags &= ~BPF_WANT_PKTAP;
2248 		}
2249 		break;
2250 
2251 	case BIOCSHEADDROP:
2252 		bcopy(addr, &int_arg, sizeof(int_arg));
2253 		d->bd_headdrop = int_arg ? 1 : 0;
2254 		break;
2255 
2256 	case BIOCGHEADDROP:
2257 		bcopy(&d->bd_headdrop, addr, sizeof(int));
2258 		break;
2259 
2260 	case BIOCSTRUNCATE:
2261 		bcopy(addr, &int_arg, sizeof(int_arg));
2262 		if (int_arg) {
2263 			d->bd_flags |=  BPF_TRUNCATE;
2264 		} else {
2265 			d->bd_flags &= ~BPF_TRUNCATE;
2266 		}
2267 		break;
2268 
2269 	case BIOCGETUUID:
2270 		bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
2271 		break;
2272 
2273 	case BIOCSETUP: {
2274 		struct bpf_setup_args bsa;
2275 		ifnet_t ifp;
2276 
2277 		bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
2278 		bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
2279 		ifp = ifunit(bsa.bsa_ifname);
2280 		if (ifp == NULL) {
2281 			error = ENXIO;
2282 			os_log_error(OS_LOG_DEFAULT,
2283 			    "%s: ifnet not found for %s error %d",
2284 			    __func__, bsa.bsa_ifname, error);
2285 			break;
2286 		}
2287 
2288 		error = bpf_setup(d, bsa.bsa_uuid, ifp);
2289 		break;
2290 	}
2291 	case BIOCSPKTHDRV2:
2292 		bcopy(addr, &int_arg, sizeof(int_arg));
2293 		if (int_arg != 0) {
2294 			d->bd_flags |= BPF_PKTHDRV2;
2295 		} else {
2296 			d->bd_flags &= ~BPF_PKTHDRV2;
2297 		}
2298 		break;
2299 
2300 	case BIOCGPKTHDRV2:
2301 		int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
2302 		bcopy(&int_arg, addr, sizeof(int_arg));
2303 		break;
2304 
2305 	case BIOCGHDRCOMP:
2306 		int_arg = d->bd_flags & BPF_COMP_REQ ? 1 : 0;
2307 		bcopy(&int_arg, addr, sizeof(int_arg));
2308 		break;
2309 
2310 	case BIOCSHDRCOMP:
2311 		bcopy(addr, &int_arg, sizeof(int_arg));
2312 		if (int_arg != 0 && int_arg != 1) {
2313 			return EINVAL;
2314 		}
2315 		if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
2316 			/*
2317 			 * Interface already attached, unable to change buffers
2318 			 */
2319 			error = EINVAL;
2320 			break;
2321 		}
2322 		if (int_arg != 0) {
2323 			d->bd_flags |= BPF_COMP_REQ;
2324 			if (bpf_hdr_comp_enable != 0) {
2325 				d->bd_flags |= BPF_COMP_ENABLED;
2326 			}
2327 		} else {
2328 			d->bd_flags &= ~(BPF_COMP_REQ | BPF_COMP_ENABLED);
2329 		}
2330 		break;
2331 
2332 	case BIOCGHDRCOMPON:
2333 		int_arg = d->bd_flags & BPF_COMP_ENABLED ? 1 : 0;
2334 		bcopy(&int_arg, addr, sizeof(int_arg));
2335 		break;
2336 
2337 	case BIOCGHDRCOMPSTATS: {
2338 		struct bpf_comp_stats bcs = {};
2339 
2340 		bcs = d->bd_bcs;
2341 
2342 		bcopy(&bcs, addr, sizeof(bcs));
2343 		break;
2344 	}
2345 	}
2346 
2347 	bpf_release_d(d);
2348 	lck_mtx_unlock(bpf_mlock);
2349 
2350 	return error;
2351 }
2352 
2353 /*
2354  * Set d's packet filter program to fp.  If this file already has a filter,
2355  * free it and replace it.  Returns EINVAL for bogus requests.
2356  */
2357 static int
bpf_setf(struct bpf_d * d,u_int bf_len,user_addr_t bf_insns,u_long cmd)2358 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2359     u_long cmd)
2360 {
2361 	struct bpf_insn *fcode, *old;
2362 	u_int flen, size;
2363 
2364 	while (d->bd_hbuf_read != 0) {
2365 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2366 	}
2367 
2368 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2369 		return ENXIO;
2370 	}
2371 
2372 	old = d->bd_filter;
2373 	if (bf_insns == USER_ADDR_NULL) {
2374 		if (bf_len != 0) {
2375 			return EINVAL;
2376 		}
2377 		d->bd_filter = NULL;
2378 		reset_d(d);
2379 		if (old != 0) {
2380 			kfree_data_addr(old);
2381 		}
2382 		return 0;
2383 	}
2384 	flen = bf_len;
2385 	if (flen > BPF_MAXINSNS) {
2386 		return EINVAL;
2387 	}
2388 
2389 	size = flen * sizeof(struct bpf_insn);
2390 	fcode = (struct bpf_insn *) kalloc_data(size, Z_WAITOK | Z_ZERO);
2391 	if (fcode == NULL) {
2392 		return ENOMEM;
2393 	}
2394 	if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
2395 	    bpf_validate(fcode, (int)flen)) {
2396 		d->bd_filter = fcode;
2397 
2398 		if (cmd == BIOCSETF32 || cmd == BIOCSETF64) {
2399 			reset_d(d);
2400 		}
2401 
2402 		if (old != 0) {
2403 			kfree_data_addr(old);
2404 		}
2405 
2406 		return 0;
2407 	}
2408 	kfree_data(fcode, size);
2409 	return EINVAL;
2410 }
2411 
2412 /*
2413  * Detach a file from its current interface (if attached at all) and attach
2414  * to the interface indicated by the name stored in ifr.
2415  * Return an errno or 0.
2416  */
2417 static int
bpf_setif(struct bpf_d * d,ifnet_t theywant,bool do_reset,bool has_hbuf_read,bool has_bufs_allocated)2418 bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read,
2419     bool has_bufs_allocated)
2420 {
2421 	struct bpf_if *bp;
2422 	int error;
2423 
2424 	while (d->bd_hbuf_read != 0 && !has_hbuf_read) {
2425 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2426 	}
2427 
2428 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2429 		return ENXIO;
2430 	}
2431 
2432 	/*
2433 	 * Look through attached interfaces for the named one.
2434 	 */
2435 	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2436 		struct ifnet *ifp = bp->bif_ifp;
2437 
2438 		if (ifp == 0 || ifp != theywant) {
2439 			continue;
2440 		}
2441 		/*
2442 		 * Do not use DLT_PKTAP, unless requested explicitly
2443 		 */
2444 		if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2445 			continue;
2446 		}
2447 		/*
2448 		 * Skip the coprocessor interface
2449 		 */
2450 		if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
2451 			continue;
2452 		}
2453 		/*
2454 		 * We found the requested interface.
2455 		 * Allocate the packet buffers.
2456 		 */
2457 		if (has_bufs_allocated == false) {
2458 			error = bpf_allocbufs(d);
2459 			if (error != 0) {
2460 				return error;
2461 			}
2462 		}
2463 		/*
2464 		 * Detach if attached to something else.
2465 		 */
2466 		if (bp != d->bd_bif) {
2467 			if (d->bd_bif != NULL) {
2468 				if (bpf_detachd(d) != 0) {
2469 					return ENXIO;
2470 				}
2471 			}
2472 			if (bpf_attachd(d, bp) != 0) {
2473 				return ENXIO;
2474 			}
2475 		}
2476 		if (do_reset) {
2477 			reset_d(d);
2478 		}
2479 		os_log(OS_LOG_DEFAULT, "bpf%u attached to %s",
2480 		    d->bd_dev_minor, if_name(theywant));
2481 		return 0;
2482 	}
2483 	/* Not found. */
2484 	return ENXIO;
2485 }
2486 
2487 /*
2488  * Get a list of available data link type of the interface.
2489  */
2490 static int
bpf_getdltlist(struct bpf_d * d,caddr_t addr,struct proc * p)2491 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2492 {
2493 	u_int           n;
2494 	int             error;
2495 	struct ifnet    *ifp;
2496 	struct bpf_if   *bp;
2497 	user_addr_t     dlist;
2498 	struct bpf_dltlist bfl;
2499 
2500 	bcopy(addr, &bfl, sizeof(bfl));
2501 	if (proc_is64bit(p)) {
2502 		dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2503 	} else {
2504 		dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2505 	}
2506 
2507 	ifp = d->bd_bif->bif_ifp;
2508 	n = 0;
2509 	error = 0;
2510 
2511 	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2512 		if (bp->bif_ifp != ifp) {
2513 			continue;
2514 		}
2515 		/*
2516 		 * Do not use DLT_PKTAP, unless requested explicitly
2517 		 */
2518 		if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2519 			continue;
2520 		}
2521 		if (dlist != USER_ADDR_NULL) {
2522 			if (n >= bfl.bfl_len) {
2523 				return ENOMEM;
2524 			}
2525 			error = copyout(&bp->bif_dlt, dlist,
2526 			    sizeof(bp->bif_dlt));
2527 			if (error != 0) {
2528 				break;
2529 			}
2530 			dlist += sizeof(bp->bif_dlt);
2531 		}
2532 		n++;
2533 	}
2534 	bfl.bfl_len = n;
2535 	bcopy(&bfl, addr, sizeof(bfl));
2536 
2537 	return error;
2538 }
2539 
2540 /*
2541  * Set the data link type of a BPF instance.
2542  */
2543 static int
bpf_setdlt(struct bpf_d * d,uint32_t dlt)2544 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2545 {
2546 	int error, opromisc;
2547 	struct ifnet *ifp;
2548 	struct bpf_if *bp;
2549 
2550 	if (d->bd_bif->bif_dlt == dlt) {
2551 		return 0;
2552 	}
2553 
2554 	while (d->bd_hbuf_read != 0) {
2555 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2556 	}
2557 
2558 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2559 		return ENXIO;
2560 	}
2561 
2562 	ifp = d->bd_bif->bif_ifp;
2563 	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2564 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2565 			/*
2566 			 * Do not use DLT_PKTAP, unless requested explicitly
2567 			 */
2568 			if (bp->bif_dlt == DLT_PKTAP &&
2569 			    !(d->bd_flags & BPF_WANT_PKTAP)) {
2570 				continue;
2571 			}
2572 			break;
2573 		}
2574 	}
2575 	if (bp != NULL) {
2576 		opromisc = d->bd_promisc;
2577 		if (bpf_detachd(d) != 0) {
2578 			return ENXIO;
2579 		}
2580 		error = bpf_attachd(d, bp);
2581 		if (error != 0) {
2582 			os_log_error(OS_LOG_DEFAULT,
2583 			    "bpf_setdlt: bpf%d bpf_attachd %s error %d",
2584 			    d->bd_dev_minor, if_name(bp->bif_ifp),
2585 			    error);
2586 			return error;
2587 		}
2588 		reset_d(d);
2589 		if (opromisc) {
2590 			lck_mtx_unlock(bpf_mlock);
2591 			error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2592 			lck_mtx_lock(bpf_mlock);
2593 			if (error != 0) {
2594 				os_log_error(OS_LOG_DEFAULT,
2595 				    "bpf_setdlt: bpf%d ifpromisc %s error %d",
2596 				    d->bd_dev_minor, if_name(bp->bif_ifp), error);
2597 			} else {
2598 				d->bd_promisc = 1;
2599 			}
2600 		}
2601 	}
2602 	return bp == NULL ? EINVAL : 0;
2603 }
2604 
2605 static int
bpf_set_traffic_class(struct bpf_d * d,int tc)2606 bpf_set_traffic_class(struct bpf_d *d, int tc)
2607 {
2608 	int error = 0;
2609 
2610 	if (!SO_VALID_TC(tc)) {
2611 		error = EINVAL;
2612 	} else {
2613 		d->bd_traffic_class = tc;
2614 	}
2615 
2616 	return error;
2617 }
2618 
2619 static void
bpf_set_packet_service_class(struct mbuf * m,int tc)2620 bpf_set_packet_service_class(struct mbuf *m, int tc)
2621 {
2622 	if (!(m->m_flags & M_PKTHDR)) {
2623 		return;
2624 	}
2625 
2626 	VERIFY(SO_VALID_TC(tc));
2627 	(void) m_set_service_class(m, so_tc2msc(tc));
2628 }
2629 
2630 /*
2631  * Support for select()
2632  *
2633  * Return true iff the specific operation will not block indefinitely.
2634  * Otherwise, return false but make a note that a selwakeup() must be done.
2635  */
2636 int
bpfselect(dev_t dev,int which,void * wql,struct proc * p)2637 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2638 {
2639 	struct bpf_d *d;
2640 	int ret = 0;
2641 
2642 	lck_mtx_lock(bpf_mlock);
2643 
2644 	d = bpf_dtab[minor(dev)];
2645 	if (d == NULL || d == BPF_DEV_RESERVED ||
2646 	    (d->bd_flags & BPF_CLOSING) != 0) {
2647 		lck_mtx_unlock(bpf_mlock);
2648 		return ENXIO;
2649 	}
2650 
2651 	bpf_acquire_d(d);
2652 
2653 	if (d->bd_bif == NULL) {
2654 		bpf_release_d(d);
2655 		lck_mtx_unlock(bpf_mlock);
2656 		return ENXIO;
2657 	}
2658 
2659 	while (d->bd_hbuf_read != 0) {
2660 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2661 	}
2662 
2663 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2664 		bpf_release_d(d);
2665 		lck_mtx_unlock(bpf_mlock);
2666 		return ENXIO;
2667 	}
2668 
2669 	switch (which) {
2670 	case FREAD:
2671 		if (d->bd_hlen != 0 ||
2672 		    ((d->bd_immediate ||
2673 		    d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
2674 			ret = 1;         /* read has data to return */
2675 		} else {
2676 			/*
2677 			 * Read has no data to return.
2678 			 * Make the select wait, and start a timer if
2679 			 * necessary.
2680 			 */
2681 			selrecord(p, &d->bd_sel, wql);
2682 			bpf_start_timer(d);
2683 		}
2684 		break;
2685 
2686 	case FWRITE:
2687 		/* can't determine whether a write would block */
2688 		ret = 1;
2689 		break;
2690 	}
2691 
2692 	bpf_release_d(d);
2693 	lck_mtx_unlock(bpf_mlock);
2694 
2695 	return ret;
2696 }
2697 
2698 /*
2699  * Support for kevent() system call.  Register EVFILT_READ filters and
2700  * reject all others.
2701  */
2702 int bpfkqfilter(dev_t dev, struct knote *kn);
2703 static void filt_bpfdetach(struct knote *);
2704 static int filt_bpfread(struct knote *, long);
2705 static int filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev);
2706 static int filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev);
2707 
2708 SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2709 	.f_isfd = 1,
2710 	.f_detach = filt_bpfdetach,
2711 	.f_event = filt_bpfread,
2712 	.f_touch = filt_bpftouch,
2713 	.f_process = filt_bpfprocess,
2714 };
2715 
2716 static int
filt_bpfread_common(struct knote * kn,struct kevent_qos_s * kev,struct bpf_d * d)2717 filt_bpfread_common(struct knote *kn, struct kevent_qos_s *kev, struct bpf_d *d)
2718 {
2719 	int ready = 0;
2720 	int64_t data = 0;
2721 
2722 	if (d->bd_immediate) {
2723 		/*
2724 		 * If there's data in the hold buffer, it's the
2725 		 * amount of data a read will return.
2726 		 *
2727 		 * If there's no data in the hold buffer, but
2728 		 * there's data in the store buffer, a read will
2729 		 * immediately rotate the store buffer to the
2730 		 * hold buffer, the amount of data in the store
2731 		 * buffer is the amount of data a read will
2732 		 * return.
2733 		 *
2734 		 * If there's no data in either buffer, we're not
2735 		 * ready to read.
2736 		 */
2737 		data = (d->bd_hlen == 0 || d->bd_hbuf_read != 0 ?
2738 		    d->bd_slen : d->bd_hlen);
2739 		int64_t lowwat = knote_low_watermark(kn);
2740 		if (lowwat > d->bd_bufsize) {
2741 			lowwat = d->bd_bufsize;
2742 		}
2743 		ready = (data >= lowwat);
2744 	} else {
2745 		/*
2746 		 * If there's data in the hold buffer, it's the
2747 		 * amount of data a read will return.
2748 		 *
2749 		 * If there's no data in the hold buffer, but
2750 		 * there's data in the store buffer, if the
2751 		 * timer has expired a read will immediately
2752 		 * rotate the store buffer to the hold buffer,
2753 		 * so the amount of data in the store buffer is
2754 		 * the amount of data a read will return.
2755 		 *
2756 		 * If there's no data in either buffer, or there's
2757 		 * no data in the hold buffer and the timer hasn't
2758 		 * expired, we're not ready to read.
2759 		 */
2760 		data = ((d->bd_hlen == 0 || d->bd_hbuf_read != 0) &&
2761 		    d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
2762 		ready = (data > 0);
2763 	}
2764 	if (!ready) {
2765 		bpf_start_timer(d);
2766 	} else if (kev) {
2767 		knote_fill_kevent(kn, kev, data);
2768 	}
2769 
2770 	return ready;
2771 }
2772 
2773 int
bpfkqfilter(dev_t dev,struct knote * kn)2774 bpfkqfilter(dev_t dev, struct knote *kn)
2775 {
2776 	struct bpf_d *d;
2777 	int res;
2778 
2779 	/*
2780 	 * Is this device a bpf?
2781 	 */
2782 	if (major(dev) != CDEV_MAJOR || kn->kn_filter != EVFILT_READ) {
2783 		knote_set_error(kn, EINVAL);
2784 		return 0;
2785 	}
2786 
2787 	lck_mtx_lock(bpf_mlock);
2788 
2789 	d = bpf_dtab[minor(dev)];
2790 
2791 	if (d == NULL || d == BPF_DEV_RESERVED ||
2792 	    (d->bd_flags & BPF_CLOSING) != 0 ||
2793 	    d->bd_bif == NULL) {
2794 		lck_mtx_unlock(bpf_mlock);
2795 		knote_set_error(kn, ENXIO);
2796 		return 0;
2797 	}
2798 
2799 	kn->kn_hook = d;
2800 	kn->kn_filtid = EVFILTID_BPFREAD;
2801 	KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2802 	d->bd_flags |= BPF_KNOTE;
2803 
2804 	/* capture the current state */
2805 	res = filt_bpfread_common(kn, NULL, d);
2806 
2807 	lck_mtx_unlock(bpf_mlock);
2808 
2809 	return res;
2810 }
2811 
2812 static void
filt_bpfdetach(struct knote * kn)2813 filt_bpfdetach(struct knote *kn)
2814 {
2815 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2816 
2817 	lck_mtx_lock(bpf_mlock);
2818 	if (d->bd_flags & BPF_KNOTE) {
2819 		KNOTE_DETACH(&d->bd_sel.si_note, kn);
2820 		d->bd_flags &= ~BPF_KNOTE;
2821 	}
2822 	lck_mtx_unlock(bpf_mlock);
2823 }
2824 
2825 static int
filt_bpfread(struct knote * kn,long hint)2826 filt_bpfread(struct knote *kn, long hint)
2827 {
2828 #pragma unused(hint)
2829 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2830 
2831 	return filt_bpfread_common(kn, NULL, d);
2832 }
2833 
2834 static int
filt_bpftouch(struct knote * kn,struct kevent_qos_s * kev)2835 filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev)
2836 {
2837 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2838 	int res;
2839 
2840 	lck_mtx_lock(bpf_mlock);
2841 
2842 	/* save off the lowat threshold and flag */
2843 	kn->kn_sdata = kev->data;
2844 	kn->kn_sfflags = kev->fflags;
2845 
2846 	/* output data will be re-generated here */
2847 	res = filt_bpfread_common(kn, NULL, d);
2848 
2849 	lck_mtx_unlock(bpf_mlock);
2850 
2851 	return res;
2852 }
2853 
2854 static int
filt_bpfprocess(struct knote * kn,struct kevent_qos_s * kev)2855 filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev)
2856 {
2857 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2858 	int res;
2859 
2860 	lck_mtx_lock(bpf_mlock);
2861 	res = filt_bpfread_common(kn, kev, d);
2862 	lck_mtx_unlock(bpf_mlock);
2863 
2864 	return res;
2865 }
2866 
2867 /*
2868  * Copy data from an mbuf chain into a buffer.	This code is derived
2869  * from m_copydata in kern/uipc_mbuf.c.
2870  */
2871 static void
bpf_mcopy(struct mbuf * m,void * dst_arg,size_t len,size_t offset)2872 bpf_mcopy(struct mbuf *m, void *dst_arg, size_t len, size_t offset)
2873 {
2874 	u_int count;
2875 	u_char *dst;
2876 
2877 	dst = dst_arg;
2878 
2879 	while (offset >= m->m_len) {
2880 		offset -= m->m_len;
2881 		m = m->m_next;
2882 		if (m == NULL) {
2883 			panic("bpf_mcopy");
2884 		}
2885 		continue;
2886 	}
2887 
2888 	while (len > 0) {
2889 		if (m == NULL) {
2890 			panic("bpf_mcopy");
2891 		}
2892 		count = MIN(m->m_len - (u_int)offset, (u_int)len);
2893 		bcopy((u_char *)mbuf_data(m) + offset, dst, count);
2894 		m = m->m_next;
2895 		dst += count;
2896 		len -= count;
2897 		offset = 0;
2898 	}
2899 }
2900 
2901 static inline void
bpf_tap_imp(ifnet_t ifp,u_int32_t dlt,struct bpf_packet * bpf_pkt,int outbound)2902 bpf_tap_imp(
2903 	ifnet_t         ifp,
2904 	u_int32_t       dlt,
2905 	struct bpf_packet *bpf_pkt,
2906 	int             outbound)
2907 {
2908 	struct bpf_d    *d;
2909 	u_int slen;
2910 	struct bpf_if *bp;
2911 
2912 	/*
2913 	 * It's possible that we get here after the bpf descriptor has been
2914 	 * detached from the interface; in such a case we simply return.
2915 	 * Lock ordering is important since we can be called asynchronously
2916 	 * (from IOKit) to process an inbound packet; when that happens
2917 	 * we would have been holding its "gateLock" and will be acquiring
2918 	 * "bpf_mlock" upon entering this routine.  Due to that, we release
2919 	 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2920 	 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2921 	 * when a ifnet_set_promiscuous request simultaneously collides with
2922 	 * an inbound packet being passed into the tap callback.
2923 	 */
2924 	lck_mtx_lock(bpf_mlock);
2925 	if (ifp->if_bpf == NULL) {
2926 		lck_mtx_unlock(bpf_mlock);
2927 		return;
2928 	}
2929 	for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2930 		if (bp->bif_ifp != ifp) {
2931 			/* wrong interface */
2932 			bp = NULL;
2933 			break;
2934 		}
2935 		if (dlt == 0 || bp->bif_dlt == dlt) {
2936 			/* tapping default DLT or DLT matches */
2937 			break;
2938 		}
2939 	}
2940 	if (bp == NULL) {
2941 		goto done;
2942 	}
2943 	for (d = bp->bif_dlist; d != NULL; d = d->bd_next) {
2944 		struct bpf_packet *bpf_pkt_saved = bpf_pkt;
2945 		struct bpf_packet bpf_pkt_tmp = {};
2946 		struct pktap_header_buffer bpfp_header_tmp = {};
2947 
2948 		if (outbound && !d->bd_seesent) {
2949 			continue;
2950 		}
2951 
2952 		++d->bd_rcount;
2953 		slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
2954 		    (u_int)bpf_pkt->bpfp_total_length, 0);
2955 
2956 		if (slen != 0) {
2957 			if (bp->bif_ifp->if_type == IFT_PKTAP &&
2958 			    bp->bif_dlt == DLT_PKTAP) {
2959 				if (d->bd_flags & BPF_TRUNCATE) {
2960 					slen = min(slen, get_pkt_trunc_len(bpf_pkt));
2961 				}
2962 				/*
2963 				 * Need to copy the bpf_pkt because the conversion
2964 				 * to v2 pktap header modifies the content of the
2965 				 * bpfp_header
2966 				 */
2967 				if ((d->bd_flags & BPF_PKTHDRV2) &&
2968 				    bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
2969 					bpf_pkt_tmp = *bpf_pkt;
2970 
2971 					bpf_pkt = &bpf_pkt_tmp;
2972 
2973 					memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
2974 					    bpf_pkt->bpfp_header_length);
2975 
2976 					bpf_pkt->bpfp_header = &bpfp_header_tmp;
2977 
2978 					convert_to_pktap_header_to_v2(bpf_pkt,
2979 					    !!(d->bd_flags & BPF_TRUNCATE));
2980 				}
2981 			}
2982 			++d->bd_fcount;
2983 			catchpacket(d, bpf_pkt, slen, outbound);
2984 		}
2985 		bpf_pkt = bpf_pkt_saved;
2986 	}
2987 
2988 done:
2989 	lck_mtx_unlock(bpf_mlock);
2990 }
2991 
2992 static inline void
bpf_tap_mbuf(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen,int outbound)2993 bpf_tap_mbuf(
2994 	ifnet_t         ifp,
2995 	u_int32_t       dlt,
2996 	mbuf_t          m,
2997 	void*           hdr,
2998 	size_t          hlen,
2999 	int             outbound)
3000 {
3001 	struct bpf_packet bpf_pkt;
3002 	struct mbuf *m0;
3003 
3004 	if (ifp->if_bpf == NULL) {
3005 		/* quickly check without taking lock */
3006 		return;
3007 	}
3008 	bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
3009 	bpf_pkt.bpfp_mbuf = m;
3010 	bpf_pkt.bpfp_total_length = 0;
3011 	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
3012 		bpf_pkt.bpfp_total_length += m0->m_len;
3013 	}
3014 	bpf_pkt.bpfp_header = hdr;
3015 	if (hdr != NULL) {
3016 		bpf_pkt.bpfp_total_length += hlen;
3017 		bpf_pkt.bpfp_header_length = hlen;
3018 	} else {
3019 		bpf_pkt.bpfp_header_length = 0;
3020 	}
3021 	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
3022 }
3023 
3024 void
bpf_tap_out(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)3025 bpf_tap_out(
3026 	ifnet_t         ifp,
3027 	u_int32_t       dlt,
3028 	mbuf_t          m,
3029 	void*           hdr,
3030 	size_t          hlen)
3031 {
3032 	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
3033 }
3034 
3035 void
bpf_tap_in(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)3036 bpf_tap_in(
3037 	ifnet_t         ifp,
3038 	u_int32_t       dlt,
3039 	mbuf_t          m,
3040 	void*           hdr,
3041 	size_t          hlen)
3042 {
3043 	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
3044 }
3045 
3046 /* Callback registered with Ethernet driver. */
3047 static int
bpf_tap_callback(struct ifnet * ifp,struct mbuf * m)3048 bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
3049 {
3050 	bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
3051 
3052 	return 0;
3053 }
3054 
3055 #if SKYWALK
3056 #include <skywalk/os_skywalk_private.h>
3057 
3058 static void
bpf_pktcopy(kern_packet_t pkt,void * dst_arg,size_t len,size_t offset)3059 bpf_pktcopy(kern_packet_t pkt, void *dst_arg, size_t len, size_t offset)
3060 {
3061 	kern_buflet_t   buflet = NULL;
3062 	size_t count;
3063 	u_char *dst;
3064 
3065 	dst = dst_arg;
3066 	while (len > 0) {
3067 		uint8_t         *addr;
3068 
3069 		u_int32_t       buflet_length;
3070 
3071 		buflet = kern_packet_get_next_buflet(pkt, buflet);
3072 		VERIFY(buflet != NULL);
3073 		addr = kern_buflet_get_data_address(buflet);
3074 		VERIFY(addr != NULL);
3075 		addr += kern_buflet_get_data_offset(buflet);
3076 		buflet_length = kern_buflet_get_data_length(buflet);
3077 		if (offset >= buflet_length) {
3078 			offset -= buflet_length;
3079 			continue;
3080 		}
3081 		count = MIN(buflet_length - offset, len);
3082 		bcopy((void *)(addr + offset), (void *)dst, count);
3083 		dst += count;
3084 		len -= count;
3085 		offset = 0;
3086 	}
3087 }
3088 
3089 static inline void
bpf_tap_packet(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen,int outbound)3090 bpf_tap_packet(
3091 	ifnet_t         ifp,
3092 	u_int32_t       dlt,
3093 	kern_packet_t   pkt,
3094 	void*           hdr,
3095 	size_t          hlen,
3096 	int             outbound)
3097 {
3098 	struct bpf_packet       bpf_pkt;
3099 	struct mbuf *           m;
3100 
3101 	if (ifp->if_bpf == NULL) {
3102 		/* quickly check without taking lock */
3103 		return;
3104 	}
3105 	m = kern_packet_get_mbuf(pkt);
3106 	if (m != NULL) {
3107 		bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
3108 		bpf_pkt.bpfp_mbuf = m;
3109 		bpf_pkt.bpfp_total_length = m_length(m);
3110 	} else {
3111 		bpf_pkt.bpfp_type = BPF_PACKET_TYPE_PKT;
3112 		bpf_pkt.bpfp_pkt = pkt;
3113 		bpf_pkt.bpfp_total_length = kern_packet_get_data_length(pkt);
3114 	}
3115 	bpf_pkt.bpfp_header = hdr;
3116 	bpf_pkt.bpfp_header_length = hlen;
3117 	if (hlen != 0) {
3118 		bpf_pkt.bpfp_total_length += hlen;
3119 	}
3120 	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
3121 }
3122 
3123 void
bpf_tap_packet_out(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)3124 bpf_tap_packet_out(
3125 	ifnet_t         ifp,
3126 	u_int32_t       dlt,
3127 	kern_packet_t   pkt,
3128 	void*           hdr,
3129 	size_t          hlen)
3130 {
3131 	bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 1);
3132 }
3133 
3134 void
bpf_tap_packet_in(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)3135 bpf_tap_packet_in(
3136 	ifnet_t         ifp,
3137 	u_int32_t       dlt,
3138 	kern_packet_t   pkt,
3139 	void*           hdr,
3140 	size_t          hlen)
3141 {
3142 	bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 0);
3143 }
3144 
3145 #endif /* SKYWALK */
3146 
3147 static errno_t
bpf_copydata(struct bpf_packet * pkt,size_t off,size_t len,void * out_data)3148 bpf_copydata(struct bpf_packet *pkt, size_t off, size_t len, void* out_data)
3149 {
3150 	errno_t err = 0;
3151 	if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
3152 		err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
3153 #if SKYWALK
3154 	} else if (pkt->bpfp_type == BPF_PACKET_TYPE_PKT) {
3155 		err = kern_packet_copy_bytes(pkt->bpfp_pkt, off, len, out_data);
3156 #endif /* SKYWALK */
3157 	} else {
3158 		err = EINVAL;
3159 	}
3160 
3161 	return err;
3162 }
3163 
3164 static void
copy_bpf_packet_offset(struct bpf_packet * pkt,void * dst,size_t len,size_t offset)3165 copy_bpf_packet_offset(struct bpf_packet * pkt, void * dst, size_t len, size_t offset)
3166 {
3167 	/* copy the optional header */
3168 	if (offset < pkt->bpfp_header_length) {
3169 		size_t  count = MIN(len, pkt->bpfp_header_length - offset);
3170 		caddr_t src = (caddr_t)pkt->bpfp_header;
3171 		bcopy(src + offset, dst, count);
3172 		len -= count;
3173 		dst = (void *)((uintptr_t)dst + count);
3174 		offset = 0;
3175 	} else {
3176 		offset -= pkt->bpfp_header_length;
3177 	}
3178 
3179 	if (len == 0) {
3180 		/* nothing past the header */
3181 		return;
3182 	}
3183 	/* copy the packet */
3184 	switch (pkt->bpfp_type) {
3185 	case BPF_PACKET_TYPE_MBUF:
3186 		bpf_mcopy(pkt->bpfp_mbuf, dst, len, offset);
3187 		break;
3188 #if SKYWALK
3189 	case BPF_PACKET_TYPE_PKT:
3190 		bpf_pktcopy(pkt->bpfp_pkt, dst, len, offset);
3191 		break;
3192 #endif /* SKYWALK */
3193 	default:
3194 		break;
3195 	}
3196 }
3197 
3198 static void
copy_bpf_packet(struct bpf_packet * pkt,void * dst,size_t len)3199 copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
3200 {
3201 	copy_bpf_packet_offset(pkt, dst, len, 0);
3202 }
3203 
3204 static uint32_t
get_esp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)3205 get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
3206     const uint32_t remaining_caplen)
3207 {
3208 	/*
3209 	 * For some reason tcpdump expects to have one byte beyond the ESP header
3210 	 */
3211 	uint32_t trunc_len = ESP_HDR_SIZE + 1;
3212 
3213 	if (trunc_len > remaining_caplen) {
3214 		return remaining_caplen;
3215 	}
3216 
3217 	return trunc_len;
3218 }
3219 
3220 static uint32_t
get_isakmp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)3221 get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
3222     const uint32_t remaining_caplen)
3223 {
3224 	/*
3225 	 * Include the payload generic header
3226 	 */
3227 	uint32_t trunc_len = ISAKMP_HDR_SIZE;
3228 
3229 	if (trunc_len > remaining_caplen) {
3230 		return remaining_caplen;
3231 	}
3232 
3233 	return trunc_len;
3234 }
3235 
3236 static uint32_t
get_isakmp_natt_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3237 get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint32_t off,
3238     const uint32_t remaining_caplen)
3239 {
3240 	int err = 0;
3241 	uint32_t trunc_len = 0;
3242 	char payload[remaining_caplen];
3243 
3244 	err = bpf_copydata(pkt, off, remaining_caplen, payload);
3245 	if (err != 0) {
3246 		return remaining_caplen;
3247 	}
3248 	/*
3249 	 * They are three cases:
3250 	 * - IKE: payload start with 4 bytes header set to zero before ISAKMP header
3251 	 * - keep alive: 1 byte payload
3252 	 * - otherwise it's ESP
3253 	 */
3254 	if (remaining_caplen >= 4 &&
3255 	    payload[0] == 0 && payload[1] == 0 &&
3256 	    payload[2] == 0 && payload[3] == 0) {
3257 		trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
3258 	} else if (remaining_caplen == 1) {
3259 		trunc_len = 1;
3260 	} else {
3261 		trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3262 	}
3263 
3264 	if (trunc_len > remaining_caplen) {
3265 		return remaining_caplen;
3266 	}
3267 
3268 	return trunc_len;
3269 }
3270 
3271 static uint32_t
get_udp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3272 get_udp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3273 {
3274 	int err = 0;
3275 	uint32_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
3276 
3277 	if (trunc_len >= remaining_caplen) {
3278 		return remaining_caplen;
3279 	}
3280 
3281 	struct udphdr udphdr;
3282 	err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
3283 	if (err != 0) {
3284 		return remaining_caplen;
3285 	}
3286 
3287 	u_short sport, dport;
3288 
3289 	sport = EXTRACT_SHORT(&udphdr.uh_sport);
3290 	dport = EXTRACT_SHORT(&udphdr.uh_dport);
3291 
3292 	if (dport == PORT_DNS || sport == PORT_DNS) {
3293 		/*
3294 		 * Full UDP payload for DNS
3295 		 */
3296 		trunc_len = remaining_caplen;
3297 	} else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) ||
3298 	    (sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
3299 		/*
3300 		 * Full UDP payload for BOOTP and DHCP
3301 		 */
3302 		trunc_len = remaining_caplen;
3303 	} else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
3304 		/*
3305 		 * Return the ISAKMP header
3306 		 */
3307 		trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
3308 		    remaining_caplen - sizeof(struct udphdr));
3309 	} else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
3310 		trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
3311 		    remaining_caplen - sizeof(struct udphdr));
3312 	}
3313 	if (trunc_len >= remaining_caplen) {
3314 		return remaining_caplen;
3315 	}
3316 
3317 	return trunc_len;
3318 }
3319 
3320 static uint32_t
get_tcp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3321 get_tcp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3322 {
3323 	int err = 0;
3324 	uint32_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
3325 	if (trunc_len >= remaining_caplen) {
3326 		return remaining_caplen;
3327 	}
3328 
3329 	struct tcphdr tcphdr;
3330 	err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
3331 	if (err != 0) {
3332 		return remaining_caplen;
3333 	}
3334 
3335 	u_short sport, dport;
3336 	sport = EXTRACT_SHORT(&tcphdr.th_sport);
3337 	dport = EXTRACT_SHORT(&tcphdr.th_dport);
3338 
3339 	if (dport == PORT_DNS || sport == PORT_DNS) {
3340 		/*
3341 		 * Full TCP payload  for DNS
3342 		 */
3343 		trunc_len = remaining_caplen;
3344 	} else {
3345 		trunc_len = (uint16_t)(tcphdr.th_off << 2);
3346 	}
3347 	if (trunc_len >= remaining_caplen) {
3348 		return remaining_caplen;
3349 	}
3350 
3351 	return trunc_len;
3352 }
3353 
3354 static uint32_t
get_proto_trunc_len(uint8_t proto,struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3355 get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3356 {
3357 	uint32_t trunc_len;
3358 
3359 	switch (proto) {
3360 	case IPPROTO_ICMP: {
3361 		/*
3362 		 * Full IMCP payload
3363 		 */
3364 		trunc_len = remaining_caplen;
3365 		break;
3366 	}
3367 	case IPPROTO_ICMPV6: {
3368 		/*
3369 		 * Full IMCPV6 payload
3370 		 */
3371 		trunc_len = remaining_caplen;
3372 		break;
3373 	}
3374 	case IPPROTO_IGMP: {
3375 		/*
3376 		 * Full IGMP payload
3377 		 */
3378 		trunc_len = remaining_caplen;
3379 		break;
3380 	}
3381 	case IPPROTO_UDP: {
3382 		trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3383 		break;
3384 	}
3385 	case IPPROTO_TCP: {
3386 		trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3387 		break;
3388 	}
3389 	case IPPROTO_ESP: {
3390 		trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3391 		break;
3392 	}
3393 	default: {
3394 		/*
3395 		 * By default we only include the IP header
3396 		 */
3397 		trunc_len = 0;
3398 		break;
3399 	}
3400 	}
3401 	if (trunc_len >= remaining_caplen) {
3402 		return remaining_caplen;
3403 	}
3404 
3405 	return trunc_len;
3406 }
3407 
3408 static uint32_t
get_ip_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3409 get_ip_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3410 {
3411 	int err = 0;
3412 	uint32_t iplen = sizeof(struct ip);
3413 	if (iplen >= remaining_caplen) {
3414 		return remaining_caplen;
3415 	}
3416 
3417 	struct ip iphdr;
3418 	err =  bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
3419 	if (err != 0) {
3420 		return remaining_caplen;
3421 	}
3422 
3423 	uint8_t proto = 0;
3424 
3425 	iplen = (uint16_t)(iphdr.ip_hl << 2);
3426 	if (iplen >= remaining_caplen) {
3427 		return remaining_caplen;
3428 	}
3429 
3430 	proto = iphdr.ip_p;
3431 	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3432 
3433 	if (iplen >= remaining_caplen) {
3434 		return remaining_caplen;
3435 	}
3436 
3437 	return iplen;
3438 }
3439 
3440 static uint32_t
get_ip6_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3441 get_ip6_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3442 {
3443 	int err = 0;
3444 	uint32_t iplen = sizeof(struct ip6_hdr);
3445 	if (iplen >= remaining_caplen) {
3446 		return remaining_caplen;
3447 	}
3448 
3449 	struct ip6_hdr ip6hdr;
3450 	err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
3451 	if (err != 0) {
3452 		return remaining_caplen;
3453 	}
3454 
3455 	uint8_t proto = 0;
3456 
3457 	/*
3458 	 * TBD: process the extension headers
3459 	 */
3460 	proto = ip6hdr.ip6_nxt;
3461 	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3462 
3463 	if (iplen >= remaining_caplen) {
3464 		return remaining_caplen;
3465 	}
3466 
3467 	return iplen;
3468 }
3469 
3470 static uint32_t
get_ether_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3471 get_ether_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3472 {
3473 	int err = 0;
3474 	uint32_t ethlen = sizeof(struct ether_header);
3475 	if (ethlen >= remaining_caplen) {
3476 		return remaining_caplen;
3477 	}
3478 
3479 	struct ether_header eh = {};
3480 	err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
3481 	if (err != 0) {
3482 		return remaining_caplen;
3483 	}
3484 
3485 	u_short type = EXTRACT_SHORT(&eh.ether_type);
3486 	/* Include full ARP */
3487 	if (type == ETHERTYPE_ARP) {
3488 		ethlen = remaining_caplen;
3489 	} else if (type == ETHERTYPE_IP) {
3490 		ethlen += get_ip_trunc_len(pkt, off + sizeof(struct ether_header),
3491 		    remaining_caplen - ethlen);
3492 	} else if (type == ETHERTYPE_IPV6) {
3493 		ethlen += get_ip6_trunc_len(pkt, off + sizeof(struct ether_header),
3494 		    remaining_caplen - ethlen);
3495 	} else {
3496 		ethlen = MIN(BPF_MIN_PKT_SIZE, remaining_caplen);
3497 	}
3498 	return ethlen;
3499 }
3500 
3501 
3502 static uint32_t
get_pkt_trunc_len(struct bpf_packet * pkt)3503 get_pkt_trunc_len(struct bpf_packet *pkt)
3504 {
3505 	struct pktap_header *pktap = (struct pktap_header *) (pkt->bpfp_header);
3506 	uint32_t in_pkt_len = 0;
3507 	uint32_t out_pkt_len = 0;
3508 	uint32_t tlen = 0;
3509 	uint32_t pre_adjust;    // L2 header not in mbuf or kern_packet
3510 
3511 	// bpfp_total_length must contain the BPF packet header
3512 	assert3u(pkt->bpfp_total_length, >=, pkt->bpfp_header_length);
3513 
3514 	// The BPF packet header must contain the pktap header
3515 	assert3u(pkt->bpfp_header_length, >=, pktap->pth_length);
3516 
3517 	// The pre frame length (L2 header) must be contained in the packet
3518 	assert3u(pkt->bpfp_total_length, >=, pktap->pth_length + pktap->pth_frame_pre_length);
3519 
3520 	/*
3521 	 * pktap->pth_frame_pre_length is the L2 header length and accounts
3522 	 * for both L2 header in the packet payload and pre_adjust.
3523 	 *
3524 	 * pre_adjust represents an adjustment for a pseudo L2 header that is not
3525 	 * part of packet payload -- not in the mbuf or kern_packet -- and comes
3526 	 * just after the pktap header.
3527 	 *
3528 	 * pktap->pth_length is the size of the pktap header (exclude pre_adjust)
3529 	 *
3530 	 * pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3531 	 */
3532 	pre_adjust = (uint32_t)(pkt->bpfp_header_length - pktap->pth_length);
3533 
3534 	if (pktap->pth_iftype == IFT_ETHER) {
3535 		/*
3536 		 * We need to parse the Ethernet header to find the network layer
3537 		 * protocol
3538 		 */
3539 		in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pre_adjust);
3540 
3541 		out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
3542 
3543 		tlen = pktap->pth_length + pre_adjust + out_pkt_len;
3544 	} else {
3545 		/*
3546 		 * For other interface types, we only know to parse IPv4 and IPv6.
3547 		 *
3548 		 * To get to the beginning of the IPv4 or IPv6 packet, we need to to skip
3549 		 * over the L2 header that is the actual packet payload (mbuf or kern_packet)
3550 		 */
3551 		uint32_t off;   // offset past the L2 header in the actual packet payload
3552 
3553 		off = pktap->pth_frame_pre_length - pre_adjust;
3554 
3555 		in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pktap->pth_frame_pre_length);
3556 
3557 		if (pktap->pth_protocol_family == AF_INET) {
3558 			out_pkt_len = get_ip_trunc_len(pkt, off, in_pkt_len);
3559 		} else if (pktap->pth_protocol_family == AF_INET6) {
3560 			out_pkt_len = get_ip6_trunc_len(pkt, off, in_pkt_len);
3561 		} else {
3562 			out_pkt_len = MIN(BPF_MIN_PKT_SIZE, in_pkt_len);
3563 		}
3564 		tlen = pktap->pth_length + pktap->pth_frame_pre_length + out_pkt_len;
3565 	}
3566 
3567 	// Verify we do not overflow the buffer
3568 	if (__improbable(tlen > pkt->bpfp_total_length)) {
3569 		bool do_panic = bpf_debug != 0 ? true : false;
3570 
3571 #if DEBUG
3572 		do_panic = true;
3573 #endif /* DEBUG */
3574 		if (do_panic) {
3575 			panic("%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3576 			    __func__, __LINE__,
3577 			    tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3578 		} else {
3579 			os_log(OS_LOG_DEFAULT,
3580 			    "%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3581 			    __func__, __LINE__,
3582 			    tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3583 		}
3584 		bpf_trunc_overflow += 1;
3585 		tlen = (uint32_t)pkt->bpfp_total_length;
3586 	}
3587 
3588 	return tlen;
3589 }
3590 
3591 static uint8_t
get_common_prefix_size(const void * a,const void * b,uint8_t max_bytes)3592 get_common_prefix_size(const void *a, const void *b, uint8_t max_bytes)
3593 {
3594 	uint8_t max_words = max_bytes >> 2;
3595 	const uint32_t *x = (const uint32_t *)a;
3596 	const uint32_t *y = (const uint32_t *)b;
3597 	uint8_t i;
3598 
3599 	for (i = 0; i < max_words; i++) {
3600 		if (x[i] != y[i]) {
3601 			break;
3602 		}
3603 	}
3604 	return (uint8_t)(i << 2);
3605 }
3606 
3607 /*
3608  * Move the packet data from interface memory (pkt) into the
3609  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
3610  * otherwise 0.
3611  */
3612 static void
catchpacket(struct bpf_d * d,struct bpf_packet * pkt,u_int snaplen,int outbound)3613 catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
3614     u_int snaplen, int outbound)
3615 {
3616 	struct bpf_hdr *hp;
3617 	struct bpf_hdr_ext *ehp;
3618 	uint32_t totlen, curlen;
3619 	uint32_t hdrlen, caplen;
3620 	int do_wakeup = 0;
3621 	u_char *payload;
3622 	struct timeval tv;
3623 
3624 	hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
3625 	    (d->bd_flags & BPF_COMP_REQ) ? d->bd_bif->bif_comphdrlen:
3626 	    d->bd_bif->bif_hdrlen;
3627 	/*
3628 	 * Figure out how many bytes to move.  If the packet is
3629 	 * greater or equal to the snapshot length, transfer that
3630 	 * much.  Otherwise, transfer the whole packet (unless
3631 	 * we hit the buffer size limit).
3632 	 */
3633 	totlen = hdrlen + MIN(snaplen, (int)pkt->bpfp_total_length);
3634 	if (totlen > d->bd_bufsize) {
3635 		totlen = d->bd_bufsize;
3636 	}
3637 
3638 	if (hdrlen > totlen) {
3639 		return;
3640 	}
3641 
3642 	/*
3643 	 * Round up the end of the previous packet to the next longword.
3644 	 */
3645 	curlen = BPF_WORDALIGN(d->bd_slen);
3646 	if (curlen + totlen > d->bd_bufsize) {
3647 		/*
3648 		 * This packet will overflow the storage buffer.
3649 		 * Rotate the buffers if we can, then wakeup any
3650 		 * pending reads.
3651 		 *
3652 		 * We cannot rotate buffers if a read is in progress
3653 		 * so drop the packet
3654 		 */
3655 		if (d->bd_hbuf_read != 0) {
3656 			++d->bd_dcount;
3657 			return;
3658 		}
3659 
3660 		if (d->bd_fbuf == NULL) {
3661 			if (d->bd_headdrop == 0) {
3662 				/*
3663 				 * We haven't completed the previous read yet,
3664 				 * so drop the packet.
3665 				 */
3666 				++d->bd_dcount;
3667 				return;
3668 			}
3669 			/*
3670 			 * Drop the hold buffer as it contains older packets
3671 			 */
3672 			d->bd_dcount += d->bd_hcnt;
3673 			d->bd_fbuf = d->bd_hbuf;
3674 			ROTATE_BUFFERS(d);
3675 		} else {
3676 			ROTATE_BUFFERS(d);
3677 		}
3678 		do_wakeup = 1;
3679 		curlen = 0;
3680 	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
3681 		/*
3682 		 * Immediate mode is set, or the read timeout has
3683 		 * already expired during a select call. A packet
3684 		 * arrived, so the reader should be woken up.
3685 		 */
3686 		do_wakeup = 1;
3687 	}
3688 
3689 	/*
3690 	 * Append the bpf header.
3691 	 */
3692 	microtime(&tv);
3693 	if (d->bd_flags & BPF_EXTENDED_HDR) {
3694 		ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
3695 		memset(ehp, 0, sizeof(*ehp));
3696 		ehp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3697 		ehp->bh_tstamp.tv_usec = tv.tv_usec;
3698 
3699 		ehp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3700 		ehp->bh_hdrlen = (u_short)hdrlen;
3701 		caplen = ehp->bh_caplen = totlen - hdrlen;
3702 		payload = (u_char *)ehp + hdrlen;
3703 
3704 		if (outbound) {
3705 			ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3706 		} else {
3707 			ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
3708 		}
3709 
3710 		if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
3711 			struct mbuf *m = pkt->bpfp_mbuf;
3712 
3713 			if (outbound) {
3714 				/* only do lookups on non-raw INPCB */
3715 				if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID |
3716 				    PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)) ==
3717 				    (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC) &&
3718 				    m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3719 					ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
3720 					if (m->m_pkthdr.pkt_proto == IPPROTO_TCP) {
3721 						ehp->bh_flags |= BPF_HDR_EXT_FLAGS_TCP;
3722 					} else if (m->m_pkthdr.pkt_proto == IPPROTO_UDP) {
3723 						ehp->bh_flags |= BPF_HDR_EXT_FLAGS_UDP;
3724 					}
3725 				}
3726 				ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
3727 				if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
3728 					ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3729 				}
3730 				if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
3731 					ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
3732 				}
3733 				if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
3734 					ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3735 				}
3736 				if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
3737 					ehp->bh_unsent_bytes =
3738 					    m->m_pkthdr.bufstatus_if;
3739 					ehp->bh_unsent_snd =
3740 					    m->m_pkthdr.bufstatus_sndbuf;
3741 				}
3742 			} else {
3743 				if (m->m_pkthdr.pkt_flags & PKTF_WAKE_PKT) {
3744 					ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3745 				}
3746 			}
3747 #if SKYWALK
3748 		} else {
3749 			kern_packet_t kern_pkt = pkt->bpfp_pkt;
3750 			packet_flowid_t flowid = 0;
3751 
3752 			if (outbound) {
3753 				/*
3754 				 * Note: pp_init() asserts that kern_packet_svc_class_t is equivalent
3755 				 * to mbuf_svc_class_t
3756 				 */
3757 				ehp->bh_svc = so_svc2tc((mbuf_svc_class_t)kern_packet_get_service_class(kern_pkt));
3758 				if (kern_packet_get_transport_retransmit(kern_pkt)) {
3759 					ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3760 				}
3761 				if (kern_packet_get_transport_last_packet(kern_pkt)) {
3762 					ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3763 				}
3764 			} else {
3765 				if (kern_packet_get_wake_flag(kern_pkt)) {
3766 					ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3767 				}
3768 			}
3769 			ehp->bh_trace_tag = kern_packet_get_trace_tag(kern_pkt);
3770 			if (kern_packet_get_flowid(kern_pkt, &flowid) == 0) {
3771 				ehp->bh_flowid = flowid;
3772 			}
3773 #endif /* SKYWALK */
3774 		}
3775 	} else {
3776 		hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
3777 		memset(hp, 0, BPF_WORDALIGN(sizeof(*hp)));
3778 		hp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3779 		hp->bh_tstamp.tv_usec = tv.tv_usec;
3780 		hp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3781 		hp->bh_hdrlen = (u_short)hdrlen;
3782 		caplen = hp->bh_caplen = totlen - hdrlen;
3783 		payload = (u_char *)hp + hdrlen;
3784 	}
3785 	if (d->bd_flags & BPF_COMP_REQ) {
3786 		uint8_t common_prefix_size = 0;
3787 		uint8_t copy_len = MIN((uint8_t)caplen, BPF_HDR_COMP_LEN_MAX);
3788 
3789 		copy_bpf_packet(pkt, d->bd_prev_fbuf, copy_len);
3790 
3791 		if (d->bd_prev_slen != 0) {
3792 			common_prefix_size = get_common_prefix_size(d->bd_prev_fbuf,
3793 			    d->bd_prev_sbuf, MIN(copy_len, d->bd_prev_slen));
3794 		}
3795 
3796 		if (d->bd_flags & BPF_COMP_ENABLED) {
3797 			assert3u(caplen, >=, common_prefix_size);
3798 			copy_bpf_packet_offset(pkt, payload, caplen - common_prefix_size,
3799 			    common_prefix_size);
3800 			d->bd_slen = curlen + totlen - common_prefix_size;
3801 		} else {
3802 			copy_bpf_packet(pkt, payload, caplen);
3803 			d->bd_slen = curlen + totlen;
3804 		}
3805 
3806 		/*
3807 		 * Update the caplen only if compression is enabled -- the caller
3808 		 * must pay attention to bpf_hdr_comp_enable
3809 		 */
3810 		if (d->bd_flags & BPF_EXTENDED_HDR) {
3811 			ehp->bh_complen = common_prefix_size;
3812 			if (d->bd_flags & BPF_COMP_ENABLED) {
3813 				ehp->bh_caplen -= common_prefix_size;
3814 			}
3815 		} else {
3816 			struct bpf_comp_hdr *hcp;
3817 
3818 			hcp = (struct bpf_comp_hdr *)(void *)(d->bd_sbuf + curlen);
3819 			hcp->bh_complen = common_prefix_size;
3820 			if (d->bd_flags & BPF_COMP_ENABLED) {
3821 				hcp->bh_caplen -= common_prefix_size;
3822 			}
3823 		}
3824 
3825 		if (common_prefix_size > 0) {
3826 			d->bd_bcs.bcs_total_compressed_prefix_size += common_prefix_size;
3827 			if (common_prefix_size > d->bd_bcs.bcs_max_compressed_prefix_size) {
3828 				d->bd_bcs.bcs_max_compressed_prefix_size = common_prefix_size;
3829 			}
3830 			d->bd_bcs.bcs_count_compressed_prefix += 1;
3831 		} else {
3832 			d->bd_bcs.bcs_count_no_common_prefix += 1;
3833 		}
3834 
3835 		/* The current compression buffer becomes the previous one */
3836 		caddr_t tmp = d->bd_prev_sbuf;
3837 		d->bd_prev_sbuf = d->bd_prev_fbuf;
3838 		d->bd_prev_slen = copy_len;
3839 		d->bd_prev_fbuf = tmp;
3840 	} else {
3841 		/*
3842 		 * Copy the packet data into the store buffer and update its length.
3843 		 */
3844 		copy_bpf_packet(pkt, payload, caplen);
3845 		d->bd_slen = curlen + totlen;
3846 	}
3847 	d->bd_scnt += 1;
3848 	d->bd_bcs.bcs_total_hdr_size += pkt->bpfp_header_length;
3849 	d->bd_bcs.bcs_total_size += caplen;
3850 
3851 	if (do_wakeup) {
3852 		bpf_wakeup(d);
3853 	}
3854 }
3855 
3856 
3857 static void
bpf_freebufs(struct bpf_d * d)3858 bpf_freebufs(struct bpf_d *d)
3859 {
3860 	if (d->bd_sbuf != NULL) {
3861 		kfree_data_addr(d->bd_sbuf);
3862 	}
3863 	if (d->bd_hbuf != NULL) {
3864 		kfree_data_addr(d->bd_hbuf);
3865 	}
3866 	if (d->bd_fbuf != NULL) {
3867 		kfree_data_addr(d->bd_fbuf);
3868 	}
3869 
3870 	if (d->bd_prev_sbuf != NULL) {
3871 		kfree_data_addr(d->bd_prev_sbuf);
3872 	}
3873 	if (d->bd_prev_fbuf != NULL) {
3874 		kfree_data_addr(d->bd_prev_fbuf);
3875 	}
3876 }
3877 /*
3878  * Initialize all nonzero fields of a descriptor.
3879  */
3880 static int
bpf_allocbufs(struct bpf_d * d)3881 bpf_allocbufs(struct bpf_d *d)
3882 {
3883 	bpf_freebufs(d);
3884 
3885 	d->bd_fbuf = (caddr_t) kalloc_data(d->bd_bufsize, Z_WAITOK | Z_ZERO);
3886 	if (d->bd_fbuf == NULL) {
3887 		goto nobufs;
3888 	}
3889 
3890 	d->bd_sbuf = (caddr_t) kalloc_data(d->bd_bufsize, Z_WAITOK | Z_ZERO);
3891 	if (d->bd_sbuf == NULL) {
3892 		goto nobufs;
3893 	}
3894 	d->bd_slen = 0;
3895 	d->bd_hlen = 0;
3896 	d->bd_scnt = 0;
3897 	d->bd_hcnt = 0;
3898 
3899 	d->bd_prev_slen = 0;
3900 	if (d->bd_flags & BPF_COMP_REQ) {
3901 		d->bd_prev_sbuf = (caddr_t) kalloc_data(BPF_HDR_COMP_LEN_MAX, Z_WAITOK | Z_ZERO);
3902 		if (d->bd_prev_sbuf == NULL) {
3903 			goto nobufs;
3904 		}
3905 		d->bd_prev_fbuf = (caddr_t) kalloc_data(BPF_HDR_COMP_LEN_MAX, Z_WAITOK | Z_ZERO);
3906 		if (d->bd_prev_fbuf == NULL) {
3907 			goto nobufs;
3908 		}
3909 	}
3910 	return 0;
3911 nobufs:
3912 	bpf_freebufs(d);
3913 	return ENOMEM;
3914 }
3915 
3916 /*
3917  * Free buffers currently in use by a descriptor.
3918  * Called on close.
3919  */
3920 static void
bpf_freed(struct bpf_d * d)3921 bpf_freed(struct bpf_d *d)
3922 {
3923 	/*
3924 	 * We don't need to lock out interrupts since this descriptor has
3925 	 * been detached from its interface and it yet hasn't been marked
3926 	 * free.
3927 	 */
3928 	if (d->bd_hbuf_read != 0) {
3929 		panic("bpf buffer freed during read");
3930 	}
3931 
3932 	bpf_freebufs(d);
3933 
3934 	if (d->bd_filter) {
3935 		kfree_data_addr(d->bd_filter);
3936 	}
3937 }
3938 
3939 /*
3940  * Attach an interface to bpf.	driverp is a pointer to a (struct bpf_if *)
3941  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
3942  * size of the link header (variable length headers not yet supported).
3943  */
3944 void
bpfattach(struct ifnet * ifp,u_int dlt,u_int hdrlen)3945 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
3946 {
3947 	bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
3948 }
3949 
3950 errno_t
bpf_attach(ifnet_t ifp,u_int32_t dlt,u_int32_t hdrlen,bpf_send_func send,bpf_tap_func tap)3951 bpf_attach(
3952 	ifnet_t ifp,
3953 	u_int32_t dlt,
3954 	u_int32_t hdrlen,
3955 	bpf_send_func send,
3956 	bpf_tap_func tap)
3957 {
3958 	struct bpf_if *bp;
3959 	struct bpf_if *bp_new;
3960 	struct bpf_if *bp_before_first = NULL;
3961 	struct bpf_if *bp_first = NULL;
3962 	struct bpf_if *bp_last = NULL;
3963 	boolean_t found;
3964 
3965 	/*
3966 	 * Z_NOFAIL will cause a panic if the allocation fails
3967 	 */
3968 	bp_new = kalloc_type(struct bpf_if, Z_WAITOK | Z_NOFAIL | Z_ZERO);
3969 
3970 	lck_mtx_lock(bpf_mlock);
3971 
3972 	/*
3973 	 * Check if this interface/dlt is already attached. Remember the
3974 	 * first and last attachment for this interface, as well as the
3975 	 * element before the first attachment.
3976 	 */
3977 	found = FALSE;
3978 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
3979 		if (bp->bif_ifp != ifp) {
3980 			if (bp_first != NULL) {
3981 				/* no more elements for this interface */
3982 				break;
3983 			}
3984 			bp_before_first = bp;
3985 		} else {
3986 			if (bp->bif_dlt == dlt) {
3987 				found = TRUE;
3988 				break;
3989 			}
3990 			if (bp_first == NULL) {
3991 				bp_first = bp;
3992 			}
3993 			bp_last = bp;
3994 		}
3995 	}
3996 	if (found) {
3997 		lck_mtx_unlock(bpf_mlock);
3998 		os_log_error(OS_LOG_DEFAULT,
3999 		    "bpfattach - %s with dlt %d is already attached",
4000 		    if_name(ifp), dlt);
4001 		kfree_type(struct bpf_if, bp_new);
4002 		return EEXIST;
4003 	}
4004 
4005 	bp_new->bif_ifp = ifp;
4006 	bp_new->bif_dlt = dlt;
4007 	bp_new->bif_send = send;
4008 	bp_new->bif_tap = tap;
4009 
4010 	if (bp_first == NULL) {
4011 		/* No other entries for this ifp */
4012 		bp_new->bif_next = bpf_iflist;
4013 		bpf_iflist = bp_new;
4014 	} else {
4015 		if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
4016 			/* Make this the first entry for this interface */
4017 			if (bp_before_first != NULL) {
4018 				/*  point the previous to us */
4019 				bp_before_first->bif_next = bp_new;
4020 			} else {
4021 				/* we're the new head */
4022 				bpf_iflist = bp_new;
4023 			}
4024 			bp_new->bif_next = bp_first;
4025 		} else {
4026 			/* Add this after the last entry for this interface */
4027 			bp_new->bif_next = bp_last->bif_next;
4028 			bp_last->bif_next = bp_new;
4029 		}
4030 	}
4031 
4032 	/*
4033 	 * Compute the length of the bpf header.  This is not necessarily
4034 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
4035 	 * that the network layer header begins on a longword boundary (for
4036 	 * performance reasons and to alleviate alignment restrictions).
4037 	 */
4038 	bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
4039 	bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
4040 	    sizeof(struct bpf_hdr_ext)) - hdrlen;
4041 	bp_new->bif_comphdrlen = BPF_WORDALIGN(hdrlen +
4042 	    sizeof(struct bpf_comp_hdr)) - hdrlen;
4043 
4044 	/* Take a reference on the interface */
4045 	ifnet_reference(ifp);
4046 
4047 	lck_mtx_unlock(bpf_mlock);
4048 
4049 	return 0;
4050 }
4051 
4052 /*
4053  * Detach bpf from an interface.  This involves detaching each descriptor
4054  * associated with the interface, and leaving bd_bif NULL.  Notify each
4055  * descriptor as it's detached so that any sleepers wake up and get
4056  * ENXIO.
4057  */
4058 void
bpfdetach(struct ifnet * ifp)4059 bpfdetach(struct ifnet *ifp)
4060 {
4061 	struct bpf_if   *bp, *bp_prev, *bp_next;
4062 	struct bpf_d    *d;
4063 
4064 	if (bpf_debug != 0) {
4065 		os_log(OS_LOG_DEFAULT, "%s: %s", __func__, if_name(ifp));
4066 	}
4067 
4068 	lck_mtx_lock(bpf_mlock);
4069 
4070 	/*
4071 	 * Build the list of devices attached to that interface
4072 	 * that we need to free while keeping the lock to maintain
4073 	 * the integrity of the interface list
4074 	 */
4075 	bp_prev = NULL;
4076 	for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
4077 		bp_next = bp->bif_next;
4078 
4079 		if (ifp != bp->bif_ifp) {
4080 			bp_prev = bp;
4081 			continue;
4082 		}
4083 		/* Unlink from the interface list */
4084 		if (bp_prev) {
4085 			bp_prev->bif_next = bp->bif_next;
4086 		} else {
4087 			bpf_iflist = bp->bif_next;
4088 		}
4089 
4090 		/* Detach the devices attached to the interface */
4091 		while ((d = bp->bif_dlist) != NULL) {
4092 			/*
4093 			 * Take an extra reference to prevent the device
4094 			 * from being freed when bpf_detachd() releases
4095 			 * the reference for the interface list
4096 			 */
4097 			bpf_acquire_d(d);
4098 			bpf_detachd(d);
4099 			bpf_wakeup(d);
4100 			bpf_release_d(d);
4101 		}
4102 		ifnet_release(ifp);
4103 	}
4104 
4105 	lck_mtx_unlock(bpf_mlock);
4106 }
4107 
4108 void
bpf_init(__unused void * unused)4109 bpf_init(__unused void *unused)
4110 {
4111 	int     maj;
4112 
4113 	/* bpf_comp_hdr is an overlay of bpf_hdr */
4114 	_CASSERT(BPF_WORDALIGN(sizeof(struct bpf_hdr)) ==
4115 	    BPF_WORDALIGN(sizeof(struct bpf_comp_hdr)));
4116 
4117 	/* compression length must fits in a byte */
4118 	_CASSERT(BPF_HDR_COMP_LEN_MAX <= UCHAR_MAX );
4119 
4120 	(void) PE_parse_boot_argn("bpf_hdr_comp", &bpf_hdr_comp_enable,
4121 	    sizeof(bpf_hdr_comp_enable));
4122 
4123 	if (bpf_devsw_installed == 0) {
4124 		bpf_devsw_installed = 1;
4125 		maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
4126 		if (maj == -1) {
4127 			bpf_devsw_installed = 0;
4128 			os_log_error(OS_LOG_DEFAULT,
4129 			    "bpf_init: failed to allocate a major number");
4130 			return;
4131 		}
4132 
4133 		for (int i = 0; i < NBPFILTER; i++) {
4134 			bpf_make_dev_t(maj);
4135 		}
4136 	}
4137 }
4138 
4139 static int
4140 sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
4141 {
4142 #pragma unused(arg1, arg2)
4143 	int i, err;
4144 
4145 	i = bpf_maxbufsize;
4146 
4147 	err = sysctl_handle_int(oidp, &i, 0, req);
4148 	if (err != 0 || req->newptr == USER_ADDR_NULL) {
4149 		return err;
4150 	}
4151 
4152 	if (i < 0 || i > BPF_BUFSIZE_CAP) {
4153 		i = BPF_BUFSIZE_CAP;
4154 	}
4155 
4156 	bpf_maxbufsize = i;
4157 	return err;
4158 }
4159 
4160 static int
4161 sysctl_bpf_bufsize_cap SYSCTL_HANDLER_ARGS
4162 {
4163 #pragma unused(arg1, arg2)
4164 	int i, err;
4165 
4166 	i = BPF_BUFSIZE_CAP;
4167 
4168 	err = sysctl_handle_int(oidp, &i, 0, req);
4169 	if (err != 0 || req->newptr == USER_ADDR_NULL) {
4170 		return err;
4171 	}
4172 
4173 	return err;
4174 }
4175 
4176 /*
4177  * Fill filter statistics
4178  */
4179 static void
bpfstats_fill_xbpf(struct xbpf_d * d,struct bpf_d * bd)4180 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
4181 {
4182 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
4183 
4184 	d->bd_structsize = sizeof(struct xbpf_d);
4185 	d->bd_promisc = bd->bd_promisc != 0 ? 1 : 0;
4186 	d->bd_immediate = d->bd_immediate != 0 ? 1 : 0;
4187 	d->bd_hdrcmplt = bd->bd_hdrcmplt != 0 ? 1 : 0;
4188 	d->bd_async = bd->bd_async != 0 ? 1 : 0;
4189 	d->bd_headdrop = bd->bd_headdrop != 0 ? 1 : 0;
4190 	d->bd_seesent = bd->bd_seesent != 0 ? 1 : 0;
4191 	d->bh_compreq = bd->bd_flags & BPF_COMP_REQ ? 1 : 0;
4192 	d->bh_compenabled = bd->bd_flags & BPF_COMP_ENABLED ? 1 : 0;
4193 	d->bd_exthdr = bd->bd_flags & BPF_EXTENDED_HDR ? 1 : 0;
4194 	d->bd_trunc = bd->bd_flags & BPF_TRUNCATE ? 1 : 0;
4195 	d->bd_pkthdrv2 = bd->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
4196 
4197 	d->bd_dev_minor = (uint8_t)bd->bd_dev_minor;
4198 
4199 	d->bd_sig = bd->bd_sig;
4200 
4201 	d->bd_rcount = bd->bd_rcount;
4202 	d->bd_dcount = bd->bd_dcount;
4203 	d->bd_fcount = bd->bd_fcount;
4204 	d->bd_slen = bd->bd_slen;
4205 	d->bd_hlen = bd->bd_hlen;
4206 	d->bd_bufsize = bd->bd_bufsize;
4207 	d->bd_pid = bd->bd_pid;
4208 	if (bd->bd_bif != NULL && bd->bd_bif->bif_ifp != NULL) {
4209 		strlcpy(d->bd_ifname,
4210 		    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
4211 	}
4212 
4213 	d->bd_comp_count = bd->bd_bcs.bcs_count_compressed_prefix;
4214 	d->bd_comp_size = bd->bd_bcs.bcs_total_compressed_prefix_size;
4215 
4216 	d->bd_scnt = bd->bd_scnt;
4217 	d->bd_hcnt = bd->bd_hcnt;
4218 
4219 	d->bd_read_count = bd->bd_bcs.bcs_total_read;
4220 	d->bd_fsize = bd->bd_bcs.bcs_total_size;
4221 }
4222 
4223 /*
4224  * Handle `netstat -B' stats request
4225  */
4226 static int
4227 sysctl_bpf_stats SYSCTL_HANDLER_ARGS
4228 {
4229 	int error;
4230 	struct xbpf_d *xbdbuf;
4231 	unsigned int x_cnt;
4232 	vm_size_t buf_size;
4233 
4234 	if (req->oldptr == USER_ADDR_NULL) {
4235 		return SYSCTL_OUT(req, 0, nbpfilter * sizeof(struct xbpf_d));
4236 	}
4237 	if (nbpfilter == 0) {
4238 		return SYSCTL_OUT(req, 0, 0);
4239 	}
4240 	buf_size = req->oldlen;
4241 	xbdbuf = kalloc_data(buf_size, Z_WAITOK | Z_ZERO);
4242 
4243 	lck_mtx_lock(bpf_mlock);
4244 	if (buf_size < (nbpfilter * sizeof(struct xbpf_d))) {
4245 		lck_mtx_unlock(bpf_mlock);
4246 		kfree_data(xbdbuf, buf_size);
4247 		return ENOMEM;
4248 	}
4249 	x_cnt = 0;
4250 	unsigned int i;
4251 
4252 	for (i = 0; i < nbpfilter; i++) {
4253 		struct bpf_d *bd = bpf_dtab[i];
4254 		struct xbpf_d *xbd;
4255 
4256 		if (bd == NULL || bd == BPF_DEV_RESERVED ||
4257 		    (bd->bd_flags & BPF_CLOSING) != 0) {
4258 			continue;
4259 		}
4260 		VERIFY(x_cnt < nbpfilter);
4261 
4262 		xbd = &xbdbuf[x_cnt++];
4263 		bpfstats_fill_xbpf(xbd, bd);
4264 	}
4265 	lck_mtx_unlock(bpf_mlock);
4266 
4267 	error = SYSCTL_OUT(req, xbdbuf, x_cnt * sizeof(struct xbpf_d));
4268 	kfree_data(xbdbuf, buf_size);
4269 	return error;
4270 }
4271