xref: /xnu-10002.1.13/bsd/net/bpf.c (revision 1031c584a5e37aff177559b9f69dbd3c8c3fd30a)
1 /*
2  * Copyright (c) 2000-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1990, 1991, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * This code is derived from the Stanford/CMU enet packet filter,
33  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35  * Berkeley Laboratory.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)bpf.c	8.2 (Berkeley) 3/28/94
66  *
67  * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68  */
69 /*
70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71  * support for mandatory and extensible security protections.  This notice
72  * is included in support of clause 2.2 (b) of the Apple Public License,
73  * Version 2.0.
74  */
75 
76 #include "bpf.h"
77 
78 #ifndef __GNUC__
79 #define inline
80 #else
81 #define inline __inline
82 #endif
83 
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/conf.h>
87 #include <sys/malloc.h>
88 #include <sys/mbuf.h>
89 #include <sys/time.h>
90 #include <sys/proc.h>
91 #include <sys/signalvar.h>
92 #include <sys/filio.h>
93 #include <sys/sockio.h>
94 #include <sys/ttycom.h>
95 #include <sys/filedesc.h>
96 #include <sys/uio_internal.h>
97 #include <sys/file_internal.h>
98 #include <sys/event.h>
99 
100 #include <sys/poll.h>
101 
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/vnode.h>
105 
106 #include <net/if.h>
107 #include <net/bpf.h>
108 #include <net/bpfdesc.h>
109 
110 #include <netinet/in.h>
111 #include <netinet/ip.h>
112 #include <netinet/ip6.h>
113 #include <netinet/in_pcb.h>
114 #include <netinet/in_var.h>
115 #include <netinet/ip_var.h>
116 #include <netinet/tcp.h>
117 #include <netinet/tcp_var.h>
118 #include <netinet/udp.h>
119 #include <netinet/udp_var.h>
120 #include <netinet/if_ether.h>
121 #include <netinet/isakmp.h>
122 #include <netinet6/esp.h>
123 #include <sys/kernel.h>
124 #include <sys/sysctl.h>
125 #include <net/firewire.h>
126 
127 #include <miscfs/devfs/devfs.h>
128 #include <net/dlil.h>
129 #include <net/pktap.h>
130 
131 #include <kern/assert.h>
132 #include <kern/locks.h>
133 #include <kern/thread_call.h>
134 #include <libkern/section_keywords.h>
135 
136 #include <os/log.h>
137 
138 #include <IOKit/IOBSD.h>
139 
140 #define BPF_WRITE_MAX 65535
141 
142 extern int tvtohz(struct timeval *);
143 extern char *proc_name_address(void *p);
144 
145 #define BPF_BUFSIZE 4096
146 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
147 
148 #define PRINET  26                      /* interruptible */
149 
150 #define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
151 #define ESP_HDR_SIZE sizeof(struct newesp)
152 
153 #define BPF_WRITE_LEEWAY 18
154 
155 typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
156 
157 /*
158  * The default read buffer size is patchable.
159  */
160 static unsigned int bpf_bufsize = BPF_BUFSIZE;
161 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
162     &bpf_bufsize, 0, "");
163 
164 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
165 static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
166 SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
167     &bpf_maxbufsize, 0,
168     sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
169 
170 extern const int copysize_limit_panic;
171 #define BPF_BUFSIZE_CAP (copysize_limit_panic >> 1)
172 static int sysctl_bpf_bufsize_cap SYSCTL_HANDLER_ARGS;
173 SYSCTL_PROC(_debug, OID_AUTO, bpf_bufsize_cap, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
174     0, 0,
175     sysctl_bpf_bufsize_cap, "I", "Upper limit on BPF max buffer size");
176 
177 #define BPF_MAX_DEVICES 256
178 static unsigned int bpf_maxdevices = BPF_MAX_DEVICES;
179 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RD | CTLFLAG_LOCKED,
180     &bpf_maxdevices, 0, "");
181 
182 /*
183  * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
184  * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
185  * explicitly to be able to use DLT_PKTAP.
186  */
187 #if !XNU_TARGET_OS_OSX
188 static unsigned int bpf_wantpktap = 1;
189 #else /* XNU_TARGET_OS_OSX */
190 static unsigned int bpf_wantpktap = 0;
191 #endif /* XNU_TARGET_OS_OSX */
192 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
193     &bpf_wantpktap, 0, "");
194 
195 static int bpf_debug = 0;
196 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
197     &bpf_debug, 0, "");
198 
199 static unsigned long bpf_trunc_overflow = 0;
200 SYSCTL_ULONG(_debug, OID_AUTO, bpf_trunc_overflow, CTLFLAG_RD | CTLFLAG_LOCKED,
201     &bpf_trunc_overflow, "");
202 
203 static int bpf_hdr_comp_enable = 1;
204 SYSCTL_INT(_debug, OID_AUTO, bpf_hdr_comp_enable, CTLFLAG_RW | CTLFLAG_LOCKED,
205     &bpf_hdr_comp_enable, 1, "");
206 
207 static int sysctl_bpf_stats SYSCTL_HANDLER_ARGS;
208 SYSCTL_PROC(_debug, OID_AUTO, bpf_stats, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
209     0, 0,
210     sysctl_bpf_stats, "S", "BPF statistics");
211 
212 /*
213  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
214  *  bpf_dtab holds pointer to the descriptors, indexed by minor device #
215  */
216 static struct bpf_if    *bpf_iflist;
217 /*
218  * BSD now stores the bpf_d in the dev_t which is a struct
219  * on their system. Our dev_t is an int, so we still store
220  * the bpf_d in a separate table indexed by minor device #.
221  *
222  * The value stored in bpf_dtab[n] represent three states:
223  *  NULL: device not opened
224  *  BPF_DEV_RESERVED: device opening or closing
225  *  other: device <n> opened with pointer to storage
226  */
227 #define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
228 static struct bpf_d **bpf_dtab = NULL;
229 static unsigned int bpf_dtab_size = 0;
230 static unsigned int nbpfilter = 0;
231 static unsigned bpf_bpfd_cnt = 0;
232 
233 static LCK_GRP_DECLARE(bpf_mlock_grp, "bpf");
234 static LCK_MTX_DECLARE(bpf_mlock_data, &bpf_mlock_grp);
235 static lck_mtx_t *const bpf_mlock = &bpf_mlock_data;
236 
237 static int      bpf_allocbufs(struct bpf_d *);
238 static errno_t  bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
239 static int      bpf_detachd(struct bpf_d *d);
240 static void     bpf_freed(struct bpf_d *);
241 static int      bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool, bool);
242 static void     bpf_timed_out(void *, void *);
243 static void     bpf_wakeup(struct bpf_d *);
244 static uint32_t get_pkt_trunc_len(struct bpf_packet *);
245 static void     catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
246 static void     reset_d(struct bpf_d *);
247 static int      bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
248 static int      bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
249 static int      bpf_setdlt(struct bpf_d *, u_int);
250 static int      bpf_set_traffic_class(struct bpf_d *, int);
251 static void     bpf_set_packet_service_class(struct mbuf *, int);
252 
253 static void     bpf_acquire_d(struct bpf_d *);
254 static void     bpf_release_d(struct bpf_d *);
255 
256 static  int bpf_devsw_installed;
257 
258 void bpf_init(void *unused);
259 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
260 
261 /*
262  * Darwin differs from BSD here, the following are static
263  * on BSD and not static on Darwin.
264  */
265 d_open_t            bpfopen;
266 d_close_t           bpfclose;
267 d_read_t            bpfread;
268 d_write_t           bpfwrite;
269 ioctl_fcn_t         bpfioctl;
270 select_fcn_t        bpfselect;
271 
272 /* Darwin's cdevsw struct differs slightly from BSDs */
273 #define CDEV_MAJOR 23
274 static const struct cdevsw bpf_cdevsw = {
275 	.d_open       = bpfopen,
276 	.d_close      = bpfclose,
277 	.d_read       = bpfread,
278 	.d_write      = bpfwrite,
279 	.d_ioctl      = bpfioctl,
280 	.d_stop       = eno_stop,
281 	.d_reset      = eno_reset,
282 	.d_ttys       = NULL,
283 	.d_select     = bpfselect,
284 	.d_mmap       = eno_mmap,
285 	.d_strategy   = eno_strat,
286 	.d_reserved_1 = eno_getc,
287 	.d_reserved_2 = eno_putc,
288 	.d_type       = 0
289 };
290 
291 #define SOCKADDR_HDR_LEN           offsetof(struct sockaddr, sa_data)
292 
293 static int
copy_uio_to_mbuf_packet(struct uio * auio,struct mbuf * top)294 copy_uio_to_mbuf_packet(struct uio *auio, struct mbuf *top)
295 {
296 	int error = 0;
297 
298 	for (struct mbuf *m = top; m != NULL; m = m->m_next) {
299 		int bytes_to_copy = (int)uio_resid(auio);
300 		int mlen;
301 
302 		if (m->m_flags & M_EXT) {
303 			mlen = m->m_ext.ext_size - (int)M_LEADINGSPACE(m);
304 		} else if (m->m_flags & M_PKTHDR) {
305 			mlen = MHLEN - (int)M_LEADINGSPACE(m);
306 		} else {
307 			mlen = MLEN - (int)M_LEADINGSPACE(m);
308 		}
309 		int copy_len = imin((int)mlen, bytes_to_copy);
310 
311 		error = uiomove(mtod(m, caddr_t), (int)copy_len, auio);
312 		if (error != 0) {
313 			os_log(OS_LOG_DEFAULT, "mbuf_packet_from_uio: len %d error %d",
314 			    copy_len, error);
315 			goto done;
316 		}
317 		m->m_len = copy_len;
318 		top->m_pkthdr.len += copy_len;
319 	}
320 done:
321 	return error;
322 }
323 
324 static int
bpf_movein(struct uio * uio,struct ifnet * ifp,int linktype,struct mbuf ** mp,struct sockaddr * sockp,int * datlen)325 bpf_movein(struct uio *uio, struct ifnet *ifp, int linktype, struct mbuf **mp,
326     struct sockaddr *sockp, int *datlen)
327 {
328 	struct mbuf *m;
329 	int error;
330 	int len;
331 	uint8_t sa_family;
332 	int hlen = 0;
333 
334 	switch (linktype) {
335 #if SLIP
336 	case DLT_SLIP:
337 		sa_family = AF_INET;
338 		hlen = 0;
339 		break;
340 #endif /* SLIP */
341 
342 	case DLT_EN10MB:
343 		sa_family = AF_UNSPEC;
344 		/* XXX Would MAXLINKHDR be better? */
345 		hlen = sizeof(struct ether_header);
346 		break;
347 
348 #if FDDI
349 	case DLT_FDDI:
350 #if defined(__FreeBSD__) || defined(__bsdi__)
351 		sa_family = AF_IMPLINK;
352 		hlen = 0;
353 #else
354 		sa_family = AF_UNSPEC;
355 		/* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
356 		hlen = 24;
357 #endif
358 		break;
359 #endif /* FDDI */
360 
361 	case DLT_RAW:
362 	case DLT_NULL:
363 		sa_family = AF_UNSPEC;
364 		hlen = 0;
365 		break;
366 
367 #ifdef __FreeBSD__
368 	case DLT_ATM_RFC1483:
369 		/*
370 		 * en atm driver requires 4-byte atm pseudo header.
371 		 * though it isn't standard, vpi:vci needs to be
372 		 * specified anyway.
373 		 */
374 		sa_family = AF_UNSPEC;
375 		hlen = 12;      /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
376 		break;
377 #endif
378 
379 	case DLT_PPP:
380 		sa_family = AF_UNSPEC;
381 		hlen = 4;       /* This should match PPP_HDRLEN */
382 		break;
383 
384 	case DLT_APPLE_IP_OVER_IEEE1394:
385 		sa_family = AF_UNSPEC;
386 		hlen = sizeof(struct firewire_header);
387 		break;
388 
389 	case DLT_IEEE802_11:            /* IEEE 802.11 wireless */
390 		sa_family = AF_IEEE80211;
391 		hlen = 0;
392 		break;
393 
394 	case DLT_IEEE802_11_RADIO:
395 		sa_family = AF_IEEE80211;
396 		hlen = 0;
397 		break;
398 
399 	default:
400 		return EIO;
401 	}
402 
403 	if (sockp) {
404 		/*
405 		 * Build a sockaddr based on the data link layer type.
406 		 * We do this at this level because the ethernet header
407 		 * is copied directly into the data field of the sockaddr.
408 		 * In the case of SLIP, there is no header and the packet
409 		 * is forwarded as is.
410 		 * Also, we are careful to leave room at the front of the mbuf
411 		 * for the link level header.
412 		 */
413 		if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
414 			return EIO;
415 		}
416 		sockp->sa_family = sa_family;
417 	} else {
418 		/*
419 		 * We're directly sending the packet data supplied by
420 		 * the user; we don't need to make room for the link
421 		 * header, and don't need the header length value any
422 		 * more, so set it to 0.
423 		 */
424 		hlen = 0;
425 	}
426 
427 	len = (int)uio_resid(uio);
428 	if (len < hlen || (unsigned)len > BPF_WRITE_MAX) {
429 		os_log(OS_LOG_DEFAULT, "bpfwrite: bad len %d if %s",
430 		    (unsigned)len, ifp->if_xname);
431 		return EMSGSIZE;
432 	}
433 	if ((len - hlen) > (ifp->if_mtu + BPF_WRITE_LEEWAY)) {
434 		os_log(OS_LOG_DEFAULT, "bpfwrite: len %u - hlen %u too big if %s mtu %u",
435 		    (unsigned)len, (unsigned)hlen, ifp->if_xname, ifp->if_mtu);
436 		return EMSGSIZE;
437 	}
438 
439 	*datlen = len - hlen;
440 
441 	error = mbuf_allocpacket(MBUF_WAITOK, len, NULL, &m);
442 	if (error != 0) {
443 		os_log(OS_LOG_DEFAULT,
444 		    "bpfwrite mbuf_allocpacket len %d error %d", len, error);
445 		return error;
446 	}
447 	/*
448 	 * Make room for link header -- the packet length is 0 at this stage
449 	 */
450 	if (hlen != 0) {
451 		m->m_data += hlen; /* leading space */
452 		error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
453 		if (error) {
454 			os_log(OS_LOG_DEFAULT,
455 			    "bpfwrite UIOMOVE hlen %d error %d", hlen, error);
456 			goto bad;
457 		}
458 	}
459 	/*
460 	 * copy_uio_to_mbuf_packet() does set the length of each mbuf and adds it to
461 	 * the total packet length
462 	 */
463 	error = copy_uio_to_mbuf_packet(uio, m);
464 	if (error != 0) {
465 		os_log(OS_LOG_DEFAULT,
466 		    "bpfwrite copy_uio_to_mbuf_packet error %d", error);
467 		goto bad;
468 	}
469 
470 	/* Check for multicast destination */
471 	switch (linktype) {
472 	case DLT_EN10MB: {
473 		struct ether_header *eh;
474 
475 		eh = mtod(m, struct ether_header *);
476 		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
477 			if (_ether_cmp(etherbroadcastaddr,
478 			    eh->ether_dhost) == 0) {
479 				m->m_flags |= M_BCAST;
480 			} else {
481 				m->m_flags |= M_MCAST;
482 			}
483 		}
484 		break;
485 	}
486 	}
487 	*mp = m;
488 
489 	return 0;
490 bad:
491 	m_freem(m);
492 	return error;
493 }
494 
495 /*
496  * The dynamic addition of a new device node must block all processes that
497  * are opening the last device so that no process will get an unexpected
498  * ENOENT
499  */
500 static void
bpf_make_dev_t(int maj)501 bpf_make_dev_t(int maj)
502 {
503 	static int              bpf_growing = 0;
504 	unsigned int    cur_size = nbpfilter, i;
505 
506 	if (nbpfilter >= BPF_MAX_DEVICES) {
507 		return;
508 	}
509 
510 	while (bpf_growing) {
511 		/* Wait until new device has been created */
512 		(void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
513 	}
514 	if (nbpfilter > cur_size) {
515 		/* other thread grew it already */
516 		return;
517 	}
518 	bpf_growing = 1;
519 
520 	/* need to grow bpf_dtab first */
521 	if (nbpfilter == bpf_dtab_size) {
522 		unsigned int new_dtab_size;
523 		struct bpf_d **new_dtab = NULL;
524 
525 		new_dtab_size = bpf_dtab_size + NBPFILTER;
526 		new_dtab = krealloc_type(struct bpf_d *,
527 		    bpf_dtab_size, new_dtab_size, bpf_dtab, Z_WAITOK | Z_ZERO);
528 		if (new_dtab == 0) {
529 			os_log_error(OS_LOG_DEFAULT, "bpf_make_dev_t: malloc bpf_dtab failed");
530 			goto done;
531 		}
532 		bpf_dtab = new_dtab;
533 		bpf_dtab_size = new_dtab_size;
534 	}
535 	i = nbpfilter++;
536 	(void) devfs_make_node(makedev(maj, i),
537 	    DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
538 	    "bpf%d", i);
539 done:
540 	bpf_growing = 0;
541 	wakeup((caddr_t)&bpf_growing);
542 }
543 
544 /*
545  * Attach file to the bpf interface, i.e. make d listen on bp.
546  */
547 static errno_t
bpf_attachd(struct bpf_d * d,struct bpf_if * bp)548 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
549 {
550 	int first = bp->bif_dlist == NULL;
551 	int     error = 0;
552 
553 	/*
554 	 * Point d at bp, and add d to the interface's list of listeners.
555 	 * Finally, point the driver's bpf cookie at the interface so
556 	 * it will divert packets to bpf.
557 	 */
558 	d->bd_bif = bp;
559 	d->bd_next = bp->bif_dlist;
560 	bp->bif_dlist = d;
561 	bpf_bpfd_cnt++;
562 
563 	/*
564 	 * Take a reference on the device even if an error is returned
565 	 * because we keep the device in the interface's list of listeners
566 	 */
567 	bpf_acquire_d(d);
568 
569 	if (first) {
570 		/* Find the default bpf entry for this ifp */
571 		if (bp->bif_ifp->if_bpf == NULL) {
572 			struct bpf_if   *tmp, *primary = NULL;
573 
574 			for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
575 				if (tmp->bif_ifp == bp->bif_ifp) {
576 					primary = tmp;
577 					break;
578 				}
579 			}
580 			bp->bif_ifp->if_bpf = primary;
581 		}
582 		/* Only call dlil_set_bpf_tap for primary dlt */
583 		if (bp->bif_ifp->if_bpf == bp) {
584 			dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
585 			    bpf_tap_callback);
586 		}
587 
588 		if (bp->bif_tap != NULL) {
589 			error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
590 			    BPF_TAP_INPUT_OUTPUT);
591 		}
592 	}
593 
594 	/*
595 	 * Reset the detach flags in case we previously detached an interface
596 	 */
597 	d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
598 
599 	if (bp->bif_dlt == DLT_PKTAP) {
600 		d->bd_flags |= BPF_FINALIZE_PKTAP;
601 	} else {
602 		d->bd_flags &= ~BPF_FINALIZE_PKTAP;
603 	}
604 	return error;
605 }
606 
607 /*
608  * Detach a file from its interface.
609  *
610  * Return 1 if was closed by some thread, 0 otherwise
611  */
612 static int
bpf_detachd(struct bpf_d * d)613 bpf_detachd(struct bpf_d *d)
614 {
615 	struct bpf_d **p;
616 	struct bpf_if *bp;
617 	struct ifnet  *ifp;
618 	uint32_t dlt;
619 	bpf_tap_func disable_tap;
620 	uint8_t bd_promisc;
621 
622 	int bpf_closed = d->bd_flags & BPF_CLOSING;
623 	/*
624 	 * Some other thread already detached
625 	 */
626 	if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0) {
627 		goto done;
628 	}
629 	/*
630 	 * This thread is doing the detach
631 	 */
632 	d->bd_flags |= BPF_DETACHING;
633 
634 	ifp = d->bd_bif->bif_ifp;
635 	bp = d->bd_bif;
636 
637 	/* Remove d from the interface's descriptor list. */
638 	p = &bp->bif_dlist;
639 	while (*p != d) {
640 		p = &(*p)->bd_next;
641 		if (*p == 0) {
642 			panic("bpf_detachd: descriptor not in list");
643 		}
644 	}
645 	*p = (*p)->bd_next;
646 	bpf_bpfd_cnt--;
647 	disable_tap = NULL;
648 	if (bp->bif_dlist == 0) {
649 		/*
650 		 * Let the driver know that there are no more listeners.
651 		 */
652 		/* Only call dlil_set_bpf_tap for primary dlt */
653 		if (bp->bif_ifp->if_bpf == bp) {
654 			dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
655 		}
656 
657 		disable_tap = bp->bif_tap;
658 		if (disable_tap) {
659 			dlt = bp->bif_dlt;
660 		}
661 
662 		for (bp = bpf_iflist; bp; bp = bp->bif_next) {
663 			if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
664 				break;
665 			}
666 		}
667 		if (bp == NULL) {
668 			ifp->if_bpf = NULL;
669 		}
670 	}
671 	d->bd_bif = NULL;
672 	/*
673 	 * Check if this descriptor had requested promiscuous mode.
674 	 * If so, turn it off.
675 	 */
676 	bd_promisc = d->bd_promisc;
677 	d->bd_promisc = 0;
678 
679 	lck_mtx_unlock(bpf_mlock);
680 	if (bd_promisc) {
681 		if (ifnet_set_promiscuous(ifp, 0)) {
682 			/*
683 			 * Something is really wrong if we were able to put
684 			 * the driver into promiscuous mode, but can't
685 			 * take it out.
686 			 * Most likely the network interface is gone.
687 			 */
688 			os_log_error(OS_LOG_DEFAULT,
689 			    "%s: bpf%d ifnet_set_promiscuous %s failed",
690 			    __func__, d->bd_dev_minor, if_name(ifp));
691 		}
692 	}
693 
694 	if (disable_tap) {
695 		disable_tap(ifp, dlt, BPF_TAP_DISABLE);
696 	}
697 	lck_mtx_lock(bpf_mlock);
698 
699 	/*
700 	 * Wake up other thread that are waiting for this thread to finish
701 	 * detaching
702 	 */
703 	d->bd_flags &= ~BPF_DETACHING;
704 	d->bd_flags |= BPF_DETACHED;
705 
706 	/* Refresh the local variable as d could have been modified */
707 	bpf_closed = d->bd_flags & BPF_CLOSING;
708 
709 	os_log(OS_LOG_DEFAULT, "bpf%d%s detached from %s fcount %llu dcount %llu",
710 	    d->bd_dev_minor, bpf_closed ? " closed and" : "", if_name(ifp),
711 	    d->bd_fcount, d->bd_dcount);
712 
713 	/*
714 	 * Note that We've kept the reference because we may have dropped
715 	 * the lock when turning off promiscuous mode
716 	 */
717 	bpf_release_d(d);
718 done:
719 	/*
720 	 * Let the caller know the bpf_d is closed
721 	 */
722 	if (bpf_closed) {
723 		return 1;
724 	} else {
725 		return 0;
726 	}
727 }
728 
729 /*
730  * Start asynchronous timer, if necessary.
731  * Must be called with bpf_mlock held.
732  */
733 static void
bpf_start_timer(struct bpf_d * d)734 bpf_start_timer(struct bpf_d *d)
735 {
736 	uint64_t deadline;
737 	struct timeval tv;
738 
739 	if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
740 		tv.tv_sec = d->bd_rtout / hz;
741 		tv.tv_usec = (d->bd_rtout % hz) * tick;
742 
743 		clock_interval_to_deadline(
744 			(uint32_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
745 			NSEC_PER_USEC, &deadline);
746 		/*
747 		 * The state is BPF_IDLE, so the timer hasn't
748 		 * been started yet, and hasn't gone off yet;
749 		 * there is no thread call scheduled, so this
750 		 * won't change the schedule.
751 		 *
752 		 * XXX - what if, by the time it gets entered,
753 		 * the deadline has already passed?
754 		 */
755 		thread_call_enter_delayed(d->bd_thread_call, deadline);
756 		d->bd_state = BPF_WAITING;
757 	}
758 }
759 
760 /*
761  * Cancel asynchronous timer.
762  * Must be called with bpf_mlock held.
763  */
764 static boolean_t
bpf_stop_timer(struct bpf_d * d)765 bpf_stop_timer(struct bpf_d *d)
766 {
767 	/*
768 	 * If the timer has already gone off, this does nothing.
769 	 * Our caller is expected to set d->bd_state to BPF_IDLE,
770 	 * with the bpf_mlock, after we are called. bpf_timed_out()
771 	 * also grabs bpf_mlock, so, if the timer has gone off and
772 	 * bpf_timed_out() hasn't finished, it's waiting for the
773 	 * lock; when this thread releases the lock, it will
774 	 * find the state is BPF_IDLE, and just release the
775 	 * lock and return.
776 	 */
777 	return thread_call_cancel(d->bd_thread_call);
778 }
779 
780 void
bpf_acquire_d(struct bpf_d * d)781 bpf_acquire_d(struct bpf_d *d)
782 {
783 	void *lr_saved =  __builtin_return_address(0);
784 
785 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
786 
787 	d->bd_refcnt += 1;
788 
789 	d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
790 	d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
791 }
792 
793 void
bpf_release_d(struct bpf_d * d)794 bpf_release_d(struct bpf_d *d)
795 {
796 	void *lr_saved =  __builtin_return_address(0);
797 
798 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
799 
800 	if (d->bd_refcnt <= 0) {
801 		panic("%s: %p refcnt <= 0", __func__, d);
802 	}
803 
804 	d->bd_refcnt -= 1;
805 
806 	d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
807 	d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
808 
809 	if (d->bd_refcnt == 0) {
810 		/* Assert the device is detached */
811 		if ((d->bd_flags & BPF_DETACHED) == 0) {
812 			panic("%s: %p BPF_DETACHED not set", __func__, d);
813 		}
814 
815 		kfree_type(struct bpf_d, d);
816 	}
817 }
818 
819 /*
820  * Open ethernet device.  Returns ENXIO for illegal minor device number,
821  * EBUSY if file is open by another process.
822  */
823 /* ARGSUSED */
824 int
bpfopen(dev_t dev,int flags,__unused int fmt,struct proc * p)825 bpfopen(dev_t dev, int flags, __unused int fmt,
826     struct proc *p)
827 {
828 	struct bpf_d *d;
829 
830 	lck_mtx_lock(bpf_mlock);
831 	if ((unsigned int) minor(dev) >= nbpfilter) {
832 		lck_mtx_unlock(bpf_mlock);
833 		return ENXIO;
834 	}
835 	/*
836 	 * New device nodes are created on demand when opening the last one.
837 	 * The programming model is for processes to loop on the minor starting
838 	 * at 0 as long as EBUSY is returned. The loop stops when either the
839 	 * open succeeds or an error other that EBUSY is returned. That means
840 	 * that bpf_make_dev_t() must block all processes that are opening the
841 	 * last  node. If not all processes are blocked, they could unexpectedly
842 	 * get ENOENT and abort their opening loop.
843 	 */
844 	if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
845 		bpf_make_dev_t(major(dev));
846 	}
847 
848 	/*
849 	 * Each minor can be opened by only one process.  If the requested
850 	 * minor is in use, return EBUSY.
851 	 *
852 	 * Important: bpfopen() and bpfclose() have to check and set the status
853 	 * of a device in the same lockin context otherwise the device may be
854 	 * leaked because the vnode use count will be unpextectly greater than 1
855 	 * when close() is called.
856 	 */
857 	if (bpf_dtab[minor(dev)] == NULL) {
858 		/* Reserve while opening */
859 		bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
860 	} else {
861 		lck_mtx_unlock(bpf_mlock);
862 		return EBUSY;
863 	}
864 	d = kalloc_type(struct bpf_d, Z_WAITOK | Z_ZERO);
865 	if (d == NULL) {
866 		/* this really is a catastrophic failure */
867 		os_log_error(OS_LOG_DEFAULT,
868 		    "bpfopen: bpf%d kalloc_type bpf_d failed", minor(dev));
869 		bpf_dtab[minor(dev)] = NULL;
870 		lck_mtx_unlock(bpf_mlock);
871 		return ENOMEM;
872 	}
873 
874 	/* Mark "in use" and do most initialization. */
875 	bpf_acquire_d(d);
876 	d->bd_bufsize = bpf_bufsize;
877 	d->bd_sig = SIGIO;
878 	d->bd_direction = BPF_D_INOUT;
879 	d->bd_oflags = flags;
880 	d->bd_state = BPF_IDLE;
881 	d->bd_traffic_class = SO_TC_BE;
882 	d->bd_flags |= BPF_DETACHED;
883 	if (bpf_wantpktap) {
884 		d->bd_flags |= BPF_WANT_PKTAP;
885 	} else {
886 		d->bd_flags &= ~BPF_WANT_PKTAP;
887 	}
888 
889 	d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
890 	if (d->bd_thread_call == NULL) {
891 		os_log_error(OS_LOG_DEFAULT, "bpfopen: bpf%d malloc thread call failed",
892 		    minor(dev));
893 		bpf_dtab[minor(dev)] = NULL;
894 		bpf_release_d(d);
895 		lck_mtx_unlock(bpf_mlock);
896 
897 		return ENOMEM;
898 	}
899 	d->bd_opened_by = p;
900 	uuid_generate(d->bd_uuid);
901 	d->bd_pid = proc_pid(p);
902 
903 	d->bd_dev_minor = minor(dev);
904 	bpf_dtab[minor(dev)] = d;         /* Mark opened */
905 	lck_mtx_unlock(bpf_mlock);
906 
907 	if (bpf_debug) {
908 		os_log(OS_LOG_DEFAULT, "bpf%u opened by %s.%u",
909 		    d->bd_dev_minor, proc_name_address(p), d->bd_pid);
910 	}
911 	return 0;
912 }
913 
914 /*
915  * Close the descriptor by detaching it from its interface,
916  * deallocating its buffers, and marking it free.
917  */
918 /* ARGSUSED */
919 int
bpfclose(dev_t dev,__unused int flags,__unused int fmt,__unused struct proc * p)920 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
921     __unused struct proc *p)
922 {
923 	struct bpf_d *d;
924 
925 	/* Take BPF lock to ensure no other thread is using the device */
926 	lck_mtx_lock(bpf_mlock);
927 
928 	d = bpf_dtab[minor(dev)];
929 	if (d == NULL || d == BPF_DEV_RESERVED) {
930 		lck_mtx_unlock(bpf_mlock);
931 		return ENXIO;
932 	}
933 
934 	/*
935 	 * Other threads may call bpd_detachd() if we drop the bpf_mlock
936 	 */
937 	d->bd_flags |= BPF_CLOSING;
938 
939 	if (bpf_debug != 0) {
940 		os_log(OS_LOG_DEFAULT, "%s: bpf%d",
941 		    __func__, d->bd_dev_minor);
942 	}
943 
944 	bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;         /* Reserve while closing */
945 
946 	/*
947 	 * Deal with any in-progress timeouts.
948 	 */
949 	switch (d->bd_state) {
950 	case BPF_IDLE:
951 		/*
952 		 * Not waiting for a timeout, and no timeout happened.
953 		 */
954 		break;
955 
956 	case BPF_WAITING:
957 		/*
958 		 * Waiting for a timeout.
959 		 * Cancel any timer that has yet to go off,
960 		 * and mark the state as "closing".
961 		 * Then drop the lock to allow any timers that
962 		 * *have* gone off to run to completion, and wait
963 		 * for them to finish.
964 		 */
965 		if (!bpf_stop_timer(d)) {
966 			/*
967 			 * There was no pending call, so the call must
968 			 * have been in progress. Wait for the call to
969 			 * complete; we have to drop the lock while
970 			 * waiting. to let the in-progrss call complete
971 			 */
972 			d->bd_state = BPF_DRAINING;
973 			while (d->bd_state == BPF_DRAINING) {
974 				msleep((caddr_t)d, bpf_mlock, PRINET,
975 				    "bpfdraining", NULL);
976 			}
977 		}
978 		d->bd_state = BPF_IDLE;
979 		break;
980 
981 	case BPF_TIMED_OUT:
982 		/*
983 		 * Timer went off, and the timeout routine finished.
984 		 */
985 		d->bd_state = BPF_IDLE;
986 		break;
987 
988 	case BPF_DRAINING:
989 		/*
990 		 * Another thread is blocked on a close waiting for
991 		 * a timeout to finish.
992 		 * This "shouldn't happen", as the first thread to enter
993 		 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
994 		 * all subsequent threads should see that and fail with
995 		 * ENXIO.
996 		 */
997 		panic("Two threads blocked in a BPF close");
998 		break;
999 	}
1000 
1001 	if (d->bd_bif) {
1002 		bpf_detachd(d);
1003 	}
1004 	selthreadclear(&d->bd_sel);
1005 	thread_call_free(d->bd_thread_call);
1006 
1007 	while (d->bd_hbuf_read || d->bd_hbuf_write) {
1008 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpfclose", NULL);
1009 	}
1010 
1011 	if (bpf_debug) {
1012 		os_log(OS_LOG_DEFAULT,
1013 		    "bpf%u closed by %s.%u dcount %llu fcount %llu ccount %llu",
1014 		    d->bd_dev_minor, proc_name_address(p), d->bd_pid,
1015 		    d->bd_dcount, d->bd_fcount, d->bd_bcs.bcs_count_compressed_prefix);
1016 	}
1017 
1018 	bpf_freed(d);
1019 
1020 	/* Mark free in same context as bpfopen comes to check */
1021 	bpf_dtab[minor(dev)] = NULL;                    /* Mark closed */
1022 
1023 	bpf_release_d(d);
1024 
1025 	lck_mtx_unlock(bpf_mlock);
1026 
1027 	return 0;
1028 }
1029 
1030 #define BPF_SLEEP bpf_sleep
1031 
1032 static int
bpf_sleep(struct bpf_d * d,int pri,const char * wmesg,int timo)1033 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
1034 {
1035 	u_int64_t abstime = 0;
1036 
1037 	if (timo != 0) {
1038 		clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
1039 	}
1040 
1041 	return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
1042 }
1043 
1044 static void
bpf_finalize_pktap(struct bpf_hdr * hp,struct pktap_header * pktaphdr)1045 bpf_finalize_pktap(struct bpf_hdr *hp, struct pktap_header *pktaphdr)
1046 {
1047 	if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
1048 		struct pktap_v2_hdr *pktap_v2_hdr;
1049 
1050 		pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
1051 
1052 		if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
1053 			pktap_v2_finalize_proc_info(pktap_v2_hdr);
1054 		}
1055 	} else {
1056 		if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
1057 			pktap_finalize_proc_info(pktaphdr);
1058 		}
1059 
1060 		if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1061 			hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
1062 			hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
1063 		}
1064 	}
1065 }
1066 
1067 /*
1068  * Rotate the packet buffers in descriptor d.  Move the store buffer
1069  * into the hold slot, and the free buffer into the store slot.
1070  * Zero the length of the new store buffer.
1071  *
1072  * Note: in head drop mode, the hold buffer can be dropped so the fist packet of the
1073  * store buffer cannot be compressed as it otherwise would refer to deleted data
1074  * in a dropped hold buffer that the reader process does know about
1075  */
1076 #define ROTATE_BUFFERS(d) do { \
1077 	if (d->bd_hbuf_read) \
1078 	        panic("rotating bpf buffers during read"); \
1079 	(d)->bd_hbuf = (d)->bd_sbuf; \
1080 	(d)->bd_hlen = (d)->bd_slen; \
1081 	(d)->bd_hcnt = (d)->bd_scnt; \
1082 	(d)->bd_sbuf = (d)->bd_fbuf; \
1083 	(d)->bd_slen = 0; \
1084 	(d)->bd_scnt = 0; \
1085 	(d)->bd_fbuf = NULL; \
1086 	if ((d)->bd_headdrop != 0) \
1087 	        (d)->bd_prev_slen = 0; \
1088 } while(false)
1089 
1090 /*
1091  *  bpfread - read next chunk of packets from buffers
1092  */
1093 int
bpfread(dev_t dev,struct uio * uio,int ioflag)1094 bpfread(dev_t dev, struct uio *uio, int ioflag)
1095 {
1096 	struct bpf_d *d;
1097 	caddr_t hbuf;
1098 	int timed_out, hbuf_len;
1099 	int error;
1100 	int flags;
1101 
1102 	lck_mtx_lock(bpf_mlock);
1103 
1104 	d = bpf_dtab[minor(dev)];
1105 	if (d == NULL || d == BPF_DEV_RESERVED ||
1106 	    (d->bd_flags & BPF_CLOSING) != 0) {
1107 		lck_mtx_unlock(bpf_mlock);
1108 		return ENXIO;
1109 	}
1110 
1111 	bpf_acquire_d(d);
1112 
1113 	/*
1114 	 * Restrict application to use a buffer the same size as
1115 	 * as kernel buffers.
1116 	 */
1117 	if (uio_resid(uio) != d->bd_bufsize) {
1118 		bpf_release_d(d);
1119 		lck_mtx_unlock(bpf_mlock);
1120 		return EINVAL;
1121 	}
1122 
1123 	if (d->bd_state == BPF_WAITING) {
1124 		bpf_stop_timer(d);
1125 	}
1126 
1127 	timed_out = (d->bd_state == BPF_TIMED_OUT);
1128 	d->bd_state = BPF_IDLE;
1129 
1130 	while (d->bd_hbuf_read) {
1131 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpfread", NULL);
1132 	}
1133 
1134 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1135 		bpf_release_d(d);
1136 		lck_mtx_unlock(bpf_mlock);
1137 		return ENXIO;
1138 	}
1139 	/*
1140 	 * If the hold buffer is empty, then do a timed sleep, which
1141 	 * ends when the timeout expires or when enough packets
1142 	 * have arrived to fill the store buffer.
1143 	 */
1144 	while (d->bd_hbuf == 0) {
1145 		if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) &&
1146 		    d->bd_slen != 0) {
1147 			/*
1148 			 * We're in immediate mode, or are reading
1149 			 * in non-blocking mode, or a timer was
1150 			 * started before the read (e.g., by select()
1151 			 * or poll()) and has expired and a packet(s)
1152 			 * either arrived since the previous
1153 			 * read or arrived while we were asleep.
1154 			 * Rotate the buffers and return what's here.
1155 			 */
1156 			ROTATE_BUFFERS(d);
1157 			break;
1158 		}
1159 
1160 		/*
1161 		 * No data is available, check to see if the bpf device
1162 		 * is still pointed at a real interface.  If not, return
1163 		 * ENXIO so that the userland process knows to rebind
1164 		 * it before using it again.
1165 		 */
1166 		if (d->bd_bif == NULL) {
1167 			bpf_release_d(d);
1168 			lck_mtx_unlock(bpf_mlock);
1169 			return ENXIO;
1170 		}
1171 		if (ioflag & IO_NDELAY) {
1172 			bpf_release_d(d);
1173 			lck_mtx_unlock(bpf_mlock);
1174 			return EWOULDBLOCK;
1175 		}
1176 		error = BPF_SLEEP(d, PRINET | PCATCH, "bpf", d->bd_rtout);
1177 		/*
1178 		 * Make sure device is still opened
1179 		 */
1180 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1181 			bpf_release_d(d);
1182 			lck_mtx_unlock(bpf_mlock);
1183 			return ENXIO;
1184 		}
1185 
1186 		while (d->bd_hbuf_read) {
1187 			msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_read",
1188 			    NULL);
1189 		}
1190 
1191 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1192 			bpf_release_d(d);
1193 			lck_mtx_unlock(bpf_mlock);
1194 			return ENXIO;
1195 		}
1196 
1197 		if (error == EINTR || error == ERESTART) {
1198 			if (d->bd_hbuf != NULL) {
1199 				/*
1200 				 * Because we msleep, the hold buffer might
1201 				 * be filled when we wake up.  Avoid rotating
1202 				 * in this case.
1203 				 */
1204 				break;
1205 			}
1206 			if (d->bd_slen != 0) {
1207 				/*
1208 				 * Sometimes we may be interrupted often and
1209 				 * the sleep above will not timeout.
1210 				 * Regardless, we should rotate the buffers
1211 				 * if there's any new data pending and
1212 				 * return it.
1213 				 */
1214 				ROTATE_BUFFERS(d);
1215 				break;
1216 			}
1217 			bpf_release_d(d);
1218 			lck_mtx_unlock(bpf_mlock);
1219 			if (error == ERESTART) {
1220 				os_log(OS_LOG_DEFAULT, "%s: bpf%d ERESTART to EINTR",
1221 				    __func__, d->bd_dev_minor);
1222 				error = EINTR;
1223 			}
1224 			return error;
1225 		}
1226 		if (error == EWOULDBLOCK) {
1227 			/*
1228 			 * On a timeout, return what's in the buffer,
1229 			 * which may be nothing.  If there is something
1230 			 * in the store buffer, we can rotate the buffers.
1231 			 */
1232 			if (d->bd_hbuf) {
1233 				/*
1234 				 * We filled up the buffer in between
1235 				 * getting the timeout and arriving
1236 				 * here, so we don't need to rotate.
1237 				 */
1238 				break;
1239 			}
1240 
1241 			if (d->bd_slen == 0) {
1242 				bpf_release_d(d);
1243 				lck_mtx_unlock(bpf_mlock);
1244 				return 0;
1245 			}
1246 			ROTATE_BUFFERS(d);
1247 			break;
1248 		}
1249 	}
1250 	/*
1251 	 * At this point, we know we have something in the hold slot.
1252 	 */
1253 
1254 	/*
1255 	 * Set the hold buffer read. So we do not
1256 	 * rotate the buffers until the hold buffer
1257 	 * read is complete. Also to avoid issues resulting
1258 	 * from page faults during disk sleep (<rdar://problem/13436396>).
1259 	 */
1260 	d->bd_hbuf_read = true;
1261 	hbuf = d->bd_hbuf;
1262 	hbuf_len = d->bd_hlen;
1263 	flags = d->bd_flags;
1264 	d->bd_bcs.bcs_total_read += d->bd_hcnt;
1265 	lck_mtx_unlock(bpf_mlock);
1266 
1267 	/*
1268 	 * Before we move data to userland, we fill out the extended
1269 	 * header fields.
1270 	 */
1271 	if (flags & BPF_EXTENDED_HDR) {
1272 		char *p;
1273 
1274 		p = hbuf;
1275 		while (p < hbuf + hbuf_len) {
1276 			struct bpf_hdr_ext *ehp;
1277 			uint32_t flowid;
1278 			struct so_procinfo soprocinfo;
1279 			int found = 0;
1280 
1281 			ehp = (struct bpf_hdr_ext *)(void *)p;
1282 			if ((flowid = ehp->bh_flowid) != 0) {
1283 				if (ehp->bh_flags & BPF_HDR_EXT_FLAGS_TCP) {
1284 					ehp->bh_flags &= ~BPF_HDR_EXT_FLAGS_TCP;
1285 					found = inp_findinpcb_procinfo(&tcbinfo,
1286 					    flowid, &soprocinfo);
1287 				} else if (ehp->bh_flags == BPF_HDR_EXT_FLAGS_UDP) {
1288 					ehp->bh_flags &= ~BPF_HDR_EXT_FLAGS_UDP;
1289 					found = inp_findinpcb_procinfo(&udbinfo,
1290 					    flowid, &soprocinfo);
1291 				}
1292 				if (found == 1) {
1293 					ehp->bh_pid = soprocinfo.spi_pid;
1294 					strlcpy(&ehp->bh_comm[0], &soprocinfo.spi_proc_name[0], sizeof(ehp->bh_comm));
1295 				}
1296 				ehp->bh_flowid = 0;
1297 			}
1298 
1299 			if ((flags & BPF_FINALIZE_PKTAP) != 0 && ehp->bh_complen == 0) {
1300 				struct pktap_header *pktaphdr;
1301 
1302 				pktaphdr = (struct pktap_header *)(void *)
1303 				    (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1304 
1305 				bpf_finalize_pktap((struct bpf_hdr *) ehp,
1306 				    pktaphdr);
1307 			}
1308 			p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1309 		}
1310 	} else if (flags & BPF_FINALIZE_PKTAP) {
1311 		char *p;
1312 
1313 		p = hbuf;
1314 
1315 		while (p < hbuf + hbuf_len) {
1316 			struct bpf_hdr *hp;
1317 			struct pktap_header *pktaphdr;
1318 
1319 			hp = (struct bpf_hdr *)(void *)p;
1320 
1321 			/*
1322 			 * Cannot finalize a compressed pktap header as we may not have
1323 			 * all the fields present
1324 			 */
1325 			if (d->bd_flags & BPF_COMP_ENABLED) {
1326 				struct bpf_comp_hdr *hcp;
1327 
1328 				hcp = (struct bpf_comp_hdr *)(void *)p;
1329 
1330 				if (hcp->bh_complen != 0) {
1331 					p += BPF_WORDALIGN(hcp->bh_hdrlen + hcp->bh_caplen);
1332 					continue;
1333 				}
1334 			}
1335 
1336 			pktaphdr = (struct pktap_header *)(void *)
1337 			    (p + BPF_WORDALIGN(hp->bh_hdrlen));
1338 
1339 			bpf_finalize_pktap(hp, pktaphdr);
1340 
1341 			p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1342 		}
1343 	}
1344 
1345 	/*
1346 	 * Move data from hold buffer into user space.
1347 	 * We know the entire buffer is transferred since
1348 	 * we checked above that the read buffer is bpf_bufsize bytes.
1349 	 */
1350 	error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1351 
1352 	lck_mtx_lock(bpf_mlock);
1353 	/*
1354 	 * Make sure device is still opened
1355 	 */
1356 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1357 		bpf_release_d(d);
1358 		lck_mtx_unlock(bpf_mlock);
1359 		return ENXIO;
1360 	}
1361 
1362 	d->bd_hbuf_read = false;
1363 	d->bd_fbuf = d->bd_hbuf;
1364 	d->bd_hbuf = NULL;
1365 	d->bd_hlen = 0;
1366 	d->bd_hcnt = 0;
1367 	wakeup((caddr_t)d);
1368 
1369 	bpf_release_d(d);
1370 	lck_mtx_unlock(bpf_mlock);
1371 	return error;
1372 }
1373 
1374 /*
1375  * If there are processes sleeping on this descriptor, wake them up.
1376  */
1377 static void
bpf_wakeup(struct bpf_d * d)1378 bpf_wakeup(struct bpf_d *d)
1379 {
1380 	if (d->bd_state == BPF_WAITING) {
1381 		bpf_stop_timer(d);
1382 		d->bd_state = BPF_IDLE;
1383 	}
1384 	wakeup((caddr_t)d);
1385 	if (d->bd_async && d->bd_sig && d->bd_sigio) {
1386 		pgsigio(d->bd_sigio, d->bd_sig);
1387 	}
1388 
1389 	selwakeup(&d->bd_sel);
1390 	if ((d->bd_flags & BPF_KNOTE)) {
1391 		KNOTE(&d->bd_sel.si_note, 1);
1392 	}
1393 }
1394 
1395 static void
bpf_timed_out(void * arg,__unused void * dummy)1396 bpf_timed_out(void *arg, __unused void *dummy)
1397 {
1398 	struct bpf_d *d = (struct bpf_d *)arg;
1399 
1400 	lck_mtx_lock(bpf_mlock);
1401 	if (d->bd_state == BPF_WAITING) {
1402 		/*
1403 		 * There's a select or kqueue waiting for this; if there's
1404 		 * now stuff to read, wake it up.
1405 		 */
1406 		d->bd_state = BPF_TIMED_OUT;
1407 		if (d->bd_slen != 0) {
1408 			bpf_wakeup(d);
1409 		}
1410 	} else if (d->bd_state == BPF_DRAINING) {
1411 		/*
1412 		 * A close is waiting for this to finish.
1413 		 * Mark it as finished, and wake the close up.
1414 		 */
1415 		d->bd_state = BPF_IDLE;
1416 		bpf_wakeup(d);
1417 	}
1418 	lck_mtx_unlock(bpf_mlock);
1419 }
1420 
1421 /* keep in sync with bpf_movein above: */
1422 #define MAX_DATALINK_HDR_LEN    (sizeof(struct firewire_header))
1423 
1424 int
bpfwrite(dev_t dev,struct uio * uio,__unused int ioflag)1425 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1426 {
1427 	struct bpf_d *d;
1428 	struct ifnet *ifp;
1429 	struct mbuf *m = NULL;
1430 	int error = 0;
1431 	char              dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1432 	int datlen = 0;
1433 	int bif_dlt;
1434 	int bd_hdrcmplt;
1435 
1436 	lck_mtx_lock(bpf_mlock);
1437 
1438 	while (true) {
1439 		d = bpf_dtab[minor(dev)];
1440 		if (d == NULL || d == BPF_DEV_RESERVED ||
1441 		    (d->bd_flags & BPF_CLOSING) != 0) {
1442 			lck_mtx_unlock(bpf_mlock);
1443 			return ENXIO;
1444 		}
1445 
1446 		if (d->bd_hbuf_write) {
1447 			msleep((caddr_t)d, bpf_mlock, PRINET, "bpfwrite",
1448 			    NULL);
1449 		} else {
1450 			break;
1451 		}
1452 	}
1453 	d->bd_hbuf_write = true;
1454 
1455 	bpf_acquire_d(d);
1456 
1457 	++d->bd_wcount;
1458 
1459 	if (d->bd_bif == NULL) {
1460 		error = ENXIO;
1461 		goto done;
1462 	}
1463 
1464 	ifp = d->bd_bif->bif_ifp;
1465 
1466 	if (IFNET_IS_MANAGEMENT(ifp) &&
1467 	    IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT) == false) {
1468 		++d->bd_wdcount;
1469 		bpf_release_d(d);
1470 		lck_mtx_unlock(bpf_mlock);
1471 		return ENETDOWN;
1472 	}
1473 
1474 	if ((ifp->if_flags & IFF_UP) == 0) {
1475 		error = ENETDOWN;
1476 		goto done;
1477 	}
1478 	if (uio_resid(uio) == 0) {
1479 		error = 0;
1480 		goto done;
1481 	}
1482 	((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1483 
1484 	/*
1485 	 * fix for PR-6849527
1486 	 * geting variables onto stack before dropping lock for bpf_movein()
1487 	 */
1488 	bif_dlt = (int)d->bd_bif->bif_dlt;
1489 	bd_hdrcmplt  = d->bd_hdrcmplt;
1490 
1491 	/* bpf_movein allocating mbufs; drop lock */
1492 	lck_mtx_unlock(bpf_mlock);
1493 
1494 	error = bpf_movein(uio, ifp, bif_dlt, &m,
1495 	    bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1496 	    &datlen);
1497 
1498 	/* take the lock again */
1499 	lck_mtx_lock(bpf_mlock);
1500 	if (error != 0) {
1501 		goto done;
1502 	}
1503 
1504 	/* verify the device is still open */
1505 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1506 		error = ENXIO;
1507 		goto done;
1508 	}
1509 
1510 	if (d->bd_bif == NULL || d->bd_bif->bif_ifp != ifp) {
1511 		error = ENXIO;
1512 		goto done;
1513 	}
1514 
1515 	bpf_set_packet_service_class(m, d->bd_traffic_class);
1516 
1517 	lck_mtx_unlock(bpf_mlock);
1518 
1519 	/*
1520 	 * The driver frees the mbuf.
1521 	 */
1522 	if (d->bd_hdrcmplt) {
1523 		if (d->bd_bif->bif_send) {
1524 			error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1525 		} else {
1526 			error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1527 		}
1528 	} else {
1529 		error = dlil_output(ifp, PF_INET, m, NULL,
1530 		    (struct sockaddr *)dst_buf, 0, NULL);
1531 	}
1532 	/* Make sure we do not double free */
1533 	m = NULL;
1534 
1535 	lck_mtx_lock(bpf_mlock);
1536 done:
1537 	if (error != 0) {
1538 		++d->bd_wdcount;
1539 	}
1540 	if (m != NULL) {
1541 		m_freem(m);
1542 	}
1543 	d->bd_hbuf_write = false;
1544 	wakeup((caddr_t)d);
1545 	bpf_release_d(d);
1546 	lck_mtx_unlock(bpf_mlock);
1547 
1548 	return error;
1549 }
1550 
1551 /*
1552  * Reset a descriptor by flushing its packet buffer and clearing the
1553  * receive and drop counts.
1554  */
1555 static void
reset_d(struct bpf_d * d)1556 reset_d(struct bpf_d *d)
1557 {
1558 	if (d->bd_hbuf_read) {
1559 		panic("resetting buffers during read");
1560 	}
1561 
1562 	if (d->bd_hbuf) {
1563 		/* Free the hold buffer. */
1564 		d->bd_fbuf = d->bd_hbuf;
1565 		d->bd_hbuf = NULL;
1566 	}
1567 	d->bd_slen = 0;
1568 	d->bd_hlen = 0;
1569 	d->bd_scnt = 0;
1570 	d->bd_hcnt = 0;
1571 	d->bd_rcount = 0;
1572 	d->bd_dcount = 0;
1573 	d->bd_fcount = 0;
1574 	d->bd_wcount = 0;
1575 	d->bd_wdcount = 0;
1576 
1577 	d->bd_prev_slen = 0;
1578 }
1579 
1580 static struct bpf_d *
bpf_get_device_from_uuid(uuid_t uuid)1581 bpf_get_device_from_uuid(uuid_t uuid)
1582 {
1583 	unsigned int i;
1584 
1585 	for (i = 0; i < nbpfilter; i++) {
1586 		struct bpf_d *d = bpf_dtab[i];
1587 
1588 		if (d == NULL || d == BPF_DEV_RESERVED ||
1589 		    (d->bd_flags & BPF_CLOSING) != 0) {
1590 			continue;
1591 		}
1592 		if (uuid_compare(uuid, d->bd_uuid) == 0) {
1593 			return d;
1594 		}
1595 	}
1596 
1597 	return NULL;
1598 }
1599 
1600 /*
1601  * The BIOCSETUP command "atomically" attach to the interface and
1602  * copy the buffer from another interface. This minimizes the risk
1603  * of missing packet because this is done while holding
1604  * the BPF global lock
1605  */
1606 static int
bpf_setup(struct bpf_d * d_to,uuid_t uuid_from,ifnet_t ifp)1607 bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1608 {
1609 	struct bpf_d *d_from;
1610 	int error = 0;
1611 
1612 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1613 
1614 	/*
1615 	 * Sanity checks
1616 	 */
1617 	d_from = bpf_get_device_from_uuid(uuid_from);
1618 	if (d_from == NULL) {
1619 		error = ENOENT;
1620 		os_log_error(OS_LOG_DEFAULT,
1621 		    "%s: uuids not found error %d",
1622 		    __func__, error);
1623 		return error;
1624 	}
1625 	if (d_from->bd_opened_by != d_to->bd_opened_by) {
1626 		error = EACCES;
1627 		os_log_error(OS_LOG_DEFAULT,
1628 		    "%s: processes not matching error %d",
1629 		    __func__, error);
1630 		return error;
1631 	}
1632 
1633 	/*
1634 	 * Prevent any read or write while copying
1635 	 */
1636 	while (d_to->bd_hbuf_read || d_to->bd_hbuf_write) {
1637 		msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
1638 	}
1639 	d_to->bd_hbuf_read = true;
1640 	d_to->bd_hbuf_write = true;
1641 
1642 	while (d_from->bd_hbuf_read || d_from->bd_hbuf_write) {
1643 		msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
1644 	}
1645 	d_from->bd_hbuf_read = true;
1646 	d_from->bd_hbuf_write = true;
1647 
1648 	/*
1649 	 * Verify the devices have not been closed
1650 	 */
1651 	if (d_to->bd_flags & BPF_CLOSING) {
1652 		error = ENXIO;
1653 		os_log_error(OS_LOG_DEFAULT,
1654 		    "%s: d_to is closing error %d",
1655 		    __func__, error);
1656 		goto done;
1657 	}
1658 	if (d_from->bd_flags & BPF_CLOSING) {
1659 		error = ENXIO;
1660 		os_log_error(OS_LOG_DEFAULT,
1661 		    "%s: d_from is closing error %d",
1662 		    __func__, error);
1663 		goto done;
1664 	}
1665 
1666 	/*
1667 	 * For now require the same buffer size
1668 	 */
1669 	if (d_from->bd_bufsize != d_to->bd_bufsize) {
1670 		error = EINVAL;
1671 		os_log_error(OS_LOG_DEFAULT,
1672 		    "%s: bufsizes not matching error %d",
1673 		    __func__, error);
1674 		goto done;
1675 	}
1676 
1677 	/*
1678 	 * Copy relevant options and flags
1679 	 */
1680 	d_to->bd_flags = d_from->bd_flags & (BPF_EXTENDED_HDR | BPF_WANT_PKTAP |
1681 	    BPF_FINALIZE_PKTAP | BPF_TRUNCATE | BPF_PKTHDRV2 |
1682 	    BPF_COMP_REQ | BPF_COMP_ENABLED);
1683 
1684 	d_to->bd_headdrop = d_from->bd_headdrop;
1685 
1686 	/*
1687 	 * Allocate and copy the buffers
1688 	 */
1689 	error = bpf_allocbufs(d_to);
1690 	if (error != 0) {
1691 		goto done;
1692 	}
1693 
1694 	/*
1695 	 * Make sure the buffers are setup as expected by bpf_setif()
1696 	 */
1697 	ASSERT(d_to->bd_hbuf == NULL);
1698 	ASSERT(d_to->bd_sbuf != NULL);
1699 	ASSERT(d_to->bd_fbuf != NULL);
1700 
1701 	/*
1702 	 * Copy the buffers and update the pointers and counts
1703 	 */
1704 	memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
1705 	d_to->bd_slen = d_from->bd_slen;
1706 	d_to->bd_scnt = d_from->bd_scnt;
1707 
1708 	if (d_from->bd_hbuf != NULL) {
1709 		d_to->bd_hbuf = d_to->bd_fbuf;
1710 		d_to->bd_fbuf = NULL;
1711 		memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
1712 	}
1713 	d_to->bd_hlen = d_from->bd_hlen;
1714 	d_to->bd_hcnt = d_from->bd_hcnt;
1715 
1716 	if (d_to->bd_flags & BPF_COMP_REQ) {
1717 		ASSERT(d_to->bd_prev_sbuf != NULL);
1718 		ASSERT(d_to->bd_prev_fbuf != NULL);
1719 
1720 		d_to->bd_prev_slen = d_from->bd_prev_slen;
1721 		ASSERT(d_to->bd_prev_slen <= BPF_HDR_COMP_LEN_MAX);
1722 		memcpy(d_to->bd_prev_sbuf, d_from->bd_prev_sbuf, BPF_HDR_COMP_LEN_MAX);
1723 	}
1724 
1725 	d_to->bd_bcs = d_from->bd_bcs;
1726 
1727 	/*
1728 	 * Attach to the interface:
1729 	 * - don't reset the buffers
1730 	 * - we already prevent reads and writes
1731 	 * - the buffers are already allocated
1732 	 */
1733 	error = bpf_setif(d_to, ifp, false, true, true);
1734 	if (error != 0) {
1735 		os_log_error(OS_LOG_DEFAULT,
1736 		    "%s: bpf_setif() failed error %d",
1737 		    __func__, error);
1738 		goto done;
1739 	}
1740 done:
1741 	d_from->bd_hbuf_read = false;
1742 	d_from->bd_hbuf_write = false;
1743 	wakeup((caddr_t)d_from);
1744 
1745 	d_to->bd_hbuf_read = false;
1746 	d_to->bd_hbuf_write = false;
1747 	wakeup((caddr_t)d_to);
1748 
1749 	return error;
1750 }
1751 
1752 #if DEVELOPMENT || DEBUG
1753 #define BPF_IOC_LIST \
1754 	X(FIONREAD) \
1755 	X(SIOCGIFADDR) \
1756 	X(BIOCGBLEN) \
1757 	X(BIOCSBLEN) \
1758 	X(BIOCSETF32) \
1759 	X(BIOCSETFNR32) \
1760 	X(BIOCSETF64) \
1761 	X(BIOCSETFNR64) \
1762 	X(BIOCFLUSH) \
1763 	X(BIOCPROMISC) \
1764 	X(BIOCGDLT) \
1765 	X(BIOCGDLTLIST) \
1766 	X(BIOCSDLT) \
1767 	X(BIOCGETIF) \
1768 	X(BIOCSETIF) \
1769 	X(BIOCSRTIMEOUT32) \
1770 	X(BIOCSRTIMEOUT64) \
1771 	X(BIOCGRTIMEOUT32) \
1772 	X(BIOCGRTIMEOUT64) \
1773 	X(BIOCGSTATS) \
1774 	X(BIOCIMMEDIATE) \
1775 	X(BIOCVERSION) \
1776 	X(BIOCGHDRCMPLT) \
1777 	X(BIOCSHDRCMPLT) \
1778 	X(BIOCGSEESENT) \
1779 	X(BIOCSSEESENT) \
1780 	X(BIOCSETTC) \
1781 	X(BIOCGETTC) \
1782 	X(FIONBIO) \
1783 	X(FIOASYNC) \
1784 	X(BIOCSRSIG) \
1785 	X(BIOCGRSIG) \
1786 	X(BIOCSEXTHDR) \
1787 	X(BIOCGIFATTACHCOUNT) \
1788 	X(BIOCGWANTPKTAP) \
1789 	X(BIOCSWANTPKTAP) \
1790 	X(BIOCSHEADDROP) \
1791 	X(BIOCGHEADDROP) \
1792 	X(BIOCSTRUNCATE) \
1793 	X(BIOCGETUUID) \
1794 	X(BIOCSETUP) \
1795 	X(BIOCSPKTHDRV2) \
1796 	X(BIOCGHDRCOMP) \
1797 	X(BIOCSHDRCOMP) \
1798 	X(BIOCGHDRCOMPSTATS) \
1799 	X(BIOCGHDRCOMPON)
1800 
1801 static void
log_bpf_ioctl_str(struct bpf_d * d,u_long cmd)1802 log_bpf_ioctl_str(struct bpf_d *d, u_long cmd)
1803 {
1804 	const char *p = NULL;
1805 	char str[32];
1806 
1807 #define X(x) case x: { p = #x ; printf("%s\n", p); break; }
1808 	switch (cmd) {
1809 		BPF_IOC_LIST
1810 	}
1811 #undef X
1812 	if (p == NULL) {
1813 		snprintf(str, sizeof(str), "0x%08x", (unsigned int)cmd);
1814 		p = str;
1815 	}
1816 	os_log(OS_LOG_DEFAULT, "bpfioctl bpf%u %s",
1817 	    d->bd_dev_minor, p);
1818 }
1819 #endif /* DEVELOPMENT || DEBUG */
1820 
1821 /*
1822  *  FIONREAD		Check for read packet available.
1823  *  SIOCGIFADDR		Get interface address - convenient hook to driver.
1824  *  BIOCGBLEN		Get buffer len [for read()].
1825  *  BIOCSETF		Set ethernet read filter.
1826  *  BIOCFLUSH		Flush read packet buffer.
1827  *  BIOCPROMISC		Put interface into promiscuous mode.
1828  *  BIOCGDLT		Get link layer type.
1829  *  BIOCGETIF		Get interface name.
1830  *  BIOCSETIF		Set interface.
1831  *  BIOCSRTIMEOUT	Set read timeout.
1832  *  BIOCGRTIMEOUT	Get read timeout.
1833  *  BIOCGSTATS		Get packet stats.
1834  *  BIOCIMMEDIATE	Set immediate mode.
1835  *  BIOCVERSION		Get filter language version.
1836  *  BIOCGHDRCMPLT	Get "header already complete" flag
1837  *  BIOCSHDRCMPLT	Set "header already complete" flag
1838  *  BIOCGSEESENT	Get "see packets sent" flag
1839  *  BIOCSSEESENT	Set "see packets sent" flag
1840  *  BIOCSETTC		Set traffic class.
1841  *  BIOCGETTC		Get traffic class.
1842  *  BIOCSEXTHDR		Set "extended header" flag
1843  *  BIOCSHEADDROP	Drop head of the buffer if user is not reading
1844  *  BIOCGHEADDROP	Get "head-drop" flag
1845  */
1846 /* ARGSUSED */
1847 int
bpfioctl(dev_t dev,u_long cmd,caddr_t addr,__unused int flags,struct proc * p)1848 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1849     struct proc *p)
1850 {
1851 	struct bpf_d *d;
1852 	int error = 0;
1853 	u_int int_arg;
1854 	struct ifreq ifr = {};
1855 
1856 	lck_mtx_lock(bpf_mlock);
1857 
1858 	d = bpf_dtab[minor(dev)];
1859 	if (d == NULL || d == BPF_DEV_RESERVED ||
1860 	    (d->bd_flags & BPF_CLOSING) != 0) {
1861 		lck_mtx_unlock(bpf_mlock);
1862 		return ENXIO;
1863 	}
1864 
1865 	bpf_acquire_d(d);
1866 
1867 	if (d->bd_state == BPF_WAITING) {
1868 		bpf_stop_timer(d);
1869 	}
1870 	d->bd_state = BPF_IDLE;
1871 
1872 #if DEVELOPMENT || DEBUG
1873 	if (bpf_debug > 0) {
1874 		log_bpf_ioctl_str(d, cmd);
1875 	}
1876 #endif /* DEVELOPMENT || DEBUG */
1877 
1878 	switch (cmd) {
1879 	default:
1880 		error = EINVAL;
1881 		break;
1882 
1883 	/*
1884 	 * Check for read packet available.
1885 	 */
1886 	case FIONREAD:                  /* int */
1887 	{
1888 		int n;
1889 
1890 		n = d->bd_slen;
1891 		if (d->bd_hbuf && d->bd_hbuf_read) {
1892 			n += d->bd_hlen;
1893 		}
1894 
1895 		bcopy(&n, addr, sizeof(n));
1896 		break;
1897 	}
1898 
1899 	case SIOCGIFADDR:               /* struct ifreq */
1900 	{
1901 		struct ifnet *ifp;
1902 
1903 		if (d->bd_bif == 0) {
1904 			error = EINVAL;
1905 		} else {
1906 			ifp = d->bd_bif->bif_ifp;
1907 			error = ifnet_ioctl(ifp, 0, cmd, addr);
1908 		}
1909 		break;
1910 	}
1911 
1912 	/*
1913 	 * Get buffer len [for read()].
1914 	 */
1915 	case BIOCGBLEN:                 /* u_int */
1916 		bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1917 		break;
1918 
1919 	/*
1920 	 * Set buffer length.
1921 	 */
1922 	case BIOCSBLEN: {               /* u_int */
1923 		u_int size;
1924 
1925 		if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
1926 			/*
1927 			 * Interface already attached, unable to change buffers
1928 			 */
1929 			error = EINVAL;
1930 			break;
1931 		}
1932 		bcopy(addr, &size, sizeof(size));
1933 
1934 		if (size > BPF_BUFSIZE_CAP) {
1935 			d->bd_bufsize = BPF_BUFSIZE_CAP;
1936 
1937 			os_log_info(OS_LOG_DEFAULT,
1938 			    "bpf%d BIOCSBLEN capped to %u from %u",
1939 			    minor(dev), d->bd_bufsize, size);
1940 		} else if (size < BPF_MINBUFSIZE) {
1941 			d->bd_bufsize = BPF_MINBUFSIZE;
1942 
1943 			os_log_info(OS_LOG_DEFAULT,
1944 			    "bpf%d BIOCSBLEN bumped to %u from %u",
1945 			    minor(dev), d->bd_bufsize, size);
1946 		} else {
1947 			d->bd_bufsize = size;
1948 
1949 			os_log_info(OS_LOG_DEFAULT,
1950 			    "bpf%d BIOCSBLEN %u",
1951 			    minor(dev), d->bd_bufsize);
1952 		}
1953 
1954 		/* It's a read/write ioctl */
1955 		bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1956 		break;
1957 	}
1958 	/*
1959 	 * Set link layer read filter.
1960 	 */
1961 	case BIOCSETF32:
1962 	case BIOCSETFNR32: {            /* struct bpf_program32 */
1963 		struct bpf_program32 prg32;
1964 
1965 		bcopy(addr, &prg32, sizeof(prg32));
1966 		error = bpf_setf(d, prg32.bf_len,
1967 		    CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1968 		break;
1969 	}
1970 
1971 	case BIOCSETF64:
1972 	case BIOCSETFNR64: {            /* struct bpf_program64 */
1973 		struct bpf_program64 prg64;
1974 
1975 		bcopy(addr, &prg64, sizeof(prg64));
1976 		error = bpf_setf(d, prg64.bf_len, CAST_USER_ADDR_T(prg64.bf_insns), cmd);
1977 		break;
1978 	}
1979 
1980 	/*
1981 	 * Flush read packet buffer.
1982 	 */
1983 	case BIOCFLUSH:
1984 		while (d->bd_hbuf_read) {
1985 			msleep((caddr_t)d, bpf_mlock, PRINET, "BIOCFLUSH",
1986 			    NULL);
1987 		}
1988 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1989 			error = ENXIO;
1990 			break;
1991 		}
1992 		reset_d(d);
1993 		break;
1994 
1995 	/*
1996 	 * Put interface into promiscuous mode.
1997 	 */
1998 	case BIOCPROMISC:
1999 		if (d->bd_bif == 0) {
2000 			/*
2001 			 * No interface attached yet.
2002 			 */
2003 			error = EINVAL;
2004 			break;
2005 		}
2006 		if (d->bd_promisc == 0) {
2007 			lck_mtx_unlock(bpf_mlock);
2008 			error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
2009 			lck_mtx_lock(bpf_mlock);
2010 			if (error == 0) {
2011 				d->bd_promisc = 1;
2012 			}
2013 		}
2014 		break;
2015 
2016 	/*
2017 	 * Get device parameters.
2018 	 */
2019 	case BIOCGDLT:                  /* u_int */
2020 		if (d->bd_bif == 0) {
2021 			error = EINVAL;
2022 		} else {
2023 			bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
2024 		}
2025 		break;
2026 
2027 	/*
2028 	 * Get a list of supported data link types.
2029 	 */
2030 	case BIOCGDLTLIST:              /* struct bpf_dltlist */
2031 		if (d->bd_bif == NULL) {
2032 			error = EINVAL;
2033 		} else {
2034 			error = bpf_getdltlist(d, addr, p);
2035 		}
2036 		break;
2037 
2038 	/*
2039 	 * Set data link type.
2040 	 */
2041 	case BIOCSDLT:                  /* u_int */
2042 		if (d->bd_bif == NULL) {
2043 			error = EINVAL;
2044 		} else {
2045 			u_int dlt;
2046 
2047 			bcopy(addr, &dlt, sizeof(dlt));
2048 
2049 			if (dlt == DLT_PKTAP &&
2050 			    !(d->bd_flags & BPF_WANT_PKTAP)) {
2051 				dlt = DLT_RAW;
2052 			}
2053 			error = bpf_setdlt(d, dlt);
2054 		}
2055 		break;
2056 
2057 	/*
2058 	 * Get interface name.
2059 	 */
2060 	case BIOCGETIF:                 /* struct ifreq */
2061 		if (d->bd_bif == 0) {
2062 			error = EINVAL;
2063 		} else {
2064 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
2065 
2066 			snprintf(((struct ifreq *)(void *)addr)->ifr_name,
2067 			    sizeof(ifr.ifr_name), "%s", if_name(ifp));
2068 		}
2069 		break;
2070 
2071 	/*
2072 	 * Set interface.
2073 	 */
2074 	case BIOCSETIF: {               /* struct ifreq */
2075 		ifnet_t ifp;
2076 
2077 		bcopy(addr, &ifr, sizeof(ifr));
2078 		ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2079 		ifp = ifunit(ifr.ifr_name);
2080 		if (ifp == NULL) {
2081 			error = ENXIO;
2082 		} else {
2083 			error = bpf_setif(d, ifp, true, false, false);
2084 		}
2085 		break;
2086 	}
2087 
2088 	/*
2089 	 * Set read timeout.
2090 	 */
2091 	case BIOCSRTIMEOUT32: {         /* struct user32_timeval */
2092 		struct user32_timeval _tv;
2093 		struct timeval tv;
2094 
2095 		bcopy(addr, &_tv, sizeof(_tv));
2096 		tv.tv_sec  = _tv.tv_sec;
2097 		tv.tv_usec = _tv.tv_usec;
2098 
2099 		/*
2100 		 * Subtract 1 tick from tvtohz() since this isn't
2101 		 * a one-shot timer.
2102 		 */
2103 		if ((error = itimerfix(&tv)) == 0) {
2104 			d->bd_rtout = tvtohz(&tv) - 1;
2105 		}
2106 		break;
2107 	}
2108 
2109 	case BIOCSRTIMEOUT64: {         /* struct user64_timeval */
2110 		struct user64_timeval _tv;
2111 		struct timeval tv;
2112 
2113 		bcopy(addr, &_tv, sizeof(_tv));
2114 		tv.tv_sec  = (__darwin_time_t)_tv.tv_sec;
2115 		tv.tv_usec = _tv.tv_usec;
2116 
2117 		/*
2118 		 * Subtract 1 tick from tvtohz() since this isn't
2119 		 * a one-shot timer.
2120 		 */
2121 		if ((error = itimerfix(&tv)) == 0) {
2122 			d->bd_rtout = tvtohz(&tv) - 1;
2123 		}
2124 		break;
2125 	}
2126 
2127 	/*
2128 	 * Get read timeout.
2129 	 */
2130 	case BIOCGRTIMEOUT32: {         /* struct user32_timeval */
2131 		struct user32_timeval tv;
2132 
2133 		bzero(&tv, sizeof(tv));
2134 		tv.tv_sec = d->bd_rtout / hz;
2135 		tv.tv_usec = (d->bd_rtout % hz) * tick;
2136 		bcopy(&tv, addr, sizeof(tv));
2137 		break;
2138 	}
2139 
2140 	case BIOCGRTIMEOUT64: {         /* struct user64_timeval */
2141 		struct user64_timeval tv;
2142 
2143 		bzero(&tv, sizeof(tv));
2144 		tv.tv_sec = d->bd_rtout / hz;
2145 		tv.tv_usec = (d->bd_rtout % hz) * tick;
2146 		bcopy(&tv, addr, sizeof(tv));
2147 		break;
2148 	}
2149 
2150 	/*
2151 	 * Get packet stats.
2152 	 */
2153 	case BIOCGSTATS: {              /* struct bpf_stat */
2154 		struct bpf_stat bs;
2155 
2156 		bzero(&bs, sizeof(bs));
2157 		bs.bs_recv = (u_int)d->bd_rcount;
2158 		bs.bs_drop = (u_int)d->bd_dcount;
2159 		bcopy(&bs, addr, sizeof(bs));
2160 		break;
2161 	}
2162 
2163 	/*
2164 	 * Set immediate mode.
2165 	 */
2166 	case BIOCIMMEDIATE:             /* u_int */
2167 		d->bd_immediate = *(u_char *)(void *)addr;
2168 		break;
2169 
2170 	case BIOCVERSION: {             /* struct bpf_version */
2171 		struct bpf_version bv;
2172 
2173 		bzero(&bv, sizeof(bv));
2174 		bv.bv_major = BPF_MAJOR_VERSION;
2175 		bv.bv_minor = BPF_MINOR_VERSION;
2176 		bcopy(&bv, addr, sizeof(bv));
2177 		break;
2178 	}
2179 
2180 	/*
2181 	 * Get "header already complete" flag
2182 	 */
2183 	case BIOCGHDRCMPLT:             /* u_int */
2184 		bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
2185 		break;
2186 
2187 	/*
2188 	 * Set "header already complete" flag
2189 	 */
2190 	case BIOCSHDRCMPLT:             /* u_int */
2191 		bcopy(addr, &int_arg, sizeof(int_arg));
2192 		d->bd_hdrcmplt = int_arg ? 1 : 0;
2193 		break;
2194 
2195 	/*
2196 	 * Get "see sent packets" flag
2197 	 */
2198 	case BIOCGSEESENT: {            /* u_int */
2199 		int_arg = 0;
2200 
2201 		if (d->bd_direction & BPF_D_OUT) {
2202 			int_arg = 1;
2203 		}
2204 		bcopy(&int_arg, addr, sizeof(u_int));
2205 		break;
2206 	}
2207 	/*
2208 	 * Set "see sent packets" flag
2209 	 */
2210 	case BIOCSSEESENT: {            /* u_int */
2211 		bcopy(addr, &int_arg, sizeof(u_int));
2212 
2213 		if (int_arg == 0) {
2214 			d->bd_direction = BPF_D_IN;
2215 		} else {
2216 			d->bd_direction = BPF_D_INOUT;
2217 		}
2218 		break;
2219 	}
2220 	/*
2221 	 * Get direction of tapped packets that can be seen for reading
2222 	 */
2223 	case BIOCGDIRECTION: {          /* u_int */
2224 		int_arg = d->bd_direction;
2225 
2226 		bcopy(&int_arg, addr, sizeof(u_int));
2227 		break;
2228 	}
2229 	/*
2230 	 * Set direction of tapped packets that can be seen for reading
2231 	 */
2232 	case BIOCSDIRECTION: {          /* u_int */
2233 		bcopy(addr, &int_arg, sizeof(u_int));
2234 
2235 		switch (int_arg) {
2236 		case BPF_D_NONE:
2237 		case BPF_D_IN:
2238 		case BPF_D_OUT:
2239 		case BPF_D_INOUT:
2240 			d->bd_direction = int_arg;
2241 			break;
2242 		default:
2243 			error = EINVAL;
2244 			break;
2245 		}
2246 		break;
2247 	}
2248 	/*
2249 	 * Set traffic service class
2250 	 */
2251 	case BIOCSETTC: {               /* int */
2252 		int tc;
2253 
2254 		bcopy(addr, &tc, sizeof(int));
2255 		error = bpf_set_traffic_class(d, tc);
2256 		break;
2257 	}
2258 
2259 	/*
2260 	 * Get traffic service class
2261 	 */
2262 	case BIOCGETTC:                 /* int */
2263 		bcopy(&d->bd_traffic_class, addr, sizeof(int));
2264 		break;
2265 
2266 	case FIONBIO:           /* Non-blocking I/O; int */
2267 		break;
2268 
2269 	case FIOASYNC:          /* Send signal on receive packets; int */
2270 		bcopy(addr, &d->bd_async, sizeof(int));
2271 		break;
2272 
2273 	case BIOCSRSIG: {         /* Set receive signal; u_int */
2274 		u_int sig;
2275 
2276 		bcopy(addr, &sig, sizeof(u_int));
2277 
2278 		if (sig >= NSIG) {
2279 			error = EINVAL;
2280 		} else {
2281 			d->bd_sig = sig;
2282 		}
2283 		break;
2284 	}
2285 	case BIOCGRSIG:                 /* u_int */
2286 		bcopy(&d->bd_sig, addr, sizeof(u_int));
2287 		break;
2288 
2289 	case BIOCSEXTHDR:               /* u_int */
2290 		bcopy(addr, &int_arg, sizeof(int_arg));
2291 		if (int_arg) {
2292 			d->bd_flags |= BPF_EXTENDED_HDR;
2293 		} else {
2294 			d->bd_flags &= ~BPF_EXTENDED_HDR;
2295 		}
2296 		break;
2297 
2298 	case BIOCGIFATTACHCOUNT: {              /* struct ifreq */
2299 		ifnet_t ifp;
2300 		struct bpf_if *bp;
2301 
2302 		bcopy(addr, &ifr, sizeof(ifr));
2303 		ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2304 		ifp = ifunit(ifr.ifr_name);
2305 		if (ifp == NULL) {
2306 			error = ENXIO;
2307 			break;
2308 		}
2309 		ifr.ifr_intval = 0;
2310 		for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2311 			struct bpf_d *bpf_d;
2312 
2313 			if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) {
2314 				continue;
2315 			}
2316 			for (bpf_d = bp->bif_dlist; bpf_d;
2317 			    bpf_d = bpf_d->bd_next) {
2318 				ifr.ifr_intval += 1;
2319 			}
2320 		}
2321 		bcopy(&ifr, addr, sizeof(ifr));
2322 		break;
2323 	}
2324 	case BIOCGWANTPKTAP:                    /* u_int */
2325 		int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
2326 		bcopy(&int_arg, addr, sizeof(int_arg));
2327 		break;
2328 
2329 	case BIOCSWANTPKTAP:                    /* u_int */
2330 		bcopy(addr, &int_arg, sizeof(int_arg));
2331 		if (int_arg) {
2332 			d->bd_flags |= BPF_WANT_PKTAP;
2333 		} else {
2334 			d->bd_flags &= ~BPF_WANT_PKTAP;
2335 		}
2336 		break;
2337 
2338 	case BIOCSHEADDROP:
2339 		bcopy(addr, &int_arg, sizeof(int_arg));
2340 		d->bd_headdrop = int_arg ? 1 : 0;
2341 		break;
2342 
2343 	case BIOCGHEADDROP:
2344 		bcopy(&d->bd_headdrop, addr, sizeof(int));
2345 		break;
2346 
2347 	case BIOCSTRUNCATE:
2348 		bcopy(addr, &int_arg, sizeof(int_arg));
2349 		if (int_arg) {
2350 			d->bd_flags |=  BPF_TRUNCATE;
2351 		} else {
2352 			d->bd_flags &= ~BPF_TRUNCATE;
2353 		}
2354 		break;
2355 
2356 	case BIOCGETUUID:
2357 		bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
2358 		break;
2359 
2360 	case BIOCSETUP: {
2361 		struct bpf_setup_args bsa;
2362 		ifnet_t ifp;
2363 
2364 		bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
2365 		bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
2366 		ifp = ifunit(bsa.bsa_ifname);
2367 		if (ifp == NULL) {
2368 			error = ENXIO;
2369 			os_log_error(OS_LOG_DEFAULT,
2370 			    "%s: ifnet not found for %s error %d",
2371 			    __func__, bsa.bsa_ifname, error);
2372 			break;
2373 		}
2374 
2375 		error = bpf_setup(d, bsa.bsa_uuid, ifp);
2376 		break;
2377 	}
2378 	case BIOCSPKTHDRV2:
2379 		bcopy(addr, &int_arg, sizeof(int_arg));
2380 		if (int_arg != 0) {
2381 			d->bd_flags |= BPF_PKTHDRV2;
2382 		} else {
2383 			d->bd_flags &= ~BPF_PKTHDRV2;
2384 		}
2385 		break;
2386 
2387 	case BIOCGPKTHDRV2:
2388 		int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
2389 		bcopy(&int_arg, addr, sizeof(int_arg));
2390 		break;
2391 
2392 	case BIOCGHDRCOMP:
2393 		int_arg = d->bd_flags & BPF_COMP_REQ ? 1 : 0;
2394 		bcopy(&int_arg, addr, sizeof(int_arg));
2395 		break;
2396 
2397 	case BIOCSHDRCOMP:
2398 		bcopy(addr, &int_arg, sizeof(int_arg));
2399 		if (int_arg != 0 && int_arg != 1) {
2400 			return EINVAL;
2401 		}
2402 		if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
2403 			/*
2404 			 * Interface already attached, unable to change buffers
2405 			 */
2406 			error = EINVAL;
2407 			break;
2408 		}
2409 		if (int_arg != 0) {
2410 			d->bd_flags |= BPF_COMP_REQ;
2411 			if (bpf_hdr_comp_enable != 0) {
2412 				d->bd_flags |= BPF_COMP_ENABLED;
2413 			}
2414 		} else {
2415 			d->bd_flags &= ~(BPF_COMP_REQ | BPF_COMP_ENABLED);
2416 		}
2417 		break;
2418 
2419 	case BIOCGHDRCOMPON:
2420 		int_arg = d->bd_flags & BPF_COMP_ENABLED ? 1 : 0;
2421 		bcopy(&int_arg, addr, sizeof(int_arg));
2422 		break;
2423 
2424 	case BIOCGHDRCOMPSTATS: {
2425 		struct bpf_comp_stats bcs = {};
2426 
2427 		bcs = d->bd_bcs;
2428 
2429 		bcopy(&bcs, addr, sizeof(bcs));
2430 		break;
2431 	}
2432 	}
2433 
2434 	bpf_release_d(d);
2435 	lck_mtx_unlock(bpf_mlock);
2436 
2437 	return error;
2438 }
2439 
2440 /*
2441  * Set d's packet filter program to fp.  If this file already has a filter,
2442  * free it and replace it.  Returns EINVAL for bogus requests.
2443  */
2444 static int
bpf_setf(struct bpf_d * d,u_int bf_len,user_addr_t bf_insns,u_long cmd)2445 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2446     u_long cmd)
2447 {
2448 	struct bpf_insn *fcode, *old;
2449 	u_int flen, size;
2450 
2451 	while (d->bd_hbuf_read) {
2452 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_setf", NULL);
2453 	}
2454 
2455 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2456 		return ENXIO;
2457 	}
2458 
2459 	old = d->bd_filter;
2460 	if (bf_insns == USER_ADDR_NULL) {
2461 		if (bf_len != 0) {
2462 			return EINVAL;
2463 		}
2464 		d->bd_filter = NULL;
2465 		reset_d(d);
2466 		if (old != 0) {
2467 			kfree_data_addr(old);
2468 		}
2469 		return 0;
2470 	}
2471 	flen = bf_len;
2472 	if (flen > BPF_MAXINSNS) {
2473 		return EINVAL;
2474 	}
2475 
2476 	size = flen * sizeof(struct bpf_insn);
2477 	fcode = (struct bpf_insn *) kalloc_data(size, Z_WAITOK | Z_ZERO);
2478 	if (fcode == NULL) {
2479 		return ENOMEM;
2480 	}
2481 	if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
2482 	    bpf_validate(fcode, (int)flen)) {
2483 		d->bd_filter = fcode;
2484 
2485 		if (cmd == BIOCSETF32 || cmd == BIOCSETF64) {
2486 			reset_d(d);
2487 		}
2488 
2489 		if (old != 0) {
2490 			kfree_data_addr(old);
2491 		}
2492 
2493 		return 0;
2494 	}
2495 	kfree_data(fcode, size);
2496 	return EINVAL;
2497 }
2498 
2499 /*
2500  * Detach a file from its current interface (if attached at all) and attach
2501  * to the interface indicated by the name stored in ifr.
2502  * Return an errno or 0.
2503  */
2504 static int
bpf_setif(struct bpf_d * d,ifnet_t theywant,bool do_reset,bool has_hbuf_read_write,bool has_bufs_allocated)2505 bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read_write,
2506     bool has_bufs_allocated)
2507 {
2508 	struct bpf_if *bp;
2509 	int error;
2510 
2511 	while (!has_hbuf_read_write && (d->bd_hbuf_read || d->bd_hbuf_write)) {
2512 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_setif", NULL);
2513 	}
2514 
2515 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2516 		return ENXIO;
2517 	}
2518 
2519 	/*
2520 	 * Look through attached interfaces for the named one.
2521 	 */
2522 	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2523 		struct ifnet *ifp = bp->bif_ifp;
2524 
2525 		if (ifp == 0 || ifp != theywant) {
2526 			continue;
2527 		}
2528 		/*
2529 		 * Do not use DLT_PKTAP, unless requested explicitly
2530 		 */
2531 		if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2532 			continue;
2533 		}
2534 		/*
2535 		 * Skip the coprocessor interface
2536 		 */
2537 		if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
2538 			continue;
2539 		}
2540 		/*
2541 		 * We found the requested interface.
2542 		 * Allocate the packet buffers.
2543 		 */
2544 		if (has_bufs_allocated == false) {
2545 			error = bpf_allocbufs(d);
2546 			if (error != 0) {
2547 				return error;
2548 			}
2549 		}
2550 		/*
2551 		 * Detach if attached to something else.
2552 		 */
2553 		if (bp != d->bd_bif) {
2554 			if (d->bd_bif != NULL) {
2555 				if (bpf_detachd(d) != 0) {
2556 					return ENXIO;
2557 				}
2558 			}
2559 			if (bpf_attachd(d, bp) != 0) {
2560 				return ENXIO;
2561 			}
2562 		}
2563 		if (do_reset) {
2564 			reset_d(d);
2565 		}
2566 		os_log(OS_LOG_DEFAULT, "bpf%u attached to %s",
2567 		    d->bd_dev_minor, if_name(theywant));
2568 		return 0;
2569 	}
2570 	/* Not found. */
2571 	return ENXIO;
2572 }
2573 
2574 /*
2575  * Get a list of available data link type of the interface.
2576  */
2577 static int
bpf_getdltlist(struct bpf_d * d,caddr_t addr,struct proc * p)2578 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2579 {
2580 	u_int           n;
2581 	int             error;
2582 	struct ifnet    *ifp;
2583 	struct bpf_if   *bp;
2584 	user_addr_t     dlist;
2585 	struct bpf_dltlist bfl;
2586 
2587 	bcopy(addr, &bfl, sizeof(bfl));
2588 	if (proc_is64bit(p)) {
2589 		dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2590 	} else {
2591 		dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2592 	}
2593 
2594 	ifp = d->bd_bif->bif_ifp;
2595 	n = 0;
2596 	error = 0;
2597 
2598 	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2599 		if (bp->bif_ifp != ifp) {
2600 			continue;
2601 		}
2602 		/*
2603 		 * Do not use DLT_PKTAP, unless requested explicitly
2604 		 */
2605 		if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2606 			continue;
2607 		}
2608 		if (dlist != USER_ADDR_NULL) {
2609 			if (n >= bfl.bfl_len) {
2610 				return ENOMEM;
2611 			}
2612 			error = copyout(&bp->bif_dlt, dlist,
2613 			    sizeof(bp->bif_dlt));
2614 			if (error != 0) {
2615 				break;
2616 			}
2617 			dlist += sizeof(bp->bif_dlt);
2618 		}
2619 		n++;
2620 	}
2621 	bfl.bfl_len = n;
2622 	bcopy(&bfl, addr, sizeof(bfl));
2623 
2624 	return error;
2625 }
2626 
2627 /*
2628  * Set the data link type of a BPF instance.
2629  */
2630 static int
bpf_setdlt(struct bpf_d * d,uint32_t dlt)2631 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2632 {
2633 	int error, opromisc;
2634 	struct ifnet *ifp;
2635 	struct bpf_if *bp;
2636 
2637 	if (d->bd_bif->bif_dlt == dlt) {
2638 		return 0;
2639 	}
2640 
2641 	while (d->bd_hbuf_read) {
2642 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_setdlt", NULL);
2643 	}
2644 
2645 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2646 		return ENXIO;
2647 	}
2648 
2649 	ifp = d->bd_bif->bif_ifp;
2650 	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2651 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2652 			/*
2653 			 * Do not use DLT_PKTAP, unless requested explicitly
2654 			 */
2655 			if (bp->bif_dlt == DLT_PKTAP &&
2656 			    !(d->bd_flags & BPF_WANT_PKTAP)) {
2657 				continue;
2658 			}
2659 			break;
2660 		}
2661 	}
2662 	if (bp != NULL) {
2663 		opromisc = d->bd_promisc;
2664 		if (bpf_detachd(d) != 0) {
2665 			return ENXIO;
2666 		}
2667 		error = bpf_attachd(d, bp);
2668 		if (error != 0) {
2669 			os_log_error(OS_LOG_DEFAULT,
2670 			    "bpf_setdlt: bpf%d bpf_attachd %s error %d",
2671 			    d->bd_dev_minor, if_name(bp->bif_ifp),
2672 			    error);
2673 			return error;
2674 		}
2675 		reset_d(d);
2676 		if (opromisc) {
2677 			lck_mtx_unlock(bpf_mlock);
2678 			error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2679 			lck_mtx_lock(bpf_mlock);
2680 			if (error != 0) {
2681 				os_log_error(OS_LOG_DEFAULT,
2682 				    "bpf_setdlt: bpf%d ifpromisc %s error %d",
2683 				    d->bd_dev_minor, if_name(bp->bif_ifp), error);
2684 			} else {
2685 				d->bd_promisc = 1;
2686 			}
2687 		}
2688 	}
2689 	return bp == NULL ? EINVAL : 0;
2690 }
2691 
2692 static int
bpf_set_traffic_class(struct bpf_d * d,int tc)2693 bpf_set_traffic_class(struct bpf_d *d, int tc)
2694 {
2695 	int error = 0;
2696 
2697 	if (!SO_VALID_TC(tc)) {
2698 		error = EINVAL;
2699 	} else {
2700 		d->bd_traffic_class = tc;
2701 	}
2702 
2703 	return error;
2704 }
2705 
2706 static void
bpf_set_packet_service_class(struct mbuf * m,int tc)2707 bpf_set_packet_service_class(struct mbuf *m, int tc)
2708 {
2709 	if (!(m->m_flags & M_PKTHDR)) {
2710 		return;
2711 	}
2712 
2713 	VERIFY(SO_VALID_TC(tc));
2714 	(void) m_set_service_class(m, so_tc2msc(tc));
2715 }
2716 
2717 /*
2718  * Support for select()
2719  *
2720  * Return true iff the specific operation will not block indefinitely.
2721  * Otherwise, return false but make a note that a selwakeup() must be done.
2722  */
2723 int
bpfselect(dev_t dev,int which,void * wql,struct proc * p)2724 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2725 {
2726 	struct bpf_d *d;
2727 	int ret = 0;
2728 
2729 	lck_mtx_lock(bpf_mlock);
2730 
2731 	d = bpf_dtab[minor(dev)];
2732 	if (d == NULL || d == BPF_DEV_RESERVED ||
2733 	    (d->bd_flags & BPF_CLOSING) != 0) {
2734 		lck_mtx_unlock(bpf_mlock);
2735 		return ENXIO;
2736 	}
2737 
2738 	bpf_acquire_d(d);
2739 
2740 	if (d->bd_bif == NULL) {
2741 		bpf_release_d(d);
2742 		lck_mtx_unlock(bpf_mlock);
2743 		return ENXIO;
2744 	}
2745 
2746 	while (d->bd_hbuf_read) {
2747 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpfselect", NULL);
2748 	}
2749 
2750 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2751 		bpf_release_d(d);
2752 		lck_mtx_unlock(bpf_mlock);
2753 		return ENXIO;
2754 	}
2755 
2756 	switch (which) {
2757 	case FREAD:
2758 		if (d->bd_hlen != 0 ||
2759 		    ((d->bd_immediate ||
2760 		    d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
2761 			ret = 1;         /* read has data to return */
2762 		} else {
2763 			/*
2764 			 * Read has no data to return.
2765 			 * Make the select wait, and start a timer if
2766 			 * necessary.
2767 			 */
2768 			selrecord(p, &d->bd_sel, wql);
2769 			bpf_start_timer(d);
2770 		}
2771 		break;
2772 
2773 	case FWRITE:
2774 		/* can't determine whether a write would block */
2775 		ret = 1;
2776 		break;
2777 	}
2778 
2779 	bpf_release_d(d);
2780 	lck_mtx_unlock(bpf_mlock);
2781 
2782 	return ret;
2783 }
2784 
2785 /*
2786  * Support for kevent() system call.  Register EVFILT_READ filters and
2787  * reject all others.
2788  */
2789 int bpfkqfilter(dev_t dev, struct knote *kn);
2790 static void filt_bpfdetach(struct knote *);
2791 static int filt_bpfread(struct knote *, long);
2792 static int filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev);
2793 static int filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev);
2794 
2795 SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2796 	.f_isfd = 1,
2797 	.f_detach = filt_bpfdetach,
2798 	.f_event = filt_bpfread,
2799 	.f_touch = filt_bpftouch,
2800 	.f_process = filt_bpfprocess,
2801 };
2802 
2803 static int
filt_bpfread_common(struct knote * kn,struct kevent_qos_s * kev,struct bpf_d * d)2804 filt_bpfread_common(struct knote *kn, struct kevent_qos_s *kev, struct bpf_d *d)
2805 {
2806 	int ready = 0;
2807 	int64_t data = 0;
2808 
2809 	if (d->bd_immediate) {
2810 		/*
2811 		 * If there's data in the hold buffer, it's the
2812 		 * amount of data a read will return.
2813 		 *
2814 		 * If there's no data in the hold buffer, but
2815 		 * there's data in the store buffer, a read will
2816 		 * immediately rotate the store buffer to the
2817 		 * hold buffer, the amount of data in the store
2818 		 * buffer is the amount of data a read will
2819 		 * return.
2820 		 *
2821 		 * If there's no data in either buffer, we're not
2822 		 * ready to read.
2823 		 */
2824 		data = (d->bd_hlen == 0 || d->bd_hbuf_read ?
2825 		    d->bd_slen : d->bd_hlen);
2826 		int64_t lowwat = knote_low_watermark(kn);
2827 		if (lowwat > d->bd_bufsize) {
2828 			lowwat = d->bd_bufsize;
2829 		}
2830 		ready = (data >= lowwat);
2831 	} else {
2832 		/*
2833 		 * If there's data in the hold buffer, it's the
2834 		 * amount of data a read will return.
2835 		 *
2836 		 * If there's no data in the hold buffer, but
2837 		 * there's data in the store buffer, if the
2838 		 * timer has expired a read will immediately
2839 		 * rotate the store buffer to the hold buffer,
2840 		 * so the amount of data in the store buffer is
2841 		 * the amount of data a read will return.
2842 		 *
2843 		 * If there's no data in either buffer, or there's
2844 		 * no data in the hold buffer and the timer hasn't
2845 		 * expired, we're not ready to read.
2846 		 */
2847 		data = ((d->bd_hlen == 0 || d->bd_hbuf_read) &&
2848 		    d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
2849 		ready = (data > 0);
2850 	}
2851 	if (!ready) {
2852 		bpf_start_timer(d);
2853 	} else if (kev) {
2854 		knote_fill_kevent(kn, kev, data);
2855 	}
2856 
2857 	return ready;
2858 }
2859 
2860 int
bpfkqfilter(dev_t dev,struct knote * kn)2861 bpfkqfilter(dev_t dev, struct knote *kn)
2862 {
2863 	struct bpf_d *d;
2864 	int res;
2865 
2866 	/*
2867 	 * Is this device a bpf?
2868 	 */
2869 	if (major(dev) != CDEV_MAJOR || kn->kn_filter != EVFILT_READ) {
2870 		knote_set_error(kn, EINVAL);
2871 		return 0;
2872 	}
2873 
2874 	lck_mtx_lock(bpf_mlock);
2875 
2876 	d = bpf_dtab[minor(dev)];
2877 
2878 	if (d == NULL || d == BPF_DEV_RESERVED ||
2879 	    (d->bd_flags & BPF_CLOSING) != 0 ||
2880 	    d->bd_bif == NULL) {
2881 		lck_mtx_unlock(bpf_mlock);
2882 		knote_set_error(kn, ENXIO);
2883 		return 0;
2884 	}
2885 
2886 	kn->kn_filtid = EVFILTID_BPFREAD;
2887 	knote_kn_hook_set_raw(kn, d);
2888 	KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2889 	d->bd_flags |= BPF_KNOTE;
2890 
2891 	/* capture the current state */
2892 	res = filt_bpfread_common(kn, NULL, d);
2893 
2894 	lck_mtx_unlock(bpf_mlock);
2895 
2896 	return res;
2897 }
2898 
2899 static void
filt_bpfdetach(struct knote * kn)2900 filt_bpfdetach(struct knote *kn)
2901 {
2902 	struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
2903 
2904 	lck_mtx_lock(bpf_mlock);
2905 	if (d->bd_flags & BPF_KNOTE) {
2906 		KNOTE_DETACH(&d->bd_sel.si_note, kn);
2907 		d->bd_flags &= ~BPF_KNOTE;
2908 	}
2909 	lck_mtx_unlock(bpf_mlock);
2910 }
2911 
2912 static int
filt_bpfread(struct knote * kn,long hint)2913 filt_bpfread(struct knote *kn, long hint)
2914 {
2915 #pragma unused(hint)
2916 	struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
2917 
2918 	return filt_bpfread_common(kn, NULL, d);
2919 }
2920 
2921 static int
filt_bpftouch(struct knote * kn,struct kevent_qos_s * kev)2922 filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev)
2923 {
2924 	struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
2925 	int res;
2926 
2927 	lck_mtx_lock(bpf_mlock);
2928 
2929 	/* save off the lowat threshold and flag */
2930 	kn->kn_sdata = kev->data;
2931 	kn->kn_sfflags = kev->fflags;
2932 
2933 	/* output data will be re-generated here */
2934 	res = filt_bpfread_common(kn, NULL, d);
2935 
2936 	lck_mtx_unlock(bpf_mlock);
2937 
2938 	return res;
2939 }
2940 
2941 static int
filt_bpfprocess(struct knote * kn,struct kevent_qos_s * kev)2942 filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev)
2943 {
2944 	struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
2945 	int res;
2946 
2947 	lck_mtx_lock(bpf_mlock);
2948 	res = filt_bpfread_common(kn, kev, d);
2949 	lck_mtx_unlock(bpf_mlock);
2950 
2951 	return res;
2952 }
2953 
2954 /*
2955  * Copy data from an mbuf chain into a buffer.	This code is derived
2956  * from m_copydata in kern/uipc_mbuf.c.
2957  */
2958 static void
bpf_mcopy(struct mbuf * m,void * dst_arg,size_t len,size_t offset)2959 bpf_mcopy(struct mbuf *m, void *dst_arg, size_t len, size_t offset)
2960 {
2961 	u_int count;
2962 	u_char *dst;
2963 
2964 	dst = dst_arg;
2965 
2966 	while (offset >= m->m_len) {
2967 		offset -= m->m_len;
2968 		m = m->m_next;
2969 		if (m == NULL) {
2970 			panic("bpf_mcopy");
2971 		}
2972 		continue;
2973 	}
2974 
2975 	while (len > 0) {
2976 		if (m == NULL) {
2977 			panic("bpf_mcopy");
2978 		}
2979 		count = MIN(m->m_len - (u_int)offset, (u_int)len);
2980 		bcopy((u_char *)mbuf_data(m) + offset, dst, count);
2981 		m = m->m_next;
2982 		dst += count;
2983 		len -= count;
2984 		offset = 0;
2985 	}
2986 }
2987 
2988 static inline void
bpf_tap_imp(ifnet_t ifp,u_int32_t dlt,struct bpf_packet * bpf_pkt,int outbound)2989 bpf_tap_imp(
2990 	ifnet_t         ifp,
2991 	u_int32_t       dlt,
2992 	struct bpf_packet *bpf_pkt,
2993 	int             outbound)
2994 {
2995 	struct bpf_d    *d;
2996 	u_int slen;
2997 	struct bpf_if *bp;
2998 
2999 	/*
3000 	 * It's possible that we get here after the bpf descriptor has been
3001 	 * detached from the interface; in such a case we simply return.
3002 	 * Lock ordering is important since we can be called asynchronously
3003 	 * (from IOKit) to process an inbound packet; when that happens
3004 	 * we would have been holding its "gateLock" and will be acquiring
3005 	 * "bpf_mlock" upon entering this routine.  Due to that, we release
3006 	 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
3007 	 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
3008 	 * when a ifnet_set_promiscuous request simultaneously collides with
3009 	 * an inbound packet being passed into the tap callback.
3010 	 */
3011 	lck_mtx_lock(bpf_mlock);
3012 	if (ifp->if_bpf == NULL) {
3013 		lck_mtx_unlock(bpf_mlock);
3014 		return;
3015 	}
3016 	for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
3017 		if (bp->bif_ifp != ifp) {
3018 			/* wrong interface */
3019 			bp = NULL;
3020 			break;
3021 		}
3022 		if (dlt == 0 || bp->bif_dlt == dlt) {
3023 			/* tapping default DLT or DLT matches */
3024 			break;
3025 		}
3026 	}
3027 	if (bp == NULL) {
3028 		goto done;
3029 	}
3030 	for (d = bp->bif_dlist; d != NULL; d = d->bd_next) {
3031 		struct bpf_packet *bpf_pkt_saved = bpf_pkt;
3032 		struct bpf_packet bpf_pkt_tmp = {};
3033 		struct pktap_header_buffer bpfp_header_tmp = {};
3034 
3035 		if (outbound && (d->bd_direction & BPF_D_OUT) == 0) {
3036 			continue;
3037 		}
3038 		if (!outbound && (d->bd_direction & BPF_D_IN) == 0) {
3039 			continue;
3040 		}
3041 
3042 		++d->bd_rcount;
3043 		slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
3044 		    (u_int)bpf_pkt->bpfp_total_length, 0);
3045 
3046 		if (slen != 0) {
3047 			if (bp->bif_ifp->if_type == IFT_PKTAP &&
3048 			    bp->bif_dlt == DLT_PKTAP) {
3049 				if (d->bd_flags & BPF_TRUNCATE) {
3050 					slen = min(slen, get_pkt_trunc_len(bpf_pkt));
3051 				}
3052 				/*
3053 				 * Need to copy the bpf_pkt because the conversion
3054 				 * to v2 pktap header modifies the content of the
3055 				 * bpfp_header
3056 				 */
3057 				if ((d->bd_flags & BPF_PKTHDRV2) &&
3058 				    bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
3059 					bpf_pkt_tmp = *bpf_pkt;
3060 
3061 					bpf_pkt = &bpf_pkt_tmp;
3062 
3063 					memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
3064 					    bpf_pkt->bpfp_header_length);
3065 
3066 					bpf_pkt->bpfp_header = &bpfp_header_tmp;
3067 
3068 					convert_to_pktap_header_to_v2(bpf_pkt,
3069 					    !!(d->bd_flags & BPF_TRUNCATE));
3070 				}
3071 			}
3072 			++d->bd_fcount;
3073 			catchpacket(d, bpf_pkt, slen, outbound);
3074 		}
3075 		bpf_pkt = bpf_pkt_saved;
3076 	}
3077 
3078 done:
3079 	lck_mtx_unlock(bpf_mlock);
3080 }
3081 
3082 static inline void
bpf_tap_mbuf(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen,int outbound)3083 bpf_tap_mbuf(
3084 	ifnet_t         ifp,
3085 	u_int32_t       dlt,
3086 	mbuf_t          m,
3087 	void*           hdr,
3088 	size_t          hlen,
3089 	int             outbound)
3090 {
3091 	struct bpf_packet bpf_pkt;
3092 	struct mbuf *m0;
3093 
3094 	if (ifp->if_bpf == NULL) {
3095 		/* quickly check without taking lock */
3096 		return;
3097 	}
3098 	bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
3099 	bpf_pkt.bpfp_mbuf = m;
3100 	bpf_pkt.bpfp_total_length = 0;
3101 	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
3102 		bpf_pkt.bpfp_total_length += m0->m_len;
3103 	}
3104 	bpf_pkt.bpfp_header = hdr;
3105 	if (hdr != NULL) {
3106 		bpf_pkt.bpfp_total_length += hlen;
3107 		bpf_pkt.bpfp_header_length = hlen;
3108 	} else {
3109 		bpf_pkt.bpfp_header_length = 0;
3110 	}
3111 	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
3112 }
3113 
3114 void
bpf_tap_out(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)3115 bpf_tap_out(
3116 	ifnet_t         ifp,
3117 	u_int32_t       dlt,
3118 	mbuf_t          m,
3119 	void*           hdr,
3120 	size_t          hlen)
3121 {
3122 	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
3123 }
3124 
3125 void
bpf_tap_in(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)3126 bpf_tap_in(
3127 	ifnet_t         ifp,
3128 	u_int32_t       dlt,
3129 	mbuf_t          m,
3130 	void*           hdr,
3131 	size_t          hlen)
3132 {
3133 	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
3134 }
3135 
3136 /* Callback registered with Ethernet driver. */
3137 static int
bpf_tap_callback(struct ifnet * ifp,struct mbuf * m)3138 bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
3139 {
3140 	bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
3141 
3142 	return 0;
3143 }
3144 
3145 #if SKYWALK
3146 #include <skywalk/os_skywalk_private.h>
3147 
3148 static void
bpf_pktcopy(kern_packet_t pkt,void * dst_arg,size_t len,size_t offset)3149 bpf_pktcopy(kern_packet_t pkt, void *dst_arg, size_t len, size_t offset)
3150 {
3151 	kern_buflet_t   buflet = NULL;
3152 	size_t count;
3153 	u_char *dst;
3154 
3155 	dst = dst_arg;
3156 	while (len > 0) {
3157 		uint8_t         *addr;
3158 
3159 		u_int32_t       buflet_length;
3160 
3161 		buflet = kern_packet_get_next_buflet(pkt, buflet);
3162 		VERIFY(buflet != NULL);
3163 		addr = kern_buflet_get_data_address(buflet);
3164 		VERIFY(addr != NULL);
3165 		addr += kern_buflet_get_data_offset(buflet);
3166 		buflet_length = kern_buflet_get_data_length(buflet);
3167 		if (offset >= buflet_length) {
3168 			offset -= buflet_length;
3169 			continue;
3170 		}
3171 		count = MIN(buflet_length - offset, len);
3172 		bcopy((void *)(addr + offset), (void *)dst, count);
3173 		dst += count;
3174 		len -= count;
3175 		offset = 0;
3176 	}
3177 }
3178 
3179 static inline void
bpf_tap_packet(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen,int outbound)3180 bpf_tap_packet(
3181 	ifnet_t         ifp,
3182 	u_int32_t       dlt,
3183 	kern_packet_t   pkt,
3184 	void*           hdr,
3185 	size_t          hlen,
3186 	int             outbound)
3187 {
3188 	struct bpf_packet       bpf_pkt;
3189 	struct mbuf *           m;
3190 
3191 	if (ifp->if_bpf == NULL) {
3192 		/* quickly check without taking lock */
3193 		return;
3194 	}
3195 	m = kern_packet_get_mbuf(pkt);
3196 	if (m != NULL) {
3197 		bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
3198 		bpf_pkt.bpfp_mbuf = m;
3199 		bpf_pkt.bpfp_total_length = m_length(m);
3200 	} else {
3201 		bpf_pkt.bpfp_type = BPF_PACKET_TYPE_PKT;
3202 		bpf_pkt.bpfp_pkt = pkt;
3203 		bpf_pkt.bpfp_total_length = kern_packet_get_data_length(pkt);
3204 	}
3205 	bpf_pkt.bpfp_header = hdr;
3206 	bpf_pkt.bpfp_header_length = hlen;
3207 	if (hlen != 0) {
3208 		bpf_pkt.bpfp_total_length += hlen;
3209 	}
3210 	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
3211 }
3212 
3213 void
bpf_tap_packet_out(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)3214 bpf_tap_packet_out(
3215 	ifnet_t         ifp,
3216 	u_int32_t       dlt,
3217 	kern_packet_t   pkt,
3218 	void*           hdr,
3219 	size_t          hlen)
3220 {
3221 	bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 1);
3222 }
3223 
3224 void
bpf_tap_packet_in(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)3225 bpf_tap_packet_in(
3226 	ifnet_t         ifp,
3227 	u_int32_t       dlt,
3228 	kern_packet_t   pkt,
3229 	void*           hdr,
3230 	size_t          hlen)
3231 {
3232 	bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 0);
3233 }
3234 
3235 #endif /* SKYWALK */
3236 
3237 static errno_t
bpf_copydata(struct bpf_packet * pkt,size_t off,size_t len,void * out_data)3238 bpf_copydata(struct bpf_packet *pkt, size_t off, size_t len, void* out_data)
3239 {
3240 	errno_t err = 0;
3241 	if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
3242 		err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
3243 #if SKYWALK
3244 	} else if (pkt->bpfp_type == BPF_PACKET_TYPE_PKT) {
3245 		err = kern_packet_copy_bytes(pkt->bpfp_pkt, off, len, out_data);
3246 #endif /* SKYWALK */
3247 	} else {
3248 		err = EINVAL;
3249 	}
3250 
3251 	return err;
3252 }
3253 
3254 static void
copy_bpf_packet_offset(struct bpf_packet * pkt,void * dst,size_t len,size_t offset)3255 copy_bpf_packet_offset(struct bpf_packet * pkt, void * dst, size_t len, size_t offset)
3256 {
3257 	/* copy the optional header */
3258 	if (offset < pkt->bpfp_header_length) {
3259 		size_t  count = MIN(len, pkt->bpfp_header_length - offset);
3260 		caddr_t src = (caddr_t)pkt->bpfp_header;
3261 		bcopy(src + offset, dst, count);
3262 		len -= count;
3263 		dst = (void *)((uintptr_t)dst + count);
3264 		offset = 0;
3265 	} else {
3266 		offset -= pkt->bpfp_header_length;
3267 	}
3268 
3269 	if (len == 0) {
3270 		/* nothing past the header */
3271 		return;
3272 	}
3273 	/* copy the packet */
3274 	switch (pkt->bpfp_type) {
3275 	case BPF_PACKET_TYPE_MBUF:
3276 		bpf_mcopy(pkt->bpfp_mbuf, dst, len, offset);
3277 		break;
3278 #if SKYWALK
3279 	case BPF_PACKET_TYPE_PKT:
3280 		bpf_pktcopy(pkt->bpfp_pkt, dst, len, offset);
3281 		break;
3282 #endif /* SKYWALK */
3283 	default:
3284 		break;
3285 	}
3286 }
3287 
3288 static void
copy_bpf_packet(struct bpf_packet * pkt,void * dst,size_t len)3289 copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
3290 {
3291 	copy_bpf_packet_offset(pkt, dst, len, 0);
3292 }
3293 
3294 static uint32_t
get_esp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)3295 get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
3296     const uint32_t remaining_caplen)
3297 {
3298 	/*
3299 	 * For some reason tcpdump expects to have one byte beyond the ESP header
3300 	 */
3301 	uint32_t trunc_len = ESP_HDR_SIZE + 1;
3302 
3303 	if (trunc_len > remaining_caplen) {
3304 		return remaining_caplen;
3305 	}
3306 
3307 	return trunc_len;
3308 }
3309 
3310 static uint32_t
get_isakmp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)3311 get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
3312     const uint32_t remaining_caplen)
3313 {
3314 	/*
3315 	 * Include the payload generic header
3316 	 */
3317 	uint32_t trunc_len = ISAKMP_HDR_SIZE;
3318 
3319 	if (trunc_len > remaining_caplen) {
3320 		return remaining_caplen;
3321 	}
3322 
3323 	return trunc_len;
3324 }
3325 
3326 static uint32_t
get_isakmp_natt_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3327 get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint32_t off,
3328     const uint32_t remaining_caplen)
3329 {
3330 	int err = 0;
3331 	uint32_t trunc_len = 0;
3332 	char payload[remaining_caplen];
3333 
3334 	err = bpf_copydata(pkt, off, remaining_caplen, payload);
3335 	if (err != 0) {
3336 		return remaining_caplen;
3337 	}
3338 	/*
3339 	 * They are three cases:
3340 	 * - IKE: payload start with 4 bytes header set to zero before ISAKMP header
3341 	 * - keep alive: 1 byte payload
3342 	 * - otherwise it's ESP
3343 	 */
3344 	if (remaining_caplen >= 4 &&
3345 	    payload[0] == 0 && payload[1] == 0 &&
3346 	    payload[2] == 0 && payload[3] == 0) {
3347 		trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
3348 	} else if (remaining_caplen == 1) {
3349 		trunc_len = 1;
3350 	} else {
3351 		trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3352 	}
3353 
3354 	if (trunc_len > remaining_caplen) {
3355 		return remaining_caplen;
3356 	}
3357 
3358 	return trunc_len;
3359 }
3360 
3361 static uint32_t
get_udp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3362 get_udp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3363 {
3364 	int err = 0;
3365 	uint32_t trunc_len = sizeof(struct udphdr);         /* By default no UDP payload */
3366 
3367 	if (trunc_len >= remaining_caplen) {
3368 		return remaining_caplen;
3369 	}
3370 
3371 	struct udphdr udphdr;
3372 	err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
3373 	if (err != 0) {
3374 		return remaining_caplen;
3375 	}
3376 
3377 	u_short sport, dport;
3378 
3379 	sport = EXTRACT_SHORT(&udphdr.uh_sport);
3380 	dport = EXTRACT_SHORT(&udphdr.uh_dport);
3381 
3382 	if (dport == PORT_DNS || sport == PORT_DNS) {
3383 		/*
3384 		 * Full UDP payload for DNS
3385 		 */
3386 		trunc_len = remaining_caplen;
3387 	} else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) ||
3388 	    (sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
3389 		/*
3390 		 * Full UDP payload for BOOTP and DHCP
3391 		 */
3392 		trunc_len = remaining_caplen;
3393 	} else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
3394 		/*
3395 		 * Return the ISAKMP header
3396 		 */
3397 		trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
3398 		    remaining_caplen - sizeof(struct udphdr));
3399 	} else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
3400 		trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
3401 		    remaining_caplen - sizeof(struct udphdr));
3402 	}
3403 	if (trunc_len >= remaining_caplen) {
3404 		return remaining_caplen;
3405 	}
3406 
3407 	return trunc_len;
3408 }
3409 
3410 static uint32_t
get_tcp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3411 get_tcp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3412 {
3413 	int err = 0;
3414 	uint32_t trunc_len = sizeof(struct tcphdr);         /* By default no TCP payload */
3415 	if (trunc_len >= remaining_caplen) {
3416 		return remaining_caplen;
3417 	}
3418 
3419 	struct tcphdr tcphdr;
3420 	err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
3421 	if (err != 0) {
3422 		return remaining_caplen;
3423 	}
3424 
3425 	u_short sport, dport;
3426 	sport = EXTRACT_SHORT(&tcphdr.th_sport);
3427 	dport = EXTRACT_SHORT(&tcphdr.th_dport);
3428 
3429 	if (dport == PORT_DNS || sport == PORT_DNS) {
3430 		/*
3431 		 * Full TCP payload  for DNS
3432 		 */
3433 		trunc_len = remaining_caplen;
3434 	} else {
3435 		trunc_len = (uint16_t)(tcphdr.th_off << 2);
3436 	}
3437 	if (trunc_len >= remaining_caplen) {
3438 		return remaining_caplen;
3439 	}
3440 
3441 	return trunc_len;
3442 }
3443 
3444 static uint32_t
get_proto_trunc_len(uint8_t proto,struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3445 get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3446 {
3447 	uint32_t trunc_len;
3448 
3449 	switch (proto) {
3450 	case IPPROTO_ICMP: {
3451 		/*
3452 		 * Full IMCP payload
3453 		 */
3454 		trunc_len = remaining_caplen;
3455 		break;
3456 	}
3457 	case IPPROTO_ICMPV6: {
3458 		/*
3459 		 * Full IMCPV6 payload
3460 		 */
3461 		trunc_len = remaining_caplen;
3462 		break;
3463 	}
3464 	case IPPROTO_IGMP: {
3465 		/*
3466 		 * Full IGMP payload
3467 		 */
3468 		trunc_len = remaining_caplen;
3469 		break;
3470 	}
3471 	case IPPROTO_UDP: {
3472 		trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3473 		break;
3474 	}
3475 	case IPPROTO_TCP: {
3476 		trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3477 		break;
3478 	}
3479 	case IPPROTO_ESP: {
3480 		trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3481 		break;
3482 	}
3483 	default: {
3484 		/*
3485 		 * By default we only include the IP header
3486 		 */
3487 		trunc_len = 0;
3488 		break;
3489 	}
3490 	}
3491 	if (trunc_len >= remaining_caplen) {
3492 		return remaining_caplen;
3493 	}
3494 
3495 	return trunc_len;
3496 }
3497 
3498 static uint32_t
get_ip_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3499 get_ip_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3500 {
3501 	int err = 0;
3502 	uint32_t iplen = sizeof(struct ip);
3503 	if (iplen >= remaining_caplen) {
3504 		return remaining_caplen;
3505 	}
3506 
3507 	struct ip iphdr;
3508 	err =  bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
3509 	if (err != 0) {
3510 		return remaining_caplen;
3511 	}
3512 
3513 	uint8_t proto = 0;
3514 
3515 	iplen = (uint16_t)(iphdr.ip_hl << 2);
3516 	if (iplen >= remaining_caplen) {
3517 		return remaining_caplen;
3518 	}
3519 
3520 	proto = iphdr.ip_p;
3521 	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3522 
3523 	if (iplen >= remaining_caplen) {
3524 		return remaining_caplen;
3525 	}
3526 
3527 	return iplen;
3528 }
3529 
3530 static uint32_t
get_ip6_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3531 get_ip6_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3532 {
3533 	int err = 0;
3534 	uint32_t iplen = sizeof(struct ip6_hdr);
3535 	if (iplen >= remaining_caplen) {
3536 		return remaining_caplen;
3537 	}
3538 
3539 	struct ip6_hdr ip6hdr;
3540 	err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
3541 	if (err != 0) {
3542 		return remaining_caplen;
3543 	}
3544 
3545 	uint8_t proto = 0;
3546 
3547 	/*
3548 	 * TBD: process the extension headers
3549 	 */
3550 	proto = ip6hdr.ip6_nxt;
3551 	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3552 
3553 	if (iplen >= remaining_caplen) {
3554 		return remaining_caplen;
3555 	}
3556 
3557 	return iplen;
3558 }
3559 
3560 static uint32_t
get_ether_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3561 get_ether_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3562 {
3563 	int err = 0;
3564 	uint32_t ethlen = sizeof(struct ether_header);
3565 	if (ethlen >= remaining_caplen) {
3566 		return remaining_caplen;
3567 	}
3568 
3569 	struct ether_header eh = {};
3570 	err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
3571 	if (err != 0) {
3572 		return remaining_caplen;
3573 	}
3574 
3575 	u_short type = EXTRACT_SHORT(&eh.ether_type);
3576 	/* Include full ARP */
3577 	if (type == ETHERTYPE_ARP) {
3578 		ethlen = remaining_caplen;
3579 	} else if (type == ETHERTYPE_IP) {
3580 		ethlen += get_ip_trunc_len(pkt, off + sizeof(struct ether_header),
3581 		    remaining_caplen - ethlen);
3582 	} else if (type == ETHERTYPE_IPV6) {
3583 		ethlen += get_ip6_trunc_len(pkt, off + sizeof(struct ether_header),
3584 		    remaining_caplen - ethlen);
3585 	} else {
3586 		ethlen = MIN(BPF_MIN_PKT_SIZE, remaining_caplen);
3587 	}
3588 	return ethlen;
3589 }
3590 
3591 static uint32_t
get_pkt_trunc_len(struct bpf_packet * pkt)3592 get_pkt_trunc_len(struct bpf_packet *pkt)
3593 {
3594 	struct pktap_header *pktap = (struct pktap_header *) (pkt->bpfp_header);
3595 	uint32_t in_pkt_len = 0;
3596 	uint32_t out_pkt_len = 0;
3597 	uint32_t tlen = 0;
3598 	uint32_t pre_adjust;         // L2 header not in mbuf or kern_packet
3599 
3600 	// bpfp_total_length must contain the BPF packet header
3601 	assert3u(pkt->bpfp_total_length, >=, pkt->bpfp_header_length);
3602 
3603 	// The BPF packet header must contain the pktap header
3604 	assert3u(pkt->bpfp_header_length, >=, pktap->pth_length);
3605 
3606 	// The pre frame length (L2 header) must be contained in the packet
3607 	assert3u(pkt->bpfp_total_length, >=, pktap->pth_length + pktap->pth_frame_pre_length);
3608 
3609 	/*
3610 	 * pktap->pth_frame_pre_length is the L2 header length and accounts
3611 	 * for both L2 header in the packet payload and pre_adjust.
3612 	 *
3613 	 * pre_adjust represents an adjustment for a pseudo L2 header that is not
3614 	 * part of packet payload -- not in the mbuf or kern_packet -- and comes
3615 	 * just after the pktap header.
3616 	 *
3617 	 * pktap->pth_length is the size of the pktap header (exclude pre_adjust)
3618 	 *
3619 	 * pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3620 	 */
3621 	pre_adjust = (uint32_t)(pkt->bpfp_header_length - pktap->pth_length);
3622 
3623 	if (pktap->pth_iftype == IFT_ETHER) {
3624 		/*
3625 		 * We need to parse the Ethernet header to find the network layer
3626 		 * protocol
3627 		 */
3628 		in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pre_adjust);
3629 
3630 		out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
3631 
3632 		tlen = pktap->pth_length + pre_adjust + out_pkt_len;
3633 	} else {
3634 		/*
3635 		 * For other interface types, we only know to parse IPv4 and IPv6.
3636 		 *
3637 		 * To get to the beginning of the IPv4 or IPv6 packet, we need to to skip
3638 		 * over the L2 header that is the actual packet payload (mbuf or kern_packet)
3639 		 */
3640 		uint32_t off;         // offset past the L2 header in the actual packet payload
3641 
3642 		off = pktap->pth_frame_pre_length - pre_adjust;
3643 
3644 		in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pktap->pth_frame_pre_length);
3645 
3646 		if (pktap->pth_protocol_family == AF_INET) {
3647 			out_pkt_len = get_ip_trunc_len(pkt, off, in_pkt_len);
3648 		} else if (pktap->pth_protocol_family == AF_INET6) {
3649 			out_pkt_len = get_ip6_trunc_len(pkt, off, in_pkt_len);
3650 		} else {
3651 			out_pkt_len = MIN(BPF_MIN_PKT_SIZE, in_pkt_len);
3652 		}
3653 		tlen = pktap->pth_length + pktap->pth_frame_pre_length + out_pkt_len;
3654 	}
3655 
3656 	// Verify we do not overflow the buffer
3657 	if (__improbable(tlen > pkt->bpfp_total_length)) {
3658 		bool do_panic = bpf_debug != 0 ? true : false;
3659 
3660 #if DEBUG
3661 		do_panic = true;
3662 #endif /* DEBUG */
3663 		if (do_panic) {
3664 			panic("%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3665 			    __func__, __LINE__,
3666 			    tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3667 		} else {
3668 			os_log(OS_LOG_DEFAULT,
3669 			    "%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3670 			    __func__, __LINE__,
3671 			    tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3672 		}
3673 		bpf_trunc_overflow += 1;
3674 		tlen = (uint32_t)pkt->bpfp_total_length;
3675 	}
3676 
3677 	return tlen;
3678 }
3679 
3680 static uint8_t
get_common_prefix_size(const void * a,const void * b,uint8_t max_bytes)3681 get_common_prefix_size(const void *a, const void *b, uint8_t max_bytes)
3682 {
3683 	uint8_t max_words = max_bytes >> 2;
3684 	const uint32_t *x = (const uint32_t *)a;
3685 	const uint32_t *y = (const uint32_t *)b;
3686 	uint8_t i;
3687 
3688 	for (i = 0; i < max_words; i++) {
3689 		if (x[i] != y[i]) {
3690 			break;
3691 		}
3692 	}
3693 	return (uint8_t)(i << 2);
3694 }
3695 
3696 /*
3697  * Move the packet data from interface memory (pkt) into the
3698  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
3699  * otherwise 0.
3700  */
3701 static void
catchpacket(struct bpf_d * d,struct bpf_packet * pkt,u_int snaplen,int outbound)3702 catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
3703     u_int snaplen, int outbound)
3704 {
3705 	struct bpf_hdr *hp;
3706 	struct bpf_hdr_ext *ehp;
3707 	uint32_t totlen, curlen;
3708 	uint32_t hdrlen, caplen;
3709 	int do_wakeup = 0;
3710 	u_char *payload;
3711 	struct timeval tv;
3712 
3713 	hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
3714 	    (d->bd_flags & BPF_COMP_REQ) ? d->bd_bif->bif_comphdrlen:
3715 	    d->bd_bif->bif_hdrlen;
3716 	/*
3717 	 * Figure out how many bytes to move.  If the packet is
3718 	 * greater or equal to the snapshot length, transfer that
3719 	 * much.  Otherwise, transfer the whole packet (unless
3720 	 * we hit the buffer size limit).
3721 	 */
3722 	totlen = hdrlen + MIN(snaplen, (int)pkt->bpfp_total_length);
3723 	if (totlen > d->bd_bufsize) {
3724 		totlen = d->bd_bufsize;
3725 	}
3726 
3727 	if (hdrlen > totlen) {
3728 		return;
3729 	}
3730 
3731 	/*
3732 	 * Round up the end of the previous packet to the next longword.
3733 	 */
3734 	curlen = BPF_WORDALIGN(d->bd_slen);
3735 	if (curlen + totlen > d->bd_bufsize) {
3736 		/*
3737 		 * This packet will overflow the storage buffer.
3738 		 * Rotate the buffers if we can, then wakeup any
3739 		 * pending reads.
3740 		 *
3741 		 * We cannot rotate buffers if a read is in progress
3742 		 * so drop the packet
3743 		 */
3744 		if (d->bd_hbuf_read) {
3745 			++d->bd_dcount;
3746 			return;
3747 		}
3748 
3749 		if (d->bd_fbuf == NULL) {
3750 			if (d->bd_headdrop == 0) {
3751 				/*
3752 				 * We haven't completed the previous read yet,
3753 				 * so drop the packet.
3754 				 */
3755 				++d->bd_dcount;
3756 				return;
3757 			}
3758 			/*
3759 			 * Drop the hold buffer as it contains older packets
3760 			 */
3761 			d->bd_dcount += d->bd_hcnt;
3762 			d->bd_fbuf = d->bd_hbuf;
3763 			ROTATE_BUFFERS(d);
3764 		} else {
3765 			ROTATE_BUFFERS(d);
3766 		}
3767 		do_wakeup = 1;
3768 		curlen = 0;
3769 	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
3770 		/*
3771 		 * Immediate mode is set, or the read timeout has
3772 		 * already expired during a select call. A packet
3773 		 * arrived, so the reader should be woken up.
3774 		 */
3775 		do_wakeup = 1;
3776 	}
3777 
3778 	/*
3779 	 * Append the bpf header.
3780 	 */
3781 	microtime(&tv);
3782 	if (d->bd_flags & BPF_EXTENDED_HDR) {
3783 		ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
3784 		memset(ehp, 0, sizeof(*ehp));
3785 		ehp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3786 		ehp->bh_tstamp.tv_usec = tv.tv_usec;
3787 
3788 		ehp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3789 		ehp->bh_hdrlen = (u_short)hdrlen;
3790 		caplen = ehp->bh_caplen = totlen - hdrlen;
3791 		payload = (u_char *)ehp + hdrlen;
3792 
3793 		if (outbound) {
3794 			ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3795 		} else {
3796 			ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
3797 		}
3798 
3799 		if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
3800 			struct mbuf *m = pkt->bpfp_mbuf;
3801 
3802 			if (outbound) {
3803 				/* only do lookups on non-raw INPCB */
3804 				if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID |
3805 				    PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)) ==
3806 				    (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC) &&
3807 				    m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3808 					ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
3809 					if (m->m_pkthdr.pkt_proto == IPPROTO_TCP) {
3810 						ehp->bh_flags |= BPF_HDR_EXT_FLAGS_TCP;
3811 					} else if (m->m_pkthdr.pkt_proto == IPPROTO_UDP) {
3812 						ehp->bh_flags |= BPF_HDR_EXT_FLAGS_UDP;
3813 					}
3814 				}
3815 				ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
3816 				if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
3817 					ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3818 				}
3819 				if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
3820 					ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
3821 				}
3822 				if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
3823 					ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3824 				}
3825 				if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
3826 					ehp->bh_unsent_bytes =
3827 					    m->m_pkthdr.bufstatus_if;
3828 					ehp->bh_unsent_snd =
3829 					    m->m_pkthdr.bufstatus_sndbuf;
3830 				}
3831 			} else {
3832 				if (m->m_pkthdr.pkt_flags & PKTF_WAKE_PKT) {
3833 					ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3834 				}
3835 			}
3836 #if SKYWALK
3837 		} else {
3838 			kern_packet_t kern_pkt = pkt->bpfp_pkt;
3839 			packet_flowid_t flowid = 0;
3840 
3841 			if (outbound) {
3842 				/*
3843 				 * Note: pp_init() asserts that kern_packet_svc_class_t is equivalent
3844 				 * to mbuf_svc_class_t
3845 				 */
3846 				ehp->bh_svc = so_svc2tc((mbuf_svc_class_t)kern_packet_get_service_class(kern_pkt));
3847 				if (kern_packet_get_transport_retransmit(kern_pkt)) {
3848 					ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3849 				}
3850 				if (kern_packet_get_transport_last_packet(kern_pkt)) {
3851 					ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3852 				}
3853 			} else {
3854 				if (kern_packet_get_wake_flag(kern_pkt)) {
3855 					ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3856 				}
3857 			}
3858 			ehp->bh_trace_tag = kern_packet_get_trace_tag(kern_pkt);
3859 			if (kern_packet_get_flowid(kern_pkt, &flowid) == 0) {
3860 				ehp->bh_flowid = flowid;
3861 			}
3862 #endif /* SKYWALK */
3863 		}
3864 	} else {
3865 		hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
3866 		memset(hp, 0, BPF_WORDALIGN(sizeof(*hp)));
3867 		hp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3868 		hp->bh_tstamp.tv_usec = tv.tv_usec;
3869 		hp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3870 		hp->bh_hdrlen = (u_short)hdrlen;
3871 		caplen = hp->bh_caplen = totlen - hdrlen;
3872 		payload = (u_char *)hp + hdrlen;
3873 	}
3874 	if (d->bd_flags & BPF_COMP_REQ) {
3875 		uint8_t common_prefix_size = 0;
3876 		uint8_t copy_len = MIN((uint8_t)caplen, BPF_HDR_COMP_LEN_MAX);
3877 
3878 		copy_bpf_packet(pkt, d->bd_prev_fbuf, copy_len);
3879 
3880 		if (d->bd_prev_slen != 0) {
3881 			common_prefix_size = get_common_prefix_size(d->bd_prev_fbuf,
3882 			    d->bd_prev_sbuf, MIN(copy_len, d->bd_prev_slen));
3883 		}
3884 
3885 		if (d->bd_flags & BPF_COMP_ENABLED) {
3886 			assert3u(caplen, >=, common_prefix_size);
3887 			copy_bpf_packet_offset(pkt, payload, caplen - common_prefix_size,
3888 			    common_prefix_size);
3889 			d->bd_slen = curlen + totlen - common_prefix_size;
3890 		} else {
3891 			copy_bpf_packet(pkt, payload, caplen);
3892 			d->bd_slen = curlen + totlen;
3893 		}
3894 
3895 		/*
3896 		 * Update the caplen only if compression is enabled -- the caller
3897 		 * must pay attention to bpf_hdr_comp_enable
3898 		 */
3899 		if (d->bd_flags & BPF_EXTENDED_HDR) {
3900 			ehp->bh_complen = common_prefix_size;
3901 			if (d->bd_flags & BPF_COMP_ENABLED) {
3902 				ehp->bh_caplen -= common_prefix_size;
3903 			}
3904 		} else {
3905 			struct bpf_comp_hdr *hcp;
3906 
3907 			hcp = (struct bpf_comp_hdr *)(void *)(d->bd_sbuf + curlen);
3908 			hcp->bh_complen = common_prefix_size;
3909 			if (d->bd_flags & BPF_COMP_ENABLED) {
3910 				hcp->bh_caplen -= common_prefix_size;
3911 			}
3912 		}
3913 
3914 		if (common_prefix_size > 0) {
3915 			d->bd_bcs.bcs_total_compressed_prefix_size += common_prefix_size;
3916 			if (common_prefix_size > d->bd_bcs.bcs_max_compressed_prefix_size) {
3917 				d->bd_bcs.bcs_max_compressed_prefix_size = common_prefix_size;
3918 			}
3919 			d->bd_bcs.bcs_count_compressed_prefix += 1;
3920 		} else {
3921 			d->bd_bcs.bcs_count_no_common_prefix += 1;
3922 		}
3923 
3924 		/* The current compression buffer becomes the previous one */
3925 		caddr_t tmp = d->bd_prev_sbuf;
3926 		d->bd_prev_sbuf = d->bd_prev_fbuf;
3927 		d->bd_prev_slen = copy_len;
3928 		d->bd_prev_fbuf = tmp;
3929 	} else {
3930 		/*
3931 		 * Copy the packet data into the store buffer and update its length.
3932 		 */
3933 		copy_bpf_packet(pkt, payload, caplen);
3934 		d->bd_slen = curlen + totlen;
3935 	}
3936 	d->bd_scnt += 1;
3937 	d->bd_bcs.bcs_total_hdr_size += pkt->bpfp_header_length;
3938 	d->bd_bcs.bcs_total_size += caplen;
3939 
3940 	if (do_wakeup) {
3941 		bpf_wakeup(d);
3942 	}
3943 }
3944 
3945 static void
bpf_freebufs(struct bpf_d * d)3946 bpf_freebufs(struct bpf_d *d)
3947 {
3948 	if (d->bd_sbuf != NULL) {
3949 		kfree_data_addr(d->bd_sbuf);
3950 	}
3951 	if (d->bd_hbuf != NULL) {
3952 		kfree_data_addr(d->bd_hbuf);
3953 	}
3954 	if (d->bd_fbuf != NULL) {
3955 		kfree_data_addr(d->bd_fbuf);
3956 	}
3957 
3958 	if (d->bd_prev_sbuf != NULL) {
3959 		kfree_data_addr(d->bd_prev_sbuf);
3960 	}
3961 	if (d->bd_prev_fbuf != NULL) {
3962 		kfree_data_addr(d->bd_prev_fbuf);
3963 	}
3964 }
3965 /*
3966  * Initialize all nonzero fields of a descriptor.
3967  */
3968 static int
bpf_allocbufs(struct bpf_d * d)3969 bpf_allocbufs(struct bpf_d *d)
3970 {
3971 	bpf_freebufs(d);
3972 
3973 	d->bd_fbuf = (caddr_t) kalloc_data(d->bd_bufsize, Z_WAITOK | Z_ZERO);
3974 	if (d->bd_fbuf == NULL) {
3975 		goto nobufs;
3976 	}
3977 
3978 	d->bd_sbuf = (caddr_t) kalloc_data(d->bd_bufsize, Z_WAITOK | Z_ZERO);
3979 	if (d->bd_sbuf == NULL) {
3980 		goto nobufs;
3981 	}
3982 	d->bd_slen = 0;
3983 	d->bd_hlen = 0;
3984 	d->bd_scnt = 0;
3985 	d->bd_hcnt = 0;
3986 
3987 	d->bd_prev_slen = 0;
3988 	if (d->bd_flags & BPF_COMP_REQ) {
3989 		d->bd_prev_sbuf = (caddr_t) kalloc_data(BPF_HDR_COMP_LEN_MAX, Z_WAITOK | Z_ZERO);
3990 		if (d->bd_prev_sbuf == NULL) {
3991 			goto nobufs;
3992 		}
3993 		d->bd_prev_fbuf = (caddr_t) kalloc_data(BPF_HDR_COMP_LEN_MAX, Z_WAITOK | Z_ZERO);
3994 		if (d->bd_prev_fbuf == NULL) {
3995 			goto nobufs;
3996 		}
3997 	}
3998 	return 0;
3999 nobufs:
4000 	bpf_freebufs(d);
4001 	return ENOMEM;
4002 }
4003 
4004 /*
4005  * Free buffers currently in use by a descriptor.
4006  * Called on close.
4007  */
4008 static void
bpf_freed(struct bpf_d * d)4009 bpf_freed(struct bpf_d *d)
4010 {
4011 	/*
4012 	 * We don't need to lock out interrupts since this descriptor has
4013 	 * been detached from its interface and it yet hasn't been marked
4014 	 * free.
4015 	 */
4016 	if (d->bd_hbuf_read || d->bd_hbuf_write) {
4017 		panic("bpf buffer freed during read/write");
4018 	}
4019 
4020 	bpf_freebufs(d);
4021 
4022 	if (d->bd_filter) {
4023 		kfree_data_addr(d->bd_filter);
4024 	}
4025 }
4026 
4027 /*
4028  * Attach an interface to bpf.	driverp is a pointer to a (struct bpf_if *)
4029  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
4030  * size of the link header (variable length headers not yet supported).
4031  */
4032 void
bpfattach(struct ifnet * ifp,u_int dlt,u_int hdrlen)4033 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
4034 {
4035 	bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
4036 }
4037 
4038 errno_t
bpf_attach(ifnet_t ifp,u_int32_t dlt,u_int32_t hdrlen,bpf_send_func send,bpf_tap_func tap)4039 bpf_attach(
4040 	ifnet_t ifp,
4041 	u_int32_t dlt,
4042 	u_int32_t hdrlen,
4043 	bpf_send_func send,
4044 	bpf_tap_func tap)
4045 {
4046 	struct bpf_if *bp;
4047 	struct bpf_if *bp_new;
4048 	struct bpf_if *bp_before_first = NULL;
4049 	struct bpf_if *bp_first = NULL;
4050 	struct bpf_if *bp_last = NULL;
4051 	boolean_t found;
4052 
4053 	/*
4054 	 * Z_NOFAIL will cause a panic if the allocation fails
4055 	 */
4056 	bp_new = kalloc_type(struct bpf_if, Z_WAITOK | Z_NOFAIL | Z_ZERO);
4057 
4058 	lck_mtx_lock(bpf_mlock);
4059 
4060 	/*
4061 	 * Check if this interface/dlt is already attached. Remember the
4062 	 * first and last attachment for this interface, as well as the
4063 	 * element before the first attachment.
4064 	 */
4065 	found = FALSE;
4066 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
4067 		if (bp->bif_ifp != ifp) {
4068 			if (bp_first != NULL) {
4069 				/* no more elements for this interface */
4070 				break;
4071 			}
4072 			bp_before_first = bp;
4073 		} else {
4074 			if (bp->bif_dlt == dlt) {
4075 				found = TRUE;
4076 				break;
4077 			}
4078 			if (bp_first == NULL) {
4079 				bp_first = bp;
4080 			}
4081 			bp_last = bp;
4082 		}
4083 	}
4084 	if (found) {
4085 		lck_mtx_unlock(bpf_mlock);
4086 		os_log_error(OS_LOG_DEFAULT,
4087 		    "bpfattach - %s with dlt %d is already attached",
4088 		    if_name(ifp), dlt);
4089 		kfree_type(struct bpf_if, bp_new);
4090 		return EEXIST;
4091 	}
4092 
4093 	bp_new->bif_ifp = ifp;
4094 	bp_new->bif_dlt = dlt;
4095 	bp_new->bif_send = send;
4096 	bp_new->bif_tap = tap;
4097 
4098 	if (bp_first == NULL) {
4099 		/* No other entries for this ifp */
4100 		bp_new->bif_next = bpf_iflist;
4101 		bpf_iflist = bp_new;
4102 	} else {
4103 		if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
4104 			/* Make this the first entry for this interface */
4105 			if (bp_before_first != NULL) {
4106 				/*  point the previous to us */
4107 				bp_before_first->bif_next = bp_new;
4108 			} else {
4109 				/* we're the new head */
4110 				bpf_iflist = bp_new;
4111 			}
4112 			bp_new->bif_next = bp_first;
4113 		} else {
4114 			/* Add this after the last entry for this interface */
4115 			bp_new->bif_next = bp_last->bif_next;
4116 			bp_last->bif_next = bp_new;
4117 		}
4118 	}
4119 
4120 	/*
4121 	 * Compute the length of the bpf header.  This is not necessarily
4122 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
4123 	 * that the network layer header begins on a longword boundary (for
4124 	 * performance reasons and to alleviate alignment restrictions).
4125 	 */
4126 	bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
4127 	bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
4128 	    sizeof(struct bpf_hdr_ext)) - hdrlen;
4129 	bp_new->bif_comphdrlen = BPF_WORDALIGN(hdrlen +
4130 	    sizeof(struct bpf_comp_hdr)) - hdrlen;
4131 
4132 	/* Take a reference on the interface */
4133 	ifnet_reference(ifp);
4134 
4135 	lck_mtx_unlock(bpf_mlock);
4136 
4137 	return 0;
4138 }
4139 
4140 /*
4141  * Detach bpf from an interface.  This involves detaching each descriptor
4142  * associated with the interface, and leaving bd_bif NULL.  Notify each
4143  * descriptor as it's detached so that any sleepers wake up and get
4144  * ENXIO.
4145  */
4146 void
bpfdetach(struct ifnet * ifp)4147 bpfdetach(struct ifnet *ifp)
4148 {
4149 	struct bpf_if   *bp, *bp_prev, *bp_next;
4150 	struct bpf_d    *d;
4151 
4152 	if (bpf_debug != 0) {
4153 		os_log(OS_LOG_DEFAULT, "%s: %s", __func__, if_name(ifp));
4154 	}
4155 
4156 	lck_mtx_lock(bpf_mlock);
4157 
4158 	/*
4159 	 * Build the list of devices attached to that interface
4160 	 * that we need to free while keeping the lock to maintain
4161 	 * the integrity of the interface list
4162 	 */
4163 	bp_prev = NULL;
4164 	for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
4165 		bp_next = bp->bif_next;
4166 
4167 		if (ifp != bp->bif_ifp) {
4168 			bp_prev = bp;
4169 			continue;
4170 		}
4171 		/* Unlink from the interface list */
4172 		if (bp_prev) {
4173 			bp_prev->bif_next = bp->bif_next;
4174 		} else {
4175 			bpf_iflist = bp->bif_next;
4176 		}
4177 
4178 		/* Detach the devices attached to the interface */
4179 		while ((d = bp->bif_dlist) != NULL) {
4180 			/*
4181 			 * Take an extra reference to prevent the device
4182 			 * from being freed when bpf_detachd() releases
4183 			 * the reference for the interface list
4184 			 */
4185 			bpf_acquire_d(d);
4186 
4187 			/*
4188 			 * Wait for active read and writes to complete
4189 			 */
4190 			while (d->bd_hbuf_read || d->bd_hbuf_write) {
4191 				msleep((caddr_t)d, bpf_mlock, PRINET, "bpfdetach", NULL);
4192 			}
4193 
4194 			bpf_detachd(d);
4195 			bpf_wakeup(d);
4196 			bpf_release_d(d);
4197 		}
4198 		ifnet_release(ifp);
4199 	}
4200 
4201 	lck_mtx_unlock(bpf_mlock);
4202 }
4203 
4204 void
bpf_init(__unused void * unused)4205 bpf_init(__unused void *unused)
4206 {
4207 	int     maj;
4208 
4209 	/* bpf_comp_hdr is an overlay of bpf_hdr */
4210 	_CASSERT(BPF_WORDALIGN(sizeof(struct bpf_hdr)) ==
4211 	    BPF_WORDALIGN(sizeof(struct bpf_comp_hdr)));
4212 
4213 	/* compression length must fits in a byte */
4214 	_CASSERT(BPF_HDR_COMP_LEN_MAX <= UCHAR_MAX );
4215 
4216 	(void) PE_parse_boot_argn("bpf_hdr_comp", &bpf_hdr_comp_enable,
4217 	    sizeof(bpf_hdr_comp_enable));
4218 
4219 	if (bpf_devsw_installed == 0) {
4220 		bpf_devsw_installed = 1;
4221 		maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
4222 		if (maj == -1) {
4223 			bpf_devsw_installed = 0;
4224 			os_log_error(OS_LOG_DEFAULT,
4225 			    "bpf_init: failed to allocate a major number");
4226 			return;
4227 		}
4228 
4229 		for (int i = 0; i < NBPFILTER; i++) {
4230 			bpf_make_dev_t(maj);
4231 		}
4232 	}
4233 }
4234 
4235 static int
4236 sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
4237 {
4238 #pragma unused(arg1, arg2)
4239 	int i, err;
4240 
4241 	i = bpf_maxbufsize;
4242 
4243 	err = sysctl_handle_int(oidp, &i, 0, req);
4244 	if (err != 0 || req->newptr == USER_ADDR_NULL) {
4245 		return err;
4246 	}
4247 
4248 	if (i < 0 || i > BPF_BUFSIZE_CAP) {
4249 		i = BPF_BUFSIZE_CAP;
4250 	}
4251 
4252 	bpf_maxbufsize = i;
4253 	return err;
4254 }
4255 
4256 static int
4257 sysctl_bpf_bufsize_cap SYSCTL_HANDLER_ARGS
4258 {
4259 #pragma unused(arg1, arg2)
4260 	int i, err;
4261 
4262 	i = BPF_BUFSIZE_CAP;
4263 
4264 	err = sysctl_handle_int(oidp, &i, 0, req);
4265 	if (err != 0 || req->newptr == USER_ADDR_NULL) {
4266 		return err;
4267 	}
4268 
4269 	return err;
4270 }
4271 
4272 /*
4273  * Fill filter statistics
4274  */
4275 static void
bpfstats_fill_xbpf(struct xbpf_d * d,struct bpf_d * bd)4276 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
4277 {
4278 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
4279 
4280 	d->bd_structsize = sizeof(struct xbpf_d);
4281 	d->bd_promisc = bd->bd_promisc != 0 ? 1 : 0;
4282 	d->bd_immediate = d->bd_immediate != 0 ? 1 : 0;
4283 	d->bd_hdrcmplt = bd->bd_hdrcmplt != 0 ? 1 : 0;
4284 	d->bd_async = bd->bd_async != 0 ? 1 : 0;
4285 	d->bd_headdrop = bd->bd_headdrop != 0 ? 1 : 0;
4286 	d->bd_direction = (uint8_t)bd->bd_direction;
4287 	d->bh_compreq = bd->bd_flags & BPF_COMP_REQ ? 1 : 0;
4288 	d->bh_compenabled = bd->bd_flags & BPF_COMP_ENABLED ? 1 : 0;
4289 	d->bd_exthdr = bd->bd_flags & BPF_EXTENDED_HDR ? 1 : 0;
4290 	d->bd_trunc = bd->bd_flags & BPF_TRUNCATE ? 1 : 0;
4291 	d->bd_pkthdrv2 = bd->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
4292 
4293 	d->bd_dev_minor = (uint8_t)bd->bd_dev_minor;
4294 
4295 	d->bd_sig = bd->bd_sig;
4296 
4297 	d->bd_rcount = bd->bd_rcount;
4298 	d->bd_dcount = bd->bd_dcount;
4299 	d->bd_fcount = bd->bd_fcount;
4300 	d->bd_wcount = bd->bd_wcount;
4301 	d->bd_wdcount = bd->bd_wdcount;
4302 	d->bd_slen = bd->bd_slen;
4303 	d->bd_hlen = bd->bd_hlen;
4304 	d->bd_bufsize = bd->bd_bufsize;
4305 	d->bd_pid = bd->bd_pid;
4306 	if (bd->bd_bif != NULL && bd->bd_bif->bif_ifp != NULL) {
4307 		strlcpy(d->bd_ifname,
4308 		    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
4309 	}
4310 
4311 	d->bd_comp_count = bd->bd_bcs.bcs_count_compressed_prefix;
4312 	d->bd_comp_size = bd->bd_bcs.bcs_total_compressed_prefix_size;
4313 
4314 	d->bd_scnt = bd->bd_scnt;
4315 	d->bd_hcnt = bd->bd_hcnt;
4316 
4317 	d->bd_read_count = bd->bd_bcs.bcs_total_read;
4318 	d->bd_fsize = bd->bd_bcs.bcs_total_size;
4319 }
4320 
4321 /*
4322  * Handle `netstat -B' stats request
4323  */
4324 static int
4325 sysctl_bpf_stats SYSCTL_HANDLER_ARGS
4326 {
4327 	int error;
4328 	struct xbpf_d *xbdbuf;
4329 	unsigned int x_cnt;
4330 	vm_size_t buf_size;
4331 
4332 	if (req->oldptr == USER_ADDR_NULL) {
4333 		return SYSCTL_OUT(req, 0, nbpfilter * sizeof(struct xbpf_d));
4334 	}
4335 	if (nbpfilter == 0) {
4336 		return SYSCTL_OUT(req, 0, 0);
4337 	}
4338 	buf_size = req->oldlen;
4339 	if (buf_size > BPF_MAX_DEVICES * sizeof(struct xbpf_d)) {
4340 		buf_size = BPF_MAX_DEVICES * sizeof(struct xbpf_d);
4341 	}
4342 	xbdbuf = kalloc_data(buf_size, Z_WAITOK | Z_ZERO);
4343 
4344 	lck_mtx_lock(bpf_mlock);
4345 	if (buf_size < (nbpfilter * sizeof(struct xbpf_d))) {
4346 		lck_mtx_unlock(bpf_mlock);
4347 		kfree_data(xbdbuf, buf_size);
4348 		return ENOMEM;
4349 	}
4350 	x_cnt = 0;
4351 	unsigned int i;
4352 
4353 	for (i = 0; i < nbpfilter; i++) {
4354 		struct bpf_d *bd = bpf_dtab[i];
4355 		struct xbpf_d *xbd;
4356 
4357 		if (bd == NULL || bd == BPF_DEV_RESERVED ||
4358 		    (bd->bd_flags & BPF_CLOSING) != 0) {
4359 			continue;
4360 		}
4361 		VERIFY(x_cnt < nbpfilter);
4362 
4363 		xbd = &xbdbuf[x_cnt++];
4364 		bpfstats_fill_xbpf(xbd, bd);
4365 	}
4366 	lck_mtx_unlock(bpf_mlock);
4367 
4368 	error = SYSCTL_OUT(req, xbdbuf, x_cnt * sizeof(struct xbpf_d));
4369 	kfree_data(xbdbuf, buf_size);
4370 	return error;
4371 }
4372