xref: /xnu-8020.121.3/bsd/net/bpf.c (revision fdd8201d7b966f0c3ea610489d29bd841d358941)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1990, 1991, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * This code is derived from the Stanford/CMU enet packet filter,
33  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35  * Berkeley Laboratory.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)bpf.c	8.2 (Berkeley) 3/28/94
66  *
67  * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68  */
69 /*
70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71  * support for mandatory and extensible security protections.  This notice
72  * is included in support of clause 2.2 (b) of the Apple Public License,
73  * Version 2.0.
74  */
75 
76 #include "bpf.h"
77 
78 #ifndef __GNUC__
79 #define inline
80 #else
81 #define inline __inline
82 #endif
83 
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/conf.h>
87 #include <sys/malloc.h>
88 #include <sys/mbuf.h>
89 #include <sys/time.h>
90 #include <sys/proc.h>
91 #include <sys/signalvar.h>
92 #include <sys/filio.h>
93 #include <sys/sockio.h>
94 #include <sys/ttycom.h>
95 #include <sys/filedesc.h>
96 #include <sys/uio_internal.h>
97 #include <sys/file_internal.h>
98 #include <sys/event.h>
99 
100 #include <sys/poll.h>
101 
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/vnode.h>
105 
106 #include <net/if.h>
107 #include <net/bpf.h>
108 #include <net/bpfdesc.h>
109 
110 #include <netinet/in.h>
111 #include <netinet/ip.h>
112 #include <netinet/ip6.h>
113 #include <netinet/in_pcb.h>
114 #include <netinet/in_var.h>
115 #include <netinet/ip_var.h>
116 #include <netinet/tcp.h>
117 #include <netinet/tcp_var.h>
118 #include <netinet/udp.h>
119 #include <netinet/udp_var.h>
120 #include <netinet/if_ether.h>
121 #include <netinet/isakmp.h>
122 #include <netinet6/esp.h>
123 #include <sys/kernel.h>
124 #include <sys/sysctl.h>
125 #include <net/firewire.h>
126 
127 #include <miscfs/devfs/devfs.h>
128 #include <net/dlil.h>
129 #include <net/pktap.h>
130 
131 #include <kern/locks.h>
132 #include <kern/thread_call.h>
133 #include <libkern/section_keywords.h>
134 
135 #include <os/log.h>
136 
137 extern int tvtohz(struct timeval *);
138 
139 #define BPF_BUFSIZE 4096
140 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
141 
142 #define PRINET  26                      /* interruptible */
143 
144 #define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
145 #define ESP_HDR_SIZE sizeof(struct newesp)
146 
147 typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
148 
149 /*
150  * The default read buffer size is patchable.
151  */
152 static unsigned int bpf_bufsize = BPF_BUFSIZE;
153 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
154     &bpf_bufsize, 0, "");
155 
156 static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
157 extern const int copysize_limit_panic;
158 #define BPF_MAXSIZE_CAP (copysize_limit_panic >> 1)
159 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
160 SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
161     &bpf_maxbufsize, 0,
162     sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
163 
164 static unsigned int bpf_maxdevices = 256;
165 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
166     &bpf_maxdevices, 0, "");
167 /*
168  * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
169  * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
170  * explicitly to be able to use DLT_PKTAP.
171  */
172 #if !XNU_TARGET_OS_OSX
173 static unsigned int bpf_wantpktap = 1;
174 #else /* XNU_TARGET_OS_OSX */
175 static unsigned int bpf_wantpktap = 0;
176 #endif /* XNU_TARGET_OS_OSX */
177 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
178     &bpf_wantpktap, 0, "");
179 
180 static int bpf_debug = 0;
181 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
182     &bpf_debug, 0, "");
183 
184 static unsigned long bpf_trunc_overflow = 0;
185 SYSCTL_ULONG(_debug, OID_AUTO, bpf_trunc_overflow, CTLFLAG_RD | CTLFLAG_LOCKED,
186     &bpf_trunc_overflow, "");
187 
188 /*
189  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
190  *  bpf_dtab holds pointer to the descriptors, indexed by minor device #
191  */
192 static struct bpf_if    *bpf_iflist;
193 #ifdef __APPLE__
194 /*
195  * BSD now stores the bpf_d in the dev_t which is a struct
196  * on their system. Our dev_t is an int, so we still store
197  * the bpf_d in a separate table indexed by minor device #.
198  *
199  * The value stored in bpf_dtab[n] represent three states:
200  *  NULL: device not opened
201  *  BPF_DEV_RESERVED: device opening or closing
202  *  other: device <n> opened with pointer to storage
203  */
204 #define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
205 static struct bpf_d     **bpf_dtab = NULL;
206 static unsigned int bpf_dtab_size = 0;
207 static unsigned int     nbpfilter = 0;
208 
209 static LCK_GRP_DECLARE(bpf_mlock_grp, "bpf");
210 static LCK_MTX_DECLARE(bpf_mlock_data, &bpf_mlock_grp);
211 static lck_mtx_t *const bpf_mlock = &bpf_mlock_data;
212 
213 #endif /* __APPLE__ */
214 
215 static int      bpf_allocbufs(struct bpf_d *);
216 static errno_t  bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
217 static int      bpf_detachd(struct bpf_d *d, int);
218 static void     bpf_freed(struct bpf_d *);
219 static int      bpf_movein(struct uio *, int,
220     struct mbuf **, struct sockaddr *, int *);
221 static int      bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool);
222 static void     bpf_timed_out(void *, void *);
223 static void     bpf_wakeup(struct bpf_d *);
224 static uint32_t get_pkt_trunc_len(struct bpf_packet *);
225 static void     catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
226 static void     reset_d(struct bpf_d *);
227 static int      bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
228 static int      bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
229 static int      bpf_setdlt(struct bpf_d *, u_int);
230 static int      bpf_set_traffic_class(struct bpf_d *, int);
231 static void     bpf_set_packet_service_class(struct mbuf *, int);
232 
233 static void     bpf_acquire_d(struct bpf_d *);
234 static void     bpf_release_d(struct bpf_d *);
235 
236 static  int bpf_devsw_installed;
237 
238 void bpf_init(void *unused);
239 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
240 
241 /*
242  * Darwin differs from BSD here, the following are static
243  * on BSD and not static on Darwin.
244  */
245 d_open_t            bpfopen;
246 d_close_t           bpfclose;
247 d_read_t            bpfread;
248 d_write_t           bpfwrite;
249 ioctl_fcn_t         bpfioctl;
250 select_fcn_t        bpfselect;
251 
252 /* Darwin's cdevsw struct differs slightly from BSDs */
253 #define CDEV_MAJOR 23
254 static const struct cdevsw bpf_cdevsw = {
255 	.d_open       = bpfopen,
256 	.d_close      = bpfclose,
257 	.d_read       = bpfread,
258 	.d_write      = bpfwrite,
259 	.d_ioctl      = bpfioctl,
260 	.d_stop       = eno_stop,
261 	.d_reset      = eno_reset,
262 	.d_ttys       = NULL,
263 	.d_select     = bpfselect,
264 	.d_mmap       = eno_mmap,
265 	.d_strategy   = eno_strat,
266 	.d_reserved_1 = eno_getc,
267 	.d_reserved_2 = eno_putc,
268 	.d_type       = 0
269 };
270 
271 #define SOCKADDR_HDR_LEN           offsetof(struct sockaddr, sa_data)
272 
273 static int
bpf_movein(struct uio * uio,int linktype,struct mbuf ** mp,struct sockaddr * sockp,int * datlen)274 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp,
275     struct sockaddr *sockp, int *datlen)
276 {
277 	struct mbuf *m;
278 	int error;
279 	int len;
280 	uint8_t sa_family;
281 	int hlen;
282 
283 	switch (linktype) {
284 #if SLIP
285 	case DLT_SLIP:
286 		sa_family = AF_INET;
287 		hlen = 0;
288 		break;
289 #endif /* SLIP */
290 
291 	case DLT_EN10MB:
292 		sa_family = AF_UNSPEC;
293 		/* XXX Would MAXLINKHDR be better? */
294 		hlen = sizeof(struct ether_header);
295 		break;
296 
297 #if FDDI
298 	case DLT_FDDI:
299 #if defined(__FreeBSD__) || defined(__bsdi__)
300 		sa_family = AF_IMPLINK;
301 		hlen = 0;
302 #else
303 		sa_family = AF_UNSPEC;
304 		/* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
305 		hlen = 24;
306 #endif
307 		break;
308 #endif /* FDDI */
309 
310 	case DLT_RAW:
311 	case DLT_NULL:
312 		sa_family = AF_UNSPEC;
313 		hlen = 0;
314 		break;
315 
316 #ifdef __FreeBSD__
317 	case DLT_ATM_RFC1483:
318 		/*
319 		 * en atm driver requires 4-byte atm pseudo header.
320 		 * though it isn't standard, vpi:vci needs to be
321 		 * specified anyway.
322 		 */
323 		sa_family = AF_UNSPEC;
324 		hlen = 12;      /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
325 		break;
326 #endif
327 
328 	case DLT_PPP:
329 		sa_family = AF_UNSPEC;
330 		hlen = 4;       /* This should match PPP_HDRLEN */
331 		break;
332 
333 	case DLT_APPLE_IP_OVER_IEEE1394:
334 		sa_family = AF_UNSPEC;
335 		hlen = sizeof(struct firewire_header);
336 		break;
337 
338 	case DLT_IEEE802_11:            /* IEEE 802.11 wireless */
339 		sa_family = AF_IEEE80211;
340 		hlen = 0;
341 		break;
342 
343 	case DLT_IEEE802_11_RADIO:
344 		sa_family = AF_IEEE80211;
345 		hlen = 0;
346 		break;
347 
348 	default:
349 		return EIO;
350 	}
351 
352 	// LP64todo - fix this!
353 	len = (int)uio_resid(uio);
354 	if (len < hlen || (unsigned)len > MCLBYTES || len - hlen > MCLBYTES) {
355 		return EIO;
356 	}
357 
358 	*datlen = len - hlen;
359 
360 	if (sockp) {
361 		/*
362 		 * Build a sockaddr based on the data link layer type.
363 		 * We do this at this level because the ethernet header
364 		 * is copied directly into the data field of the sockaddr.
365 		 * In the case of SLIP, there is no header and the packet
366 		 * is forwarded as is.
367 		 * Also, we are careful to leave room at the front of the mbuf
368 		 * for the link level header.
369 		 */
370 		if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
371 			return EIO;
372 		}
373 		sockp->sa_family = sa_family;
374 	} else {
375 		/*
376 		 * We're directly sending the packet data supplied by
377 		 * the user; we don't need to make room for the link
378 		 * header, and don't need the header length value any
379 		 * more, so set it to 0.
380 		 */
381 		hlen = 0;
382 	}
383 
384 	MGETHDR(m, M_WAIT, MT_DATA);
385 	if (m == 0) {
386 		return ENOBUFS;
387 	}
388 	if ((unsigned)len > MHLEN) {
389 		MCLGET(m, M_WAIT);
390 		if ((m->m_flags & M_EXT) == 0) {
391 			error = ENOBUFS;
392 			goto bad;
393 		}
394 	}
395 	m->m_pkthdr.len = m->m_len = len;
396 	m->m_pkthdr.rcvif = NULL;
397 	*mp = m;
398 
399 	/*
400 	 * Make room for link header.
401 	 */
402 	if (hlen != 0) {
403 		m->m_pkthdr.len -= hlen;
404 		m->m_len -= hlen;
405 		m->m_data += hlen; /* XXX */
406 		error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
407 		if (error) {
408 			goto bad;
409 		}
410 	}
411 	error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
412 	if (error) {
413 		goto bad;
414 	}
415 
416 	/* Check for multicast destination */
417 	switch (linktype) {
418 	case DLT_EN10MB: {
419 		struct ether_header *eh;
420 
421 		eh = mtod(m, struct ether_header *);
422 		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
423 			if (_ether_cmp(etherbroadcastaddr,
424 			    eh->ether_dhost) == 0) {
425 				m->m_flags |= M_BCAST;
426 			} else {
427 				m->m_flags |= M_MCAST;
428 			}
429 		}
430 		break;
431 	}
432 	}
433 
434 	return 0;
435 bad:
436 	m_freem(m);
437 	return error;
438 }
439 
440 #ifdef __APPLE__
441 
442 /*
443  * The dynamic addition of a new device node must block all processes that
444  * are opening the last device so that no process will get an unexpected
445  * ENOENT
446  */
447 static void
bpf_make_dev_t(int maj)448 bpf_make_dev_t(int maj)
449 {
450 	static int              bpf_growing = 0;
451 	unsigned int    cur_size = nbpfilter, i;
452 
453 	if (nbpfilter >= bpf_maxdevices) {
454 		return;
455 	}
456 
457 	while (bpf_growing) {
458 		/* Wait until new device has been created */
459 		(void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
460 	}
461 	if (nbpfilter > cur_size) {
462 		/* other thread grew it already */
463 		return;
464 	}
465 	bpf_growing = 1;
466 
467 	/* need to grow bpf_dtab first */
468 	if (nbpfilter == bpf_dtab_size) {
469 		unsigned int new_dtab_size;
470 		struct bpf_d **new_dtab = NULL;
471 
472 		new_dtab_size = bpf_dtab_size + NBPFILTER;
473 		new_dtab = krealloc_type(struct bpf_d *,
474 		    bpf_dtab_size, new_dtab_size, bpf_dtab, Z_WAITOK | Z_ZERO);
475 		if (new_dtab == 0) {
476 			printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
477 			goto done;
478 		}
479 		bpf_dtab = new_dtab;
480 		bpf_dtab_size = new_dtab_size;
481 	}
482 	i = nbpfilter++;
483 	(void) devfs_make_node(makedev(maj, i),
484 	    DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
485 	    "bpf%d", i);
486 done:
487 	bpf_growing = 0;
488 	wakeup((caddr_t)&bpf_growing);
489 }
490 
491 #endif
492 
493 /*
494  * Attach file to the bpf interface, i.e. make d listen on bp.
495  */
496 static errno_t
bpf_attachd(struct bpf_d * d,struct bpf_if * bp)497 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
498 {
499 	int first = bp->bif_dlist == NULL;
500 	int     error = 0;
501 
502 	/*
503 	 * Point d at bp, and add d to the interface's list of listeners.
504 	 * Finally, point the driver's bpf cookie at the interface so
505 	 * it will divert packets to bpf.
506 	 */
507 	d->bd_bif = bp;
508 	d->bd_next = bp->bif_dlist;
509 	bp->bif_dlist = d;
510 
511 	/*
512 	 * Take a reference on the device even if an error is returned
513 	 * because we keep the device in the interface's list of listeners
514 	 */
515 	bpf_acquire_d(d);
516 
517 	if (first) {
518 		/* Find the default bpf entry for this ifp */
519 		if (bp->bif_ifp->if_bpf == NULL) {
520 			struct bpf_if   *tmp, *primary = NULL;
521 
522 			for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
523 				if (tmp->bif_ifp == bp->bif_ifp) {
524 					primary = tmp;
525 					break;
526 				}
527 			}
528 			bp->bif_ifp->if_bpf = primary;
529 		}
530 		/* Only call dlil_set_bpf_tap for primary dlt */
531 		if (bp->bif_ifp->if_bpf == bp) {
532 			dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
533 			    bpf_tap_callback);
534 		}
535 
536 		if (bp->bif_tap != NULL) {
537 			error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
538 			    BPF_TAP_INPUT_OUTPUT);
539 		}
540 	}
541 
542 	/*
543 	 * Reset the detach flags in case we previously detached an interface
544 	 */
545 	d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
546 
547 	if (bp->bif_dlt == DLT_PKTAP) {
548 		d->bd_flags |= BPF_FINALIZE_PKTAP;
549 	} else {
550 		d->bd_flags &= ~BPF_FINALIZE_PKTAP;
551 	}
552 	return error;
553 }
554 
555 /*
556  * Detach a file from its interface.
557  *
558  * Return 1 if was closed by some thread, 0 otherwise
559  */
560 static int
bpf_detachd(struct bpf_d * d,int closing)561 bpf_detachd(struct bpf_d *d, int closing)
562 {
563 	struct bpf_d **p;
564 	struct bpf_if *bp;
565 	struct ifnet  *ifp;
566 
567 	int bpf_closed = d->bd_flags & BPF_CLOSING;
568 	/*
569 	 * Some other thread already detached
570 	 */
571 	if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0) {
572 		goto done;
573 	}
574 	/*
575 	 * This thread is doing the detach
576 	 */
577 	d->bd_flags |= BPF_DETACHING;
578 
579 	ifp = d->bd_bif->bif_ifp;
580 	bp = d->bd_bif;
581 
582 	if (bpf_debug != 0) {
583 		printf("%s: %llx %s%s\n",
584 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
585 		    if_name(ifp), closing ? " closing" : "");
586 	}
587 
588 	/* Remove d from the interface's descriptor list. */
589 	p = &bp->bif_dlist;
590 	while (*p != d) {
591 		p = &(*p)->bd_next;
592 		if (*p == 0) {
593 			panic("bpf_detachd: descriptor not in list");
594 		}
595 	}
596 	*p = (*p)->bd_next;
597 	if (bp->bif_dlist == 0) {
598 		/*
599 		 * Let the driver know that there are no more listeners.
600 		 */
601 		/* Only call dlil_set_bpf_tap for primary dlt */
602 		if (bp->bif_ifp->if_bpf == bp) {
603 			dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
604 		}
605 		if (bp->bif_tap) {
606 			bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
607 		}
608 
609 		for (bp = bpf_iflist; bp; bp = bp->bif_next) {
610 			if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
611 				break;
612 			}
613 		}
614 		if (bp == NULL) {
615 			ifp->if_bpf = NULL;
616 		}
617 	}
618 	d->bd_bif = NULL;
619 	/*
620 	 * Check if this descriptor had requested promiscuous mode.
621 	 * If so, turn it off.
622 	 */
623 	if (d->bd_promisc) {
624 		d->bd_promisc = 0;
625 		lck_mtx_unlock(bpf_mlock);
626 		if (ifnet_set_promiscuous(ifp, 0)) {
627 			/*
628 			 * Something is really wrong if we were able to put
629 			 * the driver into promiscuous mode, but can't
630 			 * take it out.
631 			 * Most likely the network interface is gone.
632 			 */
633 			printf("%s: ifnet_set_promiscuous failed\n", __func__);
634 		}
635 		lck_mtx_lock(bpf_mlock);
636 	}
637 
638 	/*
639 	 * Wake up other thread that are waiting for this thread to finish
640 	 * detaching
641 	 */
642 	d->bd_flags &= ~BPF_DETACHING;
643 	d->bd_flags |= BPF_DETACHED;
644 
645 	/* Refresh the local variable as d could have been modified */
646 	bpf_closed = d->bd_flags & BPF_CLOSING;
647 	/*
648 	 * Note that We've kept the reference because we may have dropped
649 	 * the lock when turning off promiscuous mode
650 	 */
651 	bpf_release_d(d);
652 
653 done:
654 	/*
655 	 * When closing makes sure no other thread refer to the bpf_d
656 	 */
657 	if (bpf_debug != 0) {
658 		printf("%s: %llx done\n",
659 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
660 	}
661 	/*
662 	 * Let the caller know the bpf_d is closed
663 	 */
664 	if (bpf_closed) {
665 		return 1;
666 	} else {
667 		return 0;
668 	}
669 }
670 
671 /*
672  * Start asynchronous timer, if necessary.
673  * Must be called with bpf_mlock held.
674  */
675 static void
bpf_start_timer(struct bpf_d * d)676 bpf_start_timer(struct bpf_d *d)
677 {
678 	uint64_t deadline;
679 	struct timeval tv;
680 
681 	if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
682 		tv.tv_sec = d->bd_rtout / hz;
683 		tv.tv_usec = (d->bd_rtout % hz) * tick;
684 
685 		clock_interval_to_deadline(
686 			(uint32_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
687 			NSEC_PER_USEC, &deadline);
688 		/*
689 		 * The state is BPF_IDLE, so the timer hasn't
690 		 * been started yet, and hasn't gone off yet;
691 		 * there is no thread call scheduled, so this
692 		 * won't change the schedule.
693 		 *
694 		 * XXX - what if, by the time it gets entered,
695 		 * the deadline has already passed?
696 		 */
697 		thread_call_enter_delayed(d->bd_thread_call, deadline);
698 		d->bd_state = BPF_WAITING;
699 	}
700 }
701 
702 /*
703  * Cancel asynchronous timer.
704  * Must be called with bpf_mlock held.
705  */
706 static boolean_t
bpf_stop_timer(struct bpf_d * d)707 bpf_stop_timer(struct bpf_d *d)
708 {
709 	/*
710 	 * If the timer has already gone off, this does nothing.
711 	 * Our caller is expected to set d->bd_state to BPF_IDLE,
712 	 * with the bpf_mlock, after we are called. bpf_timed_out()
713 	 * also grabs bpf_mlock, so, if the timer has gone off and
714 	 * bpf_timed_out() hasn't finished, it's waiting for the
715 	 * lock; when this thread releases the lock, it will
716 	 * find the state is BPF_IDLE, and just release the
717 	 * lock and return.
718 	 */
719 	return thread_call_cancel(d->bd_thread_call);
720 }
721 
722 void
bpf_acquire_d(struct bpf_d * d)723 bpf_acquire_d(struct bpf_d *d)
724 {
725 	void *lr_saved =  __builtin_return_address(0);
726 
727 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
728 
729 	d->bd_refcnt += 1;
730 
731 	d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
732 	d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
733 }
734 
735 void
bpf_release_d(struct bpf_d * d)736 bpf_release_d(struct bpf_d *d)
737 {
738 	void *lr_saved =  __builtin_return_address(0);
739 
740 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
741 
742 	if (d->bd_refcnt <= 0) {
743 		panic("%s: %p refcnt <= 0", __func__, d);
744 	}
745 
746 	d->bd_refcnt -= 1;
747 
748 	d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
749 	d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
750 
751 	if (d->bd_refcnt == 0) {
752 		/* Assert the device is detached */
753 		if ((d->bd_flags & BPF_DETACHED) == 0) {
754 			panic("%s: %p BPF_DETACHED not set", __func__, d);
755 		}
756 
757 		kfree_type(struct bpf_d, d);
758 	}
759 }
760 
761 /*
762  * Open ethernet device.  Returns ENXIO for illegal minor device number,
763  * EBUSY if file is open by another process.
764  */
765 /* ARGSUSED */
766 int
bpfopen(dev_t dev,int flags,__unused int fmt,struct proc * p)767 bpfopen(dev_t dev, int flags, __unused int fmt,
768     struct proc *p)
769 {
770 	struct bpf_d *d;
771 
772 	lck_mtx_lock(bpf_mlock);
773 	if ((unsigned int) minor(dev) >= nbpfilter) {
774 		lck_mtx_unlock(bpf_mlock);
775 		return ENXIO;
776 	}
777 	/*
778 	 * New device nodes are created on demand when opening the last one.
779 	 * The programming model is for processes to loop on the minor starting
780 	 * at 0 as long as EBUSY is returned. The loop stops when either the
781 	 * open succeeds or an error other that EBUSY is returned. That means
782 	 * that bpf_make_dev_t() must block all processes that are opening the
783 	 * last  node. If not all processes are blocked, they could unexpectedly
784 	 * get ENOENT and abort their opening loop.
785 	 */
786 	if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
787 		bpf_make_dev_t(major(dev));
788 	}
789 
790 	/*
791 	 * Each minor can be opened by only one process.  If the requested
792 	 * minor is in use, return EBUSY.
793 	 *
794 	 * Important: bpfopen() and bpfclose() have to check and set the status
795 	 * of a device in the same lockin context otherwise the device may be
796 	 * leaked because the vnode use count will be unpextectly greater than 1
797 	 * when close() is called.
798 	 */
799 	if (bpf_dtab[minor(dev)] == NULL) {
800 		/* Reserve while opening */
801 		bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
802 	} else {
803 		lck_mtx_unlock(bpf_mlock);
804 		return EBUSY;
805 	}
806 	d = kalloc_type(struct bpf_d, M_WAIT | Z_ZERO);
807 	if (d == NULL) {
808 		/* this really is a catastrophic failure */
809 		printf("bpfopen: malloc bpf_d failed\n");
810 		bpf_dtab[minor(dev)] = NULL;
811 		lck_mtx_unlock(bpf_mlock);
812 		return ENOMEM;
813 	}
814 
815 	/* Mark "in use" and do most initialization. */
816 	bpf_acquire_d(d);
817 	d->bd_bufsize = bpf_bufsize;
818 	d->bd_sig = SIGIO;
819 	d->bd_seesent = 1;
820 	d->bd_oflags = flags;
821 	d->bd_state = BPF_IDLE;
822 	d->bd_traffic_class = SO_TC_BE;
823 	d->bd_flags |= BPF_DETACHED;
824 	if (bpf_wantpktap) {
825 		d->bd_flags |= BPF_WANT_PKTAP;
826 	} else {
827 		d->bd_flags &= ~BPF_WANT_PKTAP;
828 	}
829 	d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
830 	if (d->bd_thread_call == NULL) {
831 		printf("bpfopen: malloc thread call failed\n");
832 		bpf_dtab[minor(dev)] = NULL;
833 		bpf_release_d(d);
834 		lck_mtx_unlock(bpf_mlock);
835 
836 		return ENOMEM;
837 	}
838 	d->bd_opened_by = p;
839 	uuid_generate(d->bd_uuid);
840 
841 	bpf_dtab[minor(dev)] = d; /* Mark opened */
842 	lck_mtx_unlock(bpf_mlock);
843 
844 	return 0;
845 }
846 
847 /*
848  * Close the descriptor by detaching it from its interface,
849  * deallocating its buffers, and marking it free.
850  */
851 /* ARGSUSED */
852 int
bpfclose(dev_t dev,__unused int flags,__unused int fmt,__unused struct proc * p)853 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
854     __unused struct proc *p)
855 {
856 	struct bpf_d *d;
857 
858 	/* Take BPF lock to ensure no other thread is using the device */
859 	lck_mtx_lock(bpf_mlock);
860 
861 	d = bpf_dtab[minor(dev)];
862 	if (d == NULL || d == BPF_DEV_RESERVED) {
863 		lck_mtx_unlock(bpf_mlock);
864 		return ENXIO;
865 	}
866 
867 	/*
868 	 * Other threads may call bpd_detachd() if we drop the bpf_mlock
869 	 */
870 	d->bd_flags |= BPF_CLOSING;
871 
872 	if (bpf_debug != 0) {
873 		printf("%s: %llx\n",
874 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
875 	}
876 
877 	bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
878 
879 	/*
880 	 * Deal with any in-progress timeouts.
881 	 */
882 	switch (d->bd_state) {
883 	case BPF_IDLE:
884 		/*
885 		 * Not waiting for a timeout, and no timeout happened.
886 		 */
887 		break;
888 
889 	case BPF_WAITING:
890 		/*
891 		 * Waiting for a timeout.
892 		 * Cancel any timer that has yet to go off,
893 		 * and mark the state as "closing".
894 		 * Then drop the lock to allow any timers that
895 		 * *have* gone off to run to completion, and wait
896 		 * for them to finish.
897 		 */
898 		if (!bpf_stop_timer(d)) {
899 			/*
900 			 * There was no pending call, so the call must
901 			 * have been in progress. Wait for the call to
902 			 * complete; we have to drop the lock while
903 			 * waiting. to let the in-progrss call complete
904 			 */
905 			d->bd_state = BPF_DRAINING;
906 			while (d->bd_state == BPF_DRAINING) {
907 				msleep((caddr_t)d, bpf_mlock, PRINET,
908 				    "bpfdraining", NULL);
909 			}
910 		}
911 		d->bd_state = BPF_IDLE;
912 		break;
913 
914 	case BPF_TIMED_OUT:
915 		/*
916 		 * Timer went off, and the timeout routine finished.
917 		 */
918 		d->bd_state = BPF_IDLE;
919 		break;
920 
921 	case BPF_DRAINING:
922 		/*
923 		 * Another thread is blocked on a close waiting for
924 		 * a timeout to finish.
925 		 * This "shouldn't happen", as the first thread to enter
926 		 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
927 		 * all subsequent threads should see that and fail with
928 		 * ENXIO.
929 		 */
930 		panic("Two threads blocked in a BPF close");
931 		break;
932 	}
933 
934 	if (d->bd_bif) {
935 		bpf_detachd(d, 1);
936 	}
937 	selthreadclear(&d->bd_sel);
938 	thread_call_free(d->bd_thread_call);
939 
940 	while (d->bd_hbuf_read != 0) {
941 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
942 	}
943 
944 	bpf_freed(d);
945 
946 	/* Mark free in same context as bpfopen comes to check */
947 	bpf_dtab[minor(dev)] = NULL;                    /* Mark closed */
948 
949 	bpf_release_d(d);
950 
951 	lck_mtx_unlock(bpf_mlock);
952 
953 	return 0;
954 }
955 
956 #define BPF_SLEEP bpf_sleep
957 
958 static int
bpf_sleep(struct bpf_d * d,int pri,const char * wmesg,int timo)959 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
960 {
961 	u_int64_t abstime = 0;
962 
963 	if (timo != 0) {
964 		clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
965 	}
966 
967 	return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
968 }
969 
970 static void
bpf_finalize_pktap(struct bpf_hdr * hp,struct pktap_header * pktaphdr)971 bpf_finalize_pktap(struct bpf_hdr *hp, struct pktap_header *pktaphdr)
972 {
973 	if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
974 		struct pktap_v2_hdr *pktap_v2_hdr;
975 
976 		pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
977 
978 		if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
979 			pktap_v2_finalize_proc_info(pktap_v2_hdr);
980 		}
981 	} else {
982 		if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
983 			pktap_finalize_proc_info(pktaphdr);
984 		}
985 
986 		if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
987 			hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
988 			hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
989 		}
990 	}
991 }
992 
993 /*
994  * Rotate the packet buffers in descriptor d.  Move the store buffer
995  * into the hold slot, and the free buffer into the store slot.
996  * Zero the length of the new store buffer.
997  */
998 #define ROTATE_BUFFERS(d) \
999 	if (d->bd_hbuf_read != 0) \
1000 	        panic("rotating bpf buffers during read"); \
1001 	(d)->bd_hbuf = (d)->bd_sbuf; \
1002 	(d)->bd_hlen = (d)->bd_slen; \
1003 	(d)->bd_hcnt = (d)->bd_scnt; \
1004 	(d)->bd_sbuf = (d)->bd_fbuf; \
1005 	(d)->bd_slen = 0; \
1006 	(d)->bd_scnt = 0; \
1007 	(d)->bd_fbuf = NULL;
1008 /*
1009  *  bpfread - read next chunk of packets from buffers
1010  */
1011 int
bpfread(dev_t dev,struct uio * uio,int ioflag)1012 bpfread(dev_t dev, struct uio *uio, int ioflag)
1013 {
1014 	struct bpf_d *d;
1015 	caddr_t hbuf;
1016 	int timed_out, hbuf_len;
1017 	int error;
1018 	int flags;
1019 
1020 	lck_mtx_lock(bpf_mlock);
1021 
1022 	d = bpf_dtab[minor(dev)];
1023 	if (d == NULL || d == BPF_DEV_RESERVED ||
1024 	    (d->bd_flags & BPF_CLOSING) != 0) {
1025 		lck_mtx_unlock(bpf_mlock);
1026 		return ENXIO;
1027 	}
1028 
1029 	bpf_acquire_d(d);
1030 
1031 	/*
1032 	 * Restrict application to use a buffer the same size as
1033 	 * as kernel buffers.
1034 	 */
1035 	if (uio_resid(uio) != d->bd_bufsize) {
1036 		bpf_release_d(d);
1037 		lck_mtx_unlock(bpf_mlock);
1038 		return EINVAL;
1039 	}
1040 
1041 	if (d->bd_state == BPF_WAITING) {
1042 		bpf_stop_timer(d);
1043 	}
1044 
1045 	timed_out = (d->bd_state == BPF_TIMED_OUT);
1046 	d->bd_state = BPF_IDLE;
1047 
1048 	while (d->bd_hbuf_read != 0) {
1049 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1050 	}
1051 
1052 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1053 		bpf_release_d(d);
1054 		lck_mtx_unlock(bpf_mlock);
1055 		return ENXIO;
1056 	}
1057 	/*
1058 	 * If the hold buffer is empty, then do a timed sleep, which
1059 	 * ends when the timeout expires or when enough packets
1060 	 * have arrived to fill the store buffer.
1061 	 */
1062 	while (d->bd_hbuf == 0) {
1063 		if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) &&
1064 		    d->bd_slen != 0) {
1065 			/*
1066 			 * We're in immediate mode, or are reading
1067 			 * in non-blocking mode, or a timer was
1068 			 * started before the read (e.g., by select()
1069 			 * or poll()) and has expired and a packet(s)
1070 			 * either arrived since the previous
1071 			 * read or arrived while we were asleep.
1072 			 * Rotate the buffers and return what's here.
1073 			 */
1074 			ROTATE_BUFFERS(d);
1075 			break;
1076 		}
1077 
1078 		/*
1079 		 * No data is available, check to see if the bpf device
1080 		 * is still pointed at a real interface.  If not, return
1081 		 * ENXIO so that the userland process knows to rebind
1082 		 * it before using it again.
1083 		 */
1084 		if (d->bd_bif == NULL) {
1085 			bpf_release_d(d);
1086 			lck_mtx_unlock(bpf_mlock);
1087 			return ENXIO;
1088 		}
1089 		if (ioflag & IO_NDELAY) {
1090 			bpf_release_d(d);
1091 			lck_mtx_unlock(bpf_mlock);
1092 			return EWOULDBLOCK;
1093 		}
1094 		error = BPF_SLEEP(d, PRINET | PCATCH, "bpf", d->bd_rtout);
1095 		/*
1096 		 * Make sure device is still opened
1097 		 */
1098 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1099 			bpf_release_d(d);
1100 			lck_mtx_unlock(bpf_mlock);
1101 			return ENXIO;
1102 		}
1103 
1104 		while (d->bd_hbuf_read != 0) {
1105 			msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1106 			    NULL);
1107 		}
1108 
1109 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1110 			bpf_release_d(d);
1111 			lck_mtx_unlock(bpf_mlock);
1112 			return ENXIO;
1113 		}
1114 
1115 		if (error == EINTR || error == ERESTART) {
1116 			if (d->bd_hbuf != NULL) {
1117 				/*
1118 				 * Because we msleep, the hold buffer might
1119 				 * be filled when we wake up.  Avoid rotating
1120 				 * in this case.
1121 				 */
1122 				break;
1123 			}
1124 			if (d->bd_slen != 0) {
1125 				/*
1126 				 * Sometimes we may be interrupted often and
1127 				 * the sleep above will not timeout.
1128 				 * Regardless, we should rotate the buffers
1129 				 * if there's any new data pending and
1130 				 * return it.
1131 				 */
1132 				ROTATE_BUFFERS(d);
1133 				break;
1134 			}
1135 			bpf_release_d(d);
1136 			lck_mtx_unlock(bpf_mlock);
1137 			if (error == ERESTART) {
1138 				printf("%s: %llx ERESTART to EINTR\n",
1139 				    __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
1140 				error = EINTR;
1141 			}
1142 			return error;
1143 		}
1144 		if (error == EWOULDBLOCK) {
1145 			/*
1146 			 * On a timeout, return what's in the buffer,
1147 			 * which may be nothing.  If there is something
1148 			 * in the store buffer, we can rotate the buffers.
1149 			 */
1150 			if (d->bd_hbuf) {
1151 				/*
1152 				 * We filled up the buffer in between
1153 				 * getting the timeout and arriving
1154 				 * here, so we don't need to rotate.
1155 				 */
1156 				break;
1157 			}
1158 
1159 			if (d->bd_slen == 0) {
1160 				bpf_release_d(d);
1161 				lck_mtx_unlock(bpf_mlock);
1162 				return 0;
1163 			}
1164 			ROTATE_BUFFERS(d);
1165 			break;
1166 		}
1167 	}
1168 	/*
1169 	 * At this point, we know we have something in the hold slot.
1170 	 */
1171 
1172 	/*
1173 	 * Set the hold buffer read. So we do not
1174 	 * rotate the buffers until the hold buffer
1175 	 * read is complete. Also to avoid issues resulting
1176 	 * from page faults during disk sleep (<rdar://problem/13436396>).
1177 	 */
1178 	d->bd_hbuf_read = 1;
1179 	hbuf = d->bd_hbuf;
1180 	hbuf_len = d->bd_hlen;
1181 	flags = d->bd_flags;
1182 	lck_mtx_unlock(bpf_mlock);
1183 
1184 #ifdef __APPLE__
1185 	/*
1186 	 * Before we move data to userland, we fill out the extended
1187 	 * header fields.
1188 	 */
1189 	if (flags & BPF_EXTENDED_HDR) {
1190 		char *p;
1191 
1192 		p = hbuf;
1193 		while (p < hbuf + hbuf_len) {
1194 			struct bpf_hdr_ext *ehp;
1195 			uint32_t flowid;
1196 			struct so_procinfo soprocinfo;
1197 			int found = 0;
1198 
1199 			ehp = (struct bpf_hdr_ext *)(void *)p;
1200 			if ((flowid = ehp->bh_flowid) != 0) {
1201 				if (ehp->bh_proto == IPPROTO_TCP) {
1202 					found = inp_findinpcb_procinfo(&tcbinfo,
1203 					    flowid, &soprocinfo);
1204 				} else if (ehp->bh_proto == IPPROTO_UDP) {
1205 					found = inp_findinpcb_procinfo(&udbinfo,
1206 					    flowid, &soprocinfo);
1207 				}
1208 				if (found == 1) {
1209 					ehp->bh_pid = soprocinfo.spi_pid;
1210 					strlcpy(&ehp->bh_comm[0], &soprocinfo.spi_proc_name[0], sizeof(ehp->bh_comm));
1211 				}
1212 				ehp->bh_flowid = 0;
1213 			}
1214 
1215 			if (flags & BPF_FINALIZE_PKTAP) {
1216 				struct pktap_header *pktaphdr;
1217 
1218 				pktaphdr = (struct pktap_header *)(void *)
1219 				    (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1220 
1221 				bpf_finalize_pktap((struct bpf_hdr *) ehp,
1222 				    pktaphdr);
1223 			}
1224 			p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1225 		}
1226 	} else if (flags & BPF_FINALIZE_PKTAP) {
1227 		char *p;
1228 
1229 		p = hbuf;
1230 		while (p < hbuf + hbuf_len) {
1231 			struct bpf_hdr *hp;
1232 			struct pktap_header *pktaphdr;
1233 
1234 			hp = (struct bpf_hdr *)(void *)p;
1235 			pktaphdr = (struct pktap_header *)(void *)
1236 			    (p + BPF_WORDALIGN(hp->bh_hdrlen));
1237 
1238 			bpf_finalize_pktap(hp, pktaphdr);
1239 
1240 			p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1241 		}
1242 	}
1243 #endif
1244 
1245 	/*
1246 	 * Move data from hold buffer into user space.
1247 	 * We know the entire buffer is transferred since
1248 	 * we checked above that the read buffer is bpf_bufsize bytes.
1249 	 */
1250 	error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1251 
1252 	lck_mtx_lock(bpf_mlock);
1253 	/*
1254 	 * Make sure device is still opened
1255 	 */
1256 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1257 		bpf_release_d(d);
1258 		lck_mtx_unlock(bpf_mlock);
1259 		return ENXIO;
1260 	}
1261 
1262 	d->bd_hbuf_read = 0;
1263 	d->bd_fbuf = d->bd_hbuf;
1264 	d->bd_hbuf = NULL;
1265 	d->bd_hlen = 0;
1266 	d->bd_hcnt = 0;
1267 	wakeup((caddr_t)d);
1268 
1269 	bpf_release_d(d);
1270 	lck_mtx_unlock(bpf_mlock);
1271 	return error;
1272 }
1273 
1274 /*
1275  * If there are processes sleeping on this descriptor, wake them up.
1276  */
1277 static void
bpf_wakeup(struct bpf_d * d)1278 bpf_wakeup(struct bpf_d *d)
1279 {
1280 	if (d->bd_state == BPF_WAITING) {
1281 		bpf_stop_timer(d);
1282 		d->bd_state = BPF_IDLE;
1283 	}
1284 	wakeup((caddr_t)d);
1285 	if (d->bd_async && d->bd_sig && d->bd_sigio) {
1286 		pgsigio(d->bd_sigio, d->bd_sig);
1287 	}
1288 
1289 	selwakeup(&d->bd_sel);
1290 	if ((d->bd_flags & BPF_KNOTE)) {
1291 		KNOTE(&d->bd_sel.si_note, 1);
1292 	}
1293 }
1294 
1295 static void
bpf_timed_out(void * arg,__unused void * dummy)1296 bpf_timed_out(void *arg, __unused void *dummy)
1297 {
1298 	struct bpf_d *d = (struct bpf_d *)arg;
1299 
1300 	lck_mtx_lock(bpf_mlock);
1301 	if (d->bd_state == BPF_WAITING) {
1302 		/*
1303 		 * There's a select or kqueue waiting for this; if there's
1304 		 * now stuff to read, wake it up.
1305 		 */
1306 		d->bd_state = BPF_TIMED_OUT;
1307 		if (d->bd_slen != 0) {
1308 			bpf_wakeup(d);
1309 		}
1310 	} else if (d->bd_state == BPF_DRAINING) {
1311 		/*
1312 		 * A close is waiting for this to finish.
1313 		 * Mark it as finished, and wake the close up.
1314 		 */
1315 		d->bd_state = BPF_IDLE;
1316 		bpf_wakeup(d);
1317 	}
1318 	lck_mtx_unlock(bpf_mlock);
1319 }
1320 
1321 /* keep in sync with bpf_movein above: */
1322 #define MAX_DATALINK_HDR_LEN    (sizeof(struct firewire_header))
1323 
1324 int
bpfwrite(dev_t dev,struct uio * uio,__unused int ioflag)1325 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1326 {
1327 	struct bpf_d *d;
1328 	struct ifnet *ifp;
1329 	struct mbuf *m = NULL;
1330 	int error;
1331 	char              dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1332 	int datlen = 0;
1333 	int bif_dlt;
1334 	int bd_hdrcmplt;
1335 
1336 	lck_mtx_lock(bpf_mlock);
1337 
1338 	d = bpf_dtab[minor(dev)];
1339 	if (d == NULL || d == BPF_DEV_RESERVED ||
1340 	    (d->bd_flags & BPF_CLOSING) != 0) {
1341 		lck_mtx_unlock(bpf_mlock);
1342 		return ENXIO;
1343 	}
1344 
1345 	bpf_acquire_d(d);
1346 
1347 	if (d->bd_bif == 0) {
1348 		bpf_release_d(d);
1349 		lck_mtx_unlock(bpf_mlock);
1350 		return ENXIO;
1351 	}
1352 
1353 	ifp = d->bd_bif->bif_ifp;
1354 
1355 	if ((ifp->if_flags & IFF_UP) == 0) {
1356 		bpf_release_d(d);
1357 		lck_mtx_unlock(bpf_mlock);
1358 		return ENETDOWN;
1359 	}
1360 	if (uio_resid(uio) == 0) {
1361 		bpf_release_d(d);
1362 		lck_mtx_unlock(bpf_mlock);
1363 		return 0;
1364 	}
1365 	((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1366 
1367 	/*
1368 	 * fix for PR-6849527
1369 	 * geting variables onto stack before dropping lock for bpf_movein()
1370 	 */
1371 	bif_dlt = (int)d->bd_bif->bif_dlt;
1372 	bd_hdrcmplt  = d->bd_hdrcmplt;
1373 
1374 	/* bpf_movein allocating mbufs; drop lock */
1375 	lck_mtx_unlock(bpf_mlock);
1376 
1377 	error = bpf_movein(uio, bif_dlt, &m,
1378 	    bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1379 	    &datlen);
1380 
1381 	/* take the lock again */
1382 	lck_mtx_lock(bpf_mlock);
1383 	if (error) {
1384 		bpf_release_d(d);
1385 		lck_mtx_unlock(bpf_mlock);
1386 		return error;
1387 	}
1388 
1389 	/* verify the device is still open */
1390 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1391 		bpf_release_d(d);
1392 		lck_mtx_unlock(bpf_mlock);
1393 		m_freem(m);
1394 		return ENXIO;
1395 	}
1396 
1397 	if (d->bd_bif == NULL) {
1398 		bpf_release_d(d);
1399 		lck_mtx_unlock(bpf_mlock);
1400 		m_free(m);
1401 		return ENXIO;
1402 	}
1403 
1404 	if ((unsigned)datlen > ifp->if_mtu) {
1405 		bpf_release_d(d);
1406 		lck_mtx_unlock(bpf_mlock);
1407 		m_freem(m);
1408 		return EMSGSIZE;
1409 	}
1410 
1411 	bpf_set_packet_service_class(m, d->bd_traffic_class);
1412 
1413 	lck_mtx_unlock(bpf_mlock);
1414 
1415 	/*
1416 	 * The driver frees the mbuf.
1417 	 */
1418 	if (d->bd_hdrcmplt) {
1419 		if (d->bd_bif->bif_send) {
1420 			error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1421 		} else {
1422 			error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1423 		}
1424 	} else {
1425 		error = dlil_output(ifp, PF_INET, m, NULL,
1426 		    (struct sockaddr *)dst_buf, 0, NULL);
1427 	}
1428 
1429 	lck_mtx_lock(bpf_mlock);
1430 	bpf_release_d(d);
1431 	lck_mtx_unlock(bpf_mlock);
1432 
1433 	return error;
1434 }
1435 
1436 /*
1437  * Reset a descriptor by flushing its packet buffer and clearing the
1438  * receive and drop counts.
1439  */
1440 static void
reset_d(struct bpf_d * d)1441 reset_d(struct bpf_d *d)
1442 {
1443 	if (d->bd_hbuf_read != 0) {
1444 		panic("resetting buffers during read");
1445 	}
1446 
1447 	if (d->bd_hbuf) {
1448 		/* Free the hold buffer. */
1449 		d->bd_fbuf = d->bd_hbuf;
1450 		d->bd_hbuf = NULL;
1451 	}
1452 	d->bd_slen = 0;
1453 	d->bd_hlen = 0;
1454 	d->bd_scnt = 0;
1455 	d->bd_hcnt = 0;
1456 	d->bd_rcount = 0;
1457 	d->bd_dcount = 0;
1458 }
1459 
1460 static struct bpf_d *
bpf_get_device_from_uuid(uuid_t uuid)1461 bpf_get_device_from_uuid(uuid_t uuid)
1462 {
1463 	unsigned int i;
1464 
1465 	for (i = 0; i < nbpfilter; i++) {
1466 		struct bpf_d *d = bpf_dtab[i];
1467 
1468 		if (d == NULL || d == BPF_DEV_RESERVED ||
1469 		    (d->bd_flags & BPF_CLOSING) != 0) {
1470 			continue;
1471 		}
1472 		if (uuid_compare(uuid, d->bd_uuid) == 0) {
1473 			return d;
1474 		}
1475 	}
1476 
1477 	return NULL;
1478 }
1479 
1480 /*
1481  * The BIOCSETUP command "atomically" attach to the interface and
1482  * copy the buffer from another interface. This minimizes the risk
1483  * of missing packet because this is done while holding
1484  * the BPF global lock
1485  */
1486 static int
bpf_setup(struct bpf_d * d_to,uuid_t uuid_from,ifnet_t ifp)1487 bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1488 {
1489 	struct bpf_d *d_from;
1490 	int error = 0;
1491 
1492 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1493 
1494 	/*
1495 	 * Sanity checks
1496 	 */
1497 	d_from = bpf_get_device_from_uuid(uuid_from);
1498 	if (d_from == NULL) {
1499 		error = ENOENT;
1500 		os_log_info(OS_LOG_DEFAULT,
1501 		    "%s: uuids not found error %d",
1502 		    __func__, error);
1503 		return error;
1504 	}
1505 	if (d_from->bd_opened_by != d_to->bd_opened_by) {
1506 		error = EACCES;
1507 		os_log_info(OS_LOG_DEFAULT,
1508 		    "%s: processes not matching error %d",
1509 		    __func__, error);
1510 		return error;
1511 	}
1512 
1513 	/*
1514 	 * Prevent any read while copying
1515 	 */
1516 	while (d_to->bd_hbuf_read != 0) {
1517 		msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
1518 	}
1519 	d_to->bd_hbuf_read = 1;
1520 
1521 	while (d_from->bd_hbuf_read != 0) {
1522 		msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
1523 	}
1524 	d_from->bd_hbuf_read = 1;
1525 
1526 	/*
1527 	 * Verify the devices have not been closed
1528 	 */
1529 	if (d_to->bd_flags & BPF_CLOSING) {
1530 		error = ENXIO;
1531 		os_log_info(OS_LOG_DEFAULT,
1532 		    "%s: d_to is closing error %d",
1533 		    __func__, error);
1534 		goto done;
1535 	}
1536 	if (d_from->bd_flags & BPF_CLOSING) {
1537 		error = ENXIO;
1538 		os_log_info(OS_LOG_DEFAULT,
1539 		    "%s: d_from is closing error %d",
1540 		    __func__, error);
1541 		goto done;
1542 	}
1543 
1544 	/*
1545 	 * For now require the same buffer size
1546 	 */
1547 	if (d_from->bd_bufsize != d_to->bd_bufsize) {
1548 		error = EINVAL;
1549 		os_log_info(OS_LOG_DEFAULT,
1550 		    "%s: bufsizes not matching error %d",
1551 		    __func__, error);
1552 		goto done;
1553 	}
1554 
1555 	/*
1556 	 * Attach to the interface
1557 	 */
1558 	error = bpf_setif(d_to, ifp, false, true);
1559 	if (error != 0) {
1560 		os_log_info(OS_LOG_DEFAULT,
1561 		    "%s: bpf_setif() failed error %d",
1562 		    __func__, error);
1563 		goto done;
1564 	}
1565 
1566 	/*
1567 	 * Make sure the buffers are setup as expected by bpf_setif()
1568 	 */
1569 	ASSERT(d_to->bd_hbuf == NULL);
1570 	ASSERT(d_to->bd_sbuf != NULL);
1571 	ASSERT(d_to->bd_fbuf != NULL);
1572 
1573 	/*
1574 	 * Copy the buffers and update the pointers and counts
1575 	 */
1576 	memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
1577 	d_to->bd_slen = d_from->bd_slen;
1578 	d_to->bd_scnt = d_from->bd_scnt;
1579 
1580 	if (d_from->bd_hbuf != NULL) {
1581 		d_to->bd_hbuf = d_to->bd_fbuf;
1582 		d_to->bd_fbuf = NULL;
1583 		memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
1584 	}
1585 	d_to->bd_hlen = d_from->bd_hlen;
1586 	d_to->bd_hcnt = d_from->bd_hcnt;
1587 
1588 	if (bpf_debug > 0) {
1589 		os_log_info(OS_LOG_DEFAULT,
1590 		    "%s: done slen %u scnt %u hlen %u hcnt %u",
1591 		    __func__, d_to->bd_slen, d_to->bd_scnt,
1592 		    d_to->bd_hlen, d_to->bd_hcnt);
1593 	}
1594 done:
1595 	d_from->bd_hbuf_read = 0;
1596 	wakeup((caddr_t)d_from);
1597 
1598 	d_to->bd_hbuf_read = 0;
1599 	wakeup((caddr_t)d_to);
1600 
1601 	return error;
1602 }
1603 
1604 /*
1605  *  FIONREAD		Check for read packet available.
1606  *  SIOCGIFADDR		Get interface address - convenient hook to driver.
1607  *  BIOCGBLEN		Get buffer len [for read()].
1608  *  BIOCSETF		Set ethernet read filter.
1609  *  BIOCFLUSH		Flush read packet buffer.
1610  *  BIOCPROMISC		Put interface into promiscuous mode.
1611  *  BIOCGDLT		Get link layer type.
1612  *  BIOCGETIF		Get interface name.
1613  *  BIOCSETIF		Set interface.
1614  *  BIOCSRTIMEOUT	Set read timeout.
1615  *  BIOCGRTIMEOUT	Get read timeout.
1616  *  BIOCGSTATS		Get packet stats.
1617  *  BIOCIMMEDIATE	Set immediate mode.
1618  *  BIOCVERSION		Get filter language version.
1619  *  BIOCGHDRCMPLT	Get "header already complete" flag
1620  *  BIOCSHDRCMPLT	Set "header already complete" flag
1621  *  BIOCGSEESENT	Get "see packets sent" flag
1622  *  BIOCSSEESENT	Set "see packets sent" flag
1623  *  BIOCSETTC		Set traffic class.
1624  *  BIOCGETTC		Get traffic class.
1625  *  BIOCSEXTHDR		Set "extended header" flag
1626  *  BIOCSHEADDROP	Drop head of the buffer if user is not reading
1627  *  BIOCGHEADDROP	Get "head-drop" flag
1628  */
1629 /* ARGSUSED */
1630 int
bpfioctl(dev_t dev,u_long cmd,caddr_t addr,__unused int flags,struct proc * p)1631 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1632     struct proc *p)
1633 {
1634 	struct bpf_d *d;
1635 	int error = 0;
1636 	u_int int_arg;
1637 	struct ifreq ifr;
1638 
1639 	lck_mtx_lock(bpf_mlock);
1640 
1641 	d = bpf_dtab[minor(dev)];
1642 	if (d == NULL || d == BPF_DEV_RESERVED ||
1643 	    (d->bd_flags & BPF_CLOSING) != 0) {
1644 		lck_mtx_unlock(bpf_mlock);
1645 		return ENXIO;
1646 	}
1647 
1648 	bpf_acquire_d(d);
1649 
1650 	if (d->bd_state == BPF_WAITING) {
1651 		bpf_stop_timer(d);
1652 	}
1653 	d->bd_state = BPF_IDLE;
1654 
1655 	switch (cmd) {
1656 	default:
1657 		error = EINVAL;
1658 		break;
1659 
1660 	/*
1661 	 * Check for read packet available.
1662 	 */
1663 	case FIONREAD:                  /* int */
1664 	{
1665 		int n;
1666 
1667 		n = d->bd_slen;
1668 		if (d->bd_hbuf && d->bd_hbuf_read == 0) {
1669 			n += d->bd_hlen;
1670 		}
1671 
1672 		bcopy(&n, addr, sizeof(n));
1673 		break;
1674 	}
1675 
1676 	case SIOCGIFADDR:               /* struct ifreq */
1677 	{
1678 		struct ifnet *ifp;
1679 
1680 		if (d->bd_bif == 0) {
1681 			error = EINVAL;
1682 		} else {
1683 			ifp = d->bd_bif->bif_ifp;
1684 			error = ifnet_ioctl(ifp, 0, cmd, addr);
1685 		}
1686 		break;
1687 	}
1688 
1689 	/*
1690 	 * Get buffer len [for read()].
1691 	 */
1692 	case BIOCGBLEN:                 /* u_int */
1693 		bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1694 		break;
1695 
1696 	/*
1697 	 * Set buffer length.
1698 	 */
1699 	case BIOCSBLEN: {               /* u_int */
1700 		u_int size;
1701 		unsigned int maxbufsize = bpf_maxbufsize;
1702 
1703 		/*
1704 		 * Allow larger buffer in head drop mode to with the
1705 		 * assumption the reading process may be low priority but
1706 		 * is interested in the most recent traffic
1707 		 */
1708 		if (d->bd_headdrop != 0) {
1709 			maxbufsize = 2 * bpf_maxbufsize;
1710 		}
1711 
1712 		if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
1713 			/*
1714 			 * Interface already attached, unable to change buffers
1715 			 */
1716 			error = EINVAL;
1717 			break;
1718 		}
1719 		bcopy(addr, &size, sizeof(size));
1720 
1721 		if (size > maxbufsize) {
1722 			d->bd_bufsize = maxbufsize;
1723 
1724 			os_log_info(OS_LOG_DEFAULT,
1725 			    "%s bufsize capped to %u from %u",
1726 			    __func__, d->bd_bufsize, size);
1727 		} else if (size < BPF_MINBUFSIZE) {
1728 			d->bd_bufsize = BPF_MINBUFSIZE;
1729 
1730 			os_log_info(OS_LOG_DEFAULT,
1731 			    "%s bufsize bumped to %u from %u",
1732 			    __func__, d->bd_bufsize, size);
1733 		} else {
1734 			d->bd_bufsize = size;
1735 		}
1736 
1737 		/* It's a read/write ioctl */
1738 		bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1739 		break;
1740 	}
1741 	/*
1742 	 * Set link layer read filter.
1743 	 */
1744 	case BIOCSETF32:
1745 	case BIOCSETFNR32: {            /* struct bpf_program32 */
1746 		struct bpf_program32 prg32;
1747 
1748 		bcopy(addr, &prg32, sizeof(prg32));
1749 		error = bpf_setf(d, prg32.bf_len,
1750 		    CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1751 		break;
1752 	}
1753 
1754 	case BIOCSETF64:
1755 	case BIOCSETFNR64: {            /* struct bpf_program64 */
1756 		struct bpf_program64 prg64;
1757 
1758 		bcopy(addr, &prg64, sizeof(prg64));
1759 		error = bpf_setf(d, prg64.bf_len, CAST_USER_ADDR_T(prg64.bf_insns), cmd);
1760 		break;
1761 	}
1762 
1763 	/*
1764 	 * Flush read packet buffer.
1765 	 */
1766 	case BIOCFLUSH:
1767 		while (d->bd_hbuf_read != 0) {
1768 			msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1769 			    NULL);
1770 		}
1771 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1772 			error = ENXIO;
1773 			break;
1774 		}
1775 		reset_d(d);
1776 		break;
1777 
1778 	/*
1779 	 * Put interface into promiscuous mode.
1780 	 */
1781 	case BIOCPROMISC:
1782 		if (d->bd_bif == 0) {
1783 			/*
1784 			 * No interface attached yet.
1785 			 */
1786 			error = EINVAL;
1787 			break;
1788 		}
1789 		if (d->bd_promisc == 0) {
1790 			lck_mtx_unlock(bpf_mlock);
1791 			error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1792 			lck_mtx_lock(bpf_mlock);
1793 			if (error == 0) {
1794 				d->bd_promisc = 1;
1795 			}
1796 		}
1797 		break;
1798 
1799 	/*
1800 	 * Get device parameters.
1801 	 */
1802 	case BIOCGDLT:                  /* u_int */
1803 		if (d->bd_bif == 0) {
1804 			error = EINVAL;
1805 		} else {
1806 			bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
1807 		}
1808 		break;
1809 
1810 	/*
1811 	 * Get a list of supported data link types.
1812 	 */
1813 	case BIOCGDLTLIST:              /* struct bpf_dltlist */
1814 		if (d->bd_bif == NULL) {
1815 			error = EINVAL;
1816 		} else {
1817 			error = bpf_getdltlist(d, addr, p);
1818 		}
1819 		break;
1820 
1821 	/*
1822 	 * Set data link type.
1823 	 */
1824 	case BIOCSDLT:                  /* u_int */
1825 		if (d->bd_bif == NULL) {
1826 			error = EINVAL;
1827 		} else {
1828 			u_int dlt;
1829 
1830 			bcopy(addr, &dlt, sizeof(dlt));
1831 
1832 			if (dlt == DLT_PKTAP &&
1833 			    !(d->bd_flags & BPF_WANT_PKTAP)) {
1834 				dlt = DLT_RAW;
1835 			}
1836 			error = bpf_setdlt(d, dlt);
1837 		}
1838 		break;
1839 
1840 	/*
1841 	 * Get interface name.
1842 	 */
1843 	case BIOCGETIF:                 /* struct ifreq */
1844 		if (d->bd_bif == 0) {
1845 			error = EINVAL;
1846 		} else {
1847 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
1848 
1849 			snprintf(((struct ifreq *)(void *)addr)->ifr_name,
1850 			    sizeof(ifr.ifr_name), "%s", if_name(ifp));
1851 		}
1852 		break;
1853 
1854 	/*
1855 	 * Set interface.
1856 	 */
1857 	case BIOCSETIF: {               /* struct ifreq */
1858 		ifnet_t ifp;
1859 
1860 		bcopy(addr, &ifr, sizeof(ifr));
1861 		ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1862 		ifp = ifunit(ifr.ifr_name);
1863 		if (ifp == NULL) {
1864 			error = ENXIO;
1865 		} else {
1866 			error = bpf_setif(d, ifp, true, false);
1867 		}
1868 		break;
1869 	}
1870 
1871 	/*
1872 	 * Set read timeout.
1873 	 */
1874 	case BIOCSRTIMEOUT32: {         /* struct user32_timeval */
1875 		struct user32_timeval _tv;
1876 		struct timeval tv;
1877 
1878 		bcopy(addr, &_tv, sizeof(_tv));
1879 		tv.tv_sec  = _tv.tv_sec;
1880 		tv.tv_usec = _tv.tv_usec;
1881 
1882 		/*
1883 		 * Subtract 1 tick from tvtohz() since this isn't
1884 		 * a one-shot timer.
1885 		 */
1886 		if ((error = itimerfix(&tv)) == 0) {
1887 			d->bd_rtout = tvtohz(&tv) - 1;
1888 		}
1889 		break;
1890 	}
1891 
1892 	case BIOCSRTIMEOUT64: {         /* struct user64_timeval */
1893 		struct user64_timeval _tv;
1894 		struct timeval tv;
1895 
1896 		bcopy(addr, &_tv, sizeof(_tv));
1897 		tv.tv_sec  = (__darwin_time_t)_tv.tv_sec;
1898 		tv.tv_usec = _tv.tv_usec;
1899 
1900 		/*
1901 		 * Subtract 1 tick from tvtohz() since this isn't
1902 		 * a one-shot timer.
1903 		 */
1904 		if ((error = itimerfix(&tv)) == 0) {
1905 			d->bd_rtout = tvtohz(&tv) - 1;
1906 		}
1907 		break;
1908 	}
1909 
1910 	/*
1911 	 * Get read timeout.
1912 	 */
1913 	case BIOCGRTIMEOUT32: {         /* struct user32_timeval */
1914 		struct user32_timeval tv;
1915 
1916 		bzero(&tv, sizeof(tv));
1917 		tv.tv_sec = d->bd_rtout / hz;
1918 		tv.tv_usec = (d->bd_rtout % hz) * tick;
1919 		bcopy(&tv, addr, sizeof(tv));
1920 		break;
1921 	}
1922 
1923 	case BIOCGRTIMEOUT64: {         /* struct user64_timeval */
1924 		struct user64_timeval tv;
1925 
1926 		bzero(&tv, sizeof(tv));
1927 		tv.tv_sec = d->bd_rtout / hz;
1928 		tv.tv_usec = (d->bd_rtout % hz) * tick;
1929 		bcopy(&tv, addr, sizeof(tv));
1930 		break;
1931 	}
1932 
1933 	/*
1934 	 * Get packet stats.
1935 	 */
1936 	case BIOCGSTATS: {              /* struct bpf_stat */
1937 		struct bpf_stat bs;
1938 
1939 		bzero(&bs, sizeof(bs));
1940 		bs.bs_recv = d->bd_rcount;
1941 		bs.bs_drop = d->bd_dcount;
1942 		bcopy(&bs, addr, sizeof(bs));
1943 		break;
1944 	}
1945 
1946 	/*
1947 	 * Set immediate mode.
1948 	 */
1949 	case BIOCIMMEDIATE:             /* u_int */
1950 		d->bd_immediate = *(u_char *)(void *)addr;
1951 		break;
1952 
1953 	case BIOCVERSION: {             /* struct bpf_version */
1954 		struct bpf_version bv;
1955 
1956 		bzero(&bv, sizeof(bv));
1957 		bv.bv_major = BPF_MAJOR_VERSION;
1958 		bv.bv_minor = BPF_MINOR_VERSION;
1959 		bcopy(&bv, addr, sizeof(bv));
1960 		break;
1961 	}
1962 
1963 	/*
1964 	 * Get "header already complete" flag
1965 	 */
1966 	case BIOCGHDRCMPLT:             /* u_int */
1967 		bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
1968 		break;
1969 
1970 	/*
1971 	 * Set "header already complete" flag
1972 	 */
1973 	case BIOCSHDRCMPLT:             /* u_int */
1974 		bcopy(addr, &int_arg, sizeof(int_arg));
1975 		d->bd_hdrcmplt = int_arg ? 1 : 0;
1976 		break;
1977 
1978 	/*
1979 	 * Get "see sent packets" flag
1980 	 */
1981 	case BIOCGSEESENT:              /* u_int */
1982 		bcopy(&d->bd_seesent, addr, sizeof(u_int));
1983 		break;
1984 
1985 	/*
1986 	 * Set "see sent packets" flag
1987 	 */
1988 	case BIOCSSEESENT:              /* u_int */
1989 		bcopy(addr, &d->bd_seesent, sizeof(u_int));
1990 		break;
1991 
1992 	/*
1993 	 * Set traffic service class
1994 	 */
1995 	case BIOCSETTC: {               /* int */
1996 		int tc;
1997 
1998 		bcopy(addr, &tc, sizeof(int));
1999 		error = bpf_set_traffic_class(d, tc);
2000 		break;
2001 	}
2002 
2003 	/*
2004 	 * Get traffic service class
2005 	 */
2006 	case BIOCGETTC:                 /* int */
2007 		bcopy(&d->bd_traffic_class, addr, sizeof(int));
2008 		break;
2009 
2010 	case FIONBIO:           /* Non-blocking I/O; int */
2011 		break;
2012 
2013 	case FIOASYNC:          /* Send signal on receive packets; int */
2014 		bcopy(addr, &d->bd_async, sizeof(int));
2015 		break;
2016 #ifndef __APPLE__
2017 	case FIOSETOWN:
2018 		error = fsetown(*(int *)addr, &d->bd_sigio);
2019 		break;
2020 
2021 	case FIOGETOWN:
2022 		*(int *)addr = fgetown(d->bd_sigio);
2023 		break;
2024 
2025 	/* This is deprecated, FIOSETOWN should be used instead. */
2026 	case TIOCSPGRP:
2027 		error = fsetown(-(*(int *)addr), &d->bd_sigio);
2028 		break;
2029 
2030 	/* This is deprecated, FIOGETOWN should be used instead. */
2031 	case TIOCGPGRP:
2032 		*(int *)addr = -fgetown(d->bd_sigio);
2033 		break;
2034 #endif
2035 	case BIOCSRSIG: {       /* Set receive signal; u_int */
2036 		u_int sig;
2037 
2038 		bcopy(addr, &sig, sizeof(u_int));
2039 
2040 		if (sig >= NSIG) {
2041 			error = EINVAL;
2042 		} else {
2043 			d->bd_sig = sig;
2044 		}
2045 		break;
2046 	}
2047 	case BIOCGRSIG:                 /* u_int */
2048 		bcopy(&d->bd_sig, addr, sizeof(u_int));
2049 		break;
2050 #ifdef __APPLE__
2051 	case BIOCSEXTHDR:               /* u_int */
2052 		bcopy(addr, &int_arg, sizeof(int_arg));
2053 		if (int_arg) {
2054 			d->bd_flags |= BPF_EXTENDED_HDR;
2055 		} else {
2056 			d->bd_flags &= ~BPF_EXTENDED_HDR;
2057 		}
2058 		break;
2059 
2060 	case BIOCGIFATTACHCOUNT: {              /* struct ifreq */
2061 		ifnet_t ifp;
2062 		struct bpf_if *bp;
2063 
2064 		bcopy(addr, &ifr, sizeof(ifr));
2065 		ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2066 		ifp = ifunit(ifr.ifr_name);
2067 		if (ifp == NULL) {
2068 			error = ENXIO;
2069 			break;
2070 		}
2071 		ifr.ifr_intval = 0;
2072 		for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2073 			struct bpf_d *bpf_d;
2074 
2075 			if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) {
2076 				continue;
2077 			}
2078 			for (bpf_d = bp->bif_dlist; bpf_d;
2079 			    bpf_d = bpf_d->bd_next) {
2080 				ifr.ifr_intval += 1;
2081 			}
2082 		}
2083 		bcopy(&ifr, addr, sizeof(ifr));
2084 		break;
2085 	}
2086 	case BIOCGWANTPKTAP:                    /* u_int */
2087 		int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
2088 		bcopy(&int_arg, addr, sizeof(int_arg));
2089 		break;
2090 
2091 	case BIOCSWANTPKTAP:                    /* u_int */
2092 		bcopy(addr, &int_arg, sizeof(int_arg));
2093 		if (int_arg) {
2094 			d->bd_flags |= BPF_WANT_PKTAP;
2095 		} else {
2096 			d->bd_flags &= ~BPF_WANT_PKTAP;
2097 		}
2098 		break;
2099 #endif
2100 
2101 	case BIOCSHEADDROP:
2102 		bcopy(addr, &int_arg, sizeof(int_arg));
2103 		d->bd_headdrop = int_arg ? 1 : 0;
2104 		break;
2105 
2106 	case BIOCGHEADDROP:
2107 		bcopy(&d->bd_headdrop, addr, sizeof(int));
2108 		break;
2109 
2110 	case BIOCSTRUNCATE:
2111 		bcopy(addr, &int_arg, sizeof(int_arg));
2112 		if (int_arg) {
2113 			d->bd_flags |=  BPF_TRUNCATE;
2114 		} else {
2115 			d->bd_flags &= ~BPF_TRUNCATE;
2116 		}
2117 		break;
2118 
2119 	case BIOCGETUUID:
2120 		bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
2121 		break;
2122 
2123 	case BIOCSETUP: {
2124 		struct bpf_setup_args bsa;
2125 		ifnet_t ifp;
2126 
2127 		bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
2128 		bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
2129 		ifp = ifunit(bsa.bsa_ifname);
2130 		if (ifp == NULL) {
2131 			error = ENXIO;
2132 			os_log_info(OS_LOG_DEFAULT,
2133 			    "%s: ifnet not found for %s error %d",
2134 			    __func__, bsa.bsa_ifname, error);
2135 			break;
2136 		}
2137 
2138 		error = bpf_setup(d, bsa.bsa_uuid, ifp);
2139 		break;
2140 	}
2141 	case BIOCSPKTHDRV2:
2142 		bcopy(addr, &int_arg, sizeof(int_arg));
2143 		if (int_arg != 0) {
2144 			d->bd_flags |= BPF_PKTHDRV2;
2145 		} else {
2146 			d->bd_flags &= ~BPF_PKTHDRV2;
2147 		}
2148 		break;
2149 
2150 	case BIOCGPKTHDRV2:
2151 		int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
2152 		bcopy(&int_arg, addr, sizeof(int));
2153 		break;
2154 	}
2155 
2156 	bpf_release_d(d);
2157 	lck_mtx_unlock(bpf_mlock);
2158 
2159 	return error;
2160 }
2161 
2162 /*
2163  * Set d's packet filter program to fp.  If this file already has a filter,
2164  * free it and replace it.  Returns EINVAL for bogus requests.
2165  */
2166 static int
bpf_setf(struct bpf_d * d,u_int bf_len,user_addr_t bf_insns,u_long cmd)2167 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2168     u_long cmd)
2169 {
2170 	struct bpf_insn *fcode, *old;
2171 	u_int flen, size;
2172 
2173 	while (d->bd_hbuf_read != 0) {
2174 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2175 	}
2176 
2177 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2178 		return ENXIO;
2179 	}
2180 
2181 	old = d->bd_filter;
2182 	if (bf_insns == USER_ADDR_NULL) {
2183 		if (bf_len != 0) {
2184 			return EINVAL;
2185 		}
2186 		d->bd_filter = NULL;
2187 		reset_d(d);
2188 		if (old != 0) {
2189 			kfree_data_addr(old);
2190 		}
2191 		return 0;
2192 	}
2193 	flen = bf_len;
2194 	if (flen > BPF_MAXINSNS) {
2195 		return EINVAL;
2196 	}
2197 
2198 	size = flen * sizeof(struct bpf_insn);
2199 	fcode = (struct bpf_insn *) kalloc_data(size, M_WAIT);
2200 #ifdef __APPLE__
2201 	if (fcode == NULL) {
2202 		return ENOBUFS;
2203 	}
2204 #endif
2205 	if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
2206 	    bpf_validate(fcode, (int)flen)) {
2207 		d->bd_filter = fcode;
2208 
2209 		if (cmd == BIOCSETF32 || cmd == BIOCSETF64) {
2210 			reset_d(d);
2211 		}
2212 
2213 		if (old != 0) {
2214 			kfree_data_addr(old);
2215 		}
2216 
2217 		return 0;
2218 	}
2219 	kfree_data(fcode, size);
2220 	return EINVAL;
2221 }
2222 
2223 /*
2224  * Detach a file from its current interface (if attached at all) and attach
2225  * to the interface indicated by the name stored in ifr.
2226  * Return an errno or 0.
2227  */
2228 static int
bpf_setif(struct bpf_d * d,ifnet_t theywant,bool do_reset,bool has_hbuf_read)2229 bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read)
2230 {
2231 	struct bpf_if *bp;
2232 	int error;
2233 
2234 	while (d->bd_hbuf_read != 0 && !has_hbuf_read) {
2235 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2236 	}
2237 
2238 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2239 		return ENXIO;
2240 	}
2241 
2242 	/*
2243 	 * Look through attached interfaces for the named one.
2244 	 */
2245 	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2246 		struct ifnet *ifp = bp->bif_ifp;
2247 
2248 		if (ifp == 0 || ifp != theywant) {
2249 			continue;
2250 		}
2251 		/*
2252 		 * Do not use DLT_PKTAP, unless requested explicitly
2253 		 */
2254 		if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2255 			continue;
2256 		}
2257 		/*
2258 		 * Skip the coprocessor interface
2259 		 */
2260 		if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
2261 			continue;
2262 		}
2263 		/*
2264 		 * We found the requested interface.
2265 		 * Allocate the packet buffers.
2266 		 */
2267 		error = bpf_allocbufs(d);
2268 		if (error != 0) {
2269 			return error;
2270 		}
2271 		/*
2272 		 * Detach if attached to something else.
2273 		 */
2274 		if (bp != d->bd_bif) {
2275 			if (d->bd_bif != NULL) {
2276 				if (bpf_detachd(d, 0) != 0) {
2277 					return ENXIO;
2278 				}
2279 			}
2280 			if (bpf_attachd(d, bp) != 0) {
2281 				return ENXIO;
2282 			}
2283 		}
2284 		if (do_reset) {
2285 			reset_d(d);
2286 		}
2287 		return 0;
2288 	}
2289 	/* Not found. */
2290 	return ENXIO;
2291 }
2292 
2293 /*
2294  * Get a list of available data link type of the interface.
2295  */
2296 static int
bpf_getdltlist(struct bpf_d * d,caddr_t addr,struct proc * p)2297 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2298 {
2299 	u_int           n;
2300 	int             error;
2301 	struct ifnet    *ifp;
2302 	struct bpf_if   *bp;
2303 	user_addr_t     dlist;
2304 	struct bpf_dltlist bfl;
2305 
2306 	bcopy(addr, &bfl, sizeof(bfl));
2307 	if (proc_is64bit(p)) {
2308 		dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2309 	} else {
2310 		dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2311 	}
2312 
2313 	ifp = d->bd_bif->bif_ifp;
2314 	n = 0;
2315 	error = 0;
2316 
2317 	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2318 		if (bp->bif_ifp != ifp) {
2319 			continue;
2320 		}
2321 		/*
2322 		 * Do not use DLT_PKTAP, unless requested explicitly
2323 		 */
2324 		if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2325 			continue;
2326 		}
2327 		if (dlist != USER_ADDR_NULL) {
2328 			if (n >= bfl.bfl_len) {
2329 				return ENOMEM;
2330 			}
2331 			error = copyout(&bp->bif_dlt, dlist,
2332 			    sizeof(bp->bif_dlt));
2333 			if (error != 0) {
2334 				break;
2335 			}
2336 			dlist += sizeof(bp->bif_dlt);
2337 		}
2338 		n++;
2339 	}
2340 	bfl.bfl_len = n;
2341 	bcopy(&bfl, addr, sizeof(bfl));
2342 
2343 	return error;
2344 }
2345 
2346 /*
2347  * Set the data link type of a BPF instance.
2348  */
2349 static int
bpf_setdlt(struct bpf_d * d,uint32_t dlt)2350 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2351 {
2352 	int error, opromisc;
2353 	struct ifnet *ifp;
2354 	struct bpf_if *bp;
2355 
2356 	if (d->bd_bif->bif_dlt == dlt) {
2357 		return 0;
2358 	}
2359 
2360 	while (d->bd_hbuf_read != 0) {
2361 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2362 	}
2363 
2364 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2365 		return ENXIO;
2366 	}
2367 
2368 	ifp = d->bd_bif->bif_ifp;
2369 	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2370 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2371 			/*
2372 			 * Do not use DLT_PKTAP, unless requested explicitly
2373 			 */
2374 			if (bp->bif_dlt == DLT_PKTAP &&
2375 			    !(d->bd_flags & BPF_WANT_PKTAP)) {
2376 				continue;
2377 			}
2378 			break;
2379 		}
2380 	}
2381 	if (bp != NULL) {
2382 		opromisc = d->bd_promisc;
2383 		if (bpf_detachd(d, 0) != 0) {
2384 			return ENXIO;
2385 		}
2386 		error = bpf_attachd(d, bp);
2387 		if (error) {
2388 			printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
2389 			    ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp),
2390 			    error);
2391 			return error;
2392 		}
2393 		reset_d(d);
2394 		if (opromisc) {
2395 			lck_mtx_unlock(bpf_mlock);
2396 			error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2397 			lck_mtx_lock(bpf_mlock);
2398 			if (error) {
2399 				printf("%s: ifpromisc %s%d failed (%d)\n",
2400 				    __func__, ifnet_name(bp->bif_ifp),
2401 				    ifnet_unit(bp->bif_ifp), error);
2402 			} else {
2403 				d->bd_promisc = 1;
2404 			}
2405 		}
2406 	}
2407 	return bp == NULL ? EINVAL : 0;
2408 }
2409 
2410 static int
bpf_set_traffic_class(struct bpf_d * d,int tc)2411 bpf_set_traffic_class(struct bpf_d *d, int tc)
2412 {
2413 	int error = 0;
2414 
2415 	if (!SO_VALID_TC(tc)) {
2416 		error = EINVAL;
2417 	} else {
2418 		d->bd_traffic_class = tc;
2419 	}
2420 
2421 	return error;
2422 }
2423 
2424 static void
bpf_set_packet_service_class(struct mbuf * m,int tc)2425 bpf_set_packet_service_class(struct mbuf *m, int tc)
2426 {
2427 	if (!(m->m_flags & M_PKTHDR)) {
2428 		return;
2429 	}
2430 
2431 	VERIFY(SO_VALID_TC(tc));
2432 	(void) m_set_service_class(m, so_tc2msc(tc));
2433 }
2434 
2435 /*
2436  * Support for select()
2437  *
2438  * Return true iff the specific operation will not block indefinitely.
2439  * Otherwise, return false but make a note that a selwakeup() must be done.
2440  */
2441 int
bpfselect(dev_t dev,int which,void * wql,struct proc * p)2442 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2443 {
2444 	struct bpf_d *d;
2445 	int ret = 0;
2446 
2447 	lck_mtx_lock(bpf_mlock);
2448 
2449 	d = bpf_dtab[minor(dev)];
2450 	if (d == NULL || d == BPF_DEV_RESERVED ||
2451 	    (d->bd_flags & BPF_CLOSING) != 0) {
2452 		lck_mtx_unlock(bpf_mlock);
2453 		return ENXIO;
2454 	}
2455 
2456 	bpf_acquire_d(d);
2457 
2458 	if (d->bd_bif == NULL) {
2459 		bpf_release_d(d);
2460 		lck_mtx_unlock(bpf_mlock);
2461 		return ENXIO;
2462 	}
2463 
2464 	while (d->bd_hbuf_read != 0) {
2465 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2466 	}
2467 
2468 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2469 		bpf_release_d(d);
2470 		lck_mtx_unlock(bpf_mlock);
2471 		return ENXIO;
2472 	}
2473 
2474 	switch (which) {
2475 	case FREAD:
2476 		if (d->bd_hlen != 0 ||
2477 		    ((d->bd_immediate ||
2478 		    d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
2479 			ret = 1;         /* read has data to return */
2480 		} else {
2481 			/*
2482 			 * Read has no data to return.
2483 			 * Make the select wait, and start a timer if
2484 			 * necessary.
2485 			 */
2486 			selrecord(p, &d->bd_sel, wql);
2487 			bpf_start_timer(d);
2488 		}
2489 		break;
2490 
2491 	case FWRITE:
2492 		/* can't determine whether a write would block */
2493 		ret = 1;
2494 		break;
2495 	}
2496 
2497 	bpf_release_d(d);
2498 	lck_mtx_unlock(bpf_mlock);
2499 
2500 	return ret;
2501 }
2502 
2503 /*
2504  * Support for kevent() system call.  Register EVFILT_READ filters and
2505  * reject all others.
2506  */
2507 int bpfkqfilter(dev_t dev, struct knote *kn);
2508 static void filt_bpfdetach(struct knote *);
2509 static int filt_bpfread(struct knote *, long);
2510 static int filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev);
2511 static int filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev);
2512 
2513 SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2514 	.f_isfd = 1,
2515 	.f_detach = filt_bpfdetach,
2516 	.f_event = filt_bpfread,
2517 	.f_touch = filt_bpftouch,
2518 	.f_process = filt_bpfprocess,
2519 };
2520 
2521 static int
filt_bpfread_common(struct knote * kn,struct kevent_qos_s * kev,struct bpf_d * d)2522 filt_bpfread_common(struct knote *kn, struct kevent_qos_s *kev, struct bpf_d *d)
2523 {
2524 	int ready = 0;
2525 	int64_t data = 0;
2526 
2527 	if (d->bd_immediate) {
2528 		/*
2529 		 * If there's data in the hold buffer, it's the
2530 		 * amount of data a read will return.
2531 		 *
2532 		 * If there's no data in the hold buffer, but
2533 		 * there's data in the store buffer, a read will
2534 		 * immediately rotate the store buffer to the
2535 		 * hold buffer, the amount of data in the store
2536 		 * buffer is the amount of data a read will
2537 		 * return.
2538 		 *
2539 		 * If there's no data in either buffer, we're not
2540 		 * ready to read.
2541 		 */
2542 		data = (d->bd_hlen == 0 || d->bd_hbuf_read != 0 ?
2543 		    d->bd_slen : d->bd_hlen);
2544 		int64_t lowwat = knote_low_watermark(kn);
2545 		if (lowwat > d->bd_bufsize) {
2546 			lowwat = d->bd_bufsize;
2547 		}
2548 		ready = (data >= lowwat);
2549 	} else {
2550 		/*
2551 		 * If there's data in the hold buffer, it's the
2552 		 * amount of data a read will return.
2553 		 *
2554 		 * If there's no data in the hold buffer, but
2555 		 * there's data in the store buffer, if the
2556 		 * timer has expired a read will immediately
2557 		 * rotate the store buffer to the hold buffer,
2558 		 * so the amount of data in the store buffer is
2559 		 * the amount of data a read will return.
2560 		 *
2561 		 * If there's no data in either buffer, or there's
2562 		 * no data in the hold buffer and the timer hasn't
2563 		 * expired, we're not ready to read.
2564 		 */
2565 		data = ((d->bd_hlen == 0 || d->bd_hbuf_read != 0) &&
2566 		    d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
2567 		ready = (data > 0);
2568 	}
2569 	if (!ready) {
2570 		bpf_start_timer(d);
2571 	} else if (kev) {
2572 		knote_fill_kevent(kn, kev, data);
2573 	}
2574 
2575 	return ready;
2576 }
2577 
2578 int
bpfkqfilter(dev_t dev,struct knote * kn)2579 bpfkqfilter(dev_t dev, struct knote *kn)
2580 {
2581 	struct bpf_d *d;
2582 	int res;
2583 
2584 	/*
2585 	 * Is this device a bpf?
2586 	 */
2587 	if (major(dev) != CDEV_MAJOR || kn->kn_filter != EVFILT_READ) {
2588 		knote_set_error(kn, EINVAL);
2589 		return 0;
2590 	}
2591 
2592 	lck_mtx_lock(bpf_mlock);
2593 
2594 	d = bpf_dtab[minor(dev)];
2595 
2596 	if (d == NULL || d == BPF_DEV_RESERVED ||
2597 	    (d->bd_flags & BPF_CLOSING) != 0 ||
2598 	    d->bd_bif == NULL) {
2599 		lck_mtx_unlock(bpf_mlock);
2600 		knote_set_error(kn, ENXIO);
2601 		return 0;
2602 	}
2603 
2604 	kn->kn_hook = d;
2605 	kn->kn_filtid = EVFILTID_BPFREAD;
2606 	KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2607 	d->bd_flags |= BPF_KNOTE;
2608 
2609 	/* capture the current state */
2610 	res = filt_bpfread_common(kn, NULL, d);
2611 
2612 	lck_mtx_unlock(bpf_mlock);
2613 
2614 	return res;
2615 }
2616 
2617 static void
filt_bpfdetach(struct knote * kn)2618 filt_bpfdetach(struct knote *kn)
2619 {
2620 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2621 
2622 	lck_mtx_lock(bpf_mlock);
2623 	if (d->bd_flags & BPF_KNOTE) {
2624 		KNOTE_DETACH(&d->bd_sel.si_note, kn);
2625 		d->bd_flags &= ~BPF_KNOTE;
2626 	}
2627 	lck_mtx_unlock(bpf_mlock);
2628 }
2629 
2630 static int
filt_bpfread(struct knote * kn,long hint)2631 filt_bpfread(struct knote *kn, long hint)
2632 {
2633 #pragma unused(hint)
2634 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2635 
2636 	return filt_bpfread_common(kn, NULL, d);
2637 }
2638 
2639 static int
filt_bpftouch(struct knote * kn,struct kevent_qos_s * kev)2640 filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev)
2641 {
2642 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2643 	int res;
2644 
2645 	lck_mtx_lock(bpf_mlock);
2646 
2647 	/* save off the lowat threshold and flag */
2648 	kn->kn_sdata = kev->data;
2649 	kn->kn_sfflags = kev->fflags;
2650 
2651 	/* output data will be re-generated here */
2652 	res = filt_bpfread_common(kn, NULL, d);
2653 
2654 	lck_mtx_unlock(bpf_mlock);
2655 
2656 	return res;
2657 }
2658 
2659 static int
filt_bpfprocess(struct knote * kn,struct kevent_qos_s * kev)2660 filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev)
2661 {
2662 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2663 	int res;
2664 
2665 	lck_mtx_lock(bpf_mlock);
2666 	res = filt_bpfread_common(kn, kev, d);
2667 	lck_mtx_unlock(bpf_mlock);
2668 
2669 	return res;
2670 }
2671 
2672 /*
2673  * Copy data from an mbuf chain into a buffer.	This code is derived
2674  * from m_copydata in kern/uipc_mbuf.c.
2675  */
2676 static void
bpf_mcopy(struct mbuf * m,void * dst_arg,size_t len)2677 bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
2678 {
2679 	u_int count;
2680 	u_char *dst;
2681 
2682 	dst = dst_arg;
2683 	while (len > 0) {
2684 		if (m == 0) {
2685 			panic("bpf_mcopy");
2686 		}
2687 		count = MIN(m->m_len, (u_int)len);
2688 		bcopy(mbuf_data(m), dst, count);
2689 		m = m->m_next;
2690 		dst += count;
2691 		len -= count;
2692 	}
2693 }
2694 
2695 static inline void
bpf_tap_imp(ifnet_t ifp,u_int32_t dlt,struct bpf_packet * bpf_pkt,int outbound)2696 bpf_tap_imp(
2697 	ifnet_t         ifp,
2698 	u_int32_t       dlt,
2699 	struct bpf_packet *bpf_pkt,
2700 	int             outbound)
2701 {
2702 	struct bpf_d    *d;
2703 	u_int slen;
2704 	struct bpf_if *bp;
2705 
2706 	/*
2707 	 * It's possible that we get here after the bpf descriptor has been
2708 	 * detached from the interface; in such a case we simply return.
2709 	 * Lock ordering is important since we can be called asynchronously
2710 	 * (from IOKit) to process an inbound packet; when that happens
2711 	 * we would have been holding its "gateLock" and will be acquiring
2712 	 * "bpf_mlock" upon entering this routine.  Due to that, we release
2713 	 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2714 	 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2715 	 * when a ifnet_set_promiscuous request simultaneously collides with
2716 	 * an inbound packet being passed into the tap callback.
2717 	 */
2718 	lck_mtx_lock(bpf_mlock);
2719 	if (ifp->if_bpf == NULL) {
2720 		lck_mtx_unlock(bpf_mlock);
2721 		return;
2722 	}
2723 	for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2724 		if (bp->bif_ifp != ifp) {
2725 			/* wrong interface */
2726 			bp = NULL;
2727 			break;
2728 		}
2729 		if (dlt == 0 || bp->bif_dlt == dlt) {
2730 			/* tapping default DLT or DLT matches */
2731 			break;
2732 		}
2733 	}
2734 	if (bp == NULL) {
2735 		goto done;
2736 	}
2737 	for (d = bp->bif_dlist; d != NULL; d = d->bd_next) {
2738 		struct bpf_packet *bpf_pkt_saved = bpf_pkt;
2739 		struct bpf_packet bpf_pkt_tmp = {};
2740 		struct pktap_header_buffer bpfp_header_tmp = {};
2741 
2742 		if (outbound && !d->bd_seesent) {
2743 			continue;
2744 		}
2745 
2746 		++d->bd_rcount;
2747 		slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
2748 		    (u_int)bpf_pkt->bpfp_total_length, 0);
2749 
2750 		if (bp->bif_ifp->if_type == IFT_PKTAP &&
2751 		    bp->bif_dlt == DLT_PKTAP) {
2752 			if (d->bd_flags & BPF_TRUNCATE) {
2753 				slen = min(slen, get_pkt_trunc_len(bpf_pkt));
2754 			}
2755 			/*
2756 			 * Need to copy the bpf_pkt because the conversion
2757 			 * to v2 pktap header modifies the content of the
2758 			 * bpfp_header
2759 			 */
2760 			if ((d->bd_flags & BPF_PKTHDRV2) &&
2761 			    bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
2762 				bpf_pkt_tmp = *bpf_pkt;
2763 
2764 				bpf_pkt = &bpf_pkt_tmp;
2765 
2766 				memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
2767 				    bpf_pkt->bpfp_header_length);
2768 
2769 				bpf_pkt->bpfp_header = &bpfp_header_tmp;
2770 
2771 				convert_to_pktap_header_to_v2(bpf_pkt,
2772 				    !!(d->bd_flags & BPF_TRUNCATE));
2773 			}
2774 		}
2775 		if (slen != 0) {
2776 			catchpacket(d, bpf_pkt, slen, outbound);
2777 		}
2778 		bpf_pkt = bpf_pkt_saved;
2779 	}
2780 
2781 done:
2782 	lck_mtx_unlock(bpf_mlock);
2783 }
2784 
2785 static inline void
bpf_tap_mbuf(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen,int outbound)2786 bpf_tap_mbuf(
2787 	ifnet_t         ifp,
2788 	u_int32_t       dlt,
2789 	mbuf_t          m,
2790 	void*           hdr,
2791 	size_t          hlen,
2792 	int             outbound)
2793 {
2794 	struct bpf_packet bpf_pkt;
2795 	struct mbuf *m0;
2796 
2797 	if (ifp->if_bpf == NULL) {
2798 		/* quickly check without taking lock */
2799 		return;
2800 	}
2801 	bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2802 	bpf_pkt.bpfp_mbuf = m;
2803 	bpf_pkt.bpfp_total_length = 0;
2804 	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
2805 		bpf_pkt.bpfp_total_length += m0->m_len;
2806 	}
2807 	bpf_pkt.bpfp_header = hdr;
2808 	if (hdr != NULL) {
2809 		bpf_pkt.bpfp_total_length += hlen;
2810 		bpf_pkt.bpfp_header_length = hlen;
2811 	} else {
2812 		bpf_pkt.bpfp_header_length = 0;
2813 	}
2814 	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2815 }
2816 
2817 void
bpf_tap_out(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)2818 bpf_tap_out(
2819 	ifnet_t         ifp,
2820 	u_int32_t       dlt,
2821 	mbuf_t          m,
2822 	void*           hdr,
2823 	size_t          hlen)
2824 {
2825 	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
2826 }
2827 
2828 void
bpf_tap_in(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)2829 bpf_tap_in(
2830 	ifnet_t         ifp,
2831 	u_int32_t       dlt,
2832 	mbuf_t          m,
2833 	void*           hdr,
2834 	size_t          hlen)
2835 {
2836 	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
2837 }
2838 
2839 /* Callback registered with Ethernet driver. */
2840 static int
bpf_tap_callback(struct ifnet * ifp,struct mbuf * m)2841 bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2842 {
2843 	bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
2844 
2845 	return 0;
2846 }
2847 
2848 #if SKYWALK
2849 #include <skywalk/os_skywalk_private.h>
2850 
2851 static void
bpf_pktcopy(kern_packet_t pkt,void * dst_arg,size_t len)2852 bpf_pktcopy(kern_packet_t pkt, void *dst_arg, size_t len)
2853 {
2854 	kern_buflet_t   buflet = NULL;
2855 	size_t count;
2856 	u_char *dst;
2857 
2858 	dst = dst_arg;
2859 	while (len > 0) {
2860 		uint8_t         *addr;
2861 
2862 		u_int32_t       buflet_length;
2863 
2864 		buflet = kern_packet_get_next_buflet(pkt, buflet);
2865 		VERIFY(buflet != NULL);
2866 		addr = kern_buflet_get_data_address(buflet);
2867 		VERIFY(addr != NULL);
2868 		addr += kern_buflet_get_data_offset(buflet);
2869 		buflet_length = kern_buflet_get_data_length(buflet);
2870 		count = MIN(buflet_length, len);
2871 		bcopy((void *)addr, (void *)dst, count);
2872 		dst += count;
2873 		len -= count;
2874 	}
2875 }
2876 
2877 static inline void
bpf_tap_packet(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen,int outbound)2878 bpf_tap_packet(
2879 	ifnet_t         ifp,
2880 	u_int32_t       dlt,
2881 	kern_packet_t   pkt,
2882 	void*           hdr,
2883 	size_t          hlen,
2884 	int             outbound)
2885 {
2886 	struct bpf_packet       bpf_pkt;
2887 	struct mbuf *           m;
2888 
2889 	if (ifp->if_bpf == NULL) {
2890 		/* quickly check without taking lock */
2891 		return;
2892 	}
2893 	m = kern_packet_get_mbuf(pkt);
2894 	if (m != NULL) {
2895 		bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2896 		bpf_pkt.bpfp_mbuf = m;
2897 		bpf_pkt.bpfp_total_length = m_length(m);
2898 	} else {
2899 		bpf_pkt.bpfp_type = BPF_PACKET_TYPE_PKT;
2900 		bpf_pkt.bpfp_pkt = pkt;
2901 		bpf_pkt.bpfp_total_length = kern_packet_get_data_length(pkt);
2902 	}
2903 	bpf_pkt.bpfp_header = hdr;
2904 	bpf_pkt.bpfp_header_length = hlen;
2905 	if (hlen != 0) {
2906 		bpf_pkt.bpfp_total_length += hlen;
2907 	}
2908 	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2909 }
2910 
2911 void
bpf_tap_packet_out(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)2912 bpf_tap_packet_out(
2913 	ifnet_t         ifp,
2914 	u_int32_t       dlt,
2915 	kern_packet_t   pkt,
2916 	void*           hdr,
2917 	size_t          hlen)
2918 {
2919 	bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 1);
2920 }
2921 
2922 void
bpf_tap_packet_in(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)2923 bpf_tap_packet_in(
2924 	ifnet_t         ifp,
2925 	u_int32_t       dlt,
2926 	kern_packet_t   pkt,
2927 	void*           hdr,
2928 	size_t          hlen)
2929 {
2930 	bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 0);
2931 }
2932 
2933 #endif /* SKYWALK */
2934 
2935 static errno_t
bpf_copydata(struct bpf_packet * pkt,size_t off,size_t len,void * out_data)2936 bpf_copydata(struct bpf_packet *pkt, size_t off, size_t len, void* out_data)
2937 {
2938 	errno_t err = 0;
2939 	if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
2940 		err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
2941 #if SKYWALK
2942 	} else if (pkt->bpfp_type == BPF_PACKET_TYPE_PKT) {
2943 		err = kern_packet_copy_bytes(pkt->bpfp_pkt, off, len, out_data);
2944 #endif /* SKYWALK */
2945 	} else {
2946 		err = EINVAL;
2947 	}
2948 
2949 	return err;
2950 }
2951 
2952 static void
copy_bpf_packet(struct bpf_packet * pkt,void * dst,size_t len)2953 copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
2954 {
2955 	/* copy the optional header */
2956 	if (pkt->bpfp_header_length != 0) {
2957 		size_t  count = MIN(len, pkt->bpfp_header_length);
2958 		bcopy(pkt->bpfp_header, dst, count);
2959 		len -= count;
2960 		dst = (void *)((uintptr_t)dst + count);
2961 	}
2962 	if (len == 0) {
2963 		/* nothing past the header */
2964 		return;
2965 	}
2966 	/* copy the packet */
2967 	switch (pkt->bpfp_type) {
2968 	case BPF_PACKET_TYPE_MBUF:
2969 		bpf_mcopy(pkt->bpfp_mbuf, dst, len);
2970 		break;
2971 #if SKYWALK
2972 	case BPF_PACKET_TYPE_PKT:
2973 		bpf_pktcopy(pkt->bpfp_pkt, dst, len);
2974 		break;
2975 #endif /* SKYWALK */
2976 	default:
2977 		break;
2978 	}
2979 }
2980 
2981 static uint32_t
get_esp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)2982 get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
2983     const uint32_t remaining_caplen)
2984 {
2985 	/*
2986 	 * For some reason tcpdump expects to have one byte beyond the ESP header
2987 	 */
2988 	uint32_t trunc_len = ESP_HDR_SIZE + 1;
2989 
2990 	if (trunc_len > remaining_caplen) {
2991 		return remaining_caplen;
2992 	}
2993 
2994 	return trunc_len;
2995 }
2996 
2997 static uint32_t
get_isakmp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)2998 get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
2999     const uint32_t remaining_caplen)
3000 {
3001 	/*
3002 	 * Include the payload generic header
3003 	 */
3004 	uint32_t trunc_len = ISAKMP_HDR_SIZE;
3005 
3006 	if (trunc_len > remaining_caplen) {
3007 		return remaining_caplen;
3008 	}
3009 
3010 	return trunc_len;
3011 }
3012 
3013 static uint32_t
get_isakmp_natt_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3014 get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint32_t off,
3015     const uint32_t remaining_caplen)
3016 {
3017 	int err = 0;
3018 	uint32_t trunc_len = 0;
3019 	char payload[remaining_caplen];
3020 
3021 	err = bpf_copydata(pkt, off, remaining_caplen, payload);
3022 	if (err != 0) {
3023 		return remaining_caplen;
3024 	}
3025 	/*
3026 	 * They are three cases:
3027 	 * - IKE: payload start with 4 bytes header set to zero before ISAKMP header
3028 	 * - keep alive: 1 byte payload
3029 	 * - otherwise it's ESP
3030 	 */
3031 	if (remaining_caplen >= 4 &&
3032 	    payload[0] == 0 && payload[1] == 0 &&
3033 	    payload[2] == 0 && payload[3] == 0) {
3034 		trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
3035 	} else if (remaining_caplen == 1) {
3036 		trunc_len = 1;
3037 	} else {
3038 		trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3039 	}
3040 
3041 	if (trunc_len > remaining_caplen) {
3042 		return remaining_caplen;
3043 	}
3044 
3045 	return trunc_len;
3046 }
3047 
3048 static uint32_t
get_udp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3049 get_udp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3050 {
3051 	int err = 0;
3052 	uint32_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
3053 
3054 	if (trunc_len >= remaining_caplen) {
3055 		return remaining_caplen;
3056 	}
3057 
3058 	struct udphdr udphdr;
3059 	err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
3060 	if (err != 0) {
3061 		return remaining_caplen;
3062 	}
3063 
3064 	u_short sport, dport;
3065 
3066 	sport = EXTRACT_SHORT(&udphdr.uh_sport);
3067 	dport = EXTRACT_SHORT(&udphdr.uh_dport);
3068 
3069 	if (dport == PORT_DNS || sport == PORT_DNS) {
3070 		/*
3071 		 * Full UDP payload for DNS
3072 		 */
3073 		trunc_len = remaining_caplen;
3074 	} else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) ||
3075 	    (sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
3076 		/*
3077 		 * Full UDP payload for BOOTP and DHCP
3078 		 */
3079 		trunc_len = remaining_caplen;
3080 	} else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
3081 		/*
3082 		 * Return the ISAKMP header
3083 		 */
3084 		trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
3085 		    remaining_caplen - sizeof(struct udphdr));
3086 	} else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
3087 		trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
3088 		    remaining_caplen - sizeof(struct udphdr));
3089 	}
3090 	if (trunc_len >= remaining_caplen) {
3091 		return remaining_caplen;
3092 	}
3093 
3094 	return trunc_len;
3095 }
3096 
3097 static uint32_t
get_tcp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3098 get_tcp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3099 {
3100 	int err = 0;
3101 	uint32_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
3102 	if (trunc_len >= remaining_caplen) {
3103 		return remaining_caplen;
3104 	}
3105 
3106 	struct tcphdr tcphdr;
3107 	err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
3108 	if (err != 0) {
3109 		return remaining_caplen;
3110 	}
3111 
3112 	u_short sport, dport;
3113 	sport = EXTRACT_SHORT(&tcphdr.th_sport);
3114 	dport = EXTRACT_SHORT(&tcphdr.th_dport);
3115 
3116 	if (dport == PORT_DNS || sport == PORT_DNS) {
3117 		/*
3118 		 * Full TCP payload  for DNS
3119 		 */
3120 		trunc_len = remaining_caplen;
3121 	} else {
3122 		trunc_len = (uint16_t)(tcphdr.th_off << 2);
3123 	}
3124 	if (trunc_len >= remaining_caplen) {
3125 		return remaining_caplen;
3126 	}
3127 
3128 	return trunc_len;
3129 }
3130 
3131 static uint32_t
get_proto_trunc_len(uint8_t proto,struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3132 get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3133 {
3134 	uint32_t trunc_len;
3135 
3136 	switch (proto) {
3137 	case IPPROTO_ICMP: {
3138 		/*
3139 		 * Full IMCP payload
3140 		 */
3141 		trunc_len = remaining_caplen;
3142 		break;
3143 	}
3144 	case IPPROTO_ICMPV6: {
3145 		/*
3146 		 * Full IMCPV6 payload
3147 		 */
3148 		trunc_len = remaining_caplen;
3149 		break;
3150 	}
3151 	case IPPROTO_IGMP: {
3152 		/*
3153 		 * Full IGMP payload
3154 		 */
3155 		trunc_len = remaining_caplen;
3156 		break;
3157 	}
3158 	case IPPROTO_UDP: {
3159 		trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3160 		break;
3161 	}
3162 	case IPPROTO_TCP: {
3163 		trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3164 		break;
3165 	}
3166 	case IPPROTO_ESP: {
3167 		trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3168 		break;
3169 	}
3170 	default: {
3171 		/*
3172 		 * By default we only include the IP header
3173 		 */
3174 		trunc_len = 0;
3175 		break;
3176 	}
3177 	}
3178 	if (trunc_len >= remaining_caplen) {
3179 		return remaining_caplen;
3180 	}
3181 
3182 	return trunc_len;
3183 }
3184 
3185 static uint32_t
get_ip_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3186 get_ip_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3187 {
3188 	int err = 0;
3189 	uint32_t iplen = sizeof(struct ip);
3190 	if (iplen >= remaining_caplen) {
3191 		return remaining_caplen;
3192 	}
3193 
3194 	struct ip iphdr;
3195 	err =  bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
3196 	if (err != 0) {
3197 		return remaining_caplen;
3198 	}
3199 
3200 	uint8_t proto = 0;
3201 
3202 	iplen = (uint16_t)(iphdr.ip_hl << 2);
3203 	if (iplen >= remaining_caplen) {
3204 		return remaining_caplen;
3205 	}
3206 
3207 	proto = iphdr.ip_p;
3208 	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3209 
3210 	if (iplen >= remaining_caplen) {
3211 		return remaining_caplen;
3212 	}
3213 
3214 	return iplen;
3215 }
3216 
3217 static uint32_t
get_ip6_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3218 get_ip6_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3219 {
3220 	int err = 0;
3221 	uint32_t iplen = sizeof(struct ip6_hdr);
3222 	if (iplen >= remaining_caplen) {
3223 		return remaining_caplen;
3224 	}
3225 
3226 	struct ip6_hdr ip6hdr;
3227 	err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
3228 	if (err != 0) {
3229 		return remaining_caplen;
3230 	}
3231 
3232 	uint8_t proto = 0;
3233 
3234 	/*
3235 	 * TBD: process the extension headers
3236 	 */
3237 	proto = ip6hdr.ip6_nxt;
3238 	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3239 
3240 	if (iplen >= remaining_caplen) {
3241 		return remaining_caplen;
3242 	}
3243 
3244 	return iplen;
3245 }
3246 
3247 static uint32_t
get_ether_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3248 get_ether_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3249 {
3250 	int err = 0;
3251 	uint32_t ethlen = sizeof(struct ether_header);
3252 	if (ethlen >= remaining_caplen) {
3253 		return remaining_caplen;
3254 	}
3255 
3256 	struct ether_header eh = {};
3257 	err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
3258 	if (err != 0) {
3259 		return remaining_caplen;
3260 	}
3261 
3262 	u_short type = EXTRACT_SHORT(&eh.ether_type);
3263 	/* Include full ARP */
3264 	if (type == ETHERTYPE_ARP) {
3265 		ethlen = remaining_caplen;
3266 	} else if (type == ETHERTYPE_IP) {
3267 		ethlen += get_ip_trunc_len(pkt, off + sizeof(struct ether_header),
3268 		    remaining_caplen - ethlen);
3269 	} else if (type == ETHERTYPE_IPV6) {
3270 		ethlen += get_ip6_trunc_len(pkt, off + sizeof(struct ether_header),
3271 		    remaining_caplen - ethlen);
3272 	} else {
3273 		ethlen = MIN(BPF_MIN_PKT_SIZE, remaining_caplen);
3274 	}
3275 	return ethlen;
3276 }
3277 
3278 #include <kern/assert.h>
3279 
3280 static uint32_t
get_pkt_trunc_len(struct bpf_packet * pkt)3281 get_pkt_trunc_len(struct bpf_packet *pkt)
3282 {
3283 	struct pktap_header *pktap = (struct pktap_header *) (pkt->bpfp_header);
3284 	uint32_t in_pkt_len = 0;
3285 	uint32_t out_pkt_len = 0;
3286 	uint32_t tlen = 0;
3287 	uint32_t pre_adjust;    // L2 header not in mbuf or kern_packet
3288 
3289 	// bpfp_total_length must contain the BPF packet header
3290 	assert3u(pkt->bpfp_total_length, >=, pkt->bpfp_header_length);
3291 
3292 	// The BPF packet header must contain the pktap header
3293 	assert3u(pkt->bpfp_header_length, >=, pktap->pth_length);
3294 
3295 	// The pre frame length (L2 header) must be contained in the packet
3296 	assert3u(pkt->bpfp_total_length, >=, pktap->pth_length + pktap->pth_frame_pre_length);
3297 
3298 	/*
3299 	 * pktap->pth_frame_pre_length is the L2 header length and accounts
3300 	 * for both L2 header in the packet payload and pre_adjust.
3301 	 *
3302 	 * pre_adjust represents an adjustment for a pseudo L2 header that is not
3303 	 * part of packet payload -- not in the mbuf or kern_packet -- and comes
3304 	 * just after the pktap header.
3305 	 *
3306 	 * pktap->pth_length is the size of the pktap header (exclude pre_adjust)
3307 	 *
3308 	 * pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3309 	 */
3310 	pre_adjust = (uint32_t)(pkt->bpfp_header_length - pktap->pth_length);
3311 
3312 	if (pktap->pth_iftype == IFT_ETHER) {
3313 		/*
3314 		 * We need to parse the Ethernet header to find the network layer
3315 		 * protocol
3316 		 */
3317 		in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pre_adjust);
3318 
3319 		out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
3320 
3321 		tlen = pktap->pth_length + pre_adjust + out_pkt_len;
3322 	} else {
3323 		/*
3324 		 * For other interface types, we only know to parse IPv4 and IPv6.
3325 		 *
3326 		 * To get to the beginning of the IPv4 or IPv6 packet, we need to to skip
3327 		 * over the L2 header that is the actual packet payload (mbuf or kern_packet)
3328 		 */
3329 		uint32_t off;   // offset past the L2 header in the actual packet payload
3330 
3331 		off = pktap->pth_frame_pre_length - pre_adjust;
3332 
3333 		in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pktap->pth_frame_pre_length);
3334 
3335 		if (pktap->pth_protocol_family == AF_INET) {
3336 			out_pkt_len = get_ip_trunc_len(pkt, off, in_pkt_len);
3337 		} else if (pktap->pth_protocol_family == AF_INET6) {
3338 			out_pkt_len = get_ip6_trunc_len(pkt, off, in_pkt_len);
3339 		} else {
3340 			out_pkt_len = MIN(BPF_MIN_PKT_SIZE, in_pkt_len);
3341 		}
3342 		tlen = pktap->pth_length + pktap->pth_frame_pre_length + out_pkt_len;
3343 	}
3344 
3345 	// Verify we do not overflow the buffer
3346 	if (__improbable(tlen > pkt->bpfp_total_length)) {
3347 		bool do_panic = bpf_debug != 0 ? true : false;
3348 
3349 #if DEBUG
3350 		do_panic = true;
3351 #endif /* DEBUG */
3352 		if (do_panic) {
3353 			panic("%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u\n",
3354 			    __func__, __LINE__,
3355 			    tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3356 		} else {
3357 			os_log(OS_LOG_DEFAULT,
3358 			    "%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3359 			    __func__, __LINE__,
3360 			    tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3361 		}
3362 		bpf_trunc_overflow += 1;
3363 		tlen = (uint32_t)pkt->bpfp_total_length;
3364 	}
3365 
3366 	return tlen;
3367 }
3368 
3369 /*
3370  * Move the packet data from interface memory (pkt) into the
3371  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
3372  * otherwise 0.
3373  */
3374 static void
catchpacket(struct bpf_d * d,struct bpf_packet * pkt,u_int snaplen,int outbound)3375 catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
3376     u_int snaplen, int outbound)
3377 {
3378 	struct bpf_hdr *hp;
3379 	struct bpf_hdr_ext *ehp;
3380 	int totlen, curlen;
3381 	int hdrlen, caplen;
3382 	int do_wakeup = 0;
3383 	u_char *payload;
3384 	struct timeval tv;
3385 
3386 	hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
3387 	    d->bd_bif->bif_hdrlen;
3388 	/*
3389 	 * Figure out how many bytes to move.  If the packet is
3390 	 * greater or equal to the snapshot length, transfer that
3391 	 * much.  Otherwise, transfer the whole packet (unless
3392 	 * we hit the buffer size limit).
3393 	 */
3394 	totlen = hdrlen + MIN(snaplen, (int)pkt->bpfp_total_length);
3395 	if (totlen > d->bd_bufsize) {
3396 		totlen = d->bd_bufsize;
3397 	}
3398 
3399 	if (hdrlen > totlen) {
3400 		return;
3401 	}
3402 
3403 	/*
3404 	 * Round up the end of the previous packet to the next longword.
3405 	 */
3406 	curlen = BPF_WORDALIGN(d->bd_slen);
3407 	if (curlen + totlen > d->bd_bufsize) {
3408 		/*
3409 		 * This packet will overflow the storage buffer.
3410 		 * Rotate the buffers if we can, then wakeup any
3411 		 * pending reads.
3412 		 *
3413 		 * We cannot rotate buffers if a read is in progress
3414 		 * so drop the packet
3415 		 */
3416 		if (d->bd_hbuf_read != 0) {
3417 			++d->bd_dcount;
3418 			return;
3419 		}
3420 
3421 		if (d->bd_fbuf == NULL) {
3422 			if (d->bd_headdrop == 0) {
3423 				/*
3424 				 * We haven't completed the previous read yet,
3425 				 * so drop the packet.
3426 				 */
3427 				++d->bd_dcount;
3428 				return;
3429 			}
3430 			/*
3431 			 * Drop the hold buffer as it contains older packets
3432 			 */
3433 			d->bd_dcount += d->bd_hcnt;
3434 			d->bd_fbuf = d->bd_hbuf;
3435 			ROTATE_BUFFERS(d);
3436 		} else {
3437 			ROTATE_BUFFERS(d);
3438 		}
3439 		do_wakeup = 1;
3440 		curlen = 0;
3441 	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
3442 		/*
3443 		 * Immediate mode is set, or the read timeout has
3444 		 * already expired during a select call. A packet
3445 		 * arrived, so the reader should be woken up.
3446 		 */
3447 		do_wakeup = 1;
3448 	}
3449 
3450 	/*
3451 	 * Append the bpf header.
3452 	 */
3453 	microtime(&tv);
3454 	if (d->bd_flags & BPF_EXTENDED_HDR) {
3455 		ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
3456 		memset(ehp, 0, sizeof(*ehp));
3457 		ehp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3458 		ehp->bh_tstamp.tv_usec = tv.tv_usec;
3459 
3460 		ehp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3461 		ehp->bh_hdrlen = (u_short)hdrlen;
3462 		caplen = ehp->bh_caplen = totlen - hdrlen;
3463 		payload = (u_char *)ehp + hdrlen;
3464 
3465 		if (outbound) {
3466 			ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3467 		} else {
3468 			ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
3469 		}
3470 
3471 		if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
3472 			struct mbuf *m = pkt->bpfp_mbuf;
3473 
3474 			if (outbound) {
3475 				/* only do lookups on non-raw INPCB */
3476 				if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID |
3477 				    PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)) ==
3478 				    (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC) &&
3479 				    m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3480 					ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
3481 					ehp->bh_proto = m->m_pkthdr.pkt_proto;
3482 				}
3483 				ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
3484 				if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
3485 					ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3486 				}
3487 				if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
3488 					ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
3489 				}
3490 				if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
3491 					ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3492 				}
3493 				if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
3494 					ehp->bh_unsent_bytes =
3495 					    m->m_pkthdr.bufstatus_if;
3496 					ehp->bh_unsent_snd =
3497 					    m->m_pkthdr.bufstatus_sndbuf;
3498 				}
3499 			} else {
3500 				if (m->m_pkthdr.pkt_flags & PKTF_WAKE_PKT) {
3501 					ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3502 				}
3503 			}
3504 #if SKYWALK
3505 		} else {
3506 			kern_packet_t kern_pkt = pkt->bpfp_pkt;
3507 
3508 			if (outbound) {
3509 				/*
3510 				 * Note: pp_init() asserts that kern_packet_svc_class_t is equivalent
3511 				 * to mbuf_svc_class_t
3512 				 */
3513 				ehp->bh_svc = so_svc2tc((mbuf_svc_class_t)kern_packet_get_service_class(kern_pkt));
3514 				if (kern_packet_get_transport_retransmit(kern_pkt)) {
3515 					ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3516 				}
3517 				if (kern_packet_get_transport_last_packet(kern_pkt)) {
3518 					ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3519 				}
3520 			} else {
3521 				if (kern_packet_get_wake_flag(kern_pkt)) {
3522 					ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3523 				}
3524 			}
3525 #endif /* SKYWALK */
3526 		}
3527 	} else {
3528 		hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
3529 		hp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3530 		hp->bh_tstamp.tv_usec = tv.tv_usec;
3531 		hp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3532 		hp->bh_hdrlen = (u_short)hdrlen;
3533 		caplen = hp->bh_caplen = totlen - hdrlen;
3534 		payload = (u_char *)hp + hdrlen;
3535 	}
3536 	/*
3537 	 * Copy the packet data into the store buffer and update its length.
3538 	 */
3539 	copy_bpf_packet(pkt, payload, caplen);
3540 	d->bd_slen = curlen + totlen;
3541 	d->bd_scnt += 1;
3542 
3543 	if (do_wakeup) {
3544 		bpf_wakeup(d);
3545 	}
3546 }
3547 
3548 /*
3549  * Initialize all nonzero fields of a descriptor.
3550  */
3551 static int
bpf_allocbufs(struct bpf_d * d)3552 bpf_allocbufs(struct bpf_d *d)
3553 {
3554 	if (d->bd_sbuf != NULL) {
3555 		kfree_data_addr(d->bd_sbuf);
3556 		d->bd_sbuf = NULL;
3557 	}
3558 	if (d->bd_hbuf != NULL) {
3559 		kfree_data_addr(d->bd_hbuf);
3560 		d->bd_hbuf = NULL;
3561 	}
3562 	if (d->bd_fbuf != NULL) {
3563 		kfree_data_addr(d->bd_fbuf);
3564 		d->bd_fbuf = NULL;
3565 	}
3566 
3567 	d->bd_fbuf = (caddr_t) kalloc_data(d->bd_bufsize, M_WAIT);
3568 	if (d->bd_fbuf == NULL) {
3569 		return ENOBUFS;
3570 	}
3571 
3572 	d->bd_sbuf = (caddr_t) kalloc_data(d->bd_bufsize, M_WAIT);
3573 	if (d->bd_sbuf == NULL) {
3574 		kfree_data(d->bd_fbuf, d->bd_bufsize);
3575 		d->bd_fbuf = NULL;
3576 		return ENOBUFS;
3577 	}
3578 	d->bd_slen = 0;
3579 	d->bd_hlen = 0;
3580 	d->bd_scnt = 0;
3581 	d->bd_hcnt = 0;
3582 	return 0;
3583 }
3584 
3585 /*
3586  * Free buffers currently in use by a descriptor.
3587  * Called on close.
3588  */
3589 static void
bpf_freed(struct bpf_d * d)3590 bpf_freed(struct bpf_d *d)
3591 {
3592 	/*
3593 	 * We don't need to lock out interrupts since this descriptor has
3594 	 * been detached from its interface and it yet hasn't been marked
3595 	 * free.
3596 	 */
3597 	if (d->bd_hbuf_read != 0) {
3598 		panic("bpf buffer freed during read");
3599 	}
3600 
3601 	if (d->bd_sbuf != 0) {
3602 		kfree_data_addr(d->bd_sbuf);
3603 		if (d->bd_hbuf != 0) {
3604 			kfree_data_addr(d->bd_hbuf);
3605 		}
3606 		if (d->bd_fbuf != 0) {
3607 			kfree_data_addr(d->bd_fbuf);
3608 		}
3609 	}
3610 	if (d->bd_filter) {
3611 		kfree_data_addr(d->bd_filter);
3612 	}
3613 }
3614 
3615 /*
3616  * Attach an interface to bpf.	driverp is a pointer to a (struct bpf_if *)
3617  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
3618  * size of the link header (variable length headers not yet supported).
3619  */
3620 void
bpfattach(struct ifnet * ifp,u_int dlt,u_int hdrlen)3621 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
3622 {
3623 	bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
3624 }
3625 
3626 errno_t
bpf_attach(ifnet_t ifp,u_int32_t dlt,u_int32_t hdrlen,bpf_send_func send,bpf_tap_func tap)3627 bpf_attach(
3628 	ifnet_t                 ifp,
3629 	u_int32_t               dlt,
3630 	u_int32_t               hdrlen,
3631 	bpf_send_func   send,
3632 	bpf_tap_func    tap)
3633 {
3634 	struct bpf_if *bp;
3635 	struct bpf_if *bp_new;
3636 	struct bpf_if *bp_before_first = NULL;
3637 	struct bpf_if *bp_first = NULL;
3638 	struct bpf_if *bp_last = NULL;
3639 	boolean_t found;
3640 
3641 	bp_new = kalloc_type(struct bpf_if, M_WAIT | Z_ZERO);
3642 	if (bp_new == 0) {
3643 		panic("bpfattach");
3644 	}
3645 
3646 	lck_mtx_lock(bpf_mlock);
3647 
3648 	/*
3649 	 * Check if this interface/dlt is already attached. Remember the
3650 	 * first and last attachment for this interface, as well as the
3651 	 * element before the first attachment.
3652 	 */
3653 	found = FALSE;
3654 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
3655 		if (bp->bif_ifp != ifp) {
3656 			if (bp_first != NULL) {
3657 				/* no more elements for this interface */
3658 				break;
3659 			}
3660 			bp_before_first = bp;
3661 		} else {
3662 			if (bp->bif_dlt == dlt) {
3663 				found = TRUE;
3664 				break;
3665 			}
3666 			if (bp_first == NULL) {
3667 				bp_first = bp;
3668 			}
3669 			bp_last = bp;
3670 		}
3671 	}
3672 	if (found) {
3673 		lck_mtx_unlock(bpf_mlock);
3674 		printf("bpfattach - %s with dlt %d is already attached\n",
3675 		    if_name(ifp), dlt);
3676 		kfree_type(struct bpf_if, bp_new);
3677 		return EEXIST;
3678 	}
3679 
3680 	bp_new->bif_ifp = ifp;
3681 	bp_new->bif_dlt = dlt;
3682 	bp_new->bif_send = send;
3683 	bp_new->bif_tap = tap;
3684 
3685 	if (bp_first == NULL) {
3686 		/* No other entries for this ifp */
3687 		bp_new->bif_next = bpf_iflist;
3688 		bpf_iflist = bp_new;
3689 	} else {
3690 		if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
3691 			/* Make this the first entry for this interface */
3692 			if (bp_before_first != NULL) {
3693 				/*  point the previous to us */
3694 				bp_before_first->bif_next = bp_new;
3695 			} else {
3696 				/* we're the new head */
3697 				bpf_iflist = bp_new;
3698 			}
3699 			bp_new->bif_next = bp_first;
3700 		} else {
3701 			/* Add this after the last entry for this interface */
3702 			bp_new->bif_next = bp_last->bif_next;
3703 			bp_last->bif_next = bp_new;
3704 		}
3705 	}
3706 
3707 	/*
3708 	 * Compute the length of the bpf header.  This is not necessarily
3709 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
3710 	 * that the network layer header begins on a longword boundary (for
3711 	 * performance reasons and to alleviate alignment restrictions).
3712 	 */
3713 	bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
3714 	bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
3715 	    sizeof(struct bpf_hdr_ext)) - hdrlen;
3716 
3717 	/* Take a reference on the interface */
3718 	ifnet_reference(ifp);
3719 
3720 	lck_mtx_unlock(bpf_mlock);
3721 
3722 #ifndef __APPLE__
3723 	if (bootverbose) {
3724 		printf("bpf: %s attached\n", if_name(ifp));
3725 	}
3726 #endif
3727 
3728 	return 0;
3729 }
3730 
3731 /*
3732  * Detach bpf from an interface.  This involves detaching each descriptor
3733  * associated with the interface, and leaving bd_bif NULL.  Notify each
3734  * descriptor as it's detached so that any sleepers wake up and get
3735  * ENXIO.
3736  */
3737 void
bpfdetach(struct ifnet * ifp)3738 bpfdetach(struct ifnet *ifp)
3739 {
3740 	struct bpf_if   *bp, *bp_prev, *bp_next;
3741 	struct bpf_d    *d;
3742 
3743 	if (bpf_debug != 0) {
3744 		printf("%s: %s\n", __func__, if_name(ifp));
3745 	}
3746 
3747 	lck_mtx_lock(bpf_mlock);
3748 
3749 	/*
3750 	 * Build the list of devices attached to that interface
3751 	 * that we need to free while keeping the lock to maintain
3752 	 * the integrity of the interface list
3753 	 */
3754 	bp_prev = NULL;
3755 	for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
3756 		bp_next = bp->bif_next;
3757 
3758 		if (ifp != bp->bif_ifp) {
3759 			bp_prev = bp;
3760 			continue;
3761 		}
3762 		/* Unlink from the interface list */
3763 		if (bp_prev) {
3764 			bp_prev->bif_next = bp->bif_next;
3765 		} else {
3766 			bpf_iflist = bp->bif_next;
3767 		}
3768 
3769 		/* Detach the devices attached to the interface */
3770 		while ((d = bp->bif_dlist) != NULL) {
3771 			/*
3772 			 * Take an extra reference to prevent the device
3773 			 * from being freed when bpf_detachd() releases
3774 			 * the reference for the interface list
3775 			 */
3776 			bpf_acquire_d(d);
3777 			bpf_detachd(d, 0);
3778 			bpf_wakeup(d);
3779 			bpf_release_d(d);
3780 		}
3781 		ifnet_release(ifp);
3782 	}
3783 
3784 	lck_mtx_unlock(bpf_mlock);
3785 }
3786 
3787 void
bpf_init(__unused void * unused)3788 bpf_init(__unused void *unused)
3789 {
3790 #ifdef __APPLE__
3791 	int     maj;
3792 
3793 	if (bpf_devsw_installed == 0) {
3794 		bpf_devsw_installed = 1;
3795 		maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
3796 		if (maj == -1) {
3797 			bpf_devsw_installed = 0;
3798 			printf("bpf_init: failed to allocate a major number\n");
3799 			return;
3800 		}
3801 
3802 		for (int i = 0; i < NBPFILTER; i++) {
3803 			bpf_make_dev_t(maj);
3804 		}
3805 	}
3806 #else
3807 	cdevsw_add(&bpf_cdevsw);
3808 #endif
3809 }
3810 
3811 #ifndef __APPLE__
3812 SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE + CDEV_MAJOR, bpf_drvinit, NULL);
3813 #endif
3814 
3815 static int
3816 sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
3817 {
3818 #pragma unused(arg1, arg2)
3819 	int i, err;
3820 
3821 	i = bpf_maxbufsize;
3822 
3823 	err = sysctl_handle_int(oidp, &i, 0, req);
3824 	if (err != 0 || req->newptr == USER_ADDR_NULL) {
3825 		return err;
3826 	}
3827 
3828 	if (i < 0 || i > BPF_MAXSIZE_CAP) {
3829 		i = BPF_MAXSIZE_CAP;
3830 	}
3831 
3832 	bpf_maxbufsize = i;
3833 	return err;
3834 }
3835