xref: /xnu-8019.80.24/bsd/net/bpf.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1990, 1991, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * This code is derived from the Stanford/CMU enet packet filter,
33  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35  * Berkeley Laboratory.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)bpf.c	8.2 (Berkeley) 3/28/94
66  *
67  * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68  */
69 /*
70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71  * support for mandatory and extensible security protections.  This notice
72  * is included in support of clause 2.2 (b) of the Apple Public License,
73  * Version 2.0.
74  */
75 
76 #include "bpf.h"
77 
78 #ifndef __GNUC__
79 #define inline
80 #else
81 #define inline __inline
82 #endif
83 
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/conf.h>
87 #include <sys/malloc.h>
88 #include <sys/mbuf.h>
89 #include <sys/time.h>
90 #include <sys/proc.h>
91 #include <sys/signalvar.h>
92 #include <sys/filio.h>
93 #include <sys/sockio.h>
94 #include <sys/ttycom.h>
95 #include <sys/filedesc.h>
96 #include <sys/uio_internal.h>
97 #include <sys/file_internal.h>
98 #include <sys/event.h>
99 
100 #include <sys/poll.h>
101 
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/vnode.h>
105 
106 #include <net/if.h>
107 #include <net/bpf.h>
108 #include <net/bpfdesc.h>
109 
110 #include <netinet/in.h>
111 #include <netinet/ip.h>
112 #include <netinet/ip6.h>
113 #include <netinet/in_pcb.h>
114 #include <netinet/in_var.h>
115 #include <netinet/ip_var.h>
116 #include <netinet/tcp.h>
117 #include <netinet/tcp_var.h>
118 #include <netinet/udp.h>
119 #include <netinet/udp_var.h>
120 #include <netinet/if_ether.h>
121 #include <netinet/isakmp.h>
122 #include <netinet6/esp.h>
123 #include <sys/kernel.h>
124 #include <sys/sysctl.h>
125 #include <net/firewire.h>
126 
127 #include <miscfs/devfs/devfs.h>
128 #include <net/dlil.h>
129 #include <net/pktap.h>
130 
131 #include <kern/locks.h>
132 #include <kern/thread_call.h>
133 #include <libkern/section_keywords.h>
134 
135 #include <os/log.h>
136 
137 extern int tvtohz(struct timeval *);
138 
139 #define BPF_BUFSIZE 4096
140 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
141 
142 #define PRINET  26                      /* interruptible */
143 
144 #define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
145 #define ESP_HDR_SIZE sizeof(struct newesp)
146 
147 typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
148 
149 /*
150  * The default read buffer size is patchable.
151  */
152 static unsigned int bpf_bufsize = BPF_BUFSIZE;
153 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
154     &bpf_bufsize, 0, "");
155 
156 static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
157 extern const int copysize_limit_panic;
158 #define BPF_MAXSIZE_CAP (copysize_limit_panic >> 1)
159 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
160 SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
161     &bpf_maxbufsize, 0,
162     sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
163 
164 static unsigned int bpf_maxdevices = 256;
165 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
166     &bpf_maxdevices, 0, "");
167 /*
168  * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
169  * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
170  * explicitly to be able to use DLT_PKTAP.
171  */
172 #if !XNU_TARGET_OS_OSX
173 static unsigned int bpf_wantpktap = 1;
174 #else /* XNU_TARGET_OS_OSX */
175 static unsigned int bpf_wantpktap = 0;
176 #endif /* XNU_TARGET_OS_OSX */
177 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
178     &bpf_wantpktap, 0, "");
179 
180 static int bpf_debug = 0;
181 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
182     &bpf_debug, 0, "");
183 
184 static unsigned long bpf_trunc_overflow = 0;
185 SYSCTL_ULONG(_debug, OID_AUTO, bpf_trunc_overflow, CTLFLAG_RD | CTLFLAG_LOCKED,
186     &bpf_trunc_overflow, "");
187 
188 /*
189  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
190  *  bpf_dtab holds pointer to the descriptors, indexed by minor device #
191  */
192 static struct bpf_if    *bpf_iflist;
193 #ifdef __APPLE__
194 /*
195  * BSD now stores the bpf_d in the dev_t which is a struct
196  * on their system. Our dev_t is an int, so we still store
197  * the bpf_d in a separate table indexed by minor device #.
198  *
199  * The value stored in bpf_dtab[n] represent three states:
200  *  NULL: device not opened
201  *  BPF_DEV_RESERVED: device opening or closing
202  *  other: device <n> opened with pointer to storage
203  */
204 #define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
205 static struct bpf_d     **bpf_dtab = NULL;
206 static unsigned int bpf_dtab_size = 0;
207 static unsigned int     nbpfilter = 0;
208 
209 static LCK_GRP_DECLARE(bpf_mlock_grp, "bpf");
210 static LCK_MTX_DECLARE(bpf_mlock_data, &bpf_mlock_grp);
211 static lck_mtx_t *const bpf_mlock = &bpf_mlock_data;
212 
213 #endif /* __APPLE__ */
214 
215 static int      bpf_allocbufs(struct bpf_d *);
216 static errno_t  bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
217 static int      bpf_detachd(struct bpf_d *d, int);
218 static void     bpf_freed(struct bpf_d *);
219 static int      bpf_movein(struct uio *, int,
220     struct mbuf **, struct sockaddr *, int *);
221 static int      bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool);
222 static void     bpf_timed_out(void *, void *);
223 static void     bpf_wakeup(struct bpf_d *);
224 static uint32_t get_pkt_trunc_len(struct bpf_packet *);
225 static void     catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
226 static void     reset_d(struct bpf_d *);
227 static int      bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
228 static int      bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
229 static int      bpf_setdlt(struct bpf_d *, u_int);
230 static int      bpf_set_traffic_class(struct bpf_d *, int);
231 static void     bpf_set_packet_service_class(struct mbuf *, int);
232 
233 static void     bpf_acquire_d(struct bpf_d *);
234 static void     bpf_release_d(struct bpf_d *);
235 
236 static  int bpf_devsw_installed;
237 
238 void bpf_init(void *unused);
239 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
240 
241 /*
242  * Darwin differs from BSD here, the following are static
243  * on BSD and not static on Darwin.
244  */
245 d_open_t            bpfopen;
246 d_close_t           bpfclose;
247 d_read_t            bpfread;
248 d_write_t           bpfwrite;
249 ioctl_fcn_t         bpfioctl;
250 select_fcn_t        bpfselect;
251 
252 /* Darwin's cdevsw struct differs slightly from BSDs */
253 #define CDEV_MAJOR 23
254 static const struct cdevsw bpf_cdevsw = {
255 	.d_open       = bpfopen,
256 	.d_close      = bpfclose,
257 	.d_read       = bpfread,
258 	.d_write      = bpfwrite,
259 	.d_ioctl      = bpfioctl,
260 	.d_stop       = eno_stop,
261 	.d_reset      = eno_reset,
262 	.d_ttys       = NULL,
263 	.d_select     = bpfselect,
264 	.d_mmap       = eno_mmap,
265 	.d_strategy   = eno_strat,
266 	.d_reserved_1 = eno_getc,
267 	.d_reserved_2 = eno_putc,
268 	.d_type       = 0
269 };
270 
271 #define SOCKADDR_HDR_LEN           offsetof(struct sockaddr, sa_data)
272 
273 static int
bpf_movein(struct uio * uio,int linktype,struct mbuf ** mp,struct sockaddr * sockp,int * datlen)274 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp,
275     struct sockaddr *sockp, int *datlen)
276 {
277 	struct mbuf *m;
278 	int error;
279 	int len;
280 	uint8_t sa_family;
281 	int hlen;
282 
283 	switch (linktype) {
284 #if SLIP
285 	case DLT_SLIP:
286 		sa_family = AF_INET;
287 		hlen = 0;
288 		break;
289 #endif /* SLIP */
290 
291 	case DLT_EN10MB:
292 		sa_family = AF_UNSPEC;
293 		/* XXX Would MAXLINKHDR be better? */
294 		hlen = sizeof(struct ether_header);
295 		break;
296 
297 #if FDDI
298 	case DLT_FDDI:
299 #if defined(__FreeBSD__) || defined(__bsdi__)
300 		sa_family = AF_IMPLINK;
301 		hlen = 0;
302 #else
303 		sa_family = AF_UNSPEC;
304 		/* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
305 		hlen = 24;
306 #endif
307 		break;
308 #endif /* FDDI */
309 
310 	case DLT_RAW:
311 	case DLT_NULL:
312 		sa_family = AF_UNSPEC;
313 		hlen = 0;
314 		break;
315 
316 #ifdef __FreeBSD__
317 	case DLT_ATM_RFC1483:
318 		/*
319 		 * en atm driver requires 4-byte atm pseudo header.
320 		 * though it isn't standard, vpi:vci needs to be
321 		 * specified anyway.
322 		 */
323 		sa_family = AF_UNSPEC;
324 		hlen = 12;      /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
325 		break;
326 #endif
327 
328 	case DLT_PPP:
329 		sa_family = AF_UNSPEC;
330 		hlen = 4;       /* This should match PPP_HDRLEN */
331 		break;
332 
333 	case DLT_APPLE_IP_OVER_IEEE1394:
334 		sa_family = AF_UNSPEC;
335 		hlen = sizeof(struct firewire_header);
336 		break;
337 
338 	case DLT_IEEE802_11:            /* IEEE 802.11 wireless */
339 		sa_family = AF_IEEE80211;
340 		hlen = 0;
341 		break;
342 
343 	case DLT_IEEE802_11_RADIO:
344 		sa_family = AF_IEEE80211;
345 		hlen = 0;
346 		break;
347 
348 	default:
349 		return EIO;
350 	}
351 
352 	// LP64todo - fix this!
353 	len = (int)uio_resid(uio);
354 	if (len < hlen || (unsigned)len > MCLBYTES || len - hlen > MCLBYTES) {
355 		return EIO;
356 	}
357 
358 	*datlen = len - hlen;
359 
360 	if (sockp) {
361 		/*
362 		 * Build a sockaddr based on the data link layer type.
363 		 * We do this at this level because the ethernet header
364 		 * is copied directly into the data field of the sockaddr.
365 		 * In the case of SLIP, there is no header and the packet
366 		 * is forwarded as is.
367 		 * Also, we are careful to leave room at the front of the mbuf
368 		 * for the link level header.
369 		 */
370 		if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
371 			return EIO;
372 		}
373 		sockp->sa_family = sa_family;
374 	} else {
375 		/*
376 		 * We're directly sending the packet data supplied by
377 		 * the user; we don't need to make room for the link
378 		 * header, and don't need the header length value any
379 		 * more, so set it to 0.
380 		 */
381 		hlen = 0;
382 	}
383 
384 	MGETHDR(m, M_WAIT, MT_DATA);
385 	if (m == 0) {
386 		return ENOBUFS;
387 	}
388 	if ((unsigned)len > MHLEN) {
389 		MCLGET(m, M_WAIT);
390 		if ((m->m_flags & M_EXT) == 0) {
391 			error = ENOBUFS;
392 			goto bad;
393 		}
394 	}
395 	m->m_pkthdr.len = m->m_len = len;
396 	m->m_pkthdr.rcvif = NULL;
397 	*mp = m;
398 
399 	/*
400 	 * Make room for link header.
401 	 */
402 	if (hlen != 0) {
403 		m->m_pkthdr.len -= hlen;
404 		m->m_len -= hlen;
405 		m->m_data += hlen; /* XXX */
406 		error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
407 		if (error) {
408 			goto bad;
409 		}
410 	}
411 	error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
412 	if (error) {
413 		goto bad;
414 	}
415 
416 	/* Check for multicast destination */
417 	switch (linktype) {
418 	case DLT_EN10MB: {
419 		struct ether_header *eh;
420 
421 		eh = mtod(m, struct ether_header *);
422 		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
423 			if (_ether_cmp(etherbroadcastaddr,
424 			    eh->ether_dhost) == 0) {
425 				m->m_flags |= M_BCAST;
426 			} else {
427 				m->m_flags |= M_MCAST;
428 			}
429 		}
430 		break;
431 	}
432 	}
433 
434 	return 0;
435 bad:
436 	m_freem(m);
437 	return error;
438 }
439 
440 #ifdef __APPLE__
441 
442 /*
443  * The dynamic addition of a new device node must block all processes that
444  * are opening the last device so that no process will get an unexpected
445  * ENOENT
446  */
447 static void
bpf_make_dev_t(int maj)448 bpf_make_dev_t(int maj)
449 {
450 	static int              bpf_growing = 0;
451 	unsigned int    cur_size = nbpfilter, i;
452 
453 	if (nbpfilter >= bpf_maxdevices) {
454 		return;
455 	}
456 
457 	while (bpf_growing) {
458 		/* Wait until new device has been created */
459 		(void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
460 	}
461 	if (nbpfilter > cur_size) {
462 		/* other thread grew it already */
463 		return;
464 	}
465 	bpf_growing = 1;
466 
467 	/* need to grow bpf_dtab first */
468 	if (nbpfilter == bpf_dtab_size) {
469 		int new_dtab_size;
470 		struct bpf_d **new_dtab = NULL;
471 		struct bpf_d **old_dtab = NULL;
472 
473 		new_dtab_size = bpf_dtab_size + NBPFILTER;
474 		new_dtab = (struct bpf_d **)_MALLOC(
475 			sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT);
476 		if (new_dtab == 0) {
477 			printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
478 			goto done;
479 		}
480 		if (bpf_dtab) {
481 			bcopy(bpf_dtab, new_dtab,
482 			    sizeof(struct bpf_d *) * bpf_dtab_size);
483 		}
484 		bzero(new_dtab + bpf_dtab_size,
485 		    sizeof(struct bpf_d *) * NBPFILTER);
486 		old_dtab = bpf_dtab;
487 		bpf_dtab = new_dtab;
488 		bpf_dtab_size = new_dtab_size;
489 		if (old_dtab != NULL) {
490 			_FREE(old_dtab, M_DEVBUF);
491 		}
492 	}
493 	i = nbpfilter++;
494 	(void) devfs_make_node(makedev(maj, i),
495 	    DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
496 	    "bpf%d", i);
497 done:
498 	bpf_growing = 0;
499 	wakeup((caddr_t)&bpf_growing);
500 }
501 
502 #endif
503 
504 /*
505  * Attach file to the bpf interface, i.e. make d listen on bp.
506  */
507 static errno_t
bpf_attachd(struct bpf_d * d,struct bpf_if * bp)508 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
509 {
510 	int first = bp->bif_dlist == NULL;
511 	int     error = 0;
512 
513 	/*
514 	 * Point d at bp, and add d to the interface's list of listeners.
515 	 * Finally, point the driver's bpf cookie at the interface so
516 	 * it will divert packets to bpf.
517 	 */
518 	d->bd_bif = bp;
519 	d->bd_next = bp->bif_dlist;
520 	bp->bif_dlist = d;
521 
522 	/*
523 	 * Take a reference on the device even if an error is returned
524 	 * because we keep the device in the interface's list of listeners
525 	 */
526 	bpf_acquire_d(d);
527 
528 	if (first) {
529 		/* Find the default bpf entry for this ifp */
530 		if (bp->bif_ifp->if_bpf == NULL) {
531 			struct bpf_if   *tmp, *primary = NULL;
532 
533 			for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
534 				if (tmp->bif_ifp == bp->bif_ifp) {
535 					primary = tmp;
536 					break;
537 				}
538 			}
539 			bp->bif_ifp->if_bpf = primary;
540 		}
541 		/* Only call dlil_set_bpf_tap for primary dlt */
542 		if (bp->bif_ifp->if_bpf == bp) {
543 			dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
544 			    bpf_tap_callback);
545 		}
546 
547 		if (bp->bif_tap != NULL) {
548 			error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
549 			    BPF_TAP_INPUT_OUTPUT);
550 		}
551 	}
552 
553 	/*
554 	 * Reset the detach flags in case we previously detached an interface
555 	 */
556 	d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
557 
558 	if (bp->bif_dlt == DLT_PKTAP) {
559 		d->bd_flags |= BPF_FINALIZE_PKTAP;
560 	} else {
561 		d->bd_flags &= ~BPF_FINALIZE_PKTAP;
562 	}
563 	return error;
564 }
565 
566 /*
567  * Detach a file from its interface.
568  *
569  * Return 1 if was closed by some thread, 0 otherwise
570  */
571 static int
bpf_detachd(struct bpf_d * d,int closing)572 bpf_detachd(struct bpf_d *d, int closing)
573 {
574 	struct bpf_d **p;
575 	struct bpf_if *bp;
576 	struct ifnet  *ifp;
577 
578 	int bpf_closed = d->bd_flags & BPF_CLOSING;
579 	/*
580 	 * Some other thread already detached
581 	 */
582 	if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0) {
583 		goto done;
584 	}
585 	/*
586 	 * This thread is doing the detach
587 	 */
588 	d->bd_flags |= BPF_DETACHING;
589 
590 	ifp = d->bd_bif->bif_ifp;
591 	bp = d->bd_bif;
592 
593 	if (bpf_debug != 0) {
594 		printf("%s: %llx %s%s\n",
595 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
596 		    if_name(ifp), closing ? " closing" : "");
597 	}
598 
599 	/* Remove d from the interface's descriptor list. */
600 	p = &bp->bif_dlist;
601 	while (*p != d) {
602 		p = &(*p)->bd_next;
603 		if (*p == 0) {
604 			panic("bpf_detachd: descriptor not in list");
605 		}
606 	}
607 	*p = (*p)->bd_next;
608 	if (bp->bif_dlist == 0) {
609 		/*
610 		 * Let the driver know that there are no more listeners.
611 		 */
612 		/* Only call dlil_set_bpf_tap for primary dlt */
613 		if (bp->bif_ifp->if_bpf == bp) {
614 			dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
615 		}
616 		if (bp->bif_tap) {
617 			bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
618 		}
619 
620 		for (bp = bpf_iflist; bp; bp = bp->bif_next) {
621 			if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
622 				break;
623 			}
624 		}
625 		if (bp == NULL) {
626 			ifp->if_bpf = NULL;
627 		}
628 	}
629 	d->bd_bif = NULL;
630 	/*
631 	 * Check if this descriptor had requested promiscuous mode.
632 	 * If so, turn it off.
633 	 */
634 	if (d->bd_promisc) {
635 		d->bd_promisc = 0;
636 		lck_mtx_unlock(bpf_mlock);
637 		if (ifnet_set_promiscuous(ifp, 0)) {
638 			/*
639 			 * Something is really wrong if we were able to put
640 			 * the driver into promiscuous mode, but can't
641 			 * take it out.
642 			 * Most likely the network interface is gone.
643 			 */
644 			printf("%s: ifnet_set_promiscuous failed\n", __func__);
645 		}
646 		lck_mtx_lock(bpf_mlock);
647 	}
648 
649 	/*
650 	 * Wake up other thread that are waiting for this thread to finish
651 	 * detaching
652 	 */
653 	d->bd_flags &= ~BPF_DETACHING;
654 	d->bd_flags |= BPF_DETACHED;
655 
656 	/* Refresh the local variable as d could have been modified */
657 	bpf_closed = d->bd_flags & BPF_CLOSING;
658 	/*
659 	 * Note that We've kept the reference because we may have dropped
660 	 * the lock when turning off promiscuous mode
661 	 */
662 	bpf_release_d(d);
663 
664 done:
665 	/*
666 	 * When closing makes sure no other thread refer to the bpf_d
667 	 */
668 	if (bpf_debug != 0) {
669 		printf("%s: %llx done\n",
670 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
671 	}
672 	/*
673 	 * Let the caller know the bpf_d is closed
674 	 */
675 	if (bpf_closed) {
676 		return 1;
677 	} else {
678 		return 0;
679 	}
680 }
681 
682 /*
683  * Start asynchronous timer, if necessary.
684  * Must be called with bpf_mlock held.
685  */
686 static void
bpf_start_timer(struct bpf_d * d)687 bpf_start_timer(struct bpf_d *d)
688 {
689 	uint64_t deadline;
690 	struct timeval tv;
691 
692 	if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
693 		tv.tv_sec = d->bd_rtout / hz;
694 		tv.tv_usec = (d->bd_rtout % hz) * tick;
695 
696 		clock_interval_to_deadline(
697 			(uint32_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
698 			NSEC_PER_USEC, &deadline);
699 		/*
700 		 * The state is BPF_IDLE, so the timer hasn't
701 		 * been started yet, and hasn't gone off yet;
702 		 * there is no thread call scheduled, so this
703 		 * won't change the schedule.
704 		 *
705 		 * XXX - what if, by the time it gets entered,
706 		 * the deadline has already passed?
707 		 */
708 		thread_call_enter_delayed(d->bd_thread_call, deadline);
709 		d->bd_state = BPF_WAITING;
710 	}
711 }
712 
713 /*
714  * Cancel asynchronous timer.
715  * Must be called with bpf_mlock held.
716  */
717 static boolean_t
bpf_stop_timer(struct bpf_d * d)718 bpf_stop_timer(struct bpf_d *d)
719 {
720 	/*
721 	 * If the timer has already gone off, this does nothing.
722 	 * Our caller is expected to set d->bd_state to BPF_IDLE,
723 	 * with the bpf_mlock, after we are called. bpf_timed_out()
724 	 * also grabs bpf_mlock, so, if the timer has gone off and
725 	 * bpf_timed_out() hasn't finished, it's waiting for the
726 	 * lock; when this thread releases the lock, it will
727 	 * find the state is BPF_IDLE, and just release the
728 	 * lock and return.
729 	 */
730 	return thread_call_cancel(d->bd_thread_call);
731 }
732 
733 void
bpf_acquire_d(struct bpf_d * d)734 bpf_acquire_d(struct bpf_d *d)
735 {
736 	void *lr_saved =  __builtin_return_address(0);
737 
738 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
739 
740 	d->bd_refcnt += 1;
741 
742 	d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
743 	d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
744 }
745 
746 void
bpf_release_d(struct bpf_d * d)747 bpf_release_d(struct bpf_d *d)
748 {
749 	void *lr_saved =  __builtin_return_address(0);
750 
751 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
752 
753 	if (d->bd_refcnt <= 0) {
754 		panic("%s: %p refcnt <= 0", __func__, d);
755 	}
756 
757 	d->bd_refcnt -= 1;
758 
759 	d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
760 	d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
761 
762 	if (d->bd_refcnt == 0) {
763 		/* Assert the device is detached */
764 		if ((d->bd_flags & BPF_DETACHED) == 0) {
765 			panic("%s: %p BPF_DETACHED not set", __func__, d);
766 		}
767 
768 		kfree_type(struct bpf_d, d);
769 	}
770 }
771 
772 /*
773  * Open ethernet device.  Returns ENXIO for illegal minor device number,
774  * EBUSY if file is open by another process.
775  */
776 /* ARGSUSED */
777 int
bpfopen(dev_t dev,int flags,__unused int fmt,struct proc * p)778 bpfopen(dev_t dev, int flags, __unused int fmt,
779     struct proc *p)
780 {
781 	struct bpf_d *d;
782 
783 	lck_mtx_lock(bpf_mlock);
784 	if ((unsigned int) minor(dev) >= nbpfilter) {
785 		lck_mtx_unlock(bpf_mlock);
786 		return ENXIO;
787 	}
788 	/*
789 	 * New device nodes are created on demand when opening the last one.
790 	 * The programming model is for processes to loop on the minor starting
791 	 * at 0 as long as EBUSY is returned. The loop stops when either the
792 	 * open succeeds or an error other that EBUSY is returned. That means
793 	 * that bpf_make_dev_t() must block all processes that are opening the
794 	 * last  node. If not all processes are blocked, they could unexpectedly
795 	 * get ENOENT and abort their opening loop.
796 	 */
797 	if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
798 		bpf_make_dev_t(major(dev));
799 	}
800 
801 	/*
802 	 * Each minor can be opened by only one process.  If the requested
803 	 * minor is in use, return EBUSY.
804 	 *
805 	 * Important: bpfopen() and bpfclose() have to check and set the status
806 	 * of a device in the same lockin context otherwise the device may be
807 	 * leaked because the vnode use count will be unpextectly greater than 1
808 	 * when close() is called.
809 	 */
810 	if (bpf_dtab[minor(dev)] == NULL) {
811 		/* Reserve while opening */
812 		bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
813 	} else {
814 		lck_mtx_unlock(bpf_mlock);
815 		return EBUSY;
816 	}
817 	d = kalloc_type(struct bpf_d, M_WAIT | Z_ZERO);
818 	if (d == NULL) {
819 		/* this really is a catastrophic failure */
820 		printf("bpfopen: malloc bpf_d failed\n");
821 		bpf_dtab[minor(dev)] = NULL;
822 		lck_mtx_unlock(bpf_mlock);
823 		return ENOMEM;
824 	}
825 
826 	/* Mark "in use" and do most initialization. */
827 	bpf_acquire_d(d);
828 	d->bd_bufsize = bpf_bufsize;
829 	d->bd_sig = SIGIO;
830 	d->bd_seesent = 1;
831 	d->bd_oflags = flags;
832 	d->bd_state = BPF_IDLE;
833 	d->bd_traffic_class = SO_TC_BE;
834 	d->bd_flags |= BPF_DETACHED;
835 	if (bpf_wantpktap) {
836 		d->bd_flags |= BPF_WANT_PKTAP;
837 	} else {
838 		d->bd_flags &= ~BPF_WANT_PKTAP;
839 	}
840 	d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
841 	if (d->bd_thread_call == NULL) {
842 		printf("bpfopen: malloc thread call failed\n");
843 		bpf_dtab[minor(dev)] = NULL;
844 		bpf_release_d(d);
845 		lck_mtx_unlock(bpf_mlock);
846 
847 		return ENOMEM;
848 	}
849 	d->bd_opened_by = p;
850 	uuid_generate(d->bd_uuid);
851 
852 	bpf_dtab[minor(dev)] = d; /* Mark opened */
853 	lck_mtx_unlock(bpf_mlock);
854 
855 	return 0;
856 }
857 
858 /*
859  * Close the descriptor by detaching it from its interface,
860  * deallocating its buffers, and marking it free.
861  */
862 /* ARGSUSED */
863 int
bpfclose(dev_t dev,__unused int flags,__unused int fmt,__unused struct proc * p)864 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
865     __unused struct proc *p)
866 {
867 	struct bpf_d *d;
868 
869 	/* Take BPF lock to ensure no other thread is using the device */
870 	lck_mtx_lock(bpf_mlock);
871 
872 	d = bpf_dtab[minor(dev)];
873 	if (d == NULL || d == BPF_DEV_RESERVED) {
874 		lck_mtx_unlock(bpf_mlock);
875 		return ENXIO;
876 	}
877 
878 	/*
879 	 * Other threads may call bpd_detachd() if we drop the bpf_mlock
880 	 */
881 	d->bd_flags |= BPF_CLOSING;
882 
883 	if (bpf_debug != 0) {
884 		printf("%s: %llx\n",
885 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
886 	}
887 
888 	bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
889 
890 	/*
891 	 * Deal with any in-progress timeouts.
892 	 */
893 	switch (d->bd_state) {
894 	case BPF_IDLE:
895 		/*
896 		 * Not waiting for a timeout, and no timeout happened.
897 		 */
898 		break;
899 
900 	case BPF_WAITING:
901 		/*
902 		 * Waiting for a timeout.
903 		 * Cancel any timer that has yet to go off,
904 		 * and mark the state as "closing".
905 		 * Then drop the lock to allow any timers that
906 		 * *have* gone off to run to completion, and wait
907 		 * for them to finish.
908 		 */
909 		if (!bpf_stop_timer(d)) {
910 			/*
911 			 * There was no pending call, so the call must
912 			 * have been in progress. Wait for the call to
913 			 * complete; we have to drop the lock while
914 			 * waiting. to let the in-progrss call complete
915 			 */
916 			d->bd_state = BPF_DRAINING;
917 			while (d->bd_state == BPF_DRAINING) {
918 				msleep((caddr_t)d, bpf_mlock, PRINET,
919 				    "bpfdraining", NULL);
920 			}
921 		}
922 		d->bd_state = BPF_IDLE;
923 		break;
924 
925 	case BPF_TIMED_OUT:
926 		/*
927 		 * Timer went off, and the timeout routine finished.
928 		 */
929 		d->bd_state = BPF_IDLE;
930 		break;
931 
932 	case BPF_DRAINING:
933 		/*
934 		 * Another thread is blocked on a close waiting for
935 		 * a timeout to finish.
936 		 * This "shouldn't happen", as the first thread to enter
937 		 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
938 		 * all subsequent threads should see that and fail with
939 		 * ENXIO.
940 		 */
941 		panic("Two threads blocked in a BPF close");
942 		break;
943 	}
944 
945 	if (d->bd_bif) {
946 		bpf_detachd(d, 1);
947 	}
948 	selthreadclear(&d->bd_sel);
949 	thread_call_free(d->bd_thread_call);
950 
951 	while (d->bd_hbuf_read != 0) {
952 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
953 	}
954 
955 	bpf_freed(d);
956 
957 	/* Mark free in same context as bpfopen comes to check */
958 	bpf_dtab[minor(dev)] = NULL;                    /* Mark closed */
959 
960 	bpf_release_d(d);
961 
962 	lck_mtx_unlock(bpf_mlock);
963 
964 	return 0;
965 }
966 
967 #define BPF_SLEEP bpf_sleep
968 
969 static int
bpf_sleep(struct bpf_d * d,int pri,const char * wmesg,int timo)970 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
971 {
972 	u_int64_t abstime = 0;
973 
974 	if (timo != 0) {
975 		clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
976 	}
977 
978 	return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
979 }
980 
981 static void
bpf_finalize_pktap(struct bpf_hdr * hp,struct pktap_header * pktaphdr)982 bpf_finalize_pktap(struct bpf_hdr *hp, struct pktap_header *pktaphdr)
983 {
984 	if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
985 		struct pktap_v2_hdr *pktap_v2_hdr;
986 
987 		pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
988 
989 		if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
990 			pktap_v2_finalize_proc_info(pktap_v2_hdr);
991 		}
992 	} else {
993 		if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
994 			pktap_finalize_proc_info(pktaphdr);
995 		}
996 
997 		if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
998 			hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
999 			hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
1000 		}
1001 	}
1002 }
1003 
1004 /*
1005  * Rotate the packet buffers in descriptor d.  Move the store buffer
1006  * into the hold slot, and the free buffer into the store slot.
1007  * Zero the length of the new store buffer.
1008  */
1009 #define ROTATE_BUFFERS(d) \
1010 	if (d->bd_hbuf_read != 0) \
1011 	        panic("rotating bpf buffers during read"); \
1012 	(d)->bd_hbuf = (d)->bd_sbuf; \
1013 	(d)->bd_hlen = (d)->bd_slen; \
1014 	(d)->bd_hcnt = (d)->bd_scnt; \
1015 	(d)->bd_sbuf = (d)->bd_fbuf; \
1016 	(d)->bd_slen = 0; \
1017 	(d)->bd_scnt = 0; \
1018 	(d)->bd_fbuf = NULL;
1019 /*
1020  *  bpfread - read next chunk of packets from buffers
1021  */
1022 int
bpfread(dev_t dev,struct uio * uio,int ioflag)1023 bpfread(dev_t dev, struct uio *uio, int ioflag)
1024 {
1025 	struct bpf_d *d;
1026 	caddr_t hbuf;
1027 	int timed_out, hbuf_len;
1028 	int error;
1029 	int flags;
1030 
1031 	lck_mtx_lock(bpf_mlock);
1032 
1033 	d = bpf_dtab[minor(dev)];
1034 	if (d == NULL || d == BPF_DEV_RESERVED ||
1035 	    (d->bd_flags & BPF_CLOSING) != 0) {
1036 		lck_mtx_unlock(bpf_mlock);
1037 		return ENXIO;
1038 	}
1039 
1040 	bpf_acquire_d(d);
1041 
1042 	/*
1043 	 * Restrict application to use a buffer the same size as
1044 	 * as kernel buffers.
1045 	 */
1046 	if (uio_resid(uio) != d->bd_bufsize) {
1047 		bpf_release_d(d);
1048 		lck_mtx_unlock(bpf_mlock);
1049 		return EINVAL;
1050 	}
1051 
1052 	if (d->bd_state == BPF_WAITING) {
1053 		bpf_stop_timer(d);
1054 	}
1055 
1056 	timed_out = (d->bd_state == BPF_TIMED_OUT);
1057 	d->bd_state = BPF_IDLE;
1058 
1059 	while (d->bd_hbuf_read != 0) {
1060 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1061 	}
1062 
1063 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1064 		bpf_release_d(d);
1065 		lck_mtx_unlock(bpf_mlock);
1066 		return ENXIO;
1067 	}
1068 	/*
1069 	 * If the hold buffer is empty, then do a timed sleep, which
1070 	 * ends when the timeout expires or when enough packets
1071 	 * have arrived to fill the store buffer.
1072 	 */
1073 	while (d->bd_hbuf == 0) {
1074 		if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) &&
1075 		    d->bd_slen != 0) {
1076 			/*
1077 			 * We're in immediate mode, or are reading
1078 			 * in non-blocking mode, or a timer was
1079 			 * started before the read (e.g., by select()
1080 			 * or poll()) and has expired and a packet(s)
1081 			 * either arrived since the previous
1082 			 * read or arrived while we were asleep.
1083 			 * Rotate the buffers and return what's here.
1084 			 */
1085 			ROTATE_BUFFERS(d);
1086 			break;
1087 		}
1088 
1089 		/*
1090 		 * No data is available, check to see if the bpf device
1091 		 * is still pointed at a real interface.  If not, return
1092 		 * ENXIO so that the userland process knows to rebind
1093 		 * it before using it again.
1094 		 */
1095 		if (d->bd_bif == NULL) {
1096 			bpf_release_d(d);
1097 			lck_mtx_unlock(bpf_mlock);
1098 			return ENXIO;
1099 		}
1100 		if (ioflag & IO_NDELAY) {
1101 			bpf_release_d(d);
1102 			lck_mtx_unlock(bpf_mlock);
1103 			return EWOULDBLOCK;
1104 		}
1105 		error = BPF_SLEEP(d, PRINET | PCATCH, "bpf", d->bd_rtout);
1106 		/*
1107 		 * Make sure device is still opened
1108 		 */
1109 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1110 			bpf_release_d(d);
1111 			lck_mtx_unlock(bpf_mlock);
1112 			return ENXIO;
1113 		}
1114 
1115 		while (d->bd_hbuf_read != 0) {
1116 			msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1117 			    NULL);
1118 		}
1119 
1120 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1121 			bpf_release_d(d);
1122 			lck_mtx_unlock(bpf_mlock);
1123 			return ENXIO;
1124 		}
1125 
1126 		if (error == EINTR || error == ERESTART) {
1127 			if (d->bd_hbuf != NULL) {
1128 				/*
1129 				 * Because we msleep, the hold buffer might
1130 				 * be filled when we wake up.  Avoid rotating
1131 				 * in this case.
1132 				 */
1133 				break;
1134 			}
1135 			if (d->bd_slen != 0) {
1136 				/*
1137 				 * Sometimes we may be interrupted often and
1138 				 * the sleep above will not timeout.
1139 				 * Regardless, we should rotate the buffers
1140 				 * if there's any new data pending and
1141 				 * return it.
1142 				 */
1143 				ROTATE_BUFFERS(d);
1144 				break;
1145 			}
1146 			bpf_release_d(d);
1147 			lck_mtx_unlock(bpf_mlock);
1148 			if (error == ERESTART) {
1149 				printf("%s: %llx ERESTART to EINTR\n",
1150 				    __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
1151 				error = EINTR;
1152 			}
1153 			return error;
1154 		}
1155 		if (error == EWOULDBLOCK) {
1156 			/*
1157 			 * On a timeout, return what's in the buffer,
1158 			 * which may be nothing.  If there is something
1159 			 * in the store buffer, we can rotate the buffers.
1160 			 */
1161 			if (d->bd_hbuf) {
1162 				/*
1163 				 * We filled up the buffer in between
1164 				 * getting the timeout and arriving
1165 				 * here, so we don't need to rotate.
1166 				 */
1167 				break;
1168 			}
1169 
1170 			if (d->bd_slen == 0) {
1171 				bpf_release_d(d);
1172 				lck_mtx_unlock(bpf_mlock);
1173 				return 0;
1174 			}
1175 			ROTATE_BUFFERS(d);
1176 			break;
1177 		}
1178 	}
1179 	/*
1180 	 * At this point, we know we have something in the hold slot.
1181 	 */
1182 
1183 	/*
1184 	 * Set the hold buffer read. So we do not
1185 	 * rotate the buffers until the hold buffer
1186 	 * read is complete. Also to avoid issues resulting
1187 	 * from page faults during disk sleep (<rdar://problem/13436396>).
1188 	 */
1189 	d->bd_hbuf_read = 1;
1190 	hbuf = d->bd_hbuf;
1191 	hbuf_len = d->bd_hlen;
1192 	flags = d->bd_flags;
1193 	lck_mtx_unlock(bpf_mlock);
1194 
1195 #ifdef __APPLE__
1196 	/*
1197 	 * Before we move data to userland, we fill out the extended
1198 	 * header fields.
1199 	 */
1200 	if (flags & BPF_EXTENDED_HDR) {
1201 		char *p;
1202 
1203 		p = hbuf;
1204 		while (p < hbuf + hbuf_len) {
1205 			struct bpf_hdr_ext *ehp;
1206 			uint32_t flowid;
1207 			struct so_procinfo soprocinfo;
1208 			int found = 0;
1209 
1210 			ehp = (struct bpf_hdr_ext *)(void *)p;
1211 			if ((flowid = ehp->bh_flowid) != 0) {
1212 				if (ehp->bh_proto == IPPROTO_TCP) {
1213 					found = inp_findinpcb_procinfo(&tcbinfo,
1214 					    flowid, &soprocinfo);
1215 				} else if (ehp->bh_proto == IPPROTO_UDP) {
1216 					found = inp_findinpcb_procinfo(&udbinfo,
1217 					    flowid, &soprocinfo);
1218 				}
1219 				if (found == 1) {
1220 					ehp->bh_pid = soprocinfo.spi_pid;
1221 					strlcpy(&ehp->bh_comm[0], &soprocinfo.spi_proc_name[0], sizeof(ehp->bh_comm));
1222 				}
1223 				ehp->bh_flowid = 0;
1224 			}
1225 
1226 			if (flags & BPF_FINALIZE_PKTAP) {
1227 				struct pktap_header *pktaphdr;
1228 
1229 				pktaphdr = (struct pktap_header *)(void *)
1230 				    (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1231 
1232 				bpf_finalize_pktap((struct bpf_hdr *) ehp,
1233 				    pktaphdr);
1234 			}
1235 			p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1236 		}
1237 	} else if (flags & BPF_FINALIZE_PKTAP) {
1238 		char *p;
1239 
1240 		p = hbuf;
1241 		while (p < hbuf + hbuf_len) {
1242 			struct bpf_hdr *hp;
1243 			struct pktap_header *pktaphdr;
1244 
1245 			hp = (struct bpf_hdr *)(void *)p;
1246 			pktaphdr = (struct pktap_header *)(void *)
1247 			    (p + BPF_WORDALIGN(hp->bh_hdrlen));
1248 
1249 			bpf_finalize_pktap(hp, pktaphdr);
1250 
1251 			p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1252 		}
1253 	}
1254 #endif
1255 
1256 	/*
1257 	 * Move data from hold buffer into user space.
1258 	 * We know the entire buffer is transferred since
1259 	 * we checked above that the read buffer is bpf_bufsize bytes.
1260 	 */
1261 	error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1262 
1263 	lck_mtx_lock(bpf_mlock);
1264 	/*
1265 	 * Make sure device is still opened
1266 	 */
1267 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1268 		bpf_release_d(d);
1269 		lck_mtx_unlock(bpf_mlock);
1270 		return ENXIO;
1271 	}
1272 
1273 	d->bd_hbuf_read = 0;
1274 	d->bd_fbuf = d->bd_hbuf;
1275 	d->bd_hbuf = NULL;
1276 	d->bd_hlen = 0;
1277 	d->bd_hcnt = 0;
1278 	wakeup((caddr_t)d);
1279 
1280 	bpf_release_d(d);
1281 	lck_mtx_unlock(bpf_mlock);
1282 	return error;
1283 }
1284 
1285 /*
1286  * If there are processes sleeping on this descriptor, wake them up.
1287  */
1288 static void
bpf_wakeup(struct bpf_d * d)1289 bpf_wakeup(struct bpf_d *d)
1290 {
1291 	if (d->bd_state == BPF_WAITING) {
1292 		bpf_stop_timer(d);
1293 		d->bd_state = BPF_IDLE;
1294 	}
1295 	wakeup((caddr_t)d);
1296 	if (d->bd_async && d->bd_sig && d->bd_sigio) {
1297 		pgsigio(d->bd_sigio, d->bd_sig);
1298 	}
1299 
1300 	selwakeup(&d->bd_sel);
1301 	if ((d->bd_flags & BPF_KNOTE)) {
1302 		KNOTE(&d->bd_sel.si_note, 1);
1303 	}
1304 }
1305 
1306 static void
bpf_timed_out(void * arg,__unused void * dummy)1307 bpf_timed_out(void *arg, __unused void *dummy)
1308 {
1309 	struct bpf_d *d = (struct bpf_d *)arg;
1310 
1311 	lck_mtx_lock(bpf_mlock);
1312 	if (d->bd_state == BPF_WAITING) {
1313 		/*
1314 		 * There's a select or kqueue waiting for this; if there's
1315 		 * now stuff to read, wake it up.
1316 		 */
1317 		d->bd_state = BPF_TIMED_OUT;
1318 		if (d->bd_slen != 0) {
1319 			bpf_wakeup(d);
1320 		}
1321 	} else if (d->bd_state == BPF_DRAINING) {
1322 		/*
1323 		 * A close is waiting for this to finish.
1324 		 * Mark it as finished, and wake the close up.
1325 		 */
1326 		d->bd_state = BPF_IDLE;
1327 		bpf_wakeup(d);
1328 	}
1329 	lck_mtx_unlock(bpf_mlock);
1330 }
1331 
1332 /* keep in sync with bpf_movein above: */
1333 #define MAX_DATALINK_HDR_LEN    (sizeof(struct firewire_header))
1334 
1335 int
bpfwrite(dev_t dev,struct uio * uio,__unused int ioflag)1336 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1337 {
1338 	struct bpf_d *d;
1339 	struct ifnet *ifp;
1340 	struct mbuf *m = NULL;
1341 	int error;
1342 	char              dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1343 	int datlen = 0;
1344 	int bif_dlt;
1345 	int bd_hdrcmplt;
1346 
1347 	lck_mtx_lock(bpf_mlock);
1348 
1349 	d = bpf_dtab[minor(dev)];
1350 	if (d == NULL || d == BPF_DEV_RESERVED ||
1351 	    (d->bd_flags & BPF_CLOSING) != 0) {
1352 		lck_mtx_unlock(bpf_mlock);
1353 		return ENXIO;
1354 	}
1355 
1356 	bpf_acquire_d(d);
1357 
1358 	if (d->bd_bif == 0) {
1359 		bpf_release_d(d);
1360 		lck_mtx_unlock(bpf_mlock);
1361 		return ENXIO;
1362 	}
1363 
1364 	ifp = d->bd_bif->bif_ifp;
1365 
1366 	if ((ifp->if_flags & IFF_UP) == 0) {
1367 		bpf_release_d(d);
1368 		lck_mtx_unlock(bpf_mlock);
1369 		return ENETDOWN;
1370 	}
1371 	if (uio_resid(uio) == 0) {
1372 		bpf_release_d(d);
1373 		lck_mtx_unlock(bpf_mlock);
1374 		return 0;
1375 	}
1376 	((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1377 
1378 	/*
1379 	 * fix for PR-6849527
1380 	 * geting variables onto stack before dropping lock for bpf_movein()
1381 	 */
1382 	bif_dlt = (int)d->bd_bif->bif_dlt;
1383 	bd_hdrcmplt  = d->bd_hdrcmplt;
1384 
1385 	/* bpf_movein allocating mbufs; drop lock */
1386 	lck_mtx_unlock(bpf_mlock);
1387 
1388 	error = bpf_movein(uio, bif_dlt, &m,
1389 	    bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1390 	    &datlen);
1391 
1392 	/* take the lock again */
1393 	lck_mtx_lock(bpf_mlock);
1394 	if (error) {
1395 		bpf_release_d(d);
1396 		lck_mtx_unlock(bpf_mlock);
1397 		return error;
1398 	}
1399 
1400 	/* verify the device is still open */
1401 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1402 		bpf_release_d(d);
1403 		lck_mtx_unlock(bpf_mlock);
1404 		m_freem(m);
1405 		return ENXIO;
1406 	}
1407 
1408 	if (d->bd_bif == NULL) {
1409 		bpf_release_d(d);
1410 		lck_mtx_unlock(bpf_mlock);
1411 		m_free(m);
1412 		return ENXIO;
1413 	}
1414 
1415 	if ((unsigned)datlen > ifp->if_mtu) {
1416 		bpf_release_d(d);
1417 		lck_mtx_unlock(bpf_mlock);
1418 		m_freem(m);
1419 		return EMSGSIZE;
1420 	}
1421 
1422 	bpf_set_packet_service_class(m, d->bd_traffic_class);
1423 
1424 	lck_mtx_unlock(bpf_mlock);
1425 
1426 	/*
1427 	 * The driver frees the mbuf.
1428 	 */
1429 	if (d->bd_hdrcmplt) {
1430 		if (d->bd_bif->bif_send) {
1431 			error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1432 		} else {
1433 			error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1434 		}
1435 	} else {
1436 		error = dlil_output(ifp, PF_INET, m, NULL,
1437 		    (struct sockaddr *)dst_buf, 0, NULL);
1438 	}
1439 
1440 	lck_mtx_lock(bpf_mlock);
1441 	bpf_release_d(d);
1442 	lck_mtx_unlock(bpf_mlock);
1443 
1444 	return error;
1445 }
1446 
1447 /*
1448  * Reset a descriptor by flushing its packet buffer and clearing the
1449  * receive and drop counts.
1450  */
1451 static void
reset_d(struct bpf_d * d)1452 reset_d(struct bpf_d *d)
1453 {
1454 	if (d->bd_hbuf_read != 0) {
1455 		panic("resetting buffers during read");
1456 	}
1457 
1458 	if (d->bd_hbuf) {
1459 		/* Free the hold buffer. */
1460 		d->bd_fbuf = d->bd_hbuf;
1461 		d->bd_hbuf = NULL;
1462 	}
1463 	d->bd_slen = 0;
1464 	d->bd_hlen = 0;
1465 	d->bd_scnt = 0;
1466 	d->bd_hcnt = 0;
1467 	d->bd_rcount = 0;
1468 	d->bd_dcount = 0;
1469 }
1470 
1471 static struct bpf_d *
bpf_get_device_from_uuid(uuid_t uuid)1472 bpf_get_device_from_uuid(uuid_t uuid)
1473 {
1474 	unsigned int i;
1475 
1476 	for (i = 0; i < nbpfilter; i++) {
1477 		struct bpf_d *d = bpf_dtab[i];
1478 
1479 		if (d == NULL || d == BPF_DEV_RESERVED ||
1480 		    (d->bd_flags & BPF_CLOSING) != 0) {
1481 			continue;
1482 		}
1483 		if (uuid_compare(uuid, d->bd_uuid) == 0) {
1484 			return d;
1485 		}
1486 	}
1487 
1488 	return NULL;
1489 }
1490 
1491 /*
1492  * The BIOCSETUP command "atomically" attach to the interface and
1493  * copy the buffer from another interface. This minimizes the risk
1494  * of missing packet because this is done while holding
1495  * the BPF global lock
1496  */
1497 static int
bpf_setup(struct bpf_d * d_to,uuid_t uuid_from,ifnet_t ifp)1498 bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1499 {
1500 	struct bpf_d *d_from;
1501 	int error = 0;
1502 
1503 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1504 
1505 	/*
1506 	 * Sanity checks
1507 	 */
1508 	d_from = bpf_get_device_from_uuid(uuid_from);
1509 	if (d_from == NULL) {
1510 		error = ENOENT;
1511 		os_log_info(OS_LOG_DEFAULT,
1512 		    "%s: uuids not found error %d",
1513 		    __func__, error);
1514 		return error;
1515 	}
1516 	if (d_from->bd_opened_by != d_to->bd_opened_by) {
1517 		error = EACCES;
1518 		os_log_info(OS_LOG_DEFAULT,
1519 		    "%s: processes not matching error %d",
1520 		    __func__, error);
1521 		return error;
1522 	}
1523 
1524 	/*
1525 	 * Prevent any read while copying
1526 	 */
1527 	while (d_to->bd_hbuf_read != 0) {
1528 		msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
1529 	}
1530 	d_to->bd_hbuf_read = 1;
1531 
1532 	while (d_from->bd_hbuf_read != 0) {
1533 		msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
1534 	}
1535 	d_from->bd_hbuf_read = 1;
1536 
1537 	/*
1538 	 * Verify the devices have not been closed
1539 	 */
1540 	if (d_to->bd_flags & BPF_CLOSING) {
1541 		error = ENXIO;
1542 		os_log_info(OS_LOG_DEFAULT,
1543 		    "%s: d_to is closing error %d",
1544 		    __func__, error);
1545 		goto done;
1546 	}
1547 	if (d_from->bd_flags & BPF_CLOSING) {
1548 		error = ENXIO;
1549 		os_log_info(OS_LOG_DEFAULT,
1550 		    "%s: d_from is closing error %d",
1551 		    __func__, error);
1552 		goto done;
1553 	}
1554 
1555 	/*
1556 	 * For now require the same buffer size
1557 	 */
1558 	if (d_from->bd_bufsize != d_to->bd_bufsize) {
1559 		error = EINVAL;
1560 		os_log_info(OS_LOG_DEFAULT,
1561 		    "%s: bufsizes not matching error %d",
1562 		    __func__, error);
1563 		goto done;
1564 	}
1565 
1566 	/*
1567 	 * Attach to the interface
1568 	 */
1569 	error = bpf_setif(d_to, ifp, false, true);
1570 	if (error != 0) {
1571 		os_log_info(OS_LOG_DEFAULT,
1572 		    "%s: bpf_setif() failed error %d",
1573 		    __func__, error);
1574 		goto done;
1575 	}
1576 
1577 	/*
1578 	 * Make sure the buffers are setup as expected by bpf_setif()
1579 	 */
1580 	ASSERT(d_to->bd_hbuf == NULL);
1581 	ASSERT(d_to->bd_sbuf != NULL);
1582 	ASSERT(d_to->bd_fbuf != NULL);
1583 
1584 	/*
1585 	 * Copy the buffers and update the pointers and counts
1586 	 */
1587 	memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
1588 	d_to->bd_slen = d_from->bd_slen;
1589 	d_to->bd_scnt = d_from->bd_scnt;
1590 
1591 	if (d_from->bd_hbuf != NULL) {
1592 		d_to->bd_hbuf = d_to->bd_fbuf;
1593 		d_to->bd_fbuf = NULL;
1594 		memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
1595 	}
1596 	d_to->bd_hlen = d_from->bd_hlen;
1597 	d_to->bd_hcnt = d_from->bd_hcnt;
1598 
1599 	if (bpf_debug > 0) {
1600 		os_log_info(OS_LOG_DEFAULT,
1601 		    "%s: done slen %u scnt %u hlen %u hcnt %u",
1602 		    __func__, d_to->bd_slen, d_to->bd_scnt,
1603 		    d_to->bd_hlen, d_to->bd_hcnt);
1604 	}
1605 done:
1606 	d_from->bd_hbuf_read = 0;
1607 	wakeup((caddr_t)d_from);
1608 
1609 	d_to->bd_hbuf_read = 0;
1610 	wakeup((caddr_t)d_to);
1611 
1612 	return error;
1613 }
1614 
1615 /*
1616  *  FIONREAD		Check for read packet available.
1617  *  SIOCGIFADDR		Get interface address - convenient hook to driver.
1618  *  BIOCGBLEN		Get buffer len [for read()].
1619  *  BIOCSETF		Set ethernet read filter.
1620  *  BIOCFLUSH		Flush read packet buffer.
1621  *  BIOCPROMISC		Put interface into promiscuous mode.
1622  *  BIOCGDLT		Get link layer type.
1623  *  BIOCGETIF		Get interface name.
1624  *  BIOCSETIF		Set interface.
1625  *  BIOCSRTIMEOUT	Set read timeout.
1626  *  BIOCGRTIMEOUT	Get read timeout.
1627  *  BIOCGSTATS		Get packet stats.
1628  *  BIOCIMMEDIATE	Set immediate mode.
1629  *  BIOCVERSION		Get filter language version.
1630  *  BIOCGHDRCMPLT	Get "header already complete" flag
1631  *  BIOCSHDRCMPLT	Set "header already complete" flag
1632  *  BIOCGSEESENT	Get "see packets sent" flag
1633  *  BIOCSSEESENT	Set "see packets sent" flag
1634  *  BIOCSETTC		Set traffic class.
1635  *  BIOCGETTC		Get traffic class.
1636  *  BIOCSEXTHDR		Set "extended header" flag
1637  *  BIOCSHEADDROP	Drop head of the buffer if user is not reading
1638  *  BIOCGHEADDROP	Get "head-drop" flag
1639  */
1640 /* ARGSUSED */
1641 int
bpfioctl(dev_t dev,u_long cmd,caddr_t addr,__unused int flags,struct proc * p)1642 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1643     struct proc *p)
1644 {
1645 	struct bpf_d *d;
1646 	int error = 0;
1647 	u_int int_arg;
1648 	struct ifreq ifr;
1649 
1650 	lck_mtx_lock(bpf_mlock);
1651 
1652 	d = bpf_dtab[minor(dev)];
1653 	if (d == NULL || d == BPF_DEV_RESERVED ||
1654 	    (d->bd_flags & BPF_CLOSING) != 0) {
1655 		lck_mtx_unlock(bpf_mlock);
1656 		return ENXIO;
1657 	}
1658 
1659 	bpf_acquire_d(d);
1660 
1661 	if (d->bd_state == BPF_WAITING) {
1662 		bpf_stop_timer(d);
1663 	}
1664 	d->bd_state = BPF_IDLE;
1665 
1666 	switch (cmd) {
1667 	default:
1668 		error = EINVAL;
1669 		break;
1670 
1671 	/*
1672 	 * Check for read packet available.
1673 	 */
1674 	case FIONREAD:                  /* int */
1675 	{
1676 		int n;
1677 
1678 		n = d->bd_slen;
1679 		if (d->bd_hbuf && d->bd_hbuf_read == 0) {
1680 			n += d->bd_hlen;
1681 		}
1682 
1683 		bcopy(&n, addr, sizeof(n));
1684 		break;
1685 	}
1686 
1687 	case SIOCGIFADDR:               /* struct ifreq */
1688 	{
1689 		struct ifnet *ifp;
1690 
1691 		if (d->bd_bif == 0) {
1692 			error = EINVAL;
1693 		} else {
1694 			ifp = d->bd_bif->bif_ifp;
1695 			error = ifnet_ioctl(ifp, 0, cmd, addr);
1696 		}
1697 		break;
1698 	}
1699 
1700 	/*
1701 	 * Get buffer len [for read()].
1702 	 */
1703 	case BIOCGBLEN:                 /* u_int */
1704 		bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1705 		break;
1706 
1707 	/*
1708 	 * Set buffer length.
1709 	 */
1710 	case BIOCSBLEN: {               /* u_int */
1711 		u_int size;
1712 		unsigned int maxbufsize = bpf_maxbufsize;
1713 
1714 		/*
1715 		 * Allow larger buffer in head drop mode to with the
1716 		 * assumption the reading process may be low priority but
1717 		 * is interested in the most recent traffic
1718 		 */
1719 		if (d->bd_headdrop != 0) {
1720 			maxbufsize = 2 * bpf_maxbufsize;
1721 		}
1722 
1723 		if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
1724 			/*
1725 			 * Interface already attached, unable to change buffers
1726 			 */
1727 			error = EINVAL;
1728 			break;
1729 		}
1730 		bcopy(addr, &size, sizeof(size));
1731 
1732 		if (size > maxbufsize) {
1733 			d->bd_bufsize = maxbufsize;
1734 
1735 			os_log_info(OS_LOG_DEFAULT,
1736 			    "%s bufsize capped to %u from %u",
1737 			    __func__, d->bd_bufsize, size);
1738 		} else if (size < BPF_MINBUFSIZE) {
1739 			d->bd_bufsize = BPF_MINBUFSIZE;
1740 
1741 			os_log_info(OS_LOG_DEFAULT,
1742 			    "%s bufsize bumped to %u from %u",
1743 			    __func__, d->bd_bufsize, size);
1744 		} else {
1745 			d->bd_bufsize = size;
1746 		}
1747 
1748 		/* It's a read/write ioctl */
1749 		bcopy(&d->bd_bufsize, addr, sizeof(u_int));
1750 		break;
1751 	}
1752 	/*
1753 	 * Set link layer read filter.
1754 	 */
1755 	case BIOCSETF32:
1756 	case BIOCSETFNR32: {            /* struct bpf_program32 */
1757 		struct bpf_program32 prg32;
1758 
1759 		bcopy(addr, &prg32, sizeof(prg32));
1760 		error = bpf_setf(d, prg32.bf_len,
1761 		    CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1762 		break;
1763 	}
1764 
1765 	case BIOCSETF64:
1766 	case BIOCSETFNR64: {            /* struct bpf_program64 */
1767 		struct bpf_program64 prg64;
1768 
1769 		bcopy(addr, &prg64, sizeof(prg64));
1770 		error = bpf_setf(d, prg64.bf_len, CAST_USER_ADDR_T(prg64.bf_insns), cmd);
1771 		break;
1772 	}
1773 
1774 	/*
1775 	 * Flush read packet buffer.
1776 	 */
1777 	case BIOCFLUSH:
1778 		while (d->bd_hbuf_read != 0) {
1779 			msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1780 			    NULL);
1781 		}
1782 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1783 			error = ENXIO;
1784 			break;
1785 		}
1786 		reset_d(d);
1787 		break;
1788 
1789 	/*
1790 	 * Put interface into promiscuous mode.
1791 	 */
1792 	case BIOCPROMISC:
1793 		if (d->bd_bif == 0) {
1794 			/*
1795 			 * No interface attached yet.
1796 			 */
1797 			error = EINVAL;
1798 			break;
1799 		}
1800 		if (d->bd_promisc == 0) {
1801 			lck_mtx_unlock(bpf_mlock);
1802 			error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1803 			lck_mtx_lock(bpf_mlock);
1804 			if (error == 0) {
1805 				d->bd_promisc = 1;
1806 			}
1807 		}
1808 		break;
1809 
1810 	/*
1811 	 * Get device parameters.
1812 	 */
1813 	case BIOCGDLT:                  /* u_int */
1814 		if (d->bd_bif == 0) {
1815 			error = EINVAL;
1816 		} else {
1817 			bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
1818 		}
1819 		break;
1820 
1821 	/*
1822 	 * Get a list of supported data link types.
1823 	 */
1824 	case BIOCGDLTLIST:              /* struct bpf_dltlist */
1825 		if (d->bd_bif == NULL) {
1826 			error = EINVAL;
1827 		} else {
1828 			error = bpf_getdltlist(d, addr, p);
1829 		}
1830 		break;
1831 
1832 	/*
1833 	 * Set data link type.
1834 	 */
1835 	case BIOCSDLT:                  /* u_int */
1836 		if (d->bd_bif == NULL) {
1837 			error = EINVAL;
1838 		} else {
1839 			u_int dlt;
1840 
1841 			bcopy(addr, &dlt, sizeof(dlt));
1842 
1843 			if (dlt == DLT_PKTAP &&
1844 			    !(d->bd_flags & BPF_WANT_PKTAP)) {
1845 				dlt = DLT_RAW;
1846 			}
1847 			error = bpf_setdlt(d, dlt);
1848 		}
1849 		break;
1850 
1851 	/*
1852 	 * Get interface name.
1853 	 */
1854 	case BIOCGETIF:                 /* struct ifreq */
1855 		if (d->bd_bif == 0) {
1856 			error = EINVAL;
1857 		} else {
1858 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
1859 
1860 			snprintf(((struct ifreq *)(void *)addr)->ifr_name,
1861 			    sizeof(ifr.ifr_name), "%s", if_name(ifp));
1862 		}
1863 		break;
1864 
1865 	/*
1866 	 * Set interface.
1867 	 */
1868 	case BIOCSETIF: {               /* struct ifreq */
1869 		ifnet_t ifp;
1870 
1871 		bcopy(addr, &ifr, sizeof(ifr));
1872 		ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1873 		ifp = ifunit(ifr.ifr_name);
1874 		if (ifp == NULL) {
1875 			error = ENXIO;
1876 		} else {
1877 			error = bpf_setif(d, ifp, true, false);
1878 		}
1879 		break;
1880 	}
1881 
1882 	/*
1883 	 * Set read timeout.
1884 	 */
1885 	case BIOCSRTIMEOUT32: {         /* struct user32_timeval */
1886 		struct user32_timeval _tv;
1887 		struct timeval tv;
1888 
1889 		bcopy(addr, &_tv, sizeof(_tv));
1890 		tv.tv_sec  = _tv.tv_sec;
1891 		tv.tv_usec = _tv.tv_usec;
1892 
1893 		/*
1894 		 * Subtract 1 tick from tvtohz() since this isn't
1895 		 * a one-shot timer.
1896 		 */
1897 		if ((error = itimerfix(&tv)) == 0) {
1898 			d->bd_rtout = tvtohz(&tv) - 1;
1899 		}
1900 		break;
1901 	}
1902 
1903 	case BIOCSRTIMEOUT64: {         /* struct user64_timeval */
1904 		struct user64_timeval _tv;
1905 		struct timeval tv;
1906 
1907 		bcopy(addr, &_tv, sizeof(_tv));
1908 		tv.tv_sec  = (__darwin_time_t)_tv.tv_sec;
1909 		tv.tv_usec = _tv.tv_usec;
1910 
1911 		/*
1912 		 * Subtract 1 tick from tvtohz() since this isn't
1913 		 * a one-shot timer.
1914 		 */
1915 		if ((error = itimerfix(&tv)) == 0) {
1916 			d->bd_rtout = tvtohz(&tv) - 1;
1917 		}
1918 		break;
1919 	}
1920 
1921 	/*
1922 	 * Get read timeout.
1923 	 */
1924 	case BIOCGRTIMEOUT32: {         /* struct user32_timeval */
1925 		struct user32_timeval tv;
1926 
1927 		bzero(&tv, sizeof(tv));
1928 		tv.tv_sec = d->bd_rtout / hz;
1929 		tv.tv_usec = (d->bd_rtout % hz) * tick;
1930 		bcopy(&tv, addr, sizeof(tv));
1931 		break;
1932 	}
1933 
1934 	case BIOCGRTIMEOUT64: {         /* struct user64_timeval */
1935 		struct user64_timeval tv;
1936 
1937 		bzero(&tv, sizeof(tv));
1938 		tv.tv_sec = d->bd_rtout / hz;
1939 		tv.tv_usec = (d->bd_rtout % hz) * tick;
1940 		bcopy(&tv, addr, sizeof(tv));
1941 		break;
1942 	}
1943 
1944 	/*
1945 	 * Get packet stats.
1946 	 */
1947 	case BIOCGSTATS: {              /* struct bpf_stat */
1948 		struct bpf_stat bs;
1949 
1950 		bzero(&bs, sizeof(bs));
1951 		bs.bs_recv = d->bd_rcount;
1952 		bs.bs_drop = d->bd_dcount;
1953 		bcopy(&bs, addr, sizeof(bs));
1954 		break;
1955 	}
1956 
1957 	/*
1958 	 * Set immediate mode.
1959 	 */
1960 	case BIOCIMMEDIATE:             /* u_int */
1961 		d->bd_immediate = *(u_char *)(void *)addr;
1962 		break;
1963 
1964 	case BIOCVERSION: {             /* struct bpf_version */
1965 		struct bpf_version bv;
1966 
1967 		bzero(&bv, sizeof(bv));
1968 		bv.bv_major = BPF_MAJOR_VERSION;
1969 		bv.bv_minor = BPF_MINOR_VERSION;
1970 		bcopy(&bv, addr, sizeof(bv));
1971 		break;
1972 	}
1973 
1974 	/*
1975 	 * Get "header already complete" flag
1976 	 */
1977 	case BIOCGHDRCMPLT:             /* u_int */
1978 		bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
1979 		break;
1980 
1981 	/*
1982 	 * Set "header already complete" flag
1983 	 */
1984 	case BIOCSHDRCMPLT:             /* u_int */
1985 		bcopy(addr, &int_arg, sizeof(int_arg));
1986 		d->bd_hdrcmplt = int_arg ? 1 : 0;
1987 		break;
1988 
1989 	/*
1990 	 * Get "see sent packets" flag
1991 	 */
1992 	case BIOCGSEESENT:              /* u_int */
1993 		bcopy(&d->bd_seesent, addr, sizeof(u_int));
1994 		break;
1995 
1996 	/*
1997 	 * Set "see sent packets" flag
1998 	 */
1999 	case BIOCSSEESENT:              /* u_int */
2000 		bcopy(addr, &d->bd_seesent, sizeof(u_int));
2001 		break;
2002 
2003 	/*
2004 	 * Set traffic service class
2005 	 */
2006 	case BIOCSETTC: {               /* int */
2007 		int tc;
2008 
2009 		bcopy(addr, &tc, sizeof(int));
2010 		error = bpf_set_traffic_class(d, tc);
2011 		break;
2012 	}
2013 
2014 	/*
2015 	 * Get traffic service class
2016 	 */
2017 	case BIOCGETTC:                 /* int */
2018 		bcopy(&d->bd_traffic_class, addr, sizeof(int));
2019 		break;
2020 
2021 	case FIONBIO:           /* Non-blocking I/O; int */
2022 		break;
2023 
2024 	case FIOASYNC:          /* Send signal on receive packets; int */
2025 		bcopy(addr, &d->bd_async, sizeof(int));
2026 		break;
2027 #ifndef __APPLE__
2028 	case FIOSETOWN:
2029 		error = fsetown(*(int *)addr, &d->bd_sigio);
2030 		break;
2031 
2032 	case FIOGETOWN:
2033 		*(int *)addr = fgetown(d->bd_sigio);
2034 		break;
2035 
2036 	/* This is deprecated, FIOSETOWN should be used instead. */
2037 	case TIOCSPGRP:
2038 		error = fsetown(-(*(int *)addr), &d->bd_sigio);
2039 		break;
2040 
2041 	/* This is deprecated, FIOGETOWN should be used instead. */
2042 	case TIOCGPGRP:
2043 		*(int *)addr = -fgetown(d->bd_sigio);
2044 		break;
2045 #endif
2046 	case BIOCSRSIG: {       /* Set receive signal; u_int */
2047 		u_int sig;
2048 
2049 		bcopy(addr, &sig, sizeof(u_int));
2050 
2051 		if (sig >= NSIG) {
2052 			error = EINVAL;
2053 		} else {
2054 			d->bd_sig = sig;
2055 		}
2056 		break;
2057 	}
2058 	case BIOCGRSIG:                 /* u_int */
2059 		bcopy(&d->bd_sig, addr, sizeof(u_int));
2060 		break;
2061 #ifdef __APPLE__
2062 	case BIOCSEXTHDR:               /* u_int */
2063 		bcopy(addr, &int_arg, sizeof(int_arg));
2064 		if (int_arg) {
2065 			d->bd_flags |= BPF_EXTENDED_HDR;
2066 		} else {
2067 			d->bd_flags &= ~BPF_EXTENDED_HDR;
2068 		}
2069 		break;
2070 
2071 	case BIOCGIFATTACHCOUNT: {              /* struct ifreq */
2072 		ifnet_t ifp;
2073 		struct bpf_if *bp;
2074 
2075 		bcopy(addr, &ifr, sizeof(ifr));
2076 		ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2077 		ifp = ifunit(ifr.ifr_name);
2078 		if (ifp == NULL) {
2079 			error = ENXIO;
2080 			break;
2081 		}
2082 		ifr.ifr_intval = 0;
2083 		for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2084 			struct bpf_d *bpf_d;
2085 
2086 			if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) {
2087 				continue;
2088 			}
2089 			for (bpf_d = bp->bif_dlist; bpf_d;
2090 			    bpf_d = bpf_d->bd_next) {
2091 				ifr.ifr_intval += 1;
2092 			}
2093 		}
2094 		bcopy(&ifr, addr, sizeof(ifr));
2095 		break;
2096 	}
2097 	case BIOCGWANTPKTAP:                    /* u_int */
2098 		int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
2099 		bcopy(&int_arg, addr, sizeof(int_arg));
2100 		break;
2101 
2102 	case BIOCSWANTPKTAP:                    /* u_int */
2103 		bcopy(addr, &int_arg, sizeof(int_arg));
2104 		if (int_arg) {
2105 			d->bd_flags |= BPF_WANT_PKTAP;
2106 		} else {
2107 			d->bd_flags &= ~BPF_WANT_PKTAP;
2108 		}
2109 		break;
2110 #endif
2111 
2112 	case BIOCSHEADDROP:
2113 		bcopy(addr, &int_arg, sizeof(int_arg));
2114 		d->bd_headdrop = int_arg ? 1 : 0;
2115 		break;
2116 
2117 	case BIOCGHEADDROP:
2118 		bcopy(&d->bd_headdrop, addr, sizeof(int));
2119 		break;
2120 
2121 	case BIOCSTRUNCATE:
2122 		bcopy(addr, &int_arg, sizeof(int_arg));
2123 		if (int_arg) {
2124 			d->bd_flags |=  BPF_TRUNCATE;
2125 		} else {
2126 			d->bd_flags &= ~BPF_TRUNCATE;
2127 		}
2128 		break;
2129 
2130 	case BIOCGETUUID:
2131 		bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
2132 		break;
2133 
2134 	case BIOCSETUP: {
2135 		struct bpf_setup_args bsa;
2136 		ifnet_t ifp;
2137 
2138 		bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
2139 		bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
2140 		ifp = ifunit(bsa.bsa_ifname);
2141 		if (ifp == NULL) {
2142 			error = ENXIO;
2143 			os_log_info(OS_LOG_DEFAULT,
2144 			    "%s: ifnet not found for %s error %d",
2145 			    __func__, bsa.bsa_ifname, error);
2146 			break;
2147 		}
2148 
2149 		error = bpf_setup(d, bsa.bsa_uuid, ifp);
2150 		break;
2151 	}
2152 	case BIOCSPKTHDRV2:
2153 		bcopy(addr, &int_arg, sizeof(int_arg));
2154 		if (int_arg != 0) {
2155 			d->bd_flags |= BPF_PKTHDRV2;
2156 		} else {
2157 			d->bd_flags &= ~BPF_PKTHDRV2;
2158 		}
2159 		break;
2160 
2161 	case BIOCGPKTHDRV2:
2162 		int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
2163 		bcopy(&int_arg, addr, sizeof(int));
2164 		break;
2165 	}
2166 
2167 	bpf_release_d(d);
2168 	lck_mtx_unlock(bpf_mlock);
2169 
2170 	return error;
2171 }
2172 
2173 /*
2174  * Set d's packet filter program to fp.  If this file already has a filter,
2175  * free it and replace it.  Returns EINVAL for bogus requests.
2176  */
2177 static int
bpf_setf(struct bpf_d * d,u_int bf_len,user_addr_t bf_insns,u_long cmd)2178 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2179     u_long cmd)
2180 {
2181 	struct bpf_insn *fcode, *old;
2182 	u_int flen, size;
2183 
2184 	while (d->bd_hbuf_read != 0) {
2185 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2186 	}
2187 
2188 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2189 		return ENXIO;
2190 	}
2191 
2192 	old = d->bd_filter;
2193 	if (bf_insns == USER_ADDR_NULL) {
2194 		if (bf_len != 0) {
2195 			return EINVAL;
2196 		}
2197 		d->bd_filter = NULL;
2198 		reset_d(d);
2199 		if (old != 0) {
2200 			kfree_data_addr(old);
2201 		}
2202 		return 0;
2203 	}
2204 	flen = bf_len;
2205 	if (flen > BPF_MAXINSNS) {
2206 		return EINVAL;
2207 	}
2208 
2209 	size = flen * sizeof(struct bpf_insn);
2210 	fcode = (struct bpf_insn *) kalloc_data(size, M_WAIT);
2211 #ifdef __APPLE__
2212 	if (fcode == NULL) {
2213 		return ENOBUFS;
2214 	}
2215 #endif
2216 	if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
2217 	    bpf_validate(fcode, (int)flen)) {
2218 		d->bd_filter = fcode;
2219 
2220 		if (cmd == BIOCSETF32 || cmd == BIOCSETF64) {
2221 			reset_d(d);
2222 		}
2223 
2224 		if (old != 0) {
2225 			kfree_data_addr(old);
2226 		}
2227 
2228 		return 0;
2229 	}
2230 	kfree_data(fcode, size);
2231 	return EINVAL;
2232 }
2233 
2234 /*
2235  * Detach a file from its current interface (if attached at all) and attach
2236  * to the interface indicated by the name stored in ifr.
2237  * Return an errno or 0.
2238  */
2239 static int
bpf_setif(struct bpf_d * d,ifnet_t theywant,bool do_reset,bool has_hbuf_read)2240 bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read)
2241 {
2242 	struct bpf_if *bp;
2243 	int error;
2244 
2245 	while (d->bd_hbuf_read != 0 && !has_hbuf_read) {
2246 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2247 	}
2248 
2249 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2250 		return ENXIO;
2251 	}
2252 
2253 	/*
2254 	 * Look through attached interfaces for the named one.
2255 	 */
2256 	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2257 		struct ifnet *ifp = bp->bif_ifp;
2258 
2259 		if (ifp == 0 || ifp != theywant) {
2260 			continue;
2261 		}
2262 		/*
2263 		 * Do not use DLT_PKTAP, unless requested explicitly
2264 		 */
2265 		if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2266 			continue;
2267 		}
2268 		/*
2269 		 * Skip the coprocessor interface
2270 		 */
2271 		if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
2272 			continue;
2273 		}
2274 		/*
2275 		 * We found the requested interface.
2276 		 * Allocate the packet buffers.
2277 		 */
2278 		error = bpf_allocbufs(d);
2279 		if (error != 0) {
2280 			return error;
2281 		}
2282 		/*
2283 		 * Detach if attached to something else.
2284 		 */
2285 		if (bp != d->bd_bif) {
2286 			if (d->bd_bif != NULL) {
2287 				if (bpf_detachd(d, 0) != 0) {
2288 					return ENXIO;
2289 				}
2290 			}
2291 			if (bpf_attachd(d, bp) != 0) {
2292 				return ENXIO;
2293 			}
2294 		}
2295 		if (do_reset) {
2296 			reset_d(d);
2297 		}
2298 		return 0;
2299 	}
2300 	/* Not found. */
2301 	return ENXIO;
2302 }
2303 
2304 /*
2305  * Get a list of available data link type of the interface.
2306  */
2307 static int
bpf_getdltlist(struct bpf_d * d,caddr_t addr,struct proc * p)2308 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2309 {
2310 	u_int           n;
2311 	int             error;
2312 	struct ifnet    *ifp;
2313 	struct bpf_if   *bp;
2314 	user_addr_t     dlist;
2315 	struct bpf_dltlist bfl;
2316 
2317 	bcopy(addr, &bfl, sizeof(bfl));
2318 	if (proc_is64bit(p)) {
2319 		dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2320 	} else {
2321 		dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2322 	}
2323 
2324 	ifp = d->bd_bif->bif_ifp;
2325 	n = 0;
2326 	error = 0;
2327 
2328 	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2329 		if (bp->bif_ifp != ifp) {
2330 			continue;
2331 		}
2332 		/*
2333 		 * Do not use DLT_PKTAP, unless requested explicitly
2334 		 */
2335 		if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2336 			continue;
2337 		}
2338 		if (dlist != USER_ADDR_NULL) {
2339 			if (n >= bfl.bfl_len) {
2340 				return ENOMEM;
2341 			}
2342 			error = copyout(&bp->bif_dlt, dlist,
2343 			    sizeof(bp->bif_dlt));
2344 			if (error != 0) {
2345 				break;
2346 			}
2347 			dlist += sizeof(bp->bif_dlt);
2348 		}
2349 		n++;
2350 	}
2351 	bfl.bfl_len = n;
2352 	bcopy(&bfl, addr, sizeof(bfl));
2353 
2354 	return error;
2355 }
2356 
2357 /*
2358  * Set the data link type of a BPF instance.
2359  */
2360 static int
bpf_setdlt(struct bpf_d * d,uint32_t dlt)2361 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2362 {
2363 	int error, opromisc;
2364 	struct ifnet *ifp;
2365 	struct bpf_if *bp;
2366 
2367 	if (d->bd_bif->bif_dlt == dlt) {
2368 		return 0;
2369 	}
2370 
2371 	while (d->bd_hbuf_read != 0) {
2372 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2373 	}
2374 
2375 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2376 		return ENXIO;
2377 	}
2378 
2379 	ifp = d->bd_bif->bif_ifp;
2380 	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2381 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2382 			/*
2383 			 * Do not use DLT_PKTAP, unless requested explicitly
2384 			 */
2385 			if (bp->bif_dlt == DLT_PKTAP &&
2386 			    !(d->bd_flags & BPF_WANT_PKTAP)) {
2387 				continue;
2388 			}
2389 			break;
2390 		}
2391 	}
2392 	if (bp != NULL) {
2393 		opromisc = d->bd_promisc;
2394 		if (bpf_detachd(d, 0) != 0) {
2395 			return ENXIO;
2396 		}
2397 		error = bpf_attachd(d, bp);
2398 		if (error) {
2399 			printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
2400 			    ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp),
2401 			    error);
2402 			return error;
2403 		}
2404 		reset_d(d);
2405 		if (opromisc) {
2406 			lck_mtx_unlock(bpf_mlock);
2407 			error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2408 			lck_mtx_lock(bpf_mlock);
2409 			if (error) {
2410 				printf("%s: ifpromisc %s%d failed (%d)\n",
2411 				    __func__, ifnet_name(bp->bif_ifp),
2412 				    ifnet_unit(bp->bif_ifp), error);
2413 			} else {
2414 				d->bd_promisc = 1;
2415 			}
2416 		}
2417 	}
2418 	return bp == NULL ? EINVAL : 0;
2419 }
2420 
2421 static int
bpf_set_traffic_class(struct bpf_d * d,int tc)2422 bpf_set_traffic_class(struct bpf_d *d, int tc)
2423 {
2424 	int error = 0;
2425 
2426 	if (!SO_VALID_TC(tc)) {
2427 		error = EINVAL;
2428 	} else {
2429 		d->bd_traffic_class = tc;
2430 	}
2431 
2432 	return error;
2433 }
2434 
2435 static void
bpf_set_packet_service_class(struct mbuf * m,int tc)2436 bpf_set_packet_service_class(struct mbuf *m, int tc)
2437 {
2438 	if (!(m->m_flags & M_PKTHDR)) {
2439 		return;
2440 	}
2441 
2442 	VERIFY(SO_VALID_TC(tc));
2443 	(void) m_set_service_class(m, so_tc2msc(tc));
2444 }
2445 
2446 /*
2447  * Support for select()
2448  *
2449  * Return true iff the specific operation will not block indefinitely.
2450  * Otherwise, return false but make a note that a selwakeup() must be done.
2451  */
2452 int
bpfselect(dev_t dev,int which,void * wql,struct proc * p)2453 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2454 {
2455 	struct bpf_d *d;
2456 	int ret = 0;
2457 
2458 	lck_mtx_lock(bpf_mlock);
2459 
2460 	d = bpf_dtab[minor(dev)];
2461 	if (d == NULL || d == BPF_DEV_RESERVED ||
2462 	    (d->bd_flags & BPF_CLOSING) != 0) {
2463 		lck_mtx_unlock(bpf_mlock);
2464 		return ENXIO;
2465 	}
2466 
2467 	bpf_acquire_d(d);
2468 
2469 	if (d->bd_bif == NULL) {
2470 		bpf_release_d(d);
2471 		lck_mtx_unlock(bpf_mlock);
2472 		return ENXIO;
2473 	}
2474 
2475 	while (d->bd_hbuf_read != 0) {
2476 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2477 	}
2478 
2479 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2480 		bpf_release_d(d);
2481 		lck_mtx_unlock(bpf_mlock);
2482 		return ENXIO;
2483 	}
2484 
2485 	switch (which) {
2486 	case FREAD:
2487 		if (d->bd_hlen != 0 ||
2488 		    ((d->bd_immediate ||
2489 		    d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
2490 			ret = 1;         /* read has data to return */
2491 		} else {
2492 			/*
2493 			 * Read has no data to return.
2494 			 * Make the select wait, and start a timer if
2495 			 * necessary.
2496 			 */
2497 			selrecord(p, &d->bd_sel, wql);
2498 			bpf_start_timer(d);
2499 		}
2500 		break;
2501 
2502 	case FWRITE:
2503 		/* can't determine whether a write would block */
2504 		ret = 1;
2505 		break;
2506 	}
2507 
2508 	bpf_release_d(d);
2509 	lck_mtx_unlock(bpf_mlock);
2510 
2511 	return ret;
2512 }
2513 
2514 /*
2515  * Support for kevent() system call.  Register EVFILT_READ filters and
2516  * reject all others.
2517  */
2518 int bpfkqfilter(dev_t dev, struct knote *kn);
2519 static void filt_bpfdetach(struct knote *);
2520 static int filt_bpfread(struct knote *, long);
2521 static int filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev);
2522 static int filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev);
2523 
2524 SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2525 	.f_isfd = 1,
2526 	.f_detach = filt_bpfdetach,
2527 	.f_event = filt_bpfread,
2528 	.f_touch = filt_bpftouch,
2529 	.f_process = filt_bpfprocess,
2530 };
2531 
2532 static int
filt_bpfread_common(struct knote * kn,struct kevent_qos_s * kev,struct bpf_d * d)2533 filt_bpfread_common(struct knote *kn, struct kevent_qos_s *kev, struct bpf_d *d)
2534 {
2535 	int ready = 0;
2536 	int64_t data = 0;
2537 
2538 	if (d->bd_immediate) {
2539 		/*
2540 		 * If there's data in the hold buffer, it's the
2541 		 * amount of data a read will return.
2542 		 *
2543 		 * If there's no data in the hold buffer, but
2544 		 * there's data in the store buffer, a read will
2545 		 * immediately rotate the store buffer to the
2546 		 * hold buffer, the amount of data in the store
2547 		 * buffer is the amount of data a read will
2548 		 * return.
2549 		 *
2550 		 * If there's no data in either buffer, we're not
2551 		 * ready to read.
2552 		 */
2553 		data = (d->bd_hlen == 0 || d->bd_hbuf_read != 0 ?
2554 		    d->bd_slen : d->bd_hlen);
2555 		int64_t lowwat = knote_low_watermark(kn);
2556 		if (lowwat > d->bd_bufsize) {
2557 			lowwat = d->bd_bufsize;
2558 		}
2559 		ready = (data >= lowwat);
2560 	} else {
2561 		/*
2562 		 * If there's data in the hold buffer, it's the
2563 		 * amount of data a read will return.
2564 		 *
2565 		 * If there's no data in the hold buffer, but
2566 		 * there's data in the store buffer, if the
2567 		 * timer has expired a read will immediately
2568 		 * rotate the store buffer to the hold buffer,
2569 		 * so the amount of data in the store buffer is
2570 		 * the amount of data a read will return.
2571 		 *
2572 		 * If there's no data in either buffer, or there's
2573 		 * no data in the hold buffer and the timer hasn't
2574 		 * expired, we're not ready to read.
2575 		 */
2576 		data = ((d->bd_hlen == 0 || d->bd_hbuf_read != 0) &&
2577 		    d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
2578 		ready = (data > 0);
2579 	}
2580 	if (!ready) {
2581 		bpf_start_timer(d);
2582 	} else if (kev) {
2583 		knote_fill_kevent(kn, kev, data);
2584 	}
2585 
2586 	return ready;
2587 }
2588 
2589 int
bpfkqfilter(dev_t dev,struct knote * kn)2590 bpfkqfilter(dev_t dev, struct knote *kn)
2591 {
2592 	struct bpf_d *d;
2593 	int res;
2594 
2595 	/*
2596 	 * Is this device a bpf?
2597 	 */
2598 	if (major(dev) != CDEV_MAJOR || kn->kn_filter != EVFILT_READ) {
2599 		knote_set_error(kn, EINVAL);
2600 		return 0;
2601 	}
2602 
2603 	lck_mtx_lock(bpf_mlock);
2604 
2605 	d = bpf_dtab[minor(dev)];
2606 
2607 	if (d == NULL || d == BPF_DEV_RESERVED ||
2608 	    (d->bd_flags & BPF_CLOSING) != 0 ||
2609 	    d->bd_bif == NULL) {
2610 		lck_mtx_unlock(bpf_mlock);
2611 		knote_set_error(kn, ENXIO);
2612 		return 0;
2613 	}
2614 
2615 	kn->kn_hook = d;
2616 	kn->kn_filtid = EVFILTID_BPFREAD;
2617 	KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2618 	d->bd_flags |= BPF_KNOTE;
2619 
2620 	/* capture the current state */
2621 	res = filt_bpfread_common(kn, NULL, d);
2622 
2623 	lck_mtx_unlock(bpf_mlock);
2624 
2625 	return res;
2626 }
2627 
2628 static void
filt_bpfdetach(struct knote * kn)2629 filt_bpfdetach(struct knote *kn)
2630 {
2631 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2632 
2633 	lck_mtx_lock(bpf_mlock);
2634 	if (d->bd_flags & BPF_KNOTE) {
2635 		KNOTE_DETACH(&d->bd_sel.si_note, kn);
2636 		d->bd_flags &= ~BPF_KNOTE;
2637 	}
2638 	lck_mtx_unlock(bpf_mlock);
2639 }
2640 
2641 static int
filt_bpfread(struct knote * kn,long hint)2642 filt_bpfread(struct knote *kn, long hint)
2643 {
2644 #pragma unused(hint)
2645 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2646 
2647 	return filt_bpfread_common(kn, NULL, d);
2648 }
2649 
2650 static int
filt_bpftouch(struct knote * kn,struct kevent_qos_s * kev)2651 filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev)
2652 {
2653 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2654 	int res;
2655 
2656 	lck_mtx_lock(bpf_mlock);
2657 
2658 	/* save off the lowat threshold and flag */
2659 	kn->kn_sdata = kev->data;
2660 	kn->kn_sfflags = kev->fflags;
2661 
2662 	/* output data will be re-generated here */
2663 	res = filt_bpfread_common(kn, NULL, d);
2664 
2665 	lck_mtx_unlock(bpf_mlock);
2666 
2667 	return res;
2668 }
2669 
2670 static int
filt_bpfprocess(struct knote * kn,struct kevent_qos_s * kev)2671 filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev)
2672 {
2673 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2674 	int res;
2675 
2676 	lck_mtx_lock(bpf_mlock);
2677 	res = filt_bpfread_common(kn, kev, d);
2678 	lck_mtx_unlock(bpf_mlock);
2679 
2680 	return res;
2681 }
2682 
2683 /*
2684  * Copy data from an mbuf chain into a buffer.	This code is derived
2685  * from m_copydata in kern/uipc_mbuf.c.
2686  */
2687 static void
bpf_mcopy(struct mbuf * m,void * dst_arg,size_t len)2688 bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
2689 {
2690 	u_int count;
2691 	u_char *dst;
2692 
2693 	dst = dst_arg;
2694 	while (len > 0) {
2695 		if (m == 0) {
2696 			panic("bpf_mcopy");
2697 		}
2698 		count = MIN(m->m_len, (u_int)len);
2699 		bcopy(mbuf_data(m), dst, count);
2700 		m = m->m_next;
2701 		dst += count;
2702 		len -= count;
2703 	}
2704 }
2705 
2706 static inline void
bpf_tap_imp(ifnet_t ifp,u_int32_t dlt,struct bpf_packet * bpf_pkt,int outbound)2707 bpf_tap_imp(
2708 	ifnet_t         ifp,
2709 	u_int32_t       dlt,
2710 	struct bpf_packet *bpf_pkt,
2711 	int             outbound)
2712 {
2713 	struct bpf_d    *d;
2714 	u_int slen;
2715 	struct bpf_if *bp;
2716 
2717 	/*
2718 	 * It's possible that we get here after the bpf descriptor has been
2719 	 * detached from the interface; in such a case we simply return.
2720 	 * Lock ordering is important since we can be called asynchronously
2721 	 * (from IOKit) to process an inbound packet; when that happens
2722 	 * we would have been holding its "gateLock" and will be acquiring
2723 	 * "bpf_mlock" upon entering this routine.  Due to that, we release
2724 	 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2725 	 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2726 	 * when a ifnet_set_promiscuous request simultaneously collides with
2727 	 * an inbound packet being passed into the tap callback.
2728 	 */
2729 	lck_mtx_lock(bpf_mlock);
2730 	if (ifp->if_bpf == NULL) {
2731 		lck_mtx_unlock(bpf_mlock);
2732 		return;
2733 	}
2734 	for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2735 		if (bp->bif_ifp != ifp) {
2736 			/* wrong interface */
2737 			bp = NULL;
2738 			break;
2739 		}
2740 		if (dlt == 0 || bp->bif_dlt == dlt) {
2741 			/* tapping default DLT or DLT matches */
2742 			break;
2743 		}
2744 	}
2745 	if (bp == NULL) {
2746 		goto done;
2747 	}
2748 	for (d = bp->bif_dlist; d != NULL; d = d->bd_next) {
2749 		struct bpf_packet *bpf_pkt_saved = bpf_pkt;
2750 		struct bpf_packet bpf_pkt_tmp = {};
2751 		struct pktap_header_buffer bpfp_header_tmp = {};
2752 
2753 		if (outbound && !d->bd_seesent) {
2754 			continue;
2755 		}
2756 
2757 		++d->bd_rcount;
2758 		slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
2759 		    (u_int)bpf_pkt->bpfp_total_length, 0);
2760 
2761 		if (bp->bif_ifp->if_type == IFT_PKTAP &&
2762 		    bp->bif_dlt == DLT_PKTAP) {
2763 			if (d->bd_flags & BPF_TRUNCATE) {
2764 				slen = min(slen, get_pkt_trunc_len(bpf_pkt));
2765 			}
2766 			/*
2767 			 * Need to copy the bpf_pkt because the conversion
2768 			 * to v2 pktap header modifies the content of the
2769 			 * bpfp_header
2770 			 */
2771 			if ((d->bd_flags & BPF_PKTHDRV2) &&
2772 			    bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
2773 				bpf_pkt_tmp = *bpf_pkt;
2774 
2775 				bpf_pkt = &bpf_pkt_tmp;
2776 
2777 				memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
2778 				    bpf_pkt->bpfp_header_length);
2779 
2780 				bpf_pkt->bpfp_header = &bpfp_header_tmp;
2781 
2782 				convert_to_pktap_header_to_v2(bpf_pkt,
2783 				    !!(d->bd_flags & BPF_TRUNCATE));
2784 			}
2785 		}
2786 		if (slen != 0) {
2787 			catchpacket(d, bpf_pkt, slen, outbound);
2788 		}
2789 		bpf_pkt = bpf_pkt_saved;
2790 	}
2791 
2792 done:
2793 	lck_mtx_unlock(bpf_mlock);
2794 }
2795 
2796 static inline void
bpf_tap_mbuf(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen,int outbound)2797 bpf_tap_mbuf(
2798 	ifnet_t         ifp,
2799 	u_int32_t       dlt,
2800 	mbuf_t          m,
2801 	void*           hdr,
2802 	size_t          hlen,
2803 	int             outbound)
2804 {
2805 	struct bpf_packet bpf_pkt;
2806 	struct mbuf *m0;
2807 
2808 	if (ifp->if_bpf == NULL) {
2809 		/* quickly check without taking lock */
2810 		return;
2811 	}
2812 	bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2813 	bpf_pkt.bpfp_mbuf = m;
2814 	bpf_pkt.bpfp_total_length = 0;
2815 	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
2816 		bpf_pkt.bpfp_total_length += m0->m_len;
2817 	}
2818 	bpf_pkt.bpfp_header = hdr;
2819 	if (hdr != NULL) {
2820 		bpf_pkt.bpfp_total_length += hlen;
2821 		bpf_pkt.bpfp_header_length = hlen;
2822 	} else {
2823 		bpf_pkt.bpfp_header_length = 0;
2824 	}
2825 	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2826 }
2827 
2828 void
bpf_tap_out(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)2829 bpf_tap_out(
2830 	ifnet_t         ifp,
2831 	u_int32_t       dlt,
2832 	mbuf_t          m,
2833 	void*           hdr,
2834 	size_t          hlen)
2835 {
2836 	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
2837 }
2838 
2839 void
bpf_tap_in(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)2840 bpf_tap_in(
2841 	ifnet_t         ifp,
2842 	u_int32_t       dlt,
2843 	mbuf_t          m,
2844 	void*           hdr,
2845 	size_t          hlen)
2846 {
2847 	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
2848 }
2849 
2850 /* Callback registered with Ethernet driver. */
2851 static int
bpf_tap_callback(struct ifnet * ifp,struct mbuf * m)2852 bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2853 {
2854 	bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
2855 
2856 	return 0;
2857 }
2858 
2859 #if SKYWALK
2860 #include <skywalk/os_skywalk_private.h>
2861 
2862 static void
bpf_pktcopy(kern_packet_t pkt,void * dst_arg,size_t len)2863 bpf_pktcopy(kern_packet_t pkt, void *dst_arg, size_t len)
2864 {
2865 	kern_buflet_t   buflet = NULL;
2866 	size_t count;
2867 	u_char *dst;
2868 
2869 	dst = dst_arg;
2870 	while (len > 0) {
2871 		uint8_t         *addr;
2872 
2873 		u_int32_t       buflet_length;
2874 
2875 		buflet = kern_packet_get_next_buflet(pkt, buflet);
2876 		VERIFY(buflet != NULL);
2877 		addr = kern_buflet_get_data_address(buflet);
2878 		VERIFY(addr != NULL);
2879 		addr += kern_buflet_get_data_offset(buflet);
2880 		buflet_length = kern_buflet_get_data_length(buflet);
2881 		count = MIN(buflet_length, len);
2882 		bcopy((void *)addr, (void *)dst, count);
2883 		dst += count;
2884 		len -= count;
2885 	}
2886 }
2887 
2888 static inline void
bpf_tap_packet(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen,int outbound)2889 bpf_tap_packet(
2890 	ifnet_t         ifp,
2891 	u_int32_t       dlt,
2892 	kern_packet_t   pkt,
2893 	void*           hdr,
2894 	size_t          hlen,
2895 	int             outbound)
2896 {
2897 	struct bpf_packet       bpf_pkt;
2898 	struct mbuf *           m;
2899 
2900 	if (ifp->if_bpf == NULL) {
2901 		/* quickly check without taking lock */
2902 		return;
2903 	}
2904 	m = kern_packet_get_mbuf(pkt);
2905 	if (m != NULL) {
2906 		bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2907 		bpf_pkt.bpfp_mbuf = m;
2908 		bpf_pkt.bpfp_total_length = m_length(m);
2909 	} else {
2910 		bpf_pkt.bpfp_type = BPF_PACKET_TYPE_PKT;
2911 		bpf_pkt.bpfp_pkt = pkt;
2912 		bpf_pkt.bpfp_total_length = kern_packet_get_data_length(pkt);
2913 	}
2914 	bpf_pkt.bpfp_header = hdr;
2915 	bpf_pkt.bpfp_header_length = hlen;
2916 	if (hlen != 0) {
2917 		bpf_pkt.bpfp_total_length += hlen;
2918 	}
2919 	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2920 }
2921 
2922 void
bpf_tap_packet_out(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)2923 bpf_tap_packet_out(
2924 	ifnet_t         ifp,
2925 	u_int32_t       dlt,
2926 	kern_packet_t   pkt,
2927 	void*           hdr,
2928 	size_t          hlen)
2929 {
2930 	bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 1);
2931 }
2932 
2933 void
bpf_tap_packet_in(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)2934 bpf_tap_packet_in(
2935 	ifnet_t         ifp,
2936 	u_int32_t       dlt,
2937 	kern_packet_t   pkt,
2938 	void*           hdr,
2939 	size_t          hlen)
2940 {
2941 	bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 0);
2942 }
2943 
2944 #endif /* SKYWALK */
2945 
2946 static errno_t
bpf_copydata(struct bpf_packet * pkt,size_t off,size_t len,void * out_data)2947 bpf_copydata(struct bpf_packet *pkt, size_t off, size_t len, void* out_data)
2948 {
2949 	errno_t err = 0;
2950 	if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
2951 		err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
2952 #if SKYWALK
2953 	} else if (pkt->bpfp_type == BPF_PACKET_TYPE_PKT) {
2954 		err = kern_packet_copy_bytes(pkt->bpfp_pkt, off, len, out_data);
2955 #endif /* SKYWALK */
2956 	} else {
2957 		err = EINVAL;
2958 	}
2959 
2960 	return err;
2961 }
2962 
2963 static void
copy_bpf_packet(struct bpf_packet * pkt,void * dst,size_t len)2964 copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
2965 {
2966 	/* copy the optional header */
2967 	if (pkt->bpfp_header_length != 0) {
2968 		size_t  count = MIN(len, pkt->bpfp_header_length);
2969 		bcopy(pkt->bpfp_header, dst, count);
2970 		len -= count;
2971 		dst = (void *)((uintptr_t)dst + count);
2972 	}
2973 	if (len == 0) {
2974 		/* nothing past the header */
2975 		return;
2976 	}
2977 	/* copy the packet */
2978 	switch (pkt->bpfp_type) {
2979 	case BPF_PACKET_TYPE_MBUF:
2980 		bpf_mcopy(pkt->bpfp_mbuf, dst, len);
2981 		break;
2982 #if SKYWALK
2983 	case BPF_PACKET_TYPE_PKT:
2984 		bpf_pktcopy(pkt->bpfp_pkt, dst, len);
2985 		break;
2986 #endif /* SKYWALK */
2987 	default:
2988 		break;
2989 	}
2990 }
2991 
2992 static uint32_t
get_esp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)2993 get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
2994     const uint32_t remaining_caplen)
2995 {
2996 	/*
2997 	 * For some reason tcpdump expects to have one byte beyond the ESP header
2998 	 */
2999 	uint32_t trunc_len = ESP_HDR_SIZE + 1;
3000 
3001 	if (trunc_len > remaining_caplen) {
3002 		return remaining_caplen;
3003 	}
3004 
3005 	return trunc_len;
3006 }
3007 
3008 static uint32_t
get_isakmp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)3009 get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
3010     const uint32_t remaining_caplen)
3011 {
3012 	/*
3013 	 * Include the payload generic header
3014 	 */
3015 	uint32_t trunc_len = ISAKMP_HDR_SIZE;
3016 
3017 	if (trunc_len > remaining_caplen) {
3018 		return remaining_caplen;
3019 	}
3020 
3021 	return trunc_len;
3022 }
3023 
3024 static uint32_t
get_isakmp_natt_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3025 get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint32_t off,
3026     const uint32_t remaining_caplen)
3027 {
3028 	int err = 0;
3029 	uint32_t trunc_len = 0;
3030 	char payload[remaining_caplen];
3031 
3032 	err = bpf_copydata(pkt, off, remaining_caplen, payload);
3033 	if (err != 0) {
3034 		return remaining_caplen;
3035 	}
3036 	/*
3037 	 * They are three cases:
3038 	 * - IKE: payload start with 4 bytes header set to zero before ISAKMP header
3039 	 * - keep alive: 1 byte payload
3040 	 * - otherwise it's ESP
3041 	 */
3042 	if (remaining_caplen >= 4 &&
3043 	    payload[0] == 0 && payload[1] == 0 &&
3044 	    payload[2] == 0 && payload[3] == 0) {
3045 		trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
3046 	} else if (remaining_caplen == 1) {
3047 		trunc_len = 1;
3048 	} else {
3049 		trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3050 	}
3051 
3052 	if (trunc_len > remaining_caplen) {
3053 		return remaining_caplen;
3054 	}
3055 
3056 	return trunc_len;
3057 }
3058 
3059 static uint32_t
get_udp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3060 get_udp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3061 {
3062 	int err = 0;
3063 	uint32_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
3064 
3065 	if (trunc_len >= remaining_caplen) {
3066 		return remaining_caplen;
3067 	}
3068 
3069 	struct udphdr udphdr;
3070 	err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
3071 	if (err != 0) {
3072 		return remaining_caplen;
3073 	}
3074 
3075 	u_short sport, dport;
3076 
3077 	sport = EXTRACT_SHORT(&udphdr.uh_sport);
3078 	dport = EXTRACT_SHORT(&udphdr.uh_dport);
3079 
3080 	if (dport == PORT_DNS || sport == PORT_DNS) {
3081 		/*
3082 		 * Full UDP payload for DNS
3083 		 */
3084 		trunc_len = remaining_caplen;
3085 	} else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) ||
3086 	    (sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
3087 		/*
3088 		 * Full UDP payload for BOOTP and DHCP
3089 		 */
3090 		trunc_len = remaining_caplen;
3091 	} else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
3092 		/*
3093 		 * Return the ISAKMP header
3094 		 */
3095 		trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
3096 		    remaining_caplen - sizeof(struct udphdr));
3097 	} else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
3098 		trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
3099 		    remaining_caplen - sizeof(struct udphdr));
3100 	}
3101 	if (trunc_len >= remaining_caplen) {
3102 		return remaining_caplen;
3103 	}
3104 
3105 	return trunc_len;
3106 }
3107 
3108 static uint32_t
get_tcp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3109 get_tcp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3110 {
3111 	int err = 0;
3112 	uint32_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
3113 	if (trunc_len >= remaining_caplen) {
3114 		return remaining_caplen;
3115 	}
3116 
3117 	struct tcphdr tcphdr;
3118 	err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
3119 	if (err != 0) {
3120 		return remaining_caplen;
3121 	}
3122 
3123 	u_short sport, dport;
3124 	sport = EXTRACT_SHORT(&tcphdr.th_sport);
3125 	dport = EXTRACT_SHORT(&tcphdr.th_dport);
3126 
3127 	if (dport == PORT_DNS || sport == PORT_DNS) {
3128 		/*
3129 		 * Full TCP payload  for DNS
3130 		 */
3131 		trunc_len = remaining_caplen;
3132 	} else {
3133 		trunc_len = (uint16_t)(tcphdr.th_off << 2);
3134 	}
3135 	if (trunc_len >= remaining_caplen) {
3136 		return remaining_caplen;
3137 	}
3138 
3139 	return trunc_len;
3140 }
3141 
3142 static uint32_t
get_proto_trunc_len(uint8_t proto,struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3143 get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3144 {
3145 	uint32_t trunc_len;
3146 
3147 	switch (proto) {
3148 	case IPPROTO_ICMP: {
3149 		/*
3150 		 * Full IMCP payload
3151 		 */
3152 		trunc_len = remaining_caplen;
3153 		break;
3154 	}
3155 	case IPPROTO_ICMPV6: {
3156 		/*
3157 		 * Full IMCPV6 payload
3158 		 */
3159 		trunc_len = remaining_caplen;
3160 		break;
3161 	}
3162 	case IPPROTO_IGMP: {
3163 		/*
3164 		 * Full IGMP payload
3165 		 */
3166 		trunc_len = remaining_caplen;
3167 		break;
3168 	}
3169 	case IPPROTO_UDP: {
3170 		trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3171 		break;
3172 	}
3173 	case IPPROTO_TCP: {
3174 		trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3175 		break;
3176 	}
3177 	case IPPROTO_ESP: {
3178 		trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3179 		break;
3180 	}
3181 	default: {
3182 		/*
3183 		 * By default we only include the IP header
3184 		 */
3185 		trunc_len = 0;
3186 		break;
3187 	}
3188 	}
3189 	if (trunc_len >= remaining_caplen) {
3190 		return remaining_caplen;
3191 	}
3192 
3193 	return trunc_len;
3194 }
3195 
3196 static uint32_t
get_ip_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3197 get_ip_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3198 {
3199 	int err = 0;
3200 	uint32_t iplen = sizeof(struct ip);
3201 	if (iplen >= remaining_caplen) {
3202 		return remaining_caplen;
3203 	}
3204 
3205 	struct ip iphdr;
3206 	err =  bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
3207 	if (err != 0) {
3208 		return remaining_caplen;
3209 	}
3210 
3211 	uint8_t proto = 0;
3212 
3213 	iplen = (uint16_t)(iphdr.ip_hl << 2);
3214 	if (iplen >= remaining_caplen) {
3215 		return remaining_caplen;
3216 	}
3217 
3218 	proto = iphdr.ip_p;
3219 	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3220 
3221 	if (iplen >= remaining_caplen) {
3222 		return remaining_caplen;
3223 	}
3224 
3225 	return iplen;
3226 }
3227 
3228 static uint32_t
get_ip6_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3229 get_ip6_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3230 {
3231 	int err = 0;
3232 	uint32_t iplen = sizeof(struct ip6_hdr);
3233 	if (iplen >= remaining_caplen) {
3234 		return remaining_caplen;
3235 	}
3236 
3237 	struct ip6_hdr ip6hdr;
3238 	err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
3239 	if (err != 0) {
3240 		return remaining_caplen;
3241 	}
3242 
3243 	uint8_t proto = 0;
3244 
3245 	/*
3246 	 * TBD: process the extension headers
3247 	 */
3248 	proto = ip6hdr.ip6_nxt;
3249 	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3250 
3251 	if (iplen >= remaining_caplen) {
3252 		return remaining_caplen;
3253 	}
3254 
3255 	return iplen;
3256 }
3257 
3258 static uint32_t
get_ether_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3259 get_ether_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3260 {
3261 	int err = 0;
3262 	uint32_t ethlen = sizeof(struct ether_header);
3263 	if (ethlen >= remaining_caplen) {
3264 		return remaining_caplen;
3265 	}
3266 
3267 	struct ether_header eh = {};
3268 	err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
3269 	if (err != 0) {
3270 		return remaining_caplen;
3271 	}
3272 
3273 	u_short type = EXTRACT_SHORT(&eh.ether_type);
3274 	/* Include full ARP */
3275 	if (type == ETHERTYPE_ARP) {
3276 		ethlen = remaining_caplen;
3277 	} else if (type == ETHERTYPE_IP) {
3278 		ethlen += get_ip_trunc_len(pkt, off + sizeof(struct ether_header),
3279 		    remaining_caplen - ethlen);
3280 	} else if (type == ETHERTYPE_IPV6) {
3281 		ethlen += get_ip6_trunc_len(pkt, off + sizeof(struct ether_header),
3282 		    remaining_caplen - ethlen);
3283 	} else {
3284 		ethlen = MIN(BPF_MIN_PKT_SIZE, remaining_caplen);
3285 	}
3286 	return ethlen;
3287 }
3288 
3289 #include <kern/assert.h>
3290 
3291 static uint32_t
get_pkt_trunc_len(struct bpf_packet * pkt)3292 get_pkt_trunc_len(struct bpf_packet *pkt)
3293 {
3294 	struct pktap_header *pktap = (struct pktap_header *) (pkt->bpfp_header);
3295 	uint32_t in_pkt_len = 0;
3296 	uint32_t out_pkt_len = 0;
3297 	uint32_t tlen = 0;
3298 	uint32_t pre_adjust;    // L2 header not in mbuf or kern_packet
3299 
3300 	// bpfp_total_length must contain the BPF packet header
3301 	assert3u(pkt->bpfp_total_length, >=, pkt->bpfp_header_length);
3302 
3303 	// The BPF packet header must contain the pktap header
3304 	assert3u(pkt->bpfp_header_length, >=, pktap->pth_length);
3305 
3306 	// The pre frame length (L2 header) must be contained in the packet
3307 	assert3u(pkt->bpfp_total_length, >=, pktap->pth_length + pktap->pth_frame_pre_length);
3308 
3309 	/*
3310 	 * pktap->pth_frame_pre_length is the L2 header length and accounts
3311 	 * for both L2 header in the packet payload and pre_adjust.
3312 	 *
3313 	 * pre_adjust represents an adjustment for a pseudo L2 header that is not
3314 	 * part of packet payload -- not in the mbuf or kern_packet -- and comes
3315 	 * just after the pktap header.
3316 	 *
3317 	 * pktap->pth_length is the size of the pktap header (exclude pre_adjust)
3318 	 *
3319 	 * pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3320 	 */
3321 	pre_adjust = (uint32_t)(pkt->bpfp_header_length - pktap->pth_length);
3322 
3323 	if (pktap->pth_iftype == IFT_ETHER) {
3324 		/*
3325 		 * We need to parse the Ethernet header to find the network layer
3326 		 * protocol
3327 		 */
3328 		in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pre_adjust);
3329 
3330 		out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
3331 
3332 		tlen = pktap->pth_length + pre_adjust + out_pkt_len;
3333 	} else {
3334 		/*
3335 		 * For other interface types, we only know to parse IPv4 and IPv6.
3336 		 *
3337 		 * To get to the beginning of the IPv4 or IPv6 packet, we need to to skip
3338 		 * over the L2 header that is the actual packet payload (mbuf or kern_packet)
3339 		 */
3340 		uint32_t off;   // offset past the L2 header in the actual packet payload
3341 
3342 		off = pktap->pth_frame_pre_length - pre_adjust;
3343 
3344 		in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pktap->pth_frame_pre_length);
3345 
3346 		if (pktap->pth_protocol_family == AF_INET) {
3347 			out_pkt_len = get_ip_trunc_len(pkt, off, in_pkt_len);
3348 		} else if (pktap->pth_protocol_family == AF_INET6) {
3349 			out_pkt_len = get_ip6_trunc_len(pkt, off, in_pkt_len);
3350 		} else {
3351 			out_pkt_len = MIN(BPF_MIN_PKT_SIZE, in_pkt_len);
3352 		}
3353 		tlen = pktap->pth_length + pktap->pth_frame_pre_length + out_pkt_len;
3354 	}
3355 
3356 	// Verify we do not overflow the buffer
3357 	if (__improbable(tlen > pkt->bpfp_total_length)) {
3358 		bool do_panic = bpf_debug != 0 ? true : false;
3359 
3360 #if DEBUG
3361 		do_panic = true;
3362 #endif /* DEBUG */
3363 		if (do_panic) {
3364 			panic("%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u\n",
3365 			    __func__, __LINE__,
3366 			    tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3367 		} else {
3368 			os_log(OS_LOG_DEFAULT,
3369 			    "%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3370 			    __func__, __LINE__,
3371 			    tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3372 		}
3373 		bpf_trunc_overflow += 1;
3374 		tlen = (uint32_t)pkt->bpfp_total_length;
3375 	}
3376 
3377 	return tlen;
3378 }
3379 
3380 /*
3381  * Move the packet data from interface memory (pkt) into the
3382  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
3383  * otherwise 0.
3384  */
3385 static void
catchpacket(struct bpf_d * d,struct bpf_packet * pkt,u_int snaplen,int outbound)3386 catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
3387     u_int snaplen, int outbound)
3388 {
3389 	struct bpf_hdr *hp;
3390 	struct bpf_hdr_ext *ehp;
3391 	int totlen, curlen;
3392 	int hdrlen, caplen;
3393 	int do_wakeup = 0;
3394 	u_char *payload;
3395 	struct timeval tv;
3396 
3397 	hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
3398 	    d->bd_bif->bif_hdrlen;
3399 	/*
3400 	 * Figure out how many bytes to move.  If the packet is
3401 	 * greater or equal to the snapshot length, transfer that
3402 	 * much.  Otherwise, transfer the whole packet (unless
3403 	 * we hit the buffer size limit).
3404 	 */
3405 	totlen = hdrlen + MIN(snaplen, (int)pkt->bpfp_total_length);
3406 	if (totlen > d->bd_bufsize) {
3407 		totlen = d->bd_bufsize;
3408 	}
3409 
3410 	if (hdrlen > totlen) {
3411 		return;
3412 	}
3413 
3414 	/*
3415 	 * Round up the end of the previous packet to the next longword.
3416 	 */
3417 	curlen = BPF_WORDALIGN(d->bd_slen);
3418 	if (curlen + totlen > d->bd_bufsize) {
3419 		/*
3420 		 * This packet will overflow the storage buffer.
3421 		 * Rotate the buffers if we can, then wakeup any
3422 		 * pending reads.
3423 		 *
3424 		 * We cannot rotate buffers if a read is in progress
3425 		 * so drop the packet
3426 		 */
3427 		if (d->bd_hbuf_read != 0) {
3428 			++d->bd_dcount;
3429 			return;
3430 		}
3431 
3432 		if (d->bd_fbuf == NULL) {
3433 			if (d->bd_headdrop == 0) {
3434 				/*
3435 				 * We haven't completed the previous read yet,
3436 				 * so drop the packet.
3437 				 */
3438 				++d->bd_dcount;
3439 				return;
3440 			}
3441 			/*
3442 			 * Drop the hold buffer as it contains older packets
3443 			 */
3444 			d->bd_dcount += d->bd_hcnt;
3445 			d->bd_fbuf = d->bd_hbuf;
3446 			ROTATE_BUFFERS(d);
3447 		} else {
3448 			ROTATE_BUFFERS(d);
3449 		}
3450 		do_wakeup = 1;
3451 		curlen = 0;
3452 	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
3453 		/*
3454 		 * Immediate mode is set, or the read timeout has
3455 		 * already expired during a select call. A packet
3456 		 * arrived, so the reader should be woken up.
3457 		 */
3458 		do_wakeup = 1;
3459 	}
3460 
3461 	/*
3462 	 * Append the bpf header.
3463 	 */
3464 	microtime(&tv);
3465 	if (d->bd_flags & BPF_EXTENDED_HDR) {
3466 		ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
3467 		memset(ehp, 0, sizeof(*ehp));
3468 		ehp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3469 		ehp->bh_tstamp.tv_usec = tv.tv_usec;
3470 
3471 		ehp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3472 		ehp->bh_hdrlen = (u_short)hdrlen;
3473 		caplen = ehp->bh_caplen = totlen - hdrlen;
3474 		payload = (u_char *)ehp + hdrlen;
3475 
3476 		if (outbound) {
3477 			ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3478 		} else {
3479 			ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
3480 		}
3481 
3482 		if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
3483 			struct mbuf *m = pkt->bpfp_mbuf;
3484 
3485 			if (outbound) {
3486 				/* only do lookups on non-raw INPCB */
3487 				if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID |
3488 				    PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)) ==
3489 				    (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC) &&
3490 				    m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3491 					ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
3492 					ehp->bh_proto = m->m_pkthdr.pkt_proto;
3493 				}
3494 				ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
3495 				if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
3496 					ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3497 				}
3498 				if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
3499 					ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
3500 				}
3501 				if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
3502 					ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3503 				}
3504 				if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
3505 					ehp->bh_unsent_bytes =
3506 					    m->m_pkthdr.bufstatus_if;
3507 					ehp->bh_unsent_snd =
3508 					    m->m_pkthdr.bufstatus_sndbuf;
3509 				}
3510 			} else {
3511 				if (m->m_pkthdr.pkt_flags & PKTF_WAKE_PKT) {
3512 					ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3513 				}
3514 			}
3515 #if SKYWALK
3516 		} else {
3517 			kern_packet_t kern_pkt = pkt->bpfp_pkt;
3518 
3519 			if (outbound) {
3520 				/*
3521 				 * Note: pp_init() asserts that kern_packet_svc_class_t is equivalent
3522 				 * to mbuf_svc_class_t
3523 				 */
3524 				ehp->bh_svc = so_svc2tc((mbuf_svc_class_t)kern_packet_get_service_class(kern_pkt));
3525 				if (kern_packet_get_transport_retransmit(kern_pkt)) {
3526 					ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3527 				}
3528 				if (kern_packet_get_transport_last_packet(kern_pkt)) {
3529 					ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3530 				}
3531 			} else {
3532 				if (kern_packet_get_wake_flag(kern_pkt)) {
3533 					ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
3534 				}
3535 			}
3536 #endif /* SKYWALK */
3537 		}
3538 	} else {
3539 		hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
3540 		hp->bh_tstamp.tv_sec = (int)tv.tv_sec;
3541 		hp->bh_tstamp.tv_usec = tv.tv_usec;
3542 		hp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
3543 		hp->bh_hdrlen = (u_short)hdrlen;
3544 		caplen = hp->bh_caplen = totlen - hdrlen;
3545 		payload = (u_char *)hp + hdrlen;
3546 	}
3547 	/*
3548 	 * Copy the packet data into the store buffer and update its length.
3549 	 */
3550 	copy_bpf_packet(pkt, payload, caplen);
3551 	d->bd_slen = curlen + totlen;
3552 	d->bd_scnt += 1;
3553 
3554 	if (do_wakeup) {
3555 		bpf_wakeup(d);
3556 	}
3557 }
3558 
3559 /*
3560  * Initialize all nonzero fields of a descriptor.
3561  */
3562 static int
bpf_allocbufs(struct bpf_d * d)3563 bpf_allocbufs(struct bpf_d *d)
3564 {
3565 	if (d->bd_sbuf != NULL) {
3566 		kfree_data_addr(d->bd_sbuf);
3567 		d->bd_sbuf = NULL;
3568 	}
3569 	if (d->bd_hbuf != NULL) {
3570 		kfree_data_addr(d->bd_hbuf);
3571 		d->bd_hbuf = NULL;
3572 	}
3573 	if (d->bd_fbuf != NULL) {
3574 		kfree_data_addr(d->bd_fbuf);
3575 		d->bd_fbuf = NULL;
3576 	}
3577 
3578 	d->bd_fbuf = (caddr_t) kalloc_data(d->bd_bufsize, M_WAIT);
3579 	if (d->bd_fbuf == NULL) {
3580 		return ENOBUFS;
3581 	}
3582 
3583 	d->bd_sbuf = (caddr_t) kalloc_data(d->bd_bufsize, M_WAIT);
3584 	if (d->bd_sbuf == NULL) {
3585 		kfree_data(d->bd_fbuf, d->bd_bufsize);
3586 		d->bd_fbuf = NULL;
3587 		return ENOBUFS;
3588 	}
3589 	d->bd_slen = 0;
3590 	d->bd_hlen = 0;
3591 	d->bd_scnt = 0;
3592 	d->bd_hcnt = 0;
3593 	return 0;
3594 }
3595 
3596 /*
3597  * Free buffers currently in use by a descriptor.
3598  * Called on close.
3599  */
3600 static void
bpf_freed(struct bpf_d * d)3601 bpf_freed(struct bpf_d *d)
3602 {
3603 	/*
3604 	 * We don't need to lock out interrupts since this descriptor has
3605 	 * been detached from its interface and it yet hasn't been marked
3606 	 * free.
3607 	 */
3608 	if (d->bd_hbuf_read != 0) {
3609 		panic("bpf buffer freed during read");
3610 	}
3611 
3612 	if (d->bd_sbuf != 0) {
3613 		kfree_data_addr(d->bd_sbuf);
3614 		if (d->bd_hbuf != 0) {
3615 			kfree_data_addr(d->bd_hbuf);
3616 		}
3617 		if (d->bd_fbuf != 0) {
3618 			kfree_data_addr(d->bd_fbuf);
3619 		}
3620 	}
3621 	if (d->bd_filter) {
3622 		kfree_data_addr(d->bd_filter);
3623 	}
3624 }
3625 
3626 /*
3627  * Attach an interface to bpf.	driverp is a pointer to a (struct bpf_if *)
3628  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
3629  * size of the link header (variable length headers not yet supported).
3630  */
3631 void
bpfattach(struct ifnet * ifp,u_int dlt,u_int hdrlen)3632 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
3633 {
3634 	bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
3635 }
3636 
3637 errno_t
bpf_attach(ifnet_t ifp,u_int32_t dlt,u_int32_t hdrlen,bpf_send_func send,bpf_tap_func tap)3638 bpf_attach(
3639 	ifnet_t                 ifp,
3640 	u_int32_t               dlt,
3641 	u_int32_t               hdrlen,
3642 	bpf_send_func   send,
3643 	bpf_tap_func    tap)
3644 {
3645 	struct bpf_if *bp;
3646 	struct bpf_if *bp_new;
3647 	struct bpf_if *bp_before_first = NULL;
3648 	struct bpf_if *bp_first = NULL;
3649 	struct bpf_if *bp_last = NULL;
3650 	boolean_t found;
3651 
3652 	bp_new = kalloc_type(struct bpf_if, M_WAIT | Z_ZERO);
3653 	if (bp_new == 0) {
3654 		panic("bpfattach");
3655 	}
3656 
3657 	lck_mtx_lock(bpf_mlock);
3658 
3659 	/*
3660 	 * Check if this interface/dlt is already attached. Remember the
3661 	 * first and last attachment for this interface, as well as the
3662 	 * element before the first attachment.
3663 	 */
3664 	found = FALSE;
3665 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
3666 		if (bp->bif_ifp != ifp) {
3667 			if (bp_first != NULL) {
3668 				/* no more elements for this interface */
3669 				break;
3670 			}
3671 			bp_before_first = bp;
3672 		} else {
3673 			if (bp->bif_dlt == dlt) {
3674 				found = TRUE;
3675 				break;
3676 			}
3677 			if (bp_first == NULL) {
3678 				bp_first = bp;
3679 			}
3680 			bp_last = bp;
3681 		}
3682 	}
3683 	if (found) {
3684 		lck_mtx_unlock(bpf_mlock);
3685 		printf("bpfattach - %s with dlt %d is already attached\n",
3686 		    if_name(ifp), dlt);
3687 		kfree_type(struct bpf_if, bp_new);
3688 		return EEXIST;
3689 	}
3690 
3691 	bp_new->bif_ifp = ifp;
3692 	bp_new->bif_dlt = dlt;
3693 	bp_new->bif_send = send;
3694 	bp_new->bif_tap = tap;
3695 
3696 	if (bp_first == NULL) {
3697 		/* No other entries for this ifp */
3698 		bp_new->bif_next = bpf_iflist;
3699 		bpf_iflist = bp_new;
3700 	} else {
3701 		if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
3702 			/* Make this the first entry for this interface */
3703 			if (bp_before_first != NULL) {
3704 				/*  point the previous to us */
3705 				bp_before_first->bif_next = bp_new;
3706 			} else {
3707 				/* we're the new head */
3708 				bpf_iflist = bp_new;
3709 			}
3710 			bp_new->bif_next = bp_first;
3711 		} else {
3712 			/* Add this after the last entry for this interface */
3713 			bp_new->bif_next = bp_last->bif_next;
3714 			bp_last->bif_next = bp_new;
3715 		}
3716 	}
3717 
3718 	/*
3719 	 * Compute the length of the bpf header.  This is not necessarily
3720 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
3721 	 * that the network layer header begins on a longword boundary (for
3722 	 * performance reasons and to alleviate alignment restrictions).
3723 	 */
3724 	bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
3725 	bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
3726 	    sizeof(struct bpf_hdr_ext)) - hdrlen;
3727 
3728 	/* Take a reference on the interface */
3729 	ifnet_reference(ifp);
3730 
3731 	lck_mtx_unlock(bpf_mlock);
3732 
3733 #ifndef __APPLE__
3734 	if (bootverbose) {
3735 		printf("bpf: %s attached\n", if_name(ifp));
3736 	}
3737 #endif
3738 
3739 	return 0;
3740 }
3741 
3742 /*
3743  * Detach bpf from an interface.  This involves detaching each descriptor
3744  * associated with the interface, and leaving bd_bif NULL.  Notify each
3745  * descriptor as it's detached so that any sleepers wake up and get
3746  * ENXIO.
3747  */
3748 void
bpfdetach(struct ifnet * ifp)3749 bpfdetach(struct ifnet *ifp)
3750 {
3751 	struct bpf_if   *bp, *bp_prev, *bp_next;
3752 	struct bpf_d    *d;
3753 
3754 	if (bpf_debug != 0) {
3755 		printf("%s: %s\n", __func__, if_name(ifp));
3756 	}
3757 
3758 	lck_mtx_lock(bpf_mlock);
3759 
3760 	/*
3761 	 * Build the list of devices attached to that interface
3762 	 * that we need to free while keeping the lock to maintain
3763 	 * the integrity of the interface list
3764 	 */
3765 	bp_prev = NULL;
3766 	for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
3767 		bp_next = bp->bif_next;
3768 
3769 		if (ifp != bp->bif_ifp) {
3770 			bp_prev = bp;
3771 			continue;
3772 		}
3773 		/* Unlink from the interface list */
3774 		if (bp_prev) {
3775 			bp_prev->bif_next = bp->bif_next;
3776 		} else {
3777 			bpf_iflist = bp->bif_next;
3778 		}
3779 
3780 		/* Detach the devices attached to the interface */
3781 		while ((d = bp->bif_dlist) != NULL) {
3782 			/*
3783 			 * Take an extra reference to prevent the device
3784 			 * from being freed when bpf_detachd() releases
3785 			 * the reference for the interface list
3786 			 */
3787 			bpf_acquire_d(d);
3788 			bpf_detachd(d, 0);
3789 			bpf_wakeup(d);
3790 			bpf_release_d(d);
3791 		}
3792 		ifnet_release(ifp);
3793 	}
3794 
3795 	lck_mtx_unlock(bpf_mlock);
3796 }
3797 
3798 void
bpf_init(__unused void * unused)3799 bpf_init(__unused void *unused)
3800 {
3801 #ifdef __APPLE__
3802 	int     maj;
3803 
3804 	if (bpf_devsw_installed == 0) {
3805 		bpf_devsw_installed = 1;
3806 		maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
3807 		if (maj == -1) {
3808 			bpf_devsw_installed = 0;
3809 			printf("bpf_init: failed to allocate a major number\n");
3810 			return;
3811 		}
3812 
3813 		for (int i = 0; i < NBPFILTER; i++) {
3814 			bpf_make_dev_t(maj);
3815 		}
3816 	}
3817 #else
3818 	cdevsw_add(&bpf_cdevsw);
3819 #endif
3820 }
3821 
3822 #ifndef __APPLE__
3823 SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE + CDEV_MAJOR, bpf_drvinit, NULL);
3824 #endif
3825 
3826 static int
3827 sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
3828 {
3829 #pragma unused(arg1, arg2)
3830 	int i, err;
3831 
3832 	i = bpf_maxbufsize;
3833 
3834 	err = sysctl_handle_int(oidp, &i, 0, req);
3835 	if (err != 0 || req->newptr == USER_ADDR_NULL) {
3836 		return err;
3837 	}
3838 
3839 	if (i < 0 || i > BPF_MAXSIZE_CAP) {
3840 		i = BPF_MAXSIZE_CAP;
3841 	}
3842 
3843 	bpf_maxbufsize = i;
3844 	return err;
3845 }
3846