xref: /xnu-10063.141.1/bsd/net/bpf.c (revision d8b80295118ef25ac3a784134bcf95cd8e88109f)
1 /*
2  * Copyright (c) 2000-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1990, 1991, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * This code is derived from the Stanford/CMU enet packet filter,
33  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35  * Berkeley Laboratory.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)bpf.c	8.2 (Berkeley) 3/28/94
66  *
67  * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68  */
69 /*
70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71  * support for mandatory and extensible security protections.  This notice
72  * is included in support of clause 2.2 (b) of the Apple Public License,
73  * Version 2.0.
74  */
75 
76 #include "bpf.h"
77 
78 #ifndef __GNUC__
79 #define inline
80 #else
81 #define inline __inline
82 #endif
83 
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/conf.h>
87 #include <sys/malloc.h>
88 #include <sys/mbuf.h>
89 #include <sys/time.h>
90 #include <sys/proc.h>
91 #include <sys/signalvar.h>
92 #include <sys/filio.h>
93 #include <sys/sockio.h>
94 #include <sys/ttycom.h>
95 #include <sys/filedesc.h>
96 #include <sys/uio_internal.h>
97 #include <sys/file_internal.h>
98 #include <sys/event.h>
99 
100 #include <sys/poll.h>
101 
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/vnode.h>
105 
106 #include <net/if.h>
107 #include <net/bpf.h>
108 #include <net/bpfdesc.h>
109 
110 #include <netinet/in.h>
111 #include <netinet/ip.h>
112 #include <netinet/ip6.h>
113 #include <netinet/in_pcb.h>
114 #include <netinet/in_var.h>
115 #include <netinet/ip_var.h>
116 #include <netinet/tcp.h>
117 #include <netinet/tcp_var.h>
118 #include <netinet/udp.h>
119 #include <netinet/udp_var.h>
120 #include <netinet/if_ether.h>
121 #include <netinet/isakmp.h>
122 #include <netinet6/esp.h>
123 #include <sys/kernel.h>
124 #include <sys/sysctl.h>
125 #include <net/firewire.h>
126 
127 #include <miscfs/devfs/devfs.h>
128 #include <net/dlil.h>
129 #include <net/pktap.h>
130 
131 #include <net/sockaddr_utils.h>
132 
133 #include <kern/assert.h>
134 #include <kern/locks.h>
135 #include <kern/thread_call.h>
136 #include <libkern/section_keywords.h>
137 
138 #include <os/log.h>
139 
140 #include <IOKit/IOBSD.h>
141 
142 
143 extern int tvtohz(struct timeval *);
144 extern char *proc_name_address(void *p);
145 
146 #define BPF_BUFSIZE 4096
147 
148 #define PRINET  26                      /* interruptible */
149 
150 #define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
151 #define ESP_HDR_SIZE sizeof(struct newesp)
152 
153 #define BPF_WRITE_LEEWAY 18     /* space for link layer header */
154 
155 #define BPF_WRITE_MAX 0x1000000 /* 16 MB arbitrary value */
156 
157 typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
158 
159 /*
160  * The default read buffer size is patchable.
161  */
162 static unsigned int bpf_bufsize = BPF_BUFSIZE;
163 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
164     &bpf_bufsize, 0, "");
165 
166 __private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
167 static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
168 SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
169     &bpf_maxbufsize, 0,
170     sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
171 
172 extern const int copysize_limit_panic;
173 #define BPF_BUFSIZE_CAP (copysize_limit_panic >> 1)
174 static int sysctl_bpf_bufsize_cap SYSCTL_HANDLER_ARGS;
175 SYSCTL_PROC(_debug, OID_AUTO, bpf_bufsize_cap, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
176     0, 0,
177     sysctl_bpf_bufsize_cap, "I", "Upper limit on BPF max buffer size");
178 
179 #define BPF_MAX_DEVICES 256
180 static unsigned int bpf_maxdevices = BPF_MAX_DEVICES;
181 SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RD | CTLFLAG_LOCKED,
182     &bpf_maxdevices, 0, "");
183 
184 /*
185  * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
186  * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
187  * explicitly to be able to use DLT_PKTAP.
188  */
189 #if !XNU_TARGET_OS_OSX
190 static unsigned int bpf_wantpktap = 1;
191 #else /* XNU_TARGET_OS_OSX */
192 static unsigned int bpf_wantpktap = 0;
193 #endif /* XNU_TARGET_OS_OSX */
194 SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
195     &bpf_wantpktap, 0, "");
196 
197 static int bpf_debug = 0;
198 SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
199     &bpf_debug, 0, "");
200 
201 static unsigned long bpf_trunc_overflow = 0;
202 SYSCTL_ULONG(_debug, OID_AUTO, bpf_trunc_overflow, CTLFLAG_RD | CTLFLAG_LOCKED,
203     &bpf_trunc_overflow, "");
204 
205 static int bpf_hdr_comp_enable = 1;
206 SYSCTL_INT(_debug, OID_AUTO, bpf_hdr_comp_enable, CTLFLAG_RW | CTLFLAG_LOCKED,
207     &bpf_hdr_comp_enable, 1, "");
208 
209 static int sysctl_bpf_stats SYSCTL_HANDLER_ARGS;
210 SYSCTL_PROC(_debug, OID_AUTO, bpf_stats, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
211     0, 0,
212     sysctl_bpf_stats, "S", "BPF statistics");
213 
214 /*
215  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
216  *  bpf_dtab holds pointer to the descriptors, indexed by minor device #
217  */
218 static struct bpf_if    *bpf_iflist;
219 /*
220  * BSD now stores the bpf_d in the dev_t which is a struct
221  * on their system. Our dev_t is an int, so we still store
222  * the bpf_d in a separate table indexed by minor device #.
223  *
224  * The value stored in bpf_dtab[n] represent three states:
225  *  NULL: device not opened
226  *  BPF_DEV_RESERVED: device opening or closing
227  *  other: device <n> opened with pointer to storage
228  */
229 #define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
230 static struct bpf_d **bpf_dtab = NULL;
231 static unsigned int bpf_dtab_size = 0;
232 static unsigned int nbpfilter = 0;
233 static unsigned bpf_bpfd_cnt = 0;
234 
235 static LCK_GRP_DECLARE(bpf_mlock_grp, "bpf");
236 static LCK_MTX_DECLARE(bpf_mlock_data, &bpf_mlock_grp);
237 static lck_mtx_t *const bpf_mlock = &bpf_mlock_data;
238 
239 static int      bpf_allocbufs(struct bpf_d *);
240 static errno_t  bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
241 static int      bpf_detachd(struct bpf_d *d);
242 static void     bpf_freed(struct bpf_d *);
243 static int      bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool, bool);
244 static void     bpf_timed_out(void *, void *);
245 static void     bpf_wakeup(struct bpf_d *);
246 static uint32_t get_pkt_trunc_len(struct bpf_packet *);
247 static void     catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
248 static void     reset_d(struct bpf_d *);
249 static int      bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
250 static int      bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
251 static int      bpf_setdlt(struct bpf_d *, u_int);
252 static int      bpf_set_traffic_class(struct bpf_d *, int);
253 static void     bpf_set_packet_service_class(struct mbuf *, int);
254 
255 static void     bpf_acquire_d(struct bpf_d *);
256 static void     bpf_release_d(struct bpf_d *);
257 
258 static  int bpf_devsw_installed;
259 
260 void bpf_init(void *unused);
261 static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
262 
263 /*
264  * Darwin differs from BSD here, the following are static
265  * on BSD and not static on Darwin.
266  */
267 d_open_t            bpfopen;
268 d_close_t           bpfclose;
269 d_read_t            bpfread;
270 d_write_t           bpfwrite;
271 ioctl_fcn_t         bpfioctl;
272 select_fcn_t        bpfselect;
273 
274 /* Darwin's cdevsw struct differs slightly from BSDs */
275 #define CDEV_MAJOR 23
276 static const struct cdevsw bpf_cdevsw = {
277 	.d_open       = bpfopen,
278 	.d_close      = bpfclose,
279 	.d_read       = bpfread,
280 	.d_write      = bpfwrite,
281 	.d_ioctl      = bpfioctl,
282 	.d_stop       = eno_stop,
283 	.d_reset      = eno_reset,
284 	.d_ttys       = NULL,
285 	.d_select     = bpfselect,
286 	.d_mmap       = eno_mmap,
287 	.d_strategy   = eno_strat,
288 	.d_reserved_1 = eno_getc,
289 	.d_reserved_2 = eno_putc,
290 	.d_type       = 0
291 };
292 
293 #define SOCKADDR_HDR_LEN           offsetof(struct sockaddr, sa_data)
294 
295 static int
bpf_copy_uio_to_mbuf_packet(struct uio * auio,int bytes_to_copy,struct mbuf * top)296 bpf_copy_uio_to_mbuf_packet(struct uio *auio, int bytes_to_copy, struct mbuf *top)
297 {
298 	int error = 0;
299 
300 	for (struct mbuf *m = top; m != NULL; m = m->m_next) {
301 		int mlen;
302 
303 		if (m->m_flags & M_EXT) {
304 			mlen = m->m_ext.ext_size - (int)M_LEADINGSPACE(m);
305 		} else if (m->m_flags & M_PKTHDR) {
306 			mlen = MHLEN - (int)M_LEADINGSPACE(m);
307 		} else {
308 			mlen = MLEN - (int)M_LEADINGSPACE(m);
309 		}
310 		int copy_len = imin((int)mlen, bytes_to_copy);
311 
312 		error = uiomove(mtod(m, caddr_t), (int)copy_len, auio);
313 		if (error != 0) {
314 			os_log(OS_LOG_DEFAULT, "bpf_copy_uio_to_mbuf_packet: len %d error %d",
315 			    copy_len, error);
316 			goto done;
317 		}
318 		m->m_len = copy_len;
319 		top->m_pkthdr.len += copy_len;
320 
321 		if (bytes_to_copy > copy_len) {
322 			bytes_to_copy -= copy_len;
323 		} else {
324 			break;
325 		}
326 	}
327 done:
328 	return error;
329 }
330 
331 static inline void
bpf_set_bcast_mcast(mbuf_t m,struct ether_header * eh)332 bpf_set_bcast_mcast(mbuf_t m, struct ether_header * eh)
333 {
334 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
335 		if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0) {
336 			m->m_flags |= M_BCAST;
337 		} else {
338 			m->m_flags |= M_MCAST;
339 		}
340 	}
341 }
342 
343 #if DEBUG | DEVELOPMENT
344 static void
bpf_log_bcast(const char * func,const char * ifname,uint16_t flags,bool hdrcmplt)345 bpf_log_bcast(const char * func, const char * ifname, uint16_t flags,
346     bool hdrcmplt)
347 {
348 	const char *    type;
349 
350 	if ((flags & M_BCAST) != 0) {
351 		type = "broadcast";
352 	} else if ((flags & M_MCAST) != 0) {
353 		type = "multicast";
354 	} else {
355 		type = "unicast";
356 	}
357 	os_log(OS_LOG_DEFAULT, "%s %s %s hdrcmplt=%s", func, ifname, type,
358 	    hdrcmplt ? "true" : "false");
359 }
360 #endif /* DEBUG | DEVELOPMENT */
361 
362 static int
bpf_movein(struct uio * uio,int copy_len,struct bpf_d * d,struct mbuf ** mp,struct sockaddr * sockp)363 bpf_movein(struct uio *uio, int copy_len, struct bpf_d *d, struct mbuf **mp,
364     struct sockaddr *sockp)
365 {
366 	struct mbuf *m = NULL;
367 	int error;
368 	int len;
369 	uint8_t sa_family;
370 	int hlen = 0;
371 	struct ifnet *ifp = d->bd_bif->bif_ifp;
372 	int linktype = (int)d->bd_bif->bif_dlt;
373 
374 	switch (linktype) {
375 #if SLIP
376 	case DLT_SLIP:
377 		sa_family = AF_INET;
378 		hlen = 0;
379 		break;
380 #endif /* SLIP */
381 
382 	case DLT_EN10MB:
383 		sa_family = AF_UNSPEC;
384 		/* XXX Would MAXLINKHDR be better? */
385 		hlen = sizeof(struct ether_header);
386 		break;
387 
388 #if FDDI
389 	case DLT_FDDI:
390 #if defined(__FreeBSD__) || defined(__bsdi__)
391 		sa_family = AF_IMPLINK;
392 		hlen = 0;
393 #else
394 		sa_family = AF_UNSPEC;
395 		/* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
396 		hlen = 24;
397 #endif
398 		break;
399 #endif /* FDDI */
400 
401 	case DLT_RAW:
402 	case DLT_NULL:
403 		sa_family = AF_UNSPEC;
404 		hlen = 0;
405 		break;
406 
407 #ifdef __FreeBSD__
408 	case DLT_ATM_RFC1483:
409 		/*
410 		 * en atm driver requires 4-byte atm pseudo header.
411 		 * though it isn't standard, vpi:vci needs to be
412 		 * specified anyway.
413 		 */
414 		sa_family = AF_UNSPEC;
415 		hlen = 12;      /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
416 		break;
417 #endif
418 
419 	case DLT_PPP:
420 		sa_family = AF_UNSPEC;
421 		hlen = 4;       /* This should match PPP_HDRLEN */
422 		break;
423 
424 	case DLT_APPLE_IP_OVER_IEEE1394:
425 		sa_family = AF_UNSPEC;
426 		hlen = sizeof(struct firewire_header);
427 		break;
428 
429 	case DLT_IEEE802_11:            /* IEEE 802.11 wireless */
430 		sa_family = AF_IEEE80211;
431 		hlen = 0;
432 		break;
433 
434 	case DLT_IEEE802_11_RADIO:
435 		sa_family = AF_IEEE80211;
436 		hlen = 0;
437 		break;
438 
439 	default:
440 		return EIO;
441 	}
442 
443 	if (sockp) {
444 		/*
445 		 * Build a sockaddr based on the data link layer type.
446 		 * We do this at this level because the ethernet header
447 		 * is copied directly into the data field of the sockaddr.
448 		 * In the case of SLIP, there is no header and the packet
449 		 * is forwarded as is.
450 		 * Also, we are careful to leave room at the front of the mbuf
451 		 * for the link level header.
452 		 */
453 		if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
454 			return EIO;
455 		}
456 		sockp->sa_family = sa_family;
457 	} else {
458 		/*
459 		 * We're directly sending the packet data supplied by
460 		 * the user; we don't need to make room for the link
461 		 * header, and don't need the header length value any
462 		 * more, so set it to 0.
463 		 */
464 		hlen = 0;
465 	}
466 
467 	len = (int)uio_resid(uio);
468 	if (len < copy_len) {
469 		os_log(OS_LOG_DEFAULT, "bpfwrite: len %d if %s less than copy_len %d",
470 		    (unsigned)len, ifp->if_xname, copy_len);
471 		return EMSGSIZE;
472 	}
473 	len = copy_len;
474 	if (len < hlen || (unsigned)len > BPF_WRITE_MAX) {
475 		os_log(OS_LOG_DEFAULT, "bpfwrite: bad len %d if %s",
476 		    (unsigned)len, ifp->if_xname);
477 		return EMSGSIZE;
478 	}
479 	if (d->bd_write_size_max != 0) {
480 		if ((len - hlen) > (d->bd_write_size_max + BPF_WRITE_LEEWAY)) {
481 			os_log(OS_LOG_DEFAULT, "bpfwrite: len %u - hlen %u too big if %s write_size_max %u",
482 			    (unsigned)len, (unsigned)hlen, ifp->if_xname, d->bd_write_size_max);
483 		}
484 	} else if ((len - hlen) > (ifp->if_mtu + BPF_WRITE_LEEWAY)) {
485 		os_log(OS_LOG_DEFAULT, "bpfwrite: len %u - hlen %u too big if %s mtu %u",
486 		    (unsigned)len, (unsigned)hlen, ifp->if_xname, ifp->if_mtu);
487 		return EMSGSIZE;
488 	}
489 
490 	/* drop lock while allocating mbuf and copying data */
491 	lck_mtx_unlock(bpf_mlock);
492 
493 	error = mbuf_allocpacket(MBUF_WAITOK, len, NULL, &m);
494 	if (error != 0) {
495 		os_log(OS_LOG_DEFAULT,
496 		    "bpfwrite mbuf_allocpacket len %d error %d", len, error);
497 		goto bad;
498 	}
499 	/*
500 	 * Make room for link header -- the packet length is 0 at this stage
501 	 */
502 	if (hlen != 0) {
503 		m->m_data += hlen; /* leading space */
504 		error = uiomove((caddr_t)sockp->sa_data, hlen, uio);
505 		if (error) {
506 			os_log(OS_LOG_DEFAULT,
507 			    "bpfwrite uiomove hlen %d error %d", hlen, error);
508 			goto bad;
509 		}
510 		len -= hlen;
511 		if (linktype == DLT_EN10MB) {
512 			struct ether_header * eh;
513 
514 			eh = (struct ether_header *)(void *)sockp->sa_data;
515 			bpf_set_bcast_mcast(m, eh);
516 #if DEBUG || DEVELOPMENT
517 			if (__improbable(bpf_debug != 0)) {
518 				bpf_log_bcast(__func__, ifp->if_xname,
519 				    m->m_flags, false);
520 			}
521 #endif /* DEBUG || DEVELOPMENT */
522 		}
523 	}
524 	/*
525 	 * bpf_copy_uio_to_mbuf_packet() does set the length of each mbuf and adds it to
526 	 * the total packet length
527 	 */
528 	error = bpf_copy_uio_to_mbuf_packet(uio, len, m);
529 	if (error != 0) {
530 		os_log(OS_LOG_DEFAULT,
531 		    "bpfwrite bpf_copy_uio_to_mbuf_packet error %d", error);
532 		goto bad;
533 	}
534 
535 	/* Check for multicast destination */
536 	if (hlen == 0 && linktype == DLT_EN10MB) {
537 		struct ether_header *eh;
538 
539 		eh = mtod(m, struct ether_header *);
540 		bpf_set_bcast_mcast(m, eh);
541 #if DEBUG || DEVELOPMENT
542 		if (__improbable(bpf_debug != 0)) {
543 			bpf_log_bcast(__func__, ifp->if_xname,
544 			    m->m_flags, true);
545 		}
546 #endif /* DEBUG || DEVELOPMENT */
547 	}
548 	*mp = m;
549 
550 	lck_mtx_lock(bpf_mlock);
551 	return 0;
552 bad:
553 	if (m != NULL) {
554 		m_freem(m);
555 	}
556 	lck_mtx_lock(bpf_mlock);
557 	return error;
558 }
559 
560 static int
bpf_movein_batch(struct uio * uio,struct bpf_d * d,struct mbuf ** mp,struct sockaddr * sockp)561 bpf_movein_batch(struct uio *uio, struct bpf_d *d, struct mbuf **mp,
562     struct sockaddr *sockp)
563 {
564 	int error = 0;
565 	user_ssize_t resid;
566 	int count = 0;
567 	struct mbuf *last = NULL;
568 
569 	*mp = NULL;
570 	while ((resid = uio_resid(uio)) >= sizeof(struct bpf_hdr)) {
571 		struct bpf_hdr bpfhdr = {};
572 		int bpf_hdr_min_len = offsetof(struct bpf_hdr, bh_hdrlen) + sizeof(bpfhdr.bh_hdrlen);
573 		int padding_len;
574 
575 		error = uiomove((caddr_t)&bpfhdr, bpf_hdr_min_len, uio);
576 		if (error != 0) {
577 			os_log(OS_LOG_DEFAULT, "bpf_movein_batch uiomove error %d", error);
578 			break;
579 		}
580 		/*
581 		 * Buffer validation:
582 		 * - ignore bh_tstamp
583 		 * - bh_hdrlen must fit
584 		 * - bh_caplen and bh_datalen must be equal
585 		 */
586 		if (bpfhdr.bh_hdrlen < bpf_hdr_min_len) {
587 			error = EINVAL;
588 			os_log(OS_LOG_DEFAULT, "bpf_movein_batch bh_hdrlen %u too small",
589 			    bpfhdr.bh_hdrlen);
590 			break;
591 		}
592 		if (bpfhdr.bh_caplen != bpfhdr.bh_datalen) {
593 			error = EINVAL;
594 			os_log(OS_LOG_DEFAULT, "bpf_movein_batch bh_caplen %u != bh_datalen %u",
595 			    bpfhdr.bh_caplen, bpfhdr.bh_datalen);
596 			break;
597 		}
598 		if (bpfhdr.bh_hdrlen > resid) {
599 			error = EINVAL;
600 			os_log(OS_LOG_DEFAULT, "bpf_movein_batch bh_hdrlen %u too large",
601 			    bpfhdr.bh_hdrlen);
602 			break;
603 		}
604 
605 		/*
606 		 * Ignore additional bytes in the header
607 		 */
608 		padding_len = bpfhdr.bh_hdrlen - bpf_hdr_min_len;
609 		if (padding_len > 0) {
610 			uio_update(uio, padding_len);
611 		}
612 
613 		/* skip empty packets */
614 		if (bpfhdr.bh_caplen > 0) {
615 			struct mbuf *m;
616 
617 			/*
618 			 * For time being assume all packets have same destination
619 			 */
620 			error = bpf_movein(uio, bpfhdr.bh_caplen, d, &m, sockp);
621 			if (error != 0) {
622 				os_log(OS_LOG_DEFAULT, "bpf_movein_batch bpf_movein error %d",
623 				    error);
624 				break;
625 			}
626 			count += 1;
627 
628 			if (last == NULL) {
629 				*mp = m;
630 			} else {
631 				last->m_nextpkt = m;
632 			}
633 			last = m;
634 		}
635 
636 		/*
637 		 * Each BPF packet is padded for alignment
638 		 */
639 		padding_len = BPF_WORDALIGN(bpfhdr.bh_hdrlen + bpfhdr.bh_caplen) - (bpfhdr.bh_hdrlen + bpfhdr.bh_caplen);
640 		if (padding_len > 0) {
641 			uio_update(uio, padding_len);
642 		}
643 	}
644 
645 	if (error != 0) {
646 		if (*mp != NULL) {
647 			m_freem_list(*mp);
648 			*mp = NULL;
649 		}
650 	}
651 	return error;
652 }
653 
654 /*
655  * The dynamic addition of a new device node must block all processes that
656  * are opening the last device so that no process will get an unexpected
657  * ENOENT
658  */
659 static void
bpf_make_dev_t(int maj)660 bpf_make_dev_t(int maj)
661 {
662 	static int              bpf_growing = 0;
663 	unsigned int    cur_size = nbpfilter, i;
664 
665 	if (nbpfilter >= BPF_MAX_DEVICES) {
666 		return;
667 	}
668 
669 	while (bpf_growing) {
670 		/* Wait until new device has been created */
671 		(void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
672 	}
673 	if (nbpfilter > cur_size) {
674 		/* other thread grew it already */
675 		return;
676 	}
677 	bpf_growing = 1;
678 
679 	/* need to grow bpf_dtab first */
680 	if (nbpfilter == bpf_dtab_size) {
681 		unsigned int new_dtab_size;
682 		struct bpf_d **new_dtab = NULL;
683 
684 		new_dtab_size = bpf_dtab_size + NBPFILTER;
685 		new_dtab = krealloc_type(struct bpf_d *,
686 		    bpf_dtab_size, new_dtab_size, bpf_dtab, Z_WAITOK | Z_ZERO);
687 		if (new_dtab == 0) {
688 			os_log_error(OS_LOG_DEFAULT, "bpf_make_dev_t: malloc bpf_dtab failed");
689 			goto done;
690 		}
691 		bpf_dtab = new_dtab;
692 		bpf_dtab_size = new_dtab_size;
693 	}
694 	i = nbpfilter++;
695 	(void) devfs_make_node(makedev(maj, i),
696 	    DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
697 	    "bpf%d", i);
698 done:
699 	bpf_growing = 0;
700 	wakeup((caddr_t)&bpf_growing);
701 }
702 
703 /*
704  * Attach file to the bpf interface, i.e. make d listen on bp.
705  */
706 static errno_t
bpf_attachd(struct bpf_d * d,struct bpf_if * bp)707 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
708 {
709 	int first = bp->bif_dlist == NULL;
710 	int     error = 0;
711 
712 	/*
713 	 * Point d at bp, and add d to the interface's list of listeners.
714 	 * Finally, point the driver's bpf cookie at the interface so
715 	 * it will divert packets to bpf.
716 	 */
717 	d->bd_bif = bp;
718 	d->bd_next = bp->bif_dlist;
719 	bp->bif_dlist = d;
720 	bpf_bpfd_cnt++;
721 
722 	/*
723 	 * Take a reference on the device even if an error is returned
724 	 * because we keep the device in the interface's list of listeners
725 	 */
726 	bpf_acquire_d(d);
727 
728 	if (first) {
729 		/* Find the default bpf entry for this ifp */
730 		if (bp->bif_ifp->if_bpf == NULL) {
731 			struct bpf_if   *tmp, *primary = NULL;
732 
733 			for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
734 				if (tmp->bif_ifp == bp->bif_ifp) {
735 					primary = tmp;
736 					break;
737 				}
738 			}
739 			bp->bif_ifp->if_bpf = primary;
740 		}
741 		/* Only call dlil_set_bpf_tap for primary dlt */
742 		if (bp->bif_ifp->if_bpf == bp) {
743 			dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
744 			    bpf_tap_callback);
745 		}
746 
747 		if (bp->bif_tap != NULL) {
748 			error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
749 			    BPF_TAP_INPUT_OUTPUT);
750 		}
751 	}
752 
753 	/*
754 	 * Reset the detach flags in case we previously detached an interface
755 	 */
756 	d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
757 
758 	if (bp->bif_dlt == DLT_PKTAP) {
759 		d->bd_flags |= BPF_FINALIZE_PKTAP;
760 	} else {
761 		d->bd_flags &= ~BPF_FINALIZE_PKTAP;
762 	}
763 	return error;
764 }
765 
766 /*
767  * Detach a file from its interface.
768  *
769  * Return 1 if was closed by some thread, 0 otherwise
770  */
771 static int
bpf_detachd(struct bpf_d * d)772 bpf_detachd(struct bpf_d *d)
773 {
774 	struct bpf_d **p;
775 	struct bpf_if *bp;
776 	struct ifnet  *ifp;
777 	uint32_t dlt;
778 	bpf_tap_func disable_tap;
779 	uint8_t bd_promisc;
780 
781 	int bpf_closed = d->bd_flags & BPF_CLOSING;
782 	/*
783 	 * Some other thread already detached
784 	 */
785 	if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0) {
786 		goto done;
787 	}
788 	/*
789 	 * This thread is doing the detach
790 	 */
791 	d->bd_flags |= BPF_DETACHING;
792 
793 	ifp = d->bd_bif->bif_ifp;
794 	bp = d->bd_bif;
795 
796 	/* Remove d from the interface's descriptor list. */
797 	p = &bp->bif_dlist;
798 	while (*p != d) {
799 		p = &(*p)->bd_next;
800 		if (*p == 0) {
801 			panic("bpf_detachd: descriptor not in list");
802 		}
803 	}
804 	*p = (*p)->bd_next;
805 	bpf_bpfd_cnt--;
806 	disable_tap = NULL;
807 	if (bp->bif_dlist == 0) {
808 		/*
809 		 * Let the driver know that there are no more listeners.
810 		 */
811 		/* Only call dlil_set_bpf_tap for primary dlt */
812 		if (bp->bif_ifp->if_bpf == bp) {
813 			dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
814 		}
815 
816 		disable_tap = bp->bif_tap;
817 		if (disable_tap) {
818 			dlt = bp->bif_dlt;
819 		}
820 
821 		for (bp = bpf_iflist; bp; bp = bp->bif_next) {
822 			if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
823 				break;
824 			}
825 		}
826 		if (bp == NULL) {
827 			ifp->if_bpf = NULL;
828 		}
829 	}
830 	d->bd_bif = NULL;
831 	/*
832 	 * Check if this descriptor had requested promiscuous mode.
833 	 * If so, turn it off.
834 	 */
835 	bd_promisc = d->bd_promisc;
836 	d->bd_promisc = 0;
837 
838 	lck_mtx_unlock(bpf_mlock);
839 	if (bd_promisc) {
840 		if (ifnet_set_promiscuous(ifp, 0)) {
841 			/*
842 			 * Something is really wrong if we were able to put
843 			 * the driver into promiscuous mode, but can't
844 			 * take it out.
845 			 * Most likely the network interface is gone.
846 			 */
847 			os_log_error(OS_LOG_DEFAULT,
848 			    "%s: bpf%d ifnet_set_promiscuous %s failed",
849 			    __func__, d->bd_dev_minor, if_name(ifp));
850 		}
851 	}
852 
853 	if (disable_tap) {
854 		disable_tap(ifp, dlt, BPF_TAP_DISABLE);
855 	}
856 	lck_mtx_lock(bpf_mlock);
857 
858 	/*
859 	 * Wake up other thread that are waiting for this thread to finish
860 	 * detaching
861 	 */
862 	d->bd_flags &= ~BPF_DETACHING;
863 	d->bd_flags |= BPF_DETACHED;
864 
865 	/* Refresh the local variable as d could have been modified */
866 	bpf_closed = d->bd_flags & BPF_CLOSING;
867 
868 	os_log(OS_LOG_DEFAULT, "bpf%d%s detached from %s fcount %llu dcount %llu",
869 	    d->bd_dev_minor, bpf_closed ? " closed and" : "", if_name(ifp),
870 	    d->bd_fcount, d->bd_dcount);
871 
872 	/*
873 	 * Note that We've kept the reference because we may have dropped
874 	 * the lock when turning off promiscuous mode
875 	 */
876 	bpf_release_d(d);
877 done:
878 	/*
879 	 * Let the caller know the bpf_d is closed
880 	 */
881 	if (bpf_closed) {
882 		return 1;
883 	} else {
884 		return 0;
885 	}
886 }
887 
888 /*
889  * Start asynchronous timer, if necessary.
890  * Must be called with bpf_mlock held.
891  */
892 static void
bpf_start_timer(struct bpf_d * d)893 bpf_start_timer(struct bpf_d *d)
894 {
895 	uint64_t deadline;
896 	struct timeval tv;
897 
898 	if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
899 		tv.tv_sec = d->bd_rtout / hz;
900 		tv.tv_usec = (d->bd_rtout % hz) * tick;
901 
902 		clock_interval_to_deadline(
903 			(uint32_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
904 			NSEC_PER_USEC, &deadline);
905 		/*
906 		 * The state is BPF_IDLE, so the timer hasn't
907 		 * been started yet, and hasn't gone off yet;
908 		 * there is no thread call scheduled, so this
909 		 * won't change the schedule.
910 		 *
911 		 * XXX - what if, by the time it gets entered,
912 		 * the deadline has already passed?
913 		 */
914 		thread_call_enter_delayed(d->bd_thread_call, deadline);
915 		d->bd_state = BPF_WAITING;
916 	}
917 }
918 
919 /*
920  * Cancel asynchronous timer.
921  * Must be called with bpf_mlock held.
922  */
923 static boolean_t
bpf_stop_timer(struct bpf_d * d)924 bpf_stop_timer(struct bpf_d *d)
925 {
926 	/*
927 	 * If the timer has already gone off, this does nothing.
928 	 * Our caller is expected to set d->bd_state to BPF_IDLE,
929 	 * with the bpf_mlock, after we are called. bpf_timed_out()
930 	 * also grabs bpf_mlock, so, if the timer has gone off and
931 	 * bpf_timed_out() hasn't finished, it's waiting for the
932 	 * lock; when this thread releases the lock, it will
933 	 * find the state is BPF_IDLE, and just release the
934 	 * lock and return.
935 	 */
936 	return thread_call_cancel(d->bd_thread_call);
937 }
938 
939 void
bpf_acquire_d(struct bpf_d * d)940 bpf_acquire_d(struct bpf_d *d)
941 {
942 	void *lr_saved =  __builtin_return_address(0);
943 
944 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
945 
946 	d->bd_refcnt += 1;
947 
948 	d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
949 	d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
950 }
951 
952 void
bpf_release_d(struct bpf_d * d)953 bpf_release_d(struct bpf_d *d)
954 {
955 	void *lr_saved =  __builtin_return_address(0);
956 
957 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
958 
959 	if (d->bd_refcnt <= 0) {
960 		panic("%s: %p refcnt <= 0", __func__, d);
961 	}
962 
963 	d->bd_refcnt -= 1;
964 
965 	d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
966 	d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
967 
968 	if (d->bd_refcnt == 0) {
969 		/* Assert the device is detached */
970 		if ((d->bd_flags & BPF_DETACHED) == 0) {
971 			panic("%s: %p BPF_DETACHED not set", __func__, d);
972 		}
973 
974 		kfree_type(struct bpf_d, d);
975 	}
976 }
977 
978 /*
979  * Open ethernet device.  Returns ENXIO for illegal minor device number,
980  * EBUSY if file is open by another process.
981  */
982 /* ARGSUSED */
983 int
bpfopen(dev_t dev,int flags,__unused int fmt,struct proc * p)984 bpfopen(dev_t dev, int flags, __unused int fmt,
985     struct proc *p)
986 {
987 	struct bpf_d *d;
988 
989 	lck_mtx_lock(bpf_mlock);
990 	if ((unsigned int) minor(dev) >= nbpfilter) {
991 		lck_mtx_unlock(bpf_mlock);
992 		return ENXIO;
993 	}
994 	/*
995 	 * New device nodes are created on demand when opening the last one.
996 	 * The programming model is for processes to loop on the minor starting
997 	 * at 0 as long as EBUSY is returned. The loop stops when either the
998 	 * open succeeds or an error other that EBUSY is returned. That means
999 	 * that bpf_make_dev_t() must block all processes that are opening the
1000 	 * last  node. If not all processes are blocked, they could unexpectedly
1001 	 * get ENOENT and abort their opening loop.
1002 	 */
1003 	if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
1004 		bpf_make_dev_t(major(dev));
1005 	}
1006 
1007 	/*
1008 	 * Each minor can be opened by only one process.  If the requested
1009 	 * minor is in use, return EBUSY.
1010 	 *
1011 	 * Important: bpfopen() and bpfclose() have to check and set the status
1012 	 * of a device in the same lockin context otherwise the device may be
1013 	 * leaked because the vnode use count will be unpextectly greater than 1
1014 	 * when close() is called.
1015 	 */
1016 	if (bpf_dtab[minor(dev)] == NULL) {
1017 		/* Reserve while opening */
1018 		bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
1019 	} else {
1020 		lck_mtx_unlock(bpf_mlock);
1021 		return EBUSY;
1022 	}
1023 	d = kalloc_type(struct bpf_d, Z_WAITOK | Z_ZERO);
1024 	if (d == NULL) {
1025 		/* this really is a catastrophic failure */
1026 		os_log_error(OS_LOG_DEFAULT,
1027 		    "bpfopen: bpf%d kalloc_type bpf_d failed", minor(dev));
1028 		bpf_dtab[minor(dev)] = NULL;
1029 		lck_mtx_unlock(bpf_mlock);
1030 		return ENOMEM;
1031 	}
1032 
1033 	/* Mark "in use" and do most initialization. */
1034 	bpf_acquire_d(d);
1035 	d->bd_bufsize = bpf_bufsize;
1036 	d->bd_sig = SIGIO;
1037 	d->bd_direction = BPF_D_INOUT;
1038 	d->bd_oflags = flags;
1039 	d->bd_state = BPF_IDLE;
1040 	d->bd_traffic_class = SO_TC_BE;
1041 	d->bd_flags |= BPF_DETACHED;
1042 	if (bpf_wantpktap) {
1043 		d->bd_flags |= BPF_WANT_PKTAP;
1044 	} else {
1045 		d->bd_flags &= ~BPF_WANT_PKTAP;
1046 	}
1047 
1048 	d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
1049 	if (d->bd_thread_call == NULL) {
1050 		os_log_error(OS_LOG_DEFAULT, "bpfopen: bpf%d malloc thread call failed",
1051 		    minor(dev));
1052 		bpf_dtab[minor(dev)] = NULL;
1053 		bpf_release_d(d);
1054 		lck_mtx_unlock(bpf_mlock);
1055 
1056 		return ENOMEM;
1057 	}
1058 	d->bd_opened_by = p;
1059 	uuid_generate(d->bd_uuid);
1060 	d->bd_pid = proc_pid(p);
1061 
1062 	d->bd_dev_minor = minor(dev);
1063 	bpf_dtab[minor(dev)] = d;         /* Mark opened */
1064 	lck_mtx_unlock(bpf_mlock);
1065 
1066 	if (bpf_debug) {
1067 		os_log(OS_LOG_DEFAULT, "bpf%u opened by %s.%u",
1068 		    d->bd_dev_minor, proc_name_address(p), d->bd_pid);
1069 	}
1070 	return 0;
1071 }
1072 
1073 /*
1074  * Close the descriptor by detaching it from its interface,
1075  * deallocating its buffers, and marking it free.
1076  */
1077 /* ARGSUSED */
1078 int
bpfclose(dev_t dev,__unused int flags,__unused int fmt,__unused struct proc * p)1079 bpfclose(dev_t dev, __unused int flags, __unused int fmt,
1080     __unused struct proc *p)
1081 {
1082 	struct bpf_d *d;
1083 
1084 	/* Take BPF lock to ensure no other thread is using the device */
1085 	lck_mtx_lock(bpf_mlock);
1086 
1087 	d = bpf_dtab[minor(dev)];
1088 	if (d == NULL || d == BPF_DEV_RESERVED) {
1089 		lck_mtx_unlock(bpf_mlock);
1090 		return ENXIO;
1091 	}
1092 
1093 	/*
1094 	 * Other threads may call bpd_detachd() if we drop the bpf_mlock
1095 	 */
1096 	d->bd_flags |= BPF_CLOSING;
1097 
1098 	if (bpf_debug != 0) {
1099 		os_log(OS_LOG_DEFAULT, "%s: bpf%d",
1100 		    __func__, d->bd_dev_minor);
1101 	}
1102 
1103 	bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;         /* Reserve while closing */
1104 
1105 	/*
1106 	 * Deal with any in-progress timeouts.
1107 	 */
1108 	switch (d->bd_state) {
1109 	case BPF_IDLE:
1110 		/*
1111 		 * Not waiting for a timeout, and no timeout happened.
1112 		 */
1113 		break;
1114 
1115 	case BPF_WAITING:
1116 		/*
1117 		 * Waiting for a timeout.
1118 		 * Cancel any timer that has yet to go off,
1119 		 * and mark the state as "closing".
1120 		 * Then drop the lock to allow any timers that
1121 		 * *have* gone off to run to completion, and wait
1122 		 * for them to finish.
1123 		 */
1124 		if (!bpf_stop_timer(d)) {
1125 			/*
1126 			 * There was no pending call, so the call must
1127 			 * have been in progress. Wait for the call to
1128 			 * complete; we have to drop the lock while
1129 			 * waiting. to let the in-progrss call complete
1130 			 */
1131 			d->bd_state = BPF_DRAINING;
1132 			while (d->bd_state == BPF_DRAINING) {
1133 				msleep((caddr_t)d, bpf_mlock, PRINET,
1134 				    "bpfdraining", NULL);
1135 			}
1136 		}
1137 		d->bd_state = BPF_IDLE;
1138 		break;
1139 
1140 	case BPF_TIMED_OUT:
1141 		/*
1142 		 * Timer went off, and the timeout routine finished.
1143 		 */
1144 		d->bd_state = BPF_IDLE;
1145 		break;
1146 
1147 	case BPF_DRAINING:
1148 		/*
1149 		 * Another thread is blocked on a close waiting for
1150 		 * a timeout to finish.
1151 		 * This "shouldn't happen", as the first thread to enter
1152 		 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
1153 		 * all subsequent threads should see that and fail with
1154 		 * ENXIO.
1155 		 */
1156 		panic("Two threads blocked in a BPF close");
1157 		break;
1158 	}
1159 
1160 	if (d->bd_bif) {
1161 		bpf_detachd(d);
1162 	}
1163 	selthreadclear(&d->bd_sel);
1164 	thread_call_free(d->bd_thread_call);
1165 
1166 	while (d->bd_hbuf_read || d->bd_hbuf_write) {
1167 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpfclose", NULL);
1168 	}
1169 
1170 	if (bpf_debug) {
1171 		os_log(OS_LOG_DEFAULT,
1172 		    "bpf%u closed by %s.%u dcount %llu fcount %llu ccount %llu",
1173 		    d->bd_dev_minor, proc_name_address(p), d->bd_pid,
1174 		    d->bd_dcount, d->bd_fcount, d->bd_bcs.bcs_count_compressed_prefix);
1175 	}
1176 
1177 	bpf_freed(d);
1178 
1179 	/* Mark free in same context as bpfopen comes to check */
1180 	bpf_dtab[minor(dev)] = NULL;                    /* Mark closed */
1181 
1182 	bpf_release_d(d);
1183 
1184 	lck_mtx_unlock(bpf_mlock);
1185 
1186 	return 0;
1187 }
1188 
1189 #define BPF_SLEEP bpf_sleep
1190 
1191 static int
bpf_sleep(struct bpf_d * d,int pri,const char * wmesg,int timo)1192 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
1193 {
1194 	u_int64_t abstime = 0;
1195 
1196 	if (timo != 0) {
1197 		clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
1198 	}
1199 
1200 	return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
1201 }
1202 
1203 static void
bpf_finalize_pktap(struct bpf_hdr * hp,struct pktap_header * pktaphdr)1204 bpf_finalize_pktap(struct bpf_hdr *hp, struct pktap_header *pktaphdr)
1205 {
1206 	if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
1207 		struct pktap_v2_hdr *pktap_v2_hdr;
1208 
1209 		pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
1210 
1211 		if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
1212 			pktap_v2_finalize_proc_info(pktap_v2_hdr);
1213 		}
1214 	} else {
1215 		if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
1216 			pktap_finalize_proc_info(pktaphdr);
1217 		}
1218 
1219 		if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1220 			hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
1221 			hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
1222 		}
1223 	}
1224 }
1225 
1226 /*
1227  * Rotate the packet buffers in descriptor d.  Move the store buffer
1228  * into the hold slot, and the free buffer into the store slot.
1229  * Zero the length of the new store buffer.
1230  *
1231  * Note: in head drop mode, the hold buffer can be dropped so the fist packet of the
1232  * store buffer cannot be compressed as it otherwise would refer to deleted data
1233  * in a dropped hold buffer that the reader process does know about
1234  */
1235 #define ROTATE_BUFFERS(d) do { \
1236 	if (d->bd_hbuf_read) \
1237 	        panic("rotating bpf buffers during read"); \
1238 	(d)->bd_hbuf = (d)->bd_sbuf; \
1239 	(d)->bd_hlen = (d)->bd_slen; \
1240 	(d)->bd_hcnt = (d)->bd_scnt; \
1241 	(d)->bd_sbuf = (d)->bd_fbuf; \
1242 	(d)->bd_slen = 0; \
1243 	(d)->bd_scnt = 0; \
1244 	(d)->bd_fbuf = NULL; \
1245 	if ((d)->bd_headdrop != 0) \
1246 	        (d)->bd_prev_slen = 0; \
1247 } while(false)
1248 
1249 /*
1250  *  bpfread - read next chunk of packets from buffers
1251  */
1252 int
bpfread(dev_t dev,struct uio * uio,int ioflag)1253 bpfread(dev_t dev, struct uio *uio, int ioflag)
1254 {
1255 	struct bpf_d *d;
1256 	caddr_t hbuf;
1257 	int timed_out, hbuf_len;
1258 	int error;
1259 	int flags;
1260 
1261 	lck_mtx_lock(bpf_mlock);
1262 
1263 	d = bpf_dtab[minor(dev)];
1264 	if (d == NULL || d == BPF_DEV_RESERVED ||
1265 	    (d->bd_flags & BPF_CLOSING) != 0) {
1266 		lck_mtx_unlock(bpf_mlock);
1267 		return ENXIO;
1268 	}
1269 
1270 	bpf_acquire_d(d);
1271 
1272 	/*
1273 	 * Restrict application to use a buffer the same size as
1274 	 * as kernel buffers.
1275 	 */
1276 	if (uio_resid(uio) != d->bd_bufsize) {
1277 		bpf_release_d(d);
1278 		lck_mtx_unlock(bpf_mlock);
1279 		return EINVAL;
1280 	}
1281 
1282 	if (d->bd_state == BPF_WAITING) {
1283 		bpf_stop_timer(d);
1284 	}
1285 
1286 	timed_out = (d->bd_state == BPF_TIMED_OUT);
1287 	d->bd_state = BPF_IDLE;
1288 
1289 	while (d->bd_hbuf_read) {
1290 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpfread", NULL);
1291 	}
1292 
1293 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1294 		bpf_release_d(d);
1295 		lck_mtx_unlock(bpf_mlock);
1296 		return ENXIO;
1297 	}
1298 	/*
1299 	 * If the hold buffer is empty, then do a timed sleep, which
1300 	 * ends when the timeout expires or when enough packets
1301 	 * have arrived to fill the store buffer.
1302 	 */
1303 	while (d->bd_hbuf == 0) {
1304 		if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) &&
1305 		    d->bd_slen != 0) {
1306 			/*
1307 			 * We're in immediate mode, or are reading
1308 			 * in non-blocking mode, or a timer was
1309 			 * started before the read (e.g., by select()
1310 			 * or poll()) and has expired and a packet(s)
1311 			 * either arrived since the previous
1312 			 * read or arrived while we were asleep.
1313 			 * Rotate the buffers and return what's here.
1314 			 */
1315 			ROTATE_BUFFERS(d);
1316 			break;
1317 		}
1318 
1319 		/*
1320 		 * No data is available, check to see if the bpf device
1321 		 * is still pointed at a real interface.  If not, return
1322 		 * ENXIO so that the userland process knows to rebind
1323 		 * it before using it again.
1324 		 */
1325 		if (d->bd_bif == NULL) {
1326 			bpf_release_d(d);
1327 			lck_mtx_unlock(bpf_mlock);
1328 			return ENXIO;
1329 		}
1330 		if (ioflag & IO_NDELAY) {
1331 			bpf_release_d(d);
1332 			lck_mtx_unlock(bpf_mlock);
1333 			return EWOULDBLOCK;
1334 		}
1335 		error = BPF_SLEEP(d, PRINET | PCATCH, "bpf", d->bd_rtout);
1336 		/*
1337 		 * Make sure device is still opened
1338 		 */
1339 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1340 			bpf_release_d(d);
1341 			lck_mtx_unlock(bpf_mlock);
1342 			return ENXIO;
1343 		}
1344 
1345 		while (d->bd_hbuf_read) {
1346 			msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_read",
1347 			    NULL);
1348 		}
1349 
1350 		if ((d->bd_flags & BPF_CLOSING) != 0) {
1351 			bpf_release_d(d);
1352 			lck_mtx_unlock(bpf_mlock);
1353 			return ENXIO;
1354 		}
1355 
1356 		if (error == EINTR || error == ERESTART) {
1357 			if (d->bd_hbuf != NULL) {
1358 				/*
1359 				 * Because we msleep, the hold buffer might
1360 				 * be filled when we wake up.  Avoid rotating
1361 				 * in this case.
1362 				 */
1363 				break;
1364 			}
1365 			if (d->bd_slen != 0) {
1366 				/*
1367 				 * Sometimes we may be interrupted often and
1368 				 * the sleep above will not timeout.
1369 				 * Regardless, we should rotate the buffers
1370 				 * if there's any new data pending and
1371 				 * return it.
1372 				 */
1373 				ROTATE_BUFFERS(d);
1374 				break;
1375 			}
1376 			bpf_release_d(d);
1377 			lck_mtx_unlock(bpf_mlock);
1378 			if (error == ERESTART) {
1379 				os_log(OS_LOG_DEFAULT, "%s: bpf%d ERESTART to EINTR",
1380 				    __func__, d->bd_dev_minor);
1381 				error = EINTR;
1382 			}
1383 			return error;
1384 		}
1385 		if (error == EWOULDBLOCK) {
1386 			/*
1387 			 * On a timeout, return what's in the buffer,
1388 			 * which may be nothing.  If there is something
1389 			 * in the store buffer, we can rotate the buffers.
1390 			 */
1391 			if (d->bd_hbuf) {
1392 				/*
1393 				 * We filled up the buffer in between
1394 				 * getting the timeout and arriving
1395 				 * here, so we don't need to rotate.
1396 				 */
1397 				break;
1398 			}
1399 
1400 			if (d->bd_slen == 0) {
1401 				bpf_release_d(d);
1402 				lck_mtx_unlock(bpf_mlock);
1403 				return 0;
1404 			}
1405 			ROTATE_BUFFERS(d);
1406 			break;
1407 		}
1408 	}
1409 	/*
1410 	 * At this point, we know we have something in the hold slot.
1411 	 */
1412 
1413 	/*
1414 	 * Set the hold buffer read. So we do not
1415 	 * rotate the buffers until the hold buffer
1416 	 * read is complete. Also to avoid issues resulting
1417 	 * from page faults during disk sleep (<rdar://problem/13436396>).
1418 	 */
1419 	d->bd_hbuf_read = true;
1420 	hbuf = d->bd_hbuf;
1421 	hbuf_len = d->bd_hlen;
1422 	flags = d->bd_flags;
1423 	d->bd_bcs.bcs_total_read += d->bd_hcnt;
1424 	lck_mtx_unlock(bpf_mlock);
1425 
1426 	/*
1427 	 * Before we move data to userland, we fill out the extended
1428 	 * header fields.
1429 	 */
1430 	if (flags & BPF_EXTENDED_HDR) {
1431 		char *p;
1432 
1433 		p = hbuf;
1434 		while (p < hbuf + hbuf_len) {
1435 			struct bpf_hdr_ext *ehp;
1436 			uint32_t flowid;
1437 			struct so_procinfo soprocinfo;
1438 			int found = 0;
1439 
1440 			ehp = (struct bpf_hdr_ext *)(void *)p;
1441 			if ((flowid = ehp->bh_flowid) != 0) {
1442 				if (ehp->bh_flags & BPF_HDR_EXT_FLAGS_TCP) {
1443 					ehp->bh_flags &= ~BPF_HDR_EXT_FLAGS_TCP;
1444 					found = inp_findinpcb_procinfo(&tcbinfo,
1445 					    flowid, &soprocinfo);
1446 				} else if (ehp->bh_flags == BPF_HDR_EXT_FLAGS_UDP) {
1447 					ehp->bh_flags &= ~BPF_HDR_EXT_FLAGS_UDP;
1448 					found = inp_findinpcb_procinfo(&udbinfo,
1449 					    flowid, &soprocinfo);
1450 				}
1451 				if (found == 1) {
1452 					ehp->bh_pid = soprocinfo.spi_pid;
1453 					strlcpy(&ehp->bh_comm[0], &soprocinfo.spi_proc_name[0], sizeof(ehp->bh_comm));
1454 				}
1455 				ehp->bh_flowid = 0;
1456 			}
1457 
1458 			if ((flags & BPF_FINALIZE_PKTAP) != 0 && ehp->bh_complen == 0) {
1459 				struct pktap_header *pktaphdr;
1460 
1461 				pktaphdr = (struct pktap_header *)(void *)
1462 				    (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1463 
1464 				bpf_finalize_pktap((struct bpf_hdr *) ehp,
1465 				    pktaphdr);
1466 			}
1467 			p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1468 		}
1469 	} else if (flags & BPF_FINALIZE_PKTAP) {
1470 		char *p;
1471 
1472 		p = hbuf;
1473 
1474 		while (p < hbuf + hbuf_len) {
1475 			struct bpf_hdr *hp;
1476 			struct pktap_header *pktaphdr;
1477 
1478 			hp = (struct bpf_hdr *)(void *)p;
1479 
1480 			/*
1481 			 * Cannot finalize a compressed pktap header as we may not have
1482 			 * all the fields present
1483 			 */
1484 			if (d->bd_flags & BPF_COMP_ENABLED) {
1485 				struct bpf_comp_hdr *hcp;
1486 
1487 				hcp = (struct bpf_comp_hdr *)(void *)p;
1488 
1489 				if (hcp->bh_complen != 0) {
1490 					p += BPF_WORDALIGN(hcp->bh_hdrlen + hcp->bh_caplen);
1491 					continue;
1492 				}
1493 			}
1494 
1495 			pktaphdr = (struct pktap_header *)(void *)
1496 			    (p + BPF_WORDALIGN(hp->bh_hdrlen));
1497 
1498 			bpf_finalize_pktap(hp, pktaphdr);
1499 
1500 			p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1501 		}
1502 	}
1503 
1504 	/*
1505 	 * Move data from hold buffer into user space.
1506 	 * We know the entire buffer is transferred since
1507 	 * we checked above that the read buffer is bpf_bufsize bytes.
1508 	 */
1509 	error = uiomove(hbuf, hbuf_len, uio);
1510 
1511 	lck_mtx_lock(bpf_mlock);
1512 	/*
1513 	 * Make sure device is still opened
1514 	 */
1515 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1516 		bpf_release_d(d);
1517 		lck_mtx_unlock(bpf_mlock);
1518 		return ENXIO;
1519 	}
1520 
1521 	d->bd_hbuf_read = false;
1522 	d->bd_fbuf = d->bd_hbuf;
1523 	d->bd_hbuf = NULL;
1524 	d->bd_hlen = 0;
1525 	d->bd_hcnt = 0;
1526 	wakeup((caddr_t)d);
1527 
1528 	bpf_release_d(d);
1529 	lck_mtx_unlock(bpf_mlock);
1530 	return error;
1531 }
1532 
1533 /*
1534  * If there are processes sleeping on this descriptor, wake them up.
1535  */
1536 static void
bpf_wakeup(struct bpf_d * d)1537 bpf_wakeup(struct bpf_d *d)
1538 {
1539 	if (d->bd_state == BPF_WAITING) {
1540 		bpf_stop_timer(d);
1541 		d->bd_state = BPF_IDLE;
1542 	}
1543 	wakeup((caddr_t)d);
1544 	if (d->bd_async && d->bd_sig && d->bd_sigio) {
1545 		pgsigio(d->bd_sigio, d->bd_sig);
1546 	}
1547 
1548 	selwakeup(&d->bd_sel);
1549 	if ((d->bd_flags & BPF_KNOTE)) {
1550 		KNOTE(&d->bd_sel.si_note, 1);
1551 	}
1552 }
1553 
1554 static void
bpf_timed_out(void * arg,__unused void * dummy)1555 bpf_timed_out(void *arg, __unused void *dummy)
1556 {
1557 	struct bpf_d *d = (struct bpf_d *)arg;
1558 
1559 	lck_mtx_lock(bpf_mlock);
1560 	if (d->bd_state == BPF_WAITING) {
1561 		/*
1562 		 * There's a select or kqueue waiting for this; if there's
1563 		 * now stuff to read, wake it up.
1564 		 */
1565 		d->bd_state = BPF_TIMED_OUT;
1566 		if (d->bd_slen != 0) {
1567 			bpf_wakeup(d);
1568 		}
1569 	} else if (d->bd_state == BPF_DRAINING) {
1570 		/*
1571 		 * A close is waiting for this to finish.
1572 		 * Mark it as finished, and wake the close up.
1573 		 */
1574 		d->bd_state = BPF_IDLE;
1575 		bpf_wakeup(d);
1576 	}
1577 	lck_mtx_unlock(bpf_mlock);
1578 }
1579 
1580 /* keep in sync with bpf_movein above: */
1581 #define MAX_DATALINK_HDR_LEN    (sizeof(struct firewire_header))
1582 
1583 int
bpfwrite(dev_t dev,struct uio * uio,__unused int ioflag)1584 bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1585 {
1586 	struct bpf_d *d;
1587 	struct ifnet *ifp;
1588 	struct mbuf *m = NULL;
1589 	int error = 0;
1590 	char              dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1591 	int bif_dlt;
1592 	int bd_hdrcmplt;
1593 	bpf_send_func bif_send;
1594 
1595 	lck_mtx_lock(bpf_mlock);
1596 
1597 	while (true) {
1598 		d = bpf_dtab[minor(dev)];
1599 		if (d == NULL || d == BPF_DEV_RESERVED ||
1600 		    (d->bd_flags & BPF_CLOSING) != 0) {
1601 			lck_mtx_unlock(bpf_mlock);
1602 			return ENXIO;
1603 		}
1604 
1605 		if (d->bd_hbuf_write) {
1606 			msleep((caddr_t)d, bpf_mlock, PRINET, "bpfwrite",
1607 			    NULL);
1608 		} else {
1609 			break;
1610 		}
1611 	}
1612 	d->bd_hbuf_write = true;
1613 
1614 	bpf_acquire_d(d);
1615 
1616 	++d->bd_wcount;
1617 
1618 	if (d->bd_bif == NULL) {
1619 		error = ENXIO;
1620 		goto done;
1621 	}
1622 
1623 	ifp = d->bd_bif->bif_ifp;
1624 
1625 	if (IFNET_IS_MANAGEMENT(ifp) &&
1626 	    IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT) == false) {
1627 		++d->bd_wdcount;
1628 		bpf_release_d(d);
1629 		lck_mtx_unlock(bpf_mlock);
1630 		return ENETDOWN;
1631 	}
1632 
1633 	if ((ifp->if_flags & IFF_UP) == 0) {
1634 		error = ENETDOWN;
1635 		goto done;
1636 	}
1637 	int resid = (int)uio_resid(uio);
1638 	if (resid <= 0) {
1639 		error = resid == 0 ? 0 : EINVAL;
1640 		os_log(OS_LOG_DEFAULT, "bpfwrite: resid %d error %d", resid, error);
1641 		goto done;
1642 	}
1643 	SA(dst_buf)->sa_len = sizeof(dst_buf);
1644 
1645 	/*
1646 	 * geting variables onto stack before dropping the lock
1647 	 */
1648 	bif_dlt = (int)d->bd_bif->bif_dlt;
1649 	bd_hdrcmplt  = d->bd_hdrcmplt;
1650 	bool batch_write = (d->bd_flags & BPF_BATCH_WRITE) ? true : false;
1651 
1652 	if (batch_write) {
1653 		error = bpf_movein_batch(uio, d, &m, bd_hdrcmplt ? NULL : SA(dst_buf));
1654 		if (error != 0) {
1655 			goto done;
1656 		}
1657 	} else {
1658 		error = bpf_movein(uio, resid, d, &m, bd_hdrcmplt ? NULL : SA(dst_buf));
1659 		if (error != 0) {
1660 			goto done;
1661 		}
1662 		bpf_set_packet_service_class(m, d->bd_traffic_class);
1663 	}
1664 
1665 	/* verify the device is still open */
1666 	if ((d->bd_flags & BPF_CLOSING) != 0) {
1667 		error = ENXIO;
1668 		goto done;
1669 	}
1670 
1671 	if (d->bd_bif == NULL || d->bd_bif->bif_ifp != ifp) {
1672 		error = ENXIO;
1673 		goto done;
1674 	}
1675 
1676 	bif_send = d->bd_bif->bif_send;
1677 
1678 	lck_mtx_unlock(bpf_mlock);
1679 
1680 	if (bd_hdrcmplt) {
1681 		if (bif_send) {
1682 			/*
1683 			 * Send one packet at a time, the driver frees the mbuf
1684 			 * but we need to take care of the leftover
1685 			 */
1686 			while (m != NULL && error == 0) {
1687 				struct mbuf *next = m->m_nextpkt;
1688 
1689 				m->m_nextpkt = NULL;
1690 				error = bif_send(ifp, bif_dlt, m);
1691 				m = next;
1692 			}
1693 		} else {
1694 			error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1695 			/* Make sure we do not double free */
1696 			m = NULL;
1697 		}
1698 	} else {
1699 		error = dlil_output(ifp, PF_INET, m, NULL,
1700 		    SA(dst_buf), 0, NULL);
1701 		/* Make sure we do not double free */
1702 		m = NULL;
1703 	}
1704 
1705 	lck_mtx_lock(bpf_mlock);
1706 done:
1707 	if (error != 0 && m != NULL) {
1708 		++d->bd_wdcount;
1709 	}
1710 	if (m != NULL) {
1711 		m_freem_list(m);
1712 	}
1713 	d->bd_hbuf_write = false;
1714 	wakeup((caddr_t)d);
1715 	bpf_release_d(d);
1716 	lck_mtx_unlock(bpf_mlock);
1717 
1718 	return error;
1719 }
1720 
1721 /*
1722  * Reset a descriptor by flushing its packet buffer and clearing the
1723  * receive and drop counts.
1724  */
1725 static void
reset_d(struct bpf_d * d)1726 reset_d(struct bpf_d *d)
1727 {
1728 	if (d->bd_hbuf_read) {
1729 		panic("resetting buffers during read");
1730 	}
1731 
1732 	if (d->bd_hbuf) {
1733 		/* Free the hold buffer. */
1734 		d->bd_fbuf = d->bd_hbuf;
1735 		d->bd_hbuf = NULL;
1736 	}
1737 	d->bd_slen = 0;
1738 	d->bd_hlen = 0;
1739 	d->bd_scnt = 0;
1740 	d->bd_hcnt = 0;
1741 	d->bd_rcount = 0;
1742 	d->bd_dcount = 0;
1743 	d->bd_fcount = 0;
1744 	d->bd_wcount = 0;
1745 	d->bd_wdcount = 0;
1746 
1747 	d->bd_prev_slen = 0;
1748 }
1749 
1750 static struct bpf_d *
bpf_get_device_from_uuid(uuid_t uuid)1751 bpf_get_device_from_uuid(uuid_t uuid)
1752 {
1753 	unsigned int i;
1754 
1755 	for (i = 0; i < nbpfilter; i++) {
1756 		struct bpf_d *d = bpf_dtab[i];
1757 
1758 		if (d == NULL || d == BPF_DEV_RESERVED ||
1759 		    (d->bd_flags & BPF_CLOSING) != 0) {
1760 			continue;
1761 		}
1762 		if (uuid_compare(uuid, d->bd_uuid) == 0) {
1763 			return d;
1764 		}
1765 	}
1766 
1767 	return NULL;
1768 }
1769 
1770 /*
1771  * The BIOCSETUP command "atomically" attach to the interface and
1772  * copy the buffer from another interface. This minimizes the risk
1773  * of missing packet because this is done while holding
1774  * the BPF global lock
1775  */
1776 static int
bpf_setup(struct bpf_d * d_to,uuid_t uuid_from,ifnet_t ifp)1777 bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1778 {
1779 	struct bpf_d *d_from;
1780 	int error = 0;
1781 
1782 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1783 
1784 	/*
1785 	 * Sanity checks
1786 	 */
1787 	d_from = bpf_get_device_from_uuid(uuid_from);
1788 	if (d_from == NULL) {
1789 		error = ENOENT;
1790 		os_log_error(OS_LOG_DEFAULT,
1791 		    "%s: uuids not found error %d",
1792 		    __func__, error);
1793 		return error;
1794 	}
1795 	if (d_from->bd_opened_by != d_to->bd_opened_by) {
1796 		error = EACCES;
1797 		os_log_error(OS_LOG_DEFAULT,
1798 		    "%s: processes not matching error %d",
1799 		    __func__, error);
1800 		return error;
1801 	}
1802 
1803 	/*
1804 	 * Prevent any read or write while copying
1805 	 */
1806 	while (d_to->bd_hbuf_read || d_to->bd_hbuf_write) {
1807 		msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
1808 	}
1809 	d_to->bd_hbuf_read = true;
1810 	d_to->bd_hbuf_write = true;
1811 
1812 	while (d_from->bd_hbuf_read || d_from->bd_hbuf_write) {
1813 		msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
1814 	}
1815 	d_from->bd_hbuf_read = true;
1816 	d_from->bd_hbuf_write = true;
1817 
1818 	/*
1819 	 * Verify the devices have not been closed
1820 	 */
1821 	if (d_to->bd_flags & BPF_CLOSING) {
1822 		error = ENXIO;
1823 		os_log_error(OS_LOG_DEFAULT,
1824 		    "%s: d_to is closing error %d",
1825 		    __func__, error);
1826 		goto done;
1827 	}
1828 	if (d_from->bd_flags & BPF_CLOSING) {
1829 		error = ENXIO;
1830 		os_log_error(OS_LOG_DEFAULT,
1831 		    "%s: d_from is closing error %d",
1832 		    __func__, error);
1833 		goto done;
1834 	}
1835 
1836 	/*
1837 	 * For now require the same buffer size
1838 	 */
1839 	if (d_from->bd_bufsize != d_to->bd_bufsize) {
1840 		error = EINVAL;
1841 		os_log_error(OS_LOG_DEFAULT,
1842 		    "%s: bufsizes not matching error %d",
1843 		    __func__, error);
1844 		goto done;
1845 	}
1846 
1847 	/*
1848 	 * Copy relevant options and flags
1849 	 */
1850 	d_to->bd_flags = d_from->bd_flags & (BPF_EXTENDED_HDR | BPF_WANT_PKTAP |
1851 	    BPF_FINALIZE_PKTAP | BPF_TRUNCATE | BPF_PKTHDRV2 |
1852 	    BPF_COMP_REQ | BPF_COMP_ENABLED);
1853 
1854 	d_to->bd_headdrop = d_from->bd_headdrop;
1855 
1856 	/*
1857 	 * Allocate and copy the buffers
1858 	 */
1859 	error = bpf_allocbufs(d_to);
1860 	if (error != 0) {
1861 		goto done;
1862 	}
1863 
1864 	/*
1865 	 * Make sure the buffers are setup as expected by bpf_setif()
1866 	 */
1867 	ASSERT(d_to->bd_hbuf == NULL);
1868 	ASSERT(d_to->bd_sbuf != NULL);
1869 	ASSERT(d_to->bd_fbuf != NULL);
1870 
1871 	/*
1872 	 * Copy the buffers and update the pointers and counts
1873 	 */
1874 	memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
1875 	d_to->bd_slen = d_from->bd_slen;
1876 	d_to->bd_scnt = d_from->bd_scnt;
1877 
1878 	if (d_from->bd_hbuf != NULL) {
1879 		d_to->bd_hbuf = d_to->bd_fbuf;
1880 		d_to->bd_fbuf = NULL;
1881 		memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
1882 	}
1883 	d_to->bd_hlen = d_from->bd_hlen;
1884 	d_to->bd_hcnt = d_from->bd_hcnt;
1885 
1886 	if (d_to->bd_flags & BPF_COMP_REQ) {
1887 		ASSERT(d_to->bd_prev_sbuf != NULL);
1888 		ASSERT(d_to->bd_prev_fbuf != NULL);
1889 
1890 		d_to->bd_prev_slen = d_from->bd_prev_slen;
1891 		ASSERT(d_to->bd_prev_slen <= BPF_HDR_COMP_LEN_MAX);
1892 		memcpy(d_to->bd_prev_sbuf, d_from->bd_prev_sbuf, BPF_HDR_COMP_LEN_MAX);
1893 	}
1894 
1895 	d_to->bd_bcs = d_from->bd_bcs;
1896 
1897 	/*
1898 	 * Attach to the interface:
1899 	 * - don't reset the buffers
1900 	 * - we already prevent reads and writes
1901 	 * - the buffers are already allocated
1902 	 */
1903 	error = bpf_setif(d_to, ifp, false, true, true);
1904 	if (error != 0) {
1905 		os_log_error(OS_LOG_DEFAULT,
1906 		    "%s: bpf_setif() failed error %d",
1907 		    __func__, error);
1908 		goto done;
1909 	}
1910 done:
1911 	d_from->bd_hbuf_read = false;
1912 	d_from->bd_hbuf_write = false;
1913 	wakeup((caddr_t)d_from);
1914 
1915 	d_to->bd_hbuf_read = false;
1916 	d_to->bd_hbuf_write = false;
1917 	wakeup((caddr_t)d_to);
1918 
1919 	return error;
1920 }
1921 
1922 #if DEVELOPMENT || DEBUG
1923 #define BPF_IOC_LIST \
1924 	X(FIONREAD) \
1925 	X(SIOCGIFADDR) \
1926 	X(BIOCGBLEN) \
1927 	X(BIOCSBLEN) \
1928 	X(BIOCSETF32) \
1929 	X(BIOCSETFNR32) \
1930 	X(BIOCSETF64) \
1931 	X(BIOCSETFNR64) \
1932 	X(BIOCFLUSH) \
1933 	X(BIOCPROMISC) \
1934 	X(BIOCGDLT) \
1935 	X(BIOCGDLTLIST) \
1936 	X(BIOCSDLT) \
1937 	X(BIOCGETIF) \
1938 	X(BIOCSETIF) \
1939 	X(BIOCSRTIMEOUT32) \
1940 	X(BIOCSRTIMEOUT64) \
1941 	X(BIOCGRTIMEOUT32) \
1942 	X(BIOCGRTIMEOUT64) \
1943 	X(BIOCGSTATS) \
1944 	X(BIOCIMMEDIATE) \
1945 	X(BIOCVERSION) \
1946 	X(BIOCGHDRCMPLT) \
1947 	X(BIOCSHDRCMPLT) \
1948 	X(BIOCGSEESENT) \
1949 	X(BIOCSSEESENT) \
1950 	X(BIOCSETTC) \
1951 	X(BIOCGETTC) \
1952 	X(FIONBIO) \
1953 	X(FIOASYNC) \
1954 	X(BIOCSRSIG) \
1955 	X(BIOCGRSIG) \
1956 	X(BIOCSEXTHDR) \
1957 	X(BIOCGIFATTACHCOUNT) \
1958 	X(BIOCGWANTPKTAP) \
1959 	X(BIOCSWANTPKTAP) \
1960 	X(BIOCSHEADDROP) \
1961 	X(BIOCGHEADDROP) \
1962 	X(BIOCSTRUNCATE) \
1963 	X(BIOCGETUUID) \
1964 	X(BIOCSETUP) \
1965 	X(BIOCSPKTHDRV2) \
1966 	X(BIOCGHDRCOMP) \
1967 	X(BIOCSHDRCOMP) \
1968 	X(BIOCGHDRCOMPSTATS) \
1969 	X(BIOCGHDRCOMPON) \
1970 	X(BIOCGDIRECTION) \
1971 	X(BIOCSDIRECTION) \
1972 	X(BIOCSWRITEMAX) \
1973 	X(BIOCGWRITEMAX) \
1974 	X(BIOCGBATCHWRITE) \
1975 	X(BIOCSBATCHWRITE)
1976 
1977 static void
log_bpf_ioctl_str(struct bpf_d * d,u_long cmd)1978 log_bpf_ioctl_str(struct bpf_d *d, u_long cmd)
1979 {
1980 	const char *p = NULL;
1981 	char str[32];
1982 
1983 #define X(x) case x: { p = #x ; printf("%s\n", p); break; }
1984 	switch (cmd) {
1985 		BPF_IOC_LIST
1986 	}
1987 #undef X
1988 	if (p == NULL) {
1989 		snprintf(str, sizeof(str), "0x%08x", (unsigned int)cmd);
1990 		p = str;
1991 	}
1992 	os_log(OS_LOG_DEFAULT, "bpfioctl bpf%u %s",
1993 	    d->bd_dev_minor, p);
1994 }
1995 #endif /* DEVELOPMENT || DEBUG */
1996 
1997 /*
1998  *  FIONREAD		Check for read packet available.
1999  *  SIOCGIFADDR		Get interface address - convenient hook to driver.
2000  *  BIOCGBLEN		Get buffer len [for read()].
2001  *  BIOCSETF		Set ethernet read filter.
2002  *  BIOCFLUSH		Flush read packet buffer.
2003  *  BIOCPROMISC		Put interface into promiscuous mode.
2004  *  BIOCGDLT		Get link layer type.
2005  *  BIOCGETIF		Get interface name.
2006  *  BIOCSETIF		Set interface.
2007  *  BIOCSRTIMEOUT	Set read timeout.
2008  *  BIOCGRTIMEOUT	Get read timeout.
2009  *  BIOCGSTATS		Get packet stats.
2010  *  BIOCIMMEDIATE	Set immediate mode.
2011  *  BIOCVERSION		Get filter language version.
2012  *  BIOCGHDRCMPLT	Get "header already complete" flag
2013  *  BIOCSHDRCMPLT	Set "header already complete" flag
2014  *  BIOCGSEESENT	Get "see packets sent" flag
2015  *  BIOCSSEESENT	Set "see packets sent" flag
2016  *  BIOCSETTC		Set traffic class.
2017  *  BIOCGETTC		Get traffic class.
2018  *  BIOCSEXTHDR		Set "extended header" flag
2019  *  BIOCSHEADDROP	Drop head of the buffer if user is not reading
2020  *  BIOCGHEADDROP	Get "head-drop" flag
2021  */
2022 /* ARGSUSED */
2023 int
bpfioctl(dev_t dev,u_long cmd,caddr_t addr,__unused int flags,struct proc * p)2024 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
2025     struct proc *p)
2026 {
2027 	struct bpf_d *d;
2028 	int error = 0;
2029 	u_int int_arg;
2030 	struct ifreq ifr = {};
2031 
2032 	lck_mtx_lock(bpf_mlock);
2033 
2034 	d = bpf_dtab[minor(dev)];
2035 	if (d == NULL || d == BPF_DEV_RESERVED ||
2036 	    (d->bd_flags & BPF_CLOSING) != 0) {
2037 		lck_mtx_unlock(bpf_mlock);
2038 		return ENXIO;
2039 	}
2040 
2041 	bpf_acquire_d(d);
2042 
2043 	if (d->bd_state == BPF_WAITING) {
2044 		bpf_stop_timer(d);
2045 	}
2046 	d->bd_state = BPF_IDLE;
2047 
2048 #if DEVELOPMENT || DEBUG
2049 	if (bpf_debug > 0) {
2050 		log_bpf_ioctl_str(d, cmd);
2051 	}
2052 #endif /* DEVELOPMENT || DEBUG */
2053 
2054 	switch (cmd) {
2055 	default:
2056 		error = EINVAL;
2057 		break;
2058 
2059 	/*
2060 	 * Check for read packet available.
2061 	 */
2062 	case FIONREAD:                  /* int */
2063 	{
2064 		int n;
2065 
2066 		n = d->bd_slen;
2067 		if (d->bd_hbuf && d->bd_hbuf_read) {
2068 			n += d->bd_hlen;
2069 		}
2070 
2071 		bcopy(&n, addr, sizeof(n));
2072 		break;
2073 	}
2074 
2075 	case SIOCGIFADDR:               /* struct ifreq */
2076 	{
2077 		struct ifnet *ifp;
2078 
2079 		if (d->bd_bif == 0) {
2080 			error = EINVAL;
2081 		} else {
2082 			ifp = d->bd_bif->bif_ifp;
2083 			error = ifnet_ioctl(ifp, 0, cmd, addr);
2084 		}
2085 		break;
2086 	}
2087 
2088 	/*
2089 	 * Get buffer len [for read()].
2090 	 */
2091 	case BIOCGBLEN:                 /* u_int */
2092 		bcopy(&d->bd_bufsize, addr, sizeof(u_int));
2093 		break;
2094 
2095 	/*
2096 	 * Set buffer length.
2097 	 */
2098 	case BIOCSBLEN: {               /* u_int */
2099 		u_int size;
2100 
2101 		if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
2102 			/*
2103 			 * Interface already attached, unable to change buffers
2104 			 */
2105 			error = EINVAL;
2106 			break;
2107 		}
2108 		bcopy(addr, &size, sizeof(size));
2109 
2110 		if (size > BPF_BUFSIZE_CAP) {
2111 			d->bd_bufsize = BPF_BUFSIZE_CAP;
2112 
2113 			os_log_info(OS_LOG_DEFAULT,
2114 			    "bpf%d BIOCSBLEN capped to %u from %u",
2115 			    minor(dev), d->bd_bufsize, size);
2116 		} else if (size < BPF_MINBUFSIZE) {
2117 			d->bd_bufsize = BPF_MINBUFSIZE;
2118 
2119 			os_log_info(OS_LOG_DEFAULT,
2120 			    "bpf%d BIOCSBLEN bumped to %u from %u",
2121 			    minor(dev), d->bd_bufsize, size);
2122 		} else {
2123 			d->bd_bufsize = size;
2124 
2125 			os_log_info(OS_LOG_DEFAULT,
2126 			    "bpf%d BIOCSBLEN %u",
2127 			    minor(dev), d->bd_bufsize);
2128 		}
2129 
2130 		/* It's a read/write ioctl */
2131 		bcopy(&d->bd_bufsize, addr, sizeof(u_int));
2132 		break;
2133 	}
2134 	/*
2135 	 * Set link layer read filter.
2136 	 */
2137 	case BIOCSETF32:
2138 	case BIOCSETFNR32: {            /* struct bpf_program32 */
2139 		struct bpf_program32 prg32;
2140 
2141 		bcopy(addr, &prg32, sizeof(prg32));
2142 		error = bpf_setf(d, prg32.bf_len,
2143 		    CAST_USER_ADDR_T(prg32.bf_insns), cmd);
2144 		break;
2145 	}
2146 
2147 	case BIOCSETF64:
2148 	case BIOCSETFNR64: {            /* struct bpf_program64 */
2149 		struct bpf_program64 prg64;
2150 
2151 		bcopy(addr, &prg64, sizeof(prg64));
2152 		error = bpf_setf(d, prg64.bf_len, CAST_USER_ADDR_T(prg64.bf_insns), cmd);
2153 		break;
2154 	}
2155 
2156 	/*
2157 	 * Flush read packet buffer.
2158 	 */
2159 	case BIOCFLUSH:
2160 		while (d->bd_hbuf_read) {
2161 			msleep((caddr_t)d, bpf_mlock, PRINET, "BIOCFLUSH",
2162 			    NULL);
2163 		}
2164 		if ((d->bd_flags & BPF_CLOSING) != 0) {
2165 			error = ENXIO;
2166 			break;
2167 		}
2168 		reset_d(d);
2169 		break;
2170 
2171 	/*
2172 	 * Put interface into promiscuous mode.
2173 	 */
2174 	case BIOCPROMISC:
2175 		if (d->bd_bif == 0) {
2176 			/*
2177 			 * No interface attached yet.
2178 			 */
2179 			error = EINVAL;
2180 			break;
2181 		}
2182 		if (d->bd_promisc == 0) {
2183 			lck_mtx_unlock(bpf_mlock);
2184 			error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
2185 			lck_mtx_lock(bpf_mlock);
2186 			if (error == 0) {
2187 				d->bd_promisc = 1;
2188 			}
2189 		}
2190 		break;
2191 
2192 	/*
2193 	 * Get device parameters.
2194 	 */
2195 	case BIOCGDLT:                  /* u_int */
2196 		if (d->bd_bif == 0) {
2197 			error = EINVAL;
2198 		} else {
2199 			bcopy(&d->bd_bif->bif_dlt, addr, sizeof(u_int));
2200 		}
2201 		break;
2202 
2203 	/*
2204 	 * Get a list of supported data link types.
2205 	 */
2206 	case BIOCGDLTLIST:              /* struct bpf_dltlist */
2207 		if (d->bd_bif == NULL) {
2208 			error = EINVAL;
2209 		} else {
2210 			error = bpf_getdltlist(d, addr, p);
2211 		}
2212 		break;
2213 
2214 	/*
2215 	 * Set data link type.
2216 	 */
2217 	case BIOCSDLT:                  /* u_int */
2218 		if (d->bd_bif == NULL) {
2219 			error = EINVAL;
2220 		} else {
2221 			u_int dlt;
2222 
2223 			bcopy(addr, &dlt, sizeof(dlt));
2224 
2225 			if (dlt == DLT_PKTAP &&
2226 			    !(d->bd_flags & BPF_WANT_PKTAP)) {
2227 				dlt = DLT_RAW;
2228 			}
2229 			error = bpf_setdlt(d, dlt);
2230 		}
2231 		break;
2232 
2233 	/*
2234 	 * Get interface name.
2235 	 */
2236 	case BIOCGETIF:                 /* struct ifreq */
2237 		if (d->bd_bif == 0) {
2238 			error = EINVAL;
2239 		} else {
2240 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
2241 
2242 			snprintf(((struct ifreq *)(void *)addr)->ifr_name,
2243 			    sizeof(ifr.ifr_name), "%s", if_name(ifp));
2244 		}
2245 		break;
2246 
2247 	/*
2248 	 * Set interface.
2249 	 */
2250 	case BIOCSETIF: {               /* struct ifreq */
2251 		ifnet_t ifp;
2252 
2253 		bcopy(addr, &ifr, sizeof(ifr));
2254 		ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2255 		ifp = ifunit(ifr.ifr_name);
2256 		if (ifp == NULL) {
2257 			error = ENXIO;
2258 		} else {
2259 			error = bpf_setif(d, ifp, true, false, false);
2260 		}
2261 		break;
2262 	}
2263 
2264 	/*
2265 	 * Set read timeout.
2266 	 */
2267 	case BIOCSRTIMEOUT32: {         /* struct user32_timeval */
2268 		struct user32_timeval _tv;
2269 		struct timeval tv;
2270 
2271 		bcopy(addr, &_tv, sizeof(_tv));
2272 		tv.tv_sec  = _tv.tv_sec;
2273 		tv.tv_usec = _tv.tv_usec;
2274 
2275 		/*
2276 		 * Subtract 1 tick from tvtohz() since this isn't
2277 		 * a one-shot timer.
2278 		 */
2279 		if ((error = itimerfix(&tv)) == 0) {
2280 			d->bd_rtout = tvtohz(&tv) - 1;
2281 		}
2282 		break;
2283 	}
2284 
2285 	case BIOCSRTIMEOUT64: {         /* struct user64_timeval */
2286 		struct user64_timeval _tv;
2287 		struct timeval tv;
2288 
2289 		bcopy(addr, &_tv, sizeof(_tv));
2290 		tv.tv_sec  = (__darwin_time_t)_tv.tv_sec;
2291 		tv.tv_usec = _tv.tv_usec;
2292 
2293 		/*
2294 		 * Subtract 1 tick from tvtohz() since this isn't
2295 		 * a one-shot timer.
2296 		 */
2297 		if ((error = itimerfix(&tv)) == 0) {
2298 			d->bd_rtout = tvtohz(&tv) - 1;
2299 		}
2300 		break;
2301 	}
2302 
2303 	/*
2304 	 * Get read timeout.
2305 	 */
2306 	case BIOCGRTIMEOUT32: {         /* struct user32_timeval */
2307 		struct user32_timeval tv;
2308 
2309 		bzero(&tv, sizeof(tv));
2310 		tv.tv_sec = d->bd_rtout / hz;
2311 		tv.tv_usec = (d->bd_rtout % hz) * tick;
2312 		bcopy(&tv, addr, sizeof(tv));
2313 		break;
2314 	}
2315 
2316 	case BIOCGRTIMEOUT64: {         /* struct user64_timeval */
2317 		struct user64_timeval tv;
2318 
2319 		bzero(&tv, sizeof(tv));
2320 		tv.tv_sec = d->bd_rtout / hz;
2321 		tv.tv_usec = (d->bd_rtout % hz) * tick;
2322 		bcopy(&tv, addr, sizeof(tv));
2323 		break;
2324 	}
2325 
2326 	/*
2327 	 * Get packet stats.
2328 	 */
2329 	case BIOCGSTATS: {              /* struct bpf_stat */
2330 		struct bpf_stat bs;
2331 
2332 		bzero(&bs, sizeof(bs));
2333 		bs.bs_recv = (u_int)d->bd_rcount;
2334 		bs.bs_drop = (u_int)d->bd_dcount;
2335 		bcopy(&bs, addr, sizeof(bs));
2336 		break;
2337 	}
2338 
2339 	/*
2340 	 * Set immediate mode.
2341 	 */
2342 	case BIOCIMMEDIATE:             /* u_int */
2343 		d->bd_immediate = *(u_char *)(void *)addr;
2344 		break;
2345 
2346 	case BIOCVERSION: {             /* struct bpf_version */
2347 		struct bpf_version bv;
2348 
2349 		bzero(&bv, sizeof(bv));
2350 		bv.bv_major = BPF_MAJOR_VERSION;
2351 		bv.bv_minor = BPF_MINOR_VERSION;
2352 		bcopy(&bv, addr, sizeof(bv));
2353 		break;
2354 	}
2355 
2356 	/*
2357 	 * Get "header already complete" flag
2358 	 */
2359 	case BIOCGHDRCMPLT:             /* u_int */
2360 		bcopy(&d->bd_hdrcmplt, addr, sizeof(u_int));
2361 		break;
2362 
2363 	/*
2364 	 * Set "header already complete" flag
2365 	 */
2366 	case BIOCSHDRCMPLT:             /* u_int */
2367 		bcopy(addr, &int_arg, sizeof(int_arg));
2368 		if (int_arg == 0 && (d->bd_flags & BPF_BATCH_WRITE)) {
2369 			os_log(OS_LOG_DEFAULT,
2370 			    "bpf%u cannot set BIOCSHDRCMPLT when BIOCSBATCHWRITE is set",
2371 			    d->bd_dev_minor);
2372 			error = EINVAL;
2373 			break;
2374 		}
2375 		d->bd_hdrcmplt = int_arg ? 1 : 0;
2376 		break;
2377 
2378 	/*
2379 	 * Get "see sent packets" flag
2380 	 */
2381 	case BIOCGSEESENT: {            /* u_int */
2382 		int_arg = 0;
2383 
2384 		if (d->bd_direction & BPF_D_OUT) {
2385 			int_arg = 1;
2386 		}
2387 		bcopy(&int_arg, addr, sizeof(u_int));
2388 		break;
2389 	}
2390 	/*
2391 	 * Set "see sent packets" flag
2392 	 */
2393 	case BIOCSSEESENT: {            /* u_int */
2394 		bcopy(addr, &int_arg, sizeof(u_int));
2395 
2396 		if (int_arg == 0) {
2397 			d->bd_direction = BPF_D_IN;
2398 		} else {
2399 			d->bd_direction = BPF_D_INOUT;
2400 		}
2401 		break;
2402 	}
2403 	/*
2404 	 * Get direction of tapped packets that can be seen for reading
2405 	 */
2406 	case BIOCGDIRECTION: {          /* u_int */
2407 		int_arg = d->bd_direction;
2408 
2409 		bcopy(&int_arg, addr, sizeof(u_int));
2410 		break;
2411 	}
2412 	/*
2413 	 * Set direction of tapped packets that can be seen for reading
2414 	 */
2415 	case BIOCSDIRECTION: {          /* u_int */
2416 		bcopy(addr, &int_arg, sizeof(u_int));
2417 
2418 		switch (int_arg) {
2419 		case BPF_D_NONE:
2420 		case BPF_D_IN:
2421 		case BPF_D_OUT:
2422 		case BPF_D_INOUT:
2423 			d->bd_direction = int_arg;
2424 			break;
2425 		default:
2426 			error = EINVAL;
2427 			break;
2428 		}
2429 		break;
2430 	}
2431 	/*
2432 	 * Set traffic service class
2433 	 */
2434 	case BIOCSETTC: {               /* int */
2435 		int tc;
2436 
2437 		bcopy(addr, &tc, sizeof(int));
2438 		if (tc != 0 && (d->bd_flags & BPF_BATCH_WRITE)) {
2439 			os_log(OS_LOG_DEFAULT,
2440 			    "bpf%u cannot set BIOCSETTC when BIOCSBATCHWRITE is set",
2441 			    d->bd_dev_minor);
2442 			error = EINVAL;
2443 			break;
2444 		}
2445 		error = bpf_set_traffic_class(d, tc);
2446 		break;
2447 	}
2448 
2449 	/*
2450 	 * Get traffic service class
2451 	 */
2452 	case BIOCGETTC:                 /* int */
2453 		bcopy(&d->bd_traffic_class, addr, sizeof(int));
2454 		break;
2455 
2456 	case FIONBIO:           /* Non-blocking I/O; int */
2457 		break;
2458 
2459 	case FIOASYNC:          /* Send signal on receive packets; int */
2460 		bcopy(addr, &d->bd_async, sizeof(int));
2461 		break;
2462 
2463 	case BIOCSRSIG: {         /* Set receive signal; u_int */
2464 		u_int sig;
2465 
2466 		bcopy(addr, &sig, sizeof(u_int));
2467 
2468 		if (sig >= NSIG) {
2469 			error = EINVAL;
2470 		} else {
2471 			d->bd_sig = sig;
2472 		}
2473 		break;
2474 	}
2475 	case BIOCGRSIG:                 /* u_int */
2476 		bcopy(&d->bd_sig, addr, sizeof(u_int));
2477 		break;
2478 
2479 	case BIOCSEXTHDR:               /* u_int */
2480 		bcopy(addr, &int_arg, sizeof(int_arg));
2481 		if (int_arg) {
2482 			d->bd_flags |= BPF_EXTENDED_HDR;
2483 		} else {
2484 			d->bd_flags &= ~BPF_EXTENDED_HDR;
2485 		}
2486 		break;
2487 
2488 	case BIOCGIFATTACHCOUNT: {              /* struct ifreq */
2489 		ifnet_t ifp;
2490 		struct bpf_if *bp;
2491 
2492 		bcopy(addr, &ifr, sizeof(ifr));
2493 		ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2494 		ifp = ifunit(ifr.ifr_name);
2495 		if (ifp == NULL) {
2496 			error = ENXIO;
2497 			break;
2498 		}
2499 		ifr.ifr_intval = 0;
2500 		for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2501 			struct bpf_d *bpf_d;
2502 
2503 			if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) {
2504 				continue;
2505 			}
2506 			for (bpf_d = bp->bif_dlist; bpf_d;
2507 			    bpf_d = bpf_d->bd_next) {
2508 				ifr.ifr_intval += 1;
2509 			}
2510 		}
2511 		bcopy(&ifr, addr, sizeof(ifr));
2512 		break;
2513 	}
2514 	case BIOCGWANTPKTAP:                    /* u_int */
2515 		int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
2516 		bcopy(&int_arg, addr, sizeof(int_arg));
2517 		break;
2518 
2519 	case BIOCSWANTPKTAP:                    /* u_int */
2520 		bcopy(addr, &int_arg, sizeof(int_arg));
2521 		if (int_arg) {
2522 			d->bd_flags |= BPF_WANT_PKTAP;
2523 		} else {
2524 			d->bd_flags &= ~BPF_WANT_PKTAP;
2525 		}
2526 		break;
2527 
2528 	case BIOCSHEADDROP:
2529 		bcopy(addr, &int_arg, sizeof(int_arg));
2530 		d->bd_headdrop = int_arg ? 1 : 0;
2531 		break;
2532 
2533 	case BIOCGHEADDROP:
2534 		bcopy(&d->bd_headdrop, addr, sizeof(int));
2535 		break;
2536 
2537 	case BIOCSTRUNCATE:
2538 		bcopy(addr, &int_arg, sizeof(int_arg));
2539 		if (int_arg) {
2540 			d->bd_flags |=  BPF_TRUNCATE;
2541 		} else {
2542 			d->bd_flags &= ~BPF_TRUNCATE;
2543 		}
2544 		break;
2545 
2546 	case BIOCGETUUID:
2547 		bcopy(&d->bd_uuid, addr, sizeof(uuid_t));
2548 		break;
2549 
2550 	case BIOCSETUP: {
2551 		struct bpf_setup_args bsa;
2552 		ifnet_t ifp;
2553 
2554 		bcopy(addr, &bsa, sizeof(struct bpf_setup_args));
2555 		bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
2556 		ifp = ifunit(bsa.bsa_ifname);
2557 		if (ifp == NULL) {
2558 			error = ENXIO;
2559 			os_log_error(OS_LOG_DEFAULT,
2560 			    "%s: ifnet not found for %s error %d",
2561 			    __func__, bsa.bsa_ifname, error);
2562 			break;
2563 		}
2564 
2565 		error = bpf_setup(d, bsa.bsa_uuid, ifp);
2566 		break;
2567 	}
2568 	case BIOCSPKTHDRV2:
2569 		bcopy(addr, &int_arg, sizeof(int_arg));
2570 		if (int_arg != 0) {
2571 			d->bd_flags |= BPF_PKTHDRV2;
2572 		} else {
2573 			d->bd_flags &= ~BPF_PKTHDRV2;
2574 		}
2575 		break;
2576 
2577 	case BIOCGPKTHDRV2:
2578 		int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
2579 		bcopy(&int_arg, addr, sizeof(int_arg));
2580 		break;
2581 
2582 	case BIOCGHDRCOMP:
2583 		int_arg = d->bd_flags & BPF_COMP_REQ ? 1 : 0;
2584 		bcopy(&int_arg, addr, sizeof(int_arg));
2585 		break;
2586 
2587 	case BIOCSHDRCOMP:
2588 		bcopy(addr, &int_arg, sizeof(int_arg));
2589 		if (int_arg != 0 && int_arg != 1) {
2590 			return EINVAL;
2591 		}
2592 		if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
2593 			/*
2594 			 * Interface already attached, unable to change buffers
2595 			 */
2596 			error = EINVAL;
2597 			break;
2598 		}
2599 		if (int_arg != 0) {
2600 			d->bd_flags |= BPF_COMP_REQ;
2601 			if (bpf_hdr_comp_enable != 0) {
2602 				d->bd_flags |= BPF_COMP_ENABLED;
2603 			}
2604 		} else {
2605 			d->bd_flags &= ~(BPF_COMP_REQ | BPF_COMP_ENABLED);
2606 		}
2607 		break;
2608 
2609 	case BIOCGHDRCOMPON:
2610 		int_arg = d->bd_flags & BPF_COMP_ENABLED ? 1 : 0;
2611 		bcopy(&int_arg, addr, sizeof(int_arg));
2612 		break;
2613 
2614 	case BIOCGHDRCOMPSTATS: {
2615 		struct bpf_comp_stats bcs = {};
2616 
2617 		bcs = d->bd_bcs;
2618 
2619 		bcopy(&bcs, addr, sizeof(bcs));
2620 		break;
2621 	}
2622 	case BIOCSWRITEMAX:
2623 		bcopy(addr, &int_arg, sizeof(int_arg));
2624 		if (int_arg > BPF_WRITE_MAX) {
2625 			os_log(OS_LOG_DEFAULT, "bpf%u bd_write_size_max %u too big",
2626 			    d->bd_dev_minor, d->bd_write_size_max);
2627 			error = EINVAL;
2628 			break;
2629 		}
2630 		d->bd_write_size_max = int_arg;
2631 		break;
2632 
2633 	case BIOCGWRITEMAX:
2634 		int_arg = d->bd_write_size_max;
2635 		bcopy(&int_arg, addr, sizeof(int_arg));
2636 		break;
2637 
2638 	case BIOCGBATCHWRITE:                    /* int */
2639 		int_arg = d->bd_flags & BPF_BATCH_WRITE ? 1 : 0;
2640 		bcopy(&int_arg, addr, sizeof(int_arg));
2641 		break;
2642 
2643 	case BIOCSBATCHWRITE:                    /* int */
2644 		bcopy(addr, &int_arg, sizeof(int_arg));
2645 		if (int_arg != 0) {
2646 			if (d->bd_hdrcmplt == 0) {
2647 				os_log(OS_LOG_DEFAULT,
2648 				    "bpf%u cannot set BIOCSBATCHWRITE when BIOCSHDRCMPLT is not set",
2649 				    d->bd_dev_minor);
2650 				error = EINVAL;
2651 				break;
2652 			}
2653 			if (d->bd_traffic_class != 0) {
2654 				os_log(OS_LOG_DEFAULT,
2655 				    "bpf%u cannot set BIOCSBATCHWRITE when BIOCSETTC is set",
2656 				    d->bd_dev_minor);
2657 				error = EINVAL;
2658 				break;
2659 			}
2660 			d->bd_flags |= BPF_BATCH_WRITE;
2661 		} else {
2662 			d->bd_flags &= ~BPF_BATCH_WRITE;
2663 		}
2664 		break;
2665 	}
2666 
2667 	bpf_release_d(d);
2668 	lck_mtx_unlock(bpf_mlock);
2669 
2670 	return error;
2671 }
2672 
2673 /*
2674  * Set d's packet filter program to fp.  If this file already has a filter,
2675  * free it and replace it.  Returns EINVAL for bogus requests.
2676  */
2677 static int
bpf_setf(struct bpf_d * d,u_int bf_len,user_addr_t bf_insns,u_long cmd)2678 bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2679     u_long cmd)
2680 {
2681 	struct bpf_insn *fcode, *old;
2682 	u_int flen, size;
2683 
2684 	while (d->bd_hbuf_read) {
2685 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_setf", NULL);
2686 	}
2687 
2688 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2689 		return ENXIO;
2690 	}
2691 
2692 	old = d->bd_filter;
2693 	if (bf_insns == USER_ADDR_NULL) {
2694 		if (bf_len != 0) {
2695 			return EINVAL;
2696 		}
2697 		d->bd_filter = NULL;
2698 		reset_d(d);
2699 		if (old != 0) {
2700 			kfree_data_addr(old);
2701 		}
2702 		return 0;
2703 	}
2704 	flen = bf_len;
2705 	if (flen > BPF_MAXINSNS) {
2706 		return EINVAL;
2707 	}
2708 
2709 	size = flen * sizeof(struct bpf_insn);
2710 	fcode = (struct bpf_insn *) kalloc_data(size, Z_WAITOK | Z_ZERO);
2711 	if (fcode == NULL) {
2712 		return ENOMEM;
2713 	}
2714 	if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
2715 	    bpf_validate(fcode, (int)flen)) {
2716 		d->bd_filter = fcode;
2717 
2718 		if (cmd == BIOCSETF32 || cmd == BIOCSETF64) {
2719 			reset_d(d);
2720 		}
2721 
2722 		if (old != 0) {
2723 			kfree_data_addr(old);
2724 		}
2725 
2726 		return 0;
2727 	}
2728 	kfree_data(fcode, size);
2729 	return EINVAL;
2730 }
2731 
2732 /*
2733  * Detach a file from its current interface (if attached at all) and attach
2734  * to the interface indicated by the name stored in ifr.
2735  * Return an errno or 0.
2736  */
2737 static int
bpf_setif(struct bpf_d * d,ifnet_t theywant,bool do_reset,bool has_hbuf_read_write,bool has_bufs_allocated)2738 bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read_write,
2739     bool has_bufs_allocated)
2740 {
2741 	struct bpf_if *bp;
2742 	int error;
2743 
2744 	while (!has_hbuf_read_write && (d->bd_hbuf_read || d->bd_hbuf_write)) {
2745 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_setif", NULL);
2746 	}
2747 
2748 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2749 		return ENXIO;
2750 	}
2751 
2752 	/*
2753 	 * Look through attached interfaces for the named one.
2754 	 */
2755 	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2756 		struct ifnet *ifp = bp->bif_ifp;
2757 
2758 		if (ifp == 0 || ifp != theywant) {
2759 			continue;
2760 		}
2761 		/*
2762 		 * Do not use DLT_PKTAP, unless requested explicitly
2763 		 */
2764 		if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2765 			continue;
2766 		}
2767 		/*
2768 		 * Skip the coprocessor interface
2769 		 */
2770 		if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
2771 			continue;
2772 		}
2773 		/*
2774 		 * We found the requested interface.
2775 		 * Allocate the packet buffers.
2776 		 */
2777 		if (has_bufs_allocated == false) {
2778 			error = bpf_allocbufs(d);
2779 			if (error != 0) {
2780 				return error;
2781 			}
2782 		}
2783 		/*
2784 		 * Detach if attached to something else.
2785 		 */
2786 		if (bp != d->bd_bif) {
2787 			if (d->bd_bif != NULL) {
2788 				if (bpf_detachd(d) != 0) {
2789 					return ENXIO;
2790 				}
2791 			}
2792 			if (bpf_attachd(d, bp) != 0) {
2793 				return ENXIO;
2794 			}
2795 		}
2796 		if (do_reset) {
2797 			reset_d(d);
2798 		}
2799 		os_log(OS_LOG_DEFAULT, "bpf%u attached to %s",
2800 		    d->bd_dev_minor, if_name(theywant));
2801 		return 0;
2802 	}
2803 	/* Not found. */
2804 	return ENXIO;
2805 }
2806 
2807 /*
2808  * Get a list of available data link type of the interface.
2809  */
2810 static int
bpf_getdltlist(struct bpf_d * d,caddr_t addr,struct proc * p)2811 bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2812 {
2813 	u_int           n;
2814 	int             error;
2815 	struct ifnet    *ifp;
2816 	struct bpf_if   *bp;
2817 	user_addr_t     dlist;
2818 	struct bpf_dltlist bfl;
2819 
2820 	bcopy(addr, &bfl, sizeof(bfl));
2821 	if (proc_is64bit(p)) {
2822 		dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2823 	} else {
2824 		dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2825 	}
2826 
2827 	ifp = d->bd_bif->bif_ifp;
2828 	n = 0;
2829 	error = 0;
2830 
2831 	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2832 		if (bp->bif_ifp != ifp) {
2833 			continue;
2834 		}
2835 		/*
2836 		 * Do not use DLT_PKTAP, unless requested explicitly
2837 		 */
2838 		if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2839 			continue;
2840 		}
2841 		if (dlist != USER_ADDR_NULL) {
2842 			if (n >= bfl.bfl_len) {
2843 				return ENOMEM;
2844 			}
2845 			error = copyout(&bp->bif_dlt, dlist,
2846 			    sizeof(bp->bif_dlt));
2847 			if (error != 0) {
2848 				break;
2849 			}
2850 			dlist += sizeof(bp->bif_dlt);
2851 		}
2852 		n++;
2853 	}
2854 	bfl.bfl_len = n;
2855 	bcopy(&bfl, addr, sizeof(bfl));
2856 
2857 	return error;
2858 }
2859 
2860 /*
2861  * Set the data link type of a BPF instance.
2862  */
2863 static int
bpf_setdlt(struct bpf_d * d,uint32_t dlt)2864 bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2865 {
2866 	int error, opromisc;
2867 	struct ifnet *ifp;
2868 	struct bpf_if *bp;
2869 
2870 	if (d->bd_bif->bif_dlt == dlt) {
2871 		return 0;
2872 	}
2873 
2874 	while (d->bd_hbuf_read) {
2875 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_setdlt", NULL);
2876 	}
2877 
2878 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2879 		return ENXIO;
2880 	}
2881 
2882 	ifp = d->bd_bif->bif_ifp;
2883 	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2884 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2885 			/*
2886 			 * Do not use DLT_PKTAP, unless requested explicitly
2887 			 */
2888 			if (bp->bif_dlt == DLT_PKTAP &&
2889 			    !(d->bd_flags & BPF_WANT_PKTAP)) {
2890 				continue;
2891 			}
2892 			break;
2893 		}
2894 	}
2895 	if (bp != NULL) {
2896 		opromisc = d->bd_promisc;
2897 		if (bpf_detachd(d) != 0) {
2898 			return ENXIO;
2899 		}
2900 		error = bpf_attachd(d, bp);
2901 		if (error != 0) {
2902 			os_log_error(OS_LOG_DEFAULT,
2903 			    "bpf_setdlt: bpf%d bpf_attachd %s error %d",
2904 			    d->bd_dev_minor, if_name(bp->bif_ifp),
2905 			    error);
2906 			return error;
2907 		}
2908 		reset_d(d);
2909 		if (opromisc) {
2910 			lck_mtx_unlock(bpf_mlock);
2911 			error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2912 			lck_mtx_lock(bpf_mlock);
2913 			if (error != 0) {
2914 				os_log_error(OS_LOG_DEFAULT,
2915 				    "bpf_setdlt: bpf%d ifpromisc %s error %d",
2916 				    d->bd_dev_minor, if_name(bp->bif_ifp), error);
2917 			} else {
2918 				d->bd_promisc = 1;
2919 			}
2920 		}
2921 	}
2922 	return bp == NULL ? EINVAL : 0;
2923 }
2924 
2925 static int
bpf_set_traffic_class(struct bpf_d * d,int tc)2926 bpf_set_traffic_class(struct bpf_d *d, int tc)
2927 {
2928 	int error = 0;
2929 
2930 	if (!SO_VALID_TC(tc)) {
2931 		error = EINVAL;
2932 	} else {
2933 		d->bd_traffic_class = tc;
2934 	}
2935 
2936 	return error;
2937 }
2938 
2939 static void
bpf_set_packet_service_class(struct mbuf * m,int tc)2940 bpf_set_packet_service_class(struct mbuf *m, int tc)
2941 {
2942 	if (!(m->m_flags & M_PKTHDR)) {
2943 		return;
2944 	}
2945 
2946 	VERIFY(SO_VALID_TC(tc));
2947 	(void) m_set_service_class(m, so_tc2msc(tc));
2948 }
2949 
2950 /*
2951  * Support for select()
2952  *
2953  * Return true iff the specific operation will not block indefinitely.
2954  * Otherwise, return false but make a note that a selwakeup() must be done.
2955  */
2956 int
bpfselect(dev_t dev,int which,void * wql,struct proc * p)2957 bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2958 {
2959 	struct bpf_d *d;
2960 	int ret = 0;
2961 
2962 	lck_mtx_lock(bpf_mlock);
2963 
2964 	d = bpf_dtab[minor(dev)];
2965 	if (d == NULL || d == BPF_DEV_RESERVED ||
2966 	    (d->bd_flags & BPF_CLOSING) != 0) {
2967 		lck_mtx_unlock(bpf_mlock);
2968 		return ENXIO;
2969 	}
2970 
2971 	bpf_acquire_d(d);
2972 
2973 	if (d->bd_bif == NULL) {
2974 		bpf_release_d(d);
2975 		lck_mtx_unlock(bpf_mlock);
2976 		return ENXIO;
2977 	}
2978 
2979 	while (d->bd_hbuf_read) {
2980 		msleep((caddr_t)d, bpf_mlock, PRINET, "bpfselect", NULL);
2981 	}
2982 
2983 	if ((d->bd_flags & BPF_CLOSING) != 0) {
2984 		bpf_release_d(d);
2985 		lck_mtx_unlock(bpf_mlock);
2986 		return ENXIO;
2987 	}
2988 
2989 	switch (which) {
2990 	case FREAD:
2991 		if (d->bd_hlen != 0 ||
2992 		    ((d->bd_immediate ||
2993 		    d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
2994 			ret = 1;         /* read has data to return */
2995 		} else {
2996 			/*
2997 			 * Read has no data to return.
2998 			 * Make the select wait, and start a timer if
2999 			 * necessary.
3000 			 */
3001 			selrecord(p, &d->bd_sel, wql);
3002 			bpf_start_timer(d);
3003 		}
3004 		break;
3005 
3006 	case FWRITE:
3007 		/* can't determine whether a write would block */
3008 		ret = 1;
3009 		break;
3010 	}
3011 
3012 	bpf_release_d(d);
3013 	lck_mtx_unlock(bpf_mlock);
3014 
3015 	return ret;
3016 }
3017 
3018 /*
3019  * Support for kevent() system call.  Register EVFILT_READ filters and
3020  * reject all others.
3021  */
3022 int bpfkqfilter(dev_t dev, struct knote *kn);
3023 static void filt_bpfdetach(struct knote *);
3024 static int filt_bpfread(struct knote *, long);
3025 static int filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev);
3026 static int filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev);
3027 
3028 SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
3029 	.f_isfd = 1,
3030 	.f_detach = filt_bpfdetach,
3031 	.f_event = filt_bpfread,
3032 	.f_touch = filt_bpftouch,
3033 	.f_process = filt_bpfprocess,
3034 };
3035 
3036 static int
filt_bpfread_common(struct knote * kn,struct kevent_qos_s * kev,struct bpf_d * d)3037 filt_bpfread_common(struct knote *kn, struct kevent_qos_s *kev, struct bpf_d *d)
3038 {
3039 	int ready = 0;
3040 	int64_t data = 0;
3041 
3042 	if (d->bd_immediate) {
3043 		/*
3044 		 * If there's data in the hold buffer, it's the
3045 		 * amount of data a read will return.
3046 		 *
3047 		 * If there's no data in the hold buffer, but
3048 		 * there's data in the store buffer, a read will
3049 		 * immediately rotate the store buffer to the
3050 		 * hold buffer, the amount of data in the store
3051 		 * buffer is the amount of data a read will
3052 		 * return.
3053 		 *
3054 		 * If there's no data in either buffer, we're not
3055 		 * ready to read.
3056 		 */
3057 		data = (d->bd_hlen == 0 || d->bd_hbuf_read ?
3058 		    d->bd_slen : d->bd_hlen);
3059 		int64_t lowwat = knote_low_watermark(kn);
3060 		if (lowwat > d->bd_bufsize) {
3061 			lowwat = d->bd_bufsize;
3062 		}
3063 		ready = (data >= lowwat);
3064 	} else {
3065 		/*
3066 		 * If there's data in the hold buffer, it's the
3067 		 * amount of data a read will return.
3068 		 *
3069 		 * If there's no data in the hold buffer, but
3070 		 * there's data in the store buffer, if the
3071 		 * timer has expired a read will immediately
3072 		 * rotate the store buffer to the hold buffer,
3073 		 * so the amount of data in the store buffer is
3074 		 * the amount of data a read will return.
3075 		 *
3076 		 * If there's no data in either buffer, or there's
3077 		 * no data in the hold buffer and the timer hasn't
3078 		 * expired, we're not ready to read.
3079 		 */
3080 		data = ((d->bd_hlen == 0 || d->bd_hbuf_read) &&
3081 		    d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
3082 		ready = (data > 0);
3083 	}
3084 	if (!ready) {
3085 		bpf_start_timer(d);
3086 	} else if (kev) {
3087 		knote_fill_kevent(kn, kev, data);
3088 	}
3089 
3090 	return ready;
3091 }
3092 
3093 int
bpfkqfilter(dev_t dev,struct knote * kn)3094 bpfkqfilter(dev_t dev, struct knote *kn)
3095 {
3096 	struct bpf_d *d;
3097 	int res;
3098 
3099 	/*
3100 	 * Is this device a bpf?
3101 	 */
3102 	if (major(dev) != CDEV_MAJOR || kn->kn_filter != EVFILT_READ) {
3103 		knote_set_error(kn, EINVAL);
3104 		return 0;
3105 	}
3106 
3107 	lck_mtx_lock(bpf_mlock);
3108 
3109 	d = bpf_dtab[minor(dev)];
3110 
3111 	if (d == NULL || d == BPF_DEV_RESERVED ||
3112 	    (d->bd_flags & BPF_CLOSING) != 0 ||
3113 	    d->bd_bif == NULL) {
3114 		lck_mtx_unlock(bpf_mlock);
3115 		knote_set_error(kn, ENXIO);
3116 		return 0;
3117 	}
3118 
3119 	kn->kn_filtid = EVFILTID_BPFREAD;
3120 	knote_kn_hook_set_raw(kn, d);
3121 	KNOTE_ATTACH(&d->bd_sel.si_note, kn);
3122 	d->bd_flags |= BPF_KNOTE;
3123 
3124 	/* capture the current state */
3125 	res = filt_bpfread_common(kn, NULL, d);
3126 
3127 	lck_mtx_unlock(bpf_mlock);
3128 
3129 	return res;
3130 }
3131 
3132 static void
filt_bpfdetach(struct knote * kn)3133 filt_bpfdetach(struct knote *kn)
3134 {
3135 	struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
3136 
3137 	lck_mtx_lock(bpf_mlock);
3138 	if (d->bd_flags & BPF_KNOTE) {
3139 		KNOTE_DETACH(&d->bd_sel.si_note, kn);
3140 		d->bd_flags &= ~BPF_KNOTE;
3141 	}
3142 	lck_mtx_unlock(bpf_mlock);
3143 }
3144 
3145 static int
filt_bpfread(struct knote * kn,long hint)3146 filt_bpfread(struct knote *kn, long hint)
3147 {
3148 #pragma unused(hint)
3149 	struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
3150 
3151 	return filt_bpfread_common(kn, NULL, d);
3152 }
3153 
3154 static int
filt_bpftouch(struct knote * kn,struct kevent_qos_s * kev)3155 filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev)
3156 {
3157 	struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
3158 	int res;
3159 
3160 	lck_mtx_lock(bpf_mlock);
3161 
3162 	/* save off the lowat threshold and flag */
3163 	kn->kn_sdata = kev->data;
3164 	kn->kn_sfflags = kev->fflags;
3165 
3166 	/* output data will be re-generated here */
3167 	res = filt_bpfread_common(kn, NULL, d);
3168 
3169 	lck_mtx_unlock(bpf_mlock);
3170 
3171 	return res;
3172 }
3173 
3174 static int
filt_bpfprocess(struct knote * kn,struct kevent_qos_s * kev)3175 filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev)
3176 {
3177 	struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
3178 	int res;
3179 
3180 	lck_mtx_lock(bpf_mlock);
3181 	res = filt_bpfread_common(kn, kev, d);
3182 	lck_mtx_unlock(bpf_mlock);
3183 
3184 	return res;
3185 }
3186 
3187 /*
3188  * Copy data from an mbuf chain into a buffer.	This code is derived
3189  * from m_copydata in kern/uipc_mbuf.c.
3190  */
3191 static void
bpf_mcopy(struct mbuf * m,void * dst_arg,size_t len,size_t offset)3192 bpf_mcopy(struct mbuf *m, void *dst_arg, size_t len, size_t offset)
3193 {
3194 	u_int count;
3195 	u_char *dst;
3196 
3197 	dst = dst_arg;
3198 
3199 	while (offset >= m->m_len) {
3200 		offset -= m->m_len;
3201 		m = m->m_next;
3202 		if (m == NULL) {
3203 			panic("bpf_mcopy");
3204 		}
3205 		continue;
3206 	}
3207 
3208 	while (len > 0) {
3209 		if (m == NULL) {
3210 			panic("bpf_mcopy");
3211 		}
3212 		count = MIN(m->m_len - (u_int)offset, (u_int)len);
3213 		bcopy((u_char *)mbuf_data(m) + offset, dst, count);
3214 		m = m->m_next;
3215 		dst += count;
3216 		len -= count;
3217 		offset = 0;
3218 	}
3219 }
3220 
3221 static inline void
bpf_tap_imp(ifnet_t ifp,u_int32_t dlt,struct bpf_packet * bpf_pkt,int outbound)3222 bpf_tap_imp(
3223 	ifnet_t         ifp,
3224 	u_int32_t       dlt,
3225 	struct bpf_packet *bpf_pkt,
3226 	int             outbound)
3227 {
3228 	struct bpf_d    *d;
3229 	u_int slen;
3230 	struct bpf_if *bp;
3231 
3232 	/*
3233 	 * It's possible that we get here after the bpf descriptor has been
3234 	 * detached from the interface; in such a case we simply return.
3235 	 * Lock ordering is important since we can be called asynchronously
3236 	 * (from IOKit) to process an inbound packet; when that happens
3237 	 * we would have been holding its "gateLock" and will be acquiring
3238 	 * "bpf_mlock" upon entering this routine.  Due to that, we release
3239 	 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
3240 	 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
3241 	 * when a ifnet_set_promiscuous request simultaneously collides with
3242 	 * an inbound packet being passed into the tap callback.
3243 	 */
3244 	lck_mtx_lock(bpf_mlock);
3245 	if (ifp->if_bpf == NULL) {
3246 		lck_mtx_unlock(bpf_mlock);
3247 		return;
3248 	}
3249 	for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
3250 		if (bp->bif_ifp != ifp) {
3251 			/* wrong interface */
3252 			bp = NULL;
3253 			break;
3254 		}
3255 		if (dlt == 0 || bp->bif_dlt == dlt) {
3256 			/* tapping default DLT or DLT matches */
3257 			break;
3258 		}
3259 	}
3260 	if (bp == NULL) {
3261 		goto done;
3262 	}
3263 	for (d = bp->bif_dlist; d != NULL; d = d->bd_next) {
3264 		struct bpf_packet *bpf_pkt_saved = bpf_pkt;
3265 		struct bpf_packet bpf_pkt_tmp = {};
3266 		struct pktap_header_buffer bpfp_header_tmp = {};
3267 
3268 		if (outbound && (d->bd_direction & BPF_D_OUT) == 0) {
3269 			continue;
3270 		}
3271 		if (!outbound && (d->bd_direction & BPF_D_IN) == 0) {
3272 			continue;
3273 		}
3274 
3275 		++d->bd_rcount;
3276 		slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
3277 		    (u_int)bpf_pkt->bpfp_total_length, 0);
3278 
3279 		if (slen != 0) {
3280 			if (bp->bif_ifp->if_type == IFT_PKTAP &&
3281 			    bp->bif_dlt == DLT_PKTAP) {
3282 				if (d->bd_flags & BPF_TRUNCATE) {
3283 					slen = min(slen, get_pkt_trunc_len(bpf_pkt));
3284 				}
3285 				/*
3286 				 * Need to copy the bpf_pkt because the conversion
3287 				 * to v2 pktap header modifies the content of the
3288 				 * bpfp_header
3289 				 */
3290 				if ((d->bd_flags & BPF_PKTHDRV2) &&
3291 				    bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
3292 					bpf_pkt_tmp = *bpf_pkt;
3293 
3294 					bpf_pkt = &bpf_pkt_tmp;
3295 
3296 					memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
3297 					    bpf_pkt->bpfp_header_length);
3298 
3299 					bpf_pkt->bpfp_header = &bpfp_header_tmp;
3300 
3301 					convert_to_pktap_header_to_v2(bpf_pkt,
3302 					    !!(d->bd_flags & BPF_TRUNCATE));
3303 				}
3304 			}
3305 			++d->bd_fcount;
3306 			catchpacket(d, bpf_pkt, slen, outbound);
3307 		}
3308 		bpf_pkt = bpf_pkt_saved;
3309 	}
3310 
3311 done:
3312 	lck_mtx_unlock(bpf_mlock);
3313 }
3314 
3315 static inline void
bpf_tap_mbuf(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen,int outbound)3316 bpf_tap_mbuf(
3317 	ifnet_t         ifp,
3318 	u_int32_t       dlt,
3319 	mbuf_t          m,
3320 	void*           hdr,
3321 	size_t          hlen,
3322 	int             outbound)
3323 {
3324 	struct bpf_packet bpf_pkt;
3325 	struct mbuf *m0;
3326 
3327 	if (ifp->if_bpf == NULL) {
3328 		/* quickly check without taking lock */
3329 		return;
3330 	}
3331 	bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
3332 	bpf_pkt.bpfp_mbuf = m;
3333 	bpf_pkt.bpfp_total_length = 0;
3334 	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
3335 		bpf_pkt.bpfp_total_length += m0->m_len;
3336 	}
3337 	bpf_pkt.bpfp_header = hdr;
3338 	if (hdr != NULL) {
3339 		bpf_pkt.bpfp_total_length += hlen;
3340 		bpf_pkt.bpfp_header_length = hlen;
3341 	} else {
3342 		bpf_pkt.bpfp_header_length = 0;
3343 	}
3344 	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
3345 }
3346 
3347 void
bpf_tap_out(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)3348 bpf_tap_out(
3349 	ifnet_t         ifp,
3350 	u_int32_t       dlt,
3351 	mbuf_t          m,
3352 	void*           hdr,
3353 	size_t          hlen)
3354 {
3355 	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
3356 }
3357 
3358 void
bpf_tap_in(ifnet_t ifp,u_int32_t dlt,mbuf_t m,void * hdr,size_t hlen)3359 bpf_tap_in(
3360 	ifnet_t         ifp,
3361 	u_int32_t       dlt,
3362 	mbuf_t          m,
3363 	void*           hdr,
3364 	size_t          hlen)
3365 {
3366 	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
3367 }
3368 
3369 /* Callback registered with Ethernet driver. */
3370 static int
bpf_tap_callback(struct ifnet * ifp,struct mbuf * m)3371 bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
3372 {
3373 	bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
3374 
3375 	return 0;
3376 }
3377 
3378 #if SKYWALK
3379 #include <skywalk/os_skywalk_private.h>
3380 
3381 static void
bpf_pktcopy(kern_packet_t pkt,void * dst_arg,size_t len,size_t offset)3382 bpf_pktcopy(kern_packet_t pkt, void *dst_arg, size_t len, size_t offset)
3383 {
3384 	kern_buflet_t   buflet = NULL;
3385 	size_t count;
3386 	u_char *dst;
3387 
3388 	dst = dst_arg;
3389 	while (len > 0) {
3390 		uint8_t         *addr;
3391 
3392 		u_int32_t       buflet_length;
3393 
3394 		buflet = kern_packet_get_next_buflet(pkt, buflet);
3395 		VERIFY(buflet != NULL);
3396 		addr = kern_buflet_get_data_address(buflet);
3397 		VERIFY(addr != NULL);
3398 		addr += kern_buflet_get_data_offset(buflet);
3399 		buflet_length = kern_buflet_get_data_length(buflet);
3400 		if (offset >= buflet_length) {
3401 			offset -= buflet_length;
3402 			continue;
3403 		}
3404 		count = MIN(buflet_length - offset, len);
3405 		bcopy((void *)(addr + offset), (void *)dst, count);
3406 		dst += count;
3407 		len -= count;
3408 		offset = 0;
3409 	}
3410 }
3411 
3412 static inline void
bpf_tap_packet(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen,int outbound)3413 bpf_tap_packet(
3414 	ifnet_t         ifp,
3415 	u_int32_t       dlt,
3416 	kern_packet_t   pkt,
3417 	void*           hdr,
3418 	size_t          hlen,
3419 	int             outbound)
3420 {
3421 	struct bpf_packet       bpf_pkt;
3422 	struct mbuf *           m;
3423 
3424 	if (ifp->if_bpf == NULL) {
3425 		/* quickly check without taking lock */
3426 		return;
3427 	}
3428 	m = kern_packet_get_mbuf(pkt);
3429 	if (m != NULL) {
3430 		bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
3431 		bpf_pkt.bpfp_mbuf = m;
3432 		bpf_pkt.bpfp_total_length = m_length(m);
3433 	} else {
3434 		bpf_pkt.bpfp_type = BPF_PACKET_TYPE_PKT;
3435 		bpf_pkt.bpfp_pkt = pkt;
3436 		bpf_pkt.bpfp_total_length = kern_packet_get_data_length(pkt);
3437 	}
3438 	bpf_pkt.bpfp_header = hdr;
3439 	bpf_pkt.bpfp_header_length = hlen;
3440 	if (hlen != 0) {
3441 		bpf_pkt.bpfp_total_length += hlen;
3442 	}
3443 	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
3444 }
3445 
3446 void
bpf_tap_packet_out(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)3447 bpf_tap_packet_out(
3448 	ifnet_t         ifp,
3449 	u_int32_t       dlt,
3450 	kern_packet_t   pkt,
3451 	void*           hdr,
3452 	size_t          hlen)
3453 {
3454 	bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 1);
3455 }
3456 
3457 void
bpf_tap_packet_in(ifnet_t ifp,u_int32_t dlt,kern_packet_t pkt,void * hdr,size_t hlen)3458 bpf_tap_packet_in(
3459 	ifnet_t         ifp,
3460 	u_int32_t       dlt,
3461 	kern_packet_t   pkt,
3462 	void*           hdr,
3463 	size_t          hlen)
3464 {
3465 	bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, 0);
3466 }
3467 
3468 #endif /* SKYWALK */
3469 
3470 static errno_t
bpf_copydata(struct bpf_packet * pkt,size_t off,size_t len,void * out_data)3471 bpf_copydata(struct bpf_packet *pkt, size_t off, size_t len, void* out_data)
3472 {
3473 	errno_t err = 0;
3474 	if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
3475 		err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
3476 #if SKYWALK
3477 	} else if (pkt->bpfp_type == BPF_PACKET_TYPE_PKT) {
3478 		err = kern_packet_copy_bytes(pkt->bpfp_pkt, off, len, out_data);
3479 #endif /* SKYWALK */
3480 	} else {
3481 		err = EINVAL;
3482 	}
3483 
3484 	return err;
3485 }
3486 
3487 static void
copy_bpf_packet_offset(struct bpf_packet * pkt,void * dst,size_t len,size_t offset)3488 copy_bpf_packet_offset(struct bpf_packet * pkt, void * dst, size_t len, size_t offset)
3489 {
3490 	/* copy the optional header */
3491 	if (offset < pkt->bpfp_header_length) {
3492 		size_t  count = MIN(len, pkt->bpfp_header_length - offset);
3493 		caddr_t src = (caddr_t)pkt->bpfp_header;
3494 		bcopy(src + offset, dst, count);
3495 		len -= count;
3496 		dst = (void *)((uintptr_t)dst + count);
3497 		offset = 0;
3498 	} else {
3499 		offset -= pkt->bpfp_header_length;
3500 	}
3501 
3502 	if (len == 0) {
3503 		/* nothing past the header */
3504 		return;
3505 	}
3506 	/* copy the packet */
3507 	switch (pkt->bpfp_type) {
3508 	case BPF_PACKET_TYPE_MBUF:
3509 		bpf_mcopy(pkt->bpfp_mbuf, dst, len, offset);
3510 		break;
3511 #if SKYWALK
3512 	case BPF_PACKET_TYPE_PKT:
3513 		bpf_pktcopy(pkt->bpfp_pkt, dst, len, offset);
3514 		break;
3515 #endif /* SKYWALK */
3516 	default:
3517 		break;
3518 	}
3519 }
3520 
3521 static void
copy_bpf_packet(struct bpf_packet * pkt,void * dst,size_t len)3522 copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
3523 {
3524 	copy_bpf_packet_offset(pkt, dst, len, 0);
3525 }
3526 
3527 static uint32_t
get_esp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)3528 get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
3529     const uint32_t remaining_caplen)
3530 {
3531 	/*
3532 	 * For some reason tcpdump expects to have one byte beyond the ESP header
3533 	 */
3534 	uint32_t trunc_len = ESP_HDR_SIZE + 1;
3535 
3536 	if (trunc_len > remaining_caplen) {
3537 		return remaining_caplen;
3538 	}
3539 
3540 	return trunc_len;
3541 }
3542 
3543 static uint32_t
get_isakmp_trunc_len(__unused struct bpf_packet * pkt,__unused uint32_t off,const uint32_t remaining_caplen)3544 get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
3545     const uint32_t remaining_caplen)
3546 {
3547 	/*
3548 	 * Include the payload generic header
3549 	 */
3550 	uint32_t trunc_len = ISAKMP_HDR_SIZE;
3551 
3552 	if (trunc_len > remaining_caplen) {
3553 		return remaining_caplen;
3554 	}
3555 
3556 	return trunc_len;
3557 }
3558 
3559 static uint32_t
get_isakmp_natt_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3560 get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint32_t off,
3561     const uint32_t remaining_caplen)
3562 {
3563 	int err = 0;
3564 	uint32_t trunc_len = 0;
3565 	char payload[remaining_caplen];
3566 
3567 	err = bpf_copydata(pkt, off, remaining_caplen, payload);
3568 	if (err != 0) {
3569 		return remaining_caplen;
3570 	}
3571 	/*
3572 	 * They are three cases:
3573 	 * - IKE: payload start with 4 bytes header set to zero before ISAKMP header
3574 	 * - keep alive: 1 byte payload
3575 	 * - otherwise it's ESP
3576 	 */
3577 	if (remaining_caplen >= 4 &&
3578 	    payload[0] == 0 && payload[1] == 0 &&
3579 	    payload[2] == 0 && payload[3] == 0) {
3580 		trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
3581 	} else if (remaining_caplen == 1) {
3582 		trunc_len = 1;
3583 	} else {
3584 		trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3585 	}
3586 
3587 	if (trunc_len > remaining_caplen) {
3588 		return remaining_caplen;
3589 	}
3590 
3591 	return trunc_len;
3592 }
3593 
3594 static uint32_t
get_udp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3595 get_udp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3596 {
3597 	int err = 0;
3598 	uint32_t trunc_len = sizeof(struct udphdr);         /* By default no UDP payload */
3599 
3600 	if (trunc_len >= remaining_caplen) {
3601 		return remaining_caplen;
3602 	}
3603 
3604 	struct udphdr udphdr;
3605 	err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
3606 	if (err != 0) {
3607 		return remaining_caplen;
3608 	}
3609 
3610 	u_short sport, dport;
3611 
3612 	sport = EXTRACT_SHORT(&udphdr.uh_sport);
3613 	dport = EXTRACT_SHORT(&udphdr.uh_dport);
3614 
3615 	if (dport == PORT_DNS || sport == PORT_DNS) {
3616 		/*
3617 		 * Full UDP payload for DNS
3618 		 */
3619 		trunc_len = remaining_caplen;
3620 	} else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) ||
3621 	    (sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
3622 		/*
3623 		 * Full UDP payload for BOOTP and DHCP
3624 		 */
3625 		trunc_len = remaining_caplen;
3626 	} else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
3627 		/*
3628 		 * Return the ISAKMP header
3629 		 */
3630 		trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
3631 		    remaining_caplen - sizeof(struct udphdr));
3632 	} else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
3633 		trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
3634 		    remaining_caplen - sizeof(struct udphdr));
3635 	}
3636 	if (trunc_len >= remaining_caplen) {
3637 		return remaining_caplen;
3638 	}
3639 
3640 	return trunc_len;
3641 }
3642 
3643 static uint32_t
get_tcp_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3644 get_tcp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3645 {
3646 	int err = 0;
3647 	uint32_t trunc_len = sizeof(struct tcphdr);         /* By default no TCP payload */
3648 	if (trunc_len >= remaining_caplen) {
3649 		return remaining_caplen;
3650 	}
3651 
3652 	struct tcphdr tcphdr;
3653 	err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
3654 	if (err != 0) {
3655 		return remaining_caplen;
3656 	}
3657 
3658 	u_short sport, dport;
3659 	sport = EXTRACT_SHORT(&tcphdr.th_sport);
3660 	dport = EXTRACT_SHORT(&tcphdr.th_dport);
3661 
3662 	if (dport == PORT_DNS || sport == PORT_DNS) {
3663 		/*
3664 		 * Full TCP payload  for DNS
3665 		 */
3666 		trunc_len = remaining_caplen;
3667 	} else {
3668 		trunc_len = (uint16_t)(tcphdr.th_off << 2);
3669 	}
3670 	if (trunc_len >= remaining_caplen) {
3671 		return remaining_caplen;
3672 	}
3673 
3674 	return trunc_len;
3675 }
3676 
3677 static uint32_t
get_proto_trunc_len(uint8_t proto,struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3678 get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3679 {
3680 	uint32_t trunc_len;
3681 
3682 	switch (proto) {
3683 	case IPPROTO_ICMP: {
3684 		/*
3685 		 * Full IMCP payload
3686 		 */
3687 		trunc_len = remaining_caplen;
3688 		break;
3689 	}
3690 	case IPPROTO_ICMPV6: {
3691 		/*
3692 		 * Full IMCPV6 payload
3693 		 */
3694 		trunc_len = remaining_caplen;
3695 		break;
3696 	}
3697 	case IPPROTO_IGMP: {
3698 		/*
3699 		 * Full IGMP payload
3700 		 */
3701 		trunc_len = remaining_caplen;
3702 		break;
3703 	}
3704 	case IPPROTO_UDP: {
3705 		trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3706 		break;
3707 	}
3708 	case IPPROTO_TCP: {
3709 		trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3710 		break;
3711 	}
3712 	case IPPROTO_ESP: {
3713 		trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3714 		break;
3715 	}
3716 	default: {
3717 		/*
3718 		 * By default we only include the IP header
3719 		 */
3720 		trunc_len = 0;
3721 		break;
3722 	}
3723 	}
3724 	if (trunc_len >= remaining_caplen) {
3725 		return remaining_caplen;
3726 	}
3727 
3728 	return trunc_len;
3729 }
3730 
3731 static uint32_t
get_ip_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3732 get_ip_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3733 {
3734 	int err = 0;
3735 	uint32_t iplen = sizeof(struct ip);
3736 	if (iplen >= remaining_caplen) {
3737 		return remaining_caplen;
3738 	}
3739 
3740 	struct ip iphdr;
3741 	err =  bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
3742 	if (err != 0) {
3743 		return remaining_caplen;
3744 	}
3745 
3746 	uint8_t proto = 0;
3747 
3748 	iplen = (uint16_t)(iphdr.ip_hl << 2);
3749 	if (iplen >= remaining_caplen) {
3750 		return remaining_caplen;
3751 	}
3752 
3753 	proto = iphdr.ip_p;
3754 	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3755 
3756 	if (iplen >= remaining_caplen) {
3757 		return remaining_caplen;
3758 	}
3759 
3760 	return iplen;
3761 }
3762 
3763 static uint32_t
get_ip6_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3764 get_ip6_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3765 {
3766 	int err = 0;
3767 	uint32_t iplen = sizeof(struct ip6_hdr);
3768 	if (iplen >= remaining_caplen) {
3769 		return remaining_caplen;
3770 	}
3771 
3772 	struct ip6_hdr ip6hdr;
3773 	err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
3774 	if (err != 0) {
3775 		return remaining_caplen;
3776 	}
3777 
3778 	uint8_t proto = 0;
3779 
3780 	/*
3781 	 * TBD: process the extension headers
3782 	 */
3783 	proto = ip6hdr.ip6_nxt;
3784 	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3785 
3786 	if (iplen >= remaining_caplen) {
3787 		return remaining_caplen;
3788 	}
3789 
3790 	return iplen;
3791 }
3792 
3793 static uint32_t
get_ether_trunc_len(struct bpf_packet * pkt,uint32_t off,const uint32_t remaining_caplen)3794 get_ether_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3795 {
3796 	int err = 0;
3797 	uint32_t ethlen = sizeof(struct ether_header);
3798 	if (ethlen >= remaining_caplen) {
3799 		return remaining_caplen;
3800 	}
3801 
3802 	struct ether_header eh = {};
3803 	err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
3804 	if (err != 0) {
3805 		return remaining_caplen;
3806 	}
3807 
3808 	u_short type = EXTRACT_SHORT(&eh.ether_type);
3809 	/* Include full ARP */
3810 	if (type == ETHERTYPE_ARP) {
3811 		ethlen = remaining_caplen;
3812 	} else if (type == ETHERTYPE_IP) {
3813 		ethlen += get_ip_trunc_len(pkt, off + sizeof(struct ether_header),
3814 		    remaining_caplen - ethlen);
3815 	} else if (type == ETHERTYPE_IPV6) {
3816 		ethlen += get_ip6_trunc_len(pkt, off + sizeof(struct ether_header),
3817 		    remaining_caplen - ethlen);
3818 	} else {
3819 		ethlen = MIN(BPF_MIN_PKT_SIZE, remaining_caplen);
3820 	}
3821 	return ethlen;
3822 }
3823 
3824 static uint32_t
get_pkt_trunc_len(struct bpf_packet * pkt)3825 get_pkt_trunc_len(struct bpf_packet *pkt)
3826 {
3827 	struct pktap_header *pktap = (struct pktap_header *) (pkt->bpfp_header);
3828 	uint32_t in_pkt_len = 0;
3829 	uint32_t out_pkt_len = 0;
3830 	uint32_t tlen = 0;
3831 	uint32_t pre_adjust;         // L2 header not in mbuf or kern_packet
3832 
3833 	// bpfp_total_length must contain the BPF packet header
3834 	assert3u(pkt->bpfp_total_length, >=, pkt->bpfp_header_length);
3835 
3836 	// The BPF packet header must contain the pktap header
3837 	assert3u(pkt->bpfp_header_length, >=, pktap->pth_length);
3838 
3839 	// The pre frame length (L2 header) must be contained in the packet
3840 	assert3u(pkt->bpfp_total_length, >=, pktap->pth_length + pktap->pth_frame_pre_length);
3841 
3842 	/*
3843 	 * pktap->pth_frame_pre_length is the L2 header length and accounts
3844 	 * for both L2 header in the packet payload and pre_adjust.
3845 	 *
3846 	 * pre_adjust represents an adjustment for a pseudo L2 header that is not
3847 	 * part of packet payload -- not in the mbuf or kern_packet -- and comes
3848 	 * just after the pktap header.
3849 	 *
3850 	 * pktap->pth_length is the size of the pktap header (exclude pre_adjust)
3851 	 *
3852 	 * pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3853 	 */
3854 	pre_adjust = (uint32_t)(pkt->bpfp_header_length - pktap->pth_length);
3855 
3856 	if (pktap->pth_iftype == IFT_ETHER) {
3857 		/*
3858 		 * We need to parse the Ethernet header to find the network layer
3859 		 * protocol
3860 		 */
3861 		in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pre_adjust);
3862 
3863 		out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
3864 
3865 		tlen = pktap->pth_length + pre_adjust + out_pkt_len;
3866 	} else {
3867 		/*
3868 		 * For other interface types, we only know to parse IPv4 and IPv6.
3869 		 *
3870 		 * To get to the beginning of the IPv4 or IPv6 packet, we need to to skip
3871 		 * over the L2 header that is the actual packet payload (mbuf or kern_packet)
3872 		 */
3873 		uint32_t off;         // offset past the L2 header in the actual packet payload
3874 
3875 		off = pktap->pth_frame_pre_length - pre_adjust;
3876 
3877 		in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pktap->pth_frame_pre_length);
3878 
3879 		if (pktap->pth_protocol_family == AF_INET) {
3880 			out_pkt_len = get_ip_trunc_len(pkt, off, in_pkt_len);
3881 		} else if (pktap->pth_protocol_family == AF_INET6) {
3882 			out_pkt_len = get_ip6_trunc_len(pkt, off, in_pkt_len);
3883 		} else {
3884 			out_pkt_len = MIN(BPF_MIN_PKT_SIZE, in_pkt_len);
3885 		}
3886 		tlen = pktap->pth_length + pktap->pth_frame_pre_length + out_pkt_len;
3887 	}
3888 
3889 	// Verify we do not overflow the buffer
3890 	if (__improbable(tlen > pkt->bpfp_total_length)) {
3891 		bool do_panic = bpf_debug != 0 ? true : false;
3892 
3893 #if DEBUG
3894 		do_panic = true;
3895 #endif /* DEBUG */
3896 		if (do_panic) {
3897 			panic("%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3898 			    __func__, __LINE__,
3899 			    tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3900 		} else {
3901 			os_log(OS_LOG_DEFAULT,
3902 			    "%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3903 			    __func__, __LINE__,
3904 			    tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3905 		}
3906 		bpf_trunc_overflow += 1;
3907 		tlen = (uint32_t)pkt->bpfp_total_length;
3908 	}
3909 
3910 	return tlen;
3911 }
3912 
3913 static uint8_t
get_common_prefix_size(const void * a,const void * b,uint8_t max_bytes)3914 get_common_prefix_size(const void *a, const void *b, uint8_t max_bytes)
3915 {
3916 	uint8_t max_words = max_bytes >> 2;
3917 	const uint32_t *x = (const uint32_t *)a;
3918 	const uint32_t *y = (const uint32_t *)b;
3919 	uint8_t i;
3920 
3921 	for (i = 0; i < max_words; i++) {
3922 		if (x[i] != y[i]) {
3923 			break;
3924 		}
3925 	}
3926 	return (uint8_t)(i << 2);
3927 }
3928 
3929 /*
3930  * Move the packet data from interface memory (pkt) into the
3931  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
3932  * otherwise 0.
3933  */
3934 static void
catchpacket(struct bpf_d * d,struct bpf_packet * pkt,u_int snaplen,int outbound)3935 catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
3936     u_int snaplen, int outbound)
3937 {
3938 	struct bpf_hdr *hp;
3939 	struct bpf_hdr_ext *ehp;
3940 	uint32_t totlen, curlen;
3941 	uint32_t hdrlen, caplen;
3942 	int do_wakeup = 0;
3943 	u_char *payload;
3944 	struct timeval tv;
3945 
3946 	hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
3947 	    (d->bd_flags & BPF_COMP_REQ) ? d->bd_bif->bif_comphdrlen:
3948 	    d->bd_bif->bif_hdrlen;
3949 	/*
3950 	 * Figure out how many bytes to move.  If the packet is
3951 	 * greater or equal to the snapshot length, transfer that
3952 	 * much.  Otherwise, transfer the whole packet (unless
3953 	 * we hit the buffer size limit).
3954 	 */
3955 	totlen = hdrlen + MIN(snaplen, (int)pkt->bpfp_total_length);
3956 	if (totlen > d->bd_bufsize) {
3957 		totlen = d->bd_bufsize;
3958 	}
3959 
3960 	if (hdrlen > totlen) {
3961 		return;
3962 	}
3963 
3964 	/*
3965 	 * Round up the end of the previous packet to the next longword.
3966 	 */
3967 	curlen = BPF_WORDALIGN(d->bd_slen);
3968 	if (curlen + totlen > d->bd_bufsize) {
3969 		/*
3970 		 * This packet will overflow the storage buffer.
3971 		 * Rotate the buffers if we can, then wakeup any
3972 		 * pending reads.
3973 		 *
3974 		 * We cannot rotate buffers if a read is in progress
3975 		 * so drop the packet
3976 		 */
3977 		if (d->bd_hbuf_read) {
3978 			++d->bd_dcount;
3979 			return;
3980 		}
3981 
3982 		if (d->bd_fbuf == NULL) {
3983 			if (d->bd_headdrop == 0) {
3984 				/*
3985 				 * We haven't completed the previous read yet,
3986 				 * so drop the packet.
3987 				 */
3988 				++d->bd_dcount;
3989 				return;
3990 			}
3991 			/*
3992 			 * Drop the hold buffer as it contains older packets
3993 			 */
3994 			d->bd_dcount += d->bd_hcnt;
3995 			d->bd_fbuf = d->bd_hbuf;
3996 			ROTATE_BUFFERS(d);
3997 		} else {
3998 			ROTATE_BUFFERS(d);
3999 		}
4000 		do_wakeup = 1;
4001 		curlen = 0;
4002 	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
4003 		/*
4004 		 * Immediate mode is set, or the read timeout has
4005 		 * already expired during a select call. A packet
4006 		 * arrived, so the reader should be woken up.
4007 		 */
4008 		do_wakeup = 1;
4009 	}
4010 
4011 	/*
4012 	 * Append the bpf header.
4013 	 */
4014 	microtime(&tv);
4015 	if (d->bd_flags & BPF_EXTENDED_HDR) {
4016 		ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
4017 		memset(ehp, 0, sizeof(*ehp));
4018 		ehp->bh_tstamp.tv_sec = (int)tv.tv_sec;
4019 		ehp->bh_tstamp.tv_usec = tv.tv_usec;
4020 
4021 		ehp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
4022 		ehp->bh_hdrlen = (u_short)hdrlen;
4023 		caplen = ehp->bh_caplen = totlen - hdrlen;
4024 		payload = (u_char *)ehp + hdrlen;
4025 
4026 		if (outbound) {
4027 			ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
4028 		} else {
4029 			ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
4030 		}
4031 
4032 		if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
4033 			struct mbuf *m = pkt->bpfp_mbuf;
4034 
4035 			if (outbound) {
4036 				/* only do lookups on non-raw INPCB */
4037 				if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID |
4038 				    PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)) ==
4039 				    (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC) &&
4040 				    m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
4041 					ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
4042 					if (m->m_pkthdr.pkt_proto == IPPROTO_TCP) {
4043 						ehp->bh_flags |= BPF_HDR_EXT_FLAGS_TCP;
4044 					} else if (m->m_pkthdr.pkt_proto == IPPROTO_UDP) {
4045 						ehp->bh_flags |= BPF_HDR_EXT_FLAGS_UDP;
4046 					}
4047 				}
4048 				ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
4049 				if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
4050 					ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
4051 				}
4052 				if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
4053 					ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
4054 				}
4055 				if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
4056 					ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
4057 				}
4058 				if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
4059 					ehp->bh_unsent_bytes =
4060 					    m->m_pkthdr.bufstatus_if;
4061 					ehp->bh_unsent_snd =
4062 					    m->m_pkthdr.bufstatus_sndbuf;
4063 				}
4064 			} else {
4065 				if (m->m_pkthdr.pkt_flags & PKTF_WAKE_PKT) {
4066 					ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
4067 				}
4068 			}
4069 #if SKYWALK
4070 		} else {
4071 			kern_packet_t kern_pkt = pkt->bpfp_pkt;
4072 			packet_flowid_t flowid = 0;
4073 
4074 			if (outbound) {
4075 				/*
4076 				 * Note: pp_init() asserts that kern_packet_svc_class_t is equivalent
4077 				 * to mbuf_svc_class_t
4078 				 */
4079 				ehp->bh_svc = so_svc2tc((mbuf_svc_class_t)kern_packet_get_service_class(kern_pkt));
4080 				if (kern_packet_get_transport_retransmit(kern_pkt)) {
4081 					ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
4082 				}
4083 				if (kern_packet_get_transport_last_packet(kern_pkt)) {
4084 					ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
4085 				}
4086 			} else {
4087 				if (kern_packet_get_wake_flag(kern_pkt)) {
4088 					ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
4089 				}
4090 			}
4091 			ehp->bh_trace_tag = kern_packet_get_trace_tag(kern_pkt);
4092 			if (kern_packet_get_flowid(kern_pkt, &flowid) == 0) {
4093 				ehp->bh_flowid = flowid;
4094 			}
4095 #endif /* SKYWALK */
4096 		}
4097 	} else {
4098 		hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
4099 		memset(hp, 0, BPF_WORDALIGN(sizeof(*hp)));
4100 		hp->bh_tstamp.tv_sec = (int)tv.tv_sec;
4101 		hp->bh_tstamp.tv_usec = tv.tv_usec;
4102 		hp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
4103 		hp->bh_hdrlen = (u_short)hdrlen;
4104 		caplen = hp->bh_caplen = totlen - hdrlen;
4105 		payload = (u_char *)hp + hdrlen;
4106 	}
4107 	if (d->bd_flags & BPF_COMP_REQ) {
4108 		uint8_t common_prefix_size = 0;
4109 		uint8_t copy_len = MIN((uint8_t)caplen, BPF_HDR_COMP_LEN_MAX);
4110 
4111 		copy_bpf_packet(pkt, d->bd_prev_fbuf, copy_len);
4112 
4113 		if (d->bd_prev_slen != 0) {
4114 			common_prefix_size = get_common_prefix_size(d->bd_prev_fbuf,
4115 			    d->bd_prev_sbuf, MIN(copy_len, d->bd_prev_slen));
4116 		}
4117 
4118 		if (d->bd_flags & BPF_COMP_ENABLED) {
4119 			assert3u(caplen, >=, common_prefix_size);
4120 			copy_bpf_packet_offset(pkt, payload, caplen - common_prefix_size,
4121 			    common_prefix_size);
4122 			d->bd_slen = curlen + totlen - common_prefix_size;
4123 		} else {
4124 			copy_bpf_packet(pkt, payload, caplen);
4125 			d->bd_slen = curlen + totlen;
4126 		}
4127 
4128 		/*
4129 		 * Update the caplen only if compression is enabled -- the caller
4130 		 * must pay attention to bpf_hdr_comp_enable
4131 		 */
4132 		if (d->bd_flags & BPF_EXTENDED_HDR) {
4133 			ehp->bh_complen = common_prefix_size;
4134 			if (d->bd_flags & BPF_COMP_ENABLED) {
4135 				ehp->bh_caplen -= common_prefix_size;
4136 			}
4137 		} else {
4138 			struct bpf_comp_hdr *hcp;
4139 
4140 			hcp = (struct bpf_comp_hdr *)(void *)(d->bd_sbuf + curlen);
4141 			hcp->bh_complen = common_prefix_size;
4142 			if (d->bd_flags & BPF_COMP_ENABLED) {
4143 				hcp->bh_caplen -= common_prefix_size;
4144 			}
4145 		}
4146 
4147 		if (common_prefix_size > 0) {
4148 			d->bd_bcs.bcs_total_compressed_prefix_size += common_prefix_size;
4149 			if (common_prefix_size > d->bd_bcs.bcs_max_compressed_prefix_size) {
4150 				d->bd_bcs.bcs_max_compressed_prefix_size = common_prefix_size;
4151 			}
4152 			d->bd_bcs.bcs_count_compressed_prefix += 1;
4153 		} else {
4154 			d->bd_bcs.bcs_count_no_common_prefix += 1;
4155 		}
4156 
4157 		/* The current compression buffer becomes the previous one */
4158 		caddr_t tmp = d->bd_prev_sbuf;
4159 		d->bd_prev_sbuf = d->bd_prev_fbuf;
4160 		d->bd_prev_slen = copy_len;
4161 		d->bd_prev_fbuf = tmp;
4162 	} else {
4163 		/*
4164 		 * Copy the packet data into the store buffer and update its length.
4165 		 */
4166 		copy_bpf_packet(pkt, payload, caplen);
4167 		d->bd_slen = curlen + totlen;
4168 	}
4169 	d->bd_scnt += 1;
4170 	d->bd_bcs.bcs_total_hdr_size += pkt->bpfp_header_length;
4171 	d->bd_bcs.bcs_total_size += caplen;
4172 
4173 	if (do_wakeup) {
4174 		bpf_wakeup(d);
4175 	}
4176 }
4177 
4178 static void
bpf_freebufs(struct bpf_d * d)4179 bpf_freebufs(struct bpf_d *d)
4180 {
4181 	if (d->bd_sbuf != NULL) {
4182 		kfree_data_addr(d->bd_sbuf);
4183 	}
4184 	if (d->bd_hbuf != NULL) {
4185 		kfree_data_addr(d->bd_hbuf);
4186 	}
4187 	if (d->bd_fbuf != NULL) {
4188 		kfree_data_addr(d->bd_fbuf);
4189 	}
4190 
4191 	if (d->bd_prev_sbuf != NULL) {
4192 		kfree_data_addr(d->bd_prev_sbuf);
4193 	}
4194 	if (d->bd_prev_fbuf != NULL) {
4195 		kfree_data_addr(d->bd_prev_fbuf);
4196 	}
4197 }
4198 /*
4199  * Initialize all nonzero fields of a descriptor.
4200  */
4201 static int
bpf_allocbufs(struct bpf_d * d)4202 bpf_allocbufs(struct bpf_d *d)
4203 {
4204 	bpf_freebufs(d);
4205 
4206 	d->bd_fbuf = (caddr_t) kalloc_data(d->bd_bufsize, Z_WAITOK | Z_ZERO);
4207 	if (d->bd_fbuf == NULL) {
4208 		goto nobufs;
4209 	}
4210 
4211 	d->bd_sbuf = (caddr_t) kalloc_data(d->bd_bufsize, Z_WAITOK | Z_ZERO);
4212 	if (d->bd_sbuf == NULL) {
4213 		goto nobufs;
4214 	}
4215 	d->bd_slen = 0;
4216 	d->bd_hlen = 0;
4217 	d->bd_scnt = 0;
4218 	d->bd_hcnt = 0;
4219 
4220 	d->bd_prev_slen = 0;
4221 	if (d->bd_flags & BPF_COMP_REQ) {
4222 		d->bd_prev_sbuf = (caddr_t) kalloc_data(BPF_HDR_COMP_LEN_MAX, Z_WAITOK | Z_ZERO);
4223 		if (d->bd_prev_sbuf == NULL) {
4224 			goto nobufs;
4225 		}
4226 		d->bd_prev_fbuf = (caddr_t) kalloc_data(BPF_HDR_COMP_LEN_MAX, Z_WAITOK | Z_ZERO);
4227 		if (d->bd_prev_fbuf == NULL) {
4228 			goto nobufs;
4229 		}
4230 	}
4231 	return 0;
4232 nobufs:
4233 	bpf_freebufs(d);
4234 	return ENOMEM;
4235 }
4236 
4237 /*
4238  * Free buffers currently in use by a descriptor.
4239  * Called on close.
4240  */
4241 static void
bpf_freed(struct bpf_d * d)4242 bpf_freed(struct bpf_d *d)
4243 {
4244 	/*
4245 	 * We don't need to lock out interrupts since this descriptor has
4246 	 * been detached from its interface and it yet hasn't been marked
4247 	 * free.
4248 	 */
4249 	if (d->bd_hbuf_read || d->bd_hbuf_write) {
4250 		panic("bpf buffer freed during read/write");
4251 	}
4252 
4253 	bpf_freebufs(d);
4254 
4255 	if (d->bd_filter) {
4256 		kfree_data_addr(d->bd_filter);
4257 	}
4258 }
4259 
4260 /*
4261  * Attach an interface to bpf.	driverp is a pointer to a (struct bpf_if *)
4262  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
4263  * size of the link header (variable length headers not yet supported).
4264  */
4265 void
bpfattach(struct ifnet * ifp,u_int dlt,u_int hdrlen)4266 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
4267 {
4268 	bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
4269 }
4270 
4271 errno_t
bpf_attach(ifnet_t ifp,u_int32_t dlt,u_int32_t hdrlen,bpf_send_func send,bpf_tap_func tap)4272 bpf_attach(
4273 	ifnet_t ifp,
4274 	u_int32_t dlt,
4275 	u_int32_t hdrlen,
4276 	bpf_send_func send,
4277 	bpf_tap_func tap)
4278 {
4279 	struct bpf_if *bp;
4280 	struct bpf_if *bp_new;
4281 	struct bpf_if *bp_before_first = NULL;
4282 	struct bpf_if *bp_first = NULL;
4283 	struct bpf_if *bp_last = NULL;
4284 	boolean_t found;
4285 
4286 	/*
4287 	 * Z_NOFAIL will cause a panic if the allocation fails
4288 	 */
4289 	bp_new = kalloc_type(struct bpf_if, Z_WAITOK | Z_NOFAIL | Z_ZERO);
4290 
4291 	lck_mtx_lock(bpf_mlock);
4292 
4293 	/*
4294 	 * Check if this interface/dlt is already attached. Remember the
4295 	 * first and last attachment for this interface, as well as the
4296 	 * element before the first attachment.
4297 	 */
4298 	found = FALSE;
4299 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
4300 		if (bp->bif_ifp != ifp) {
4301 			if (bp_first != NULL) {
4302 				/* no more elements for this interface */
4303 				break;
4304 			}
4305 			bp_before_first = bp;
4306 		} else {
4307 			if (bp->bif_dlt == dlt) {
4308 				found = TRUE;
4309 				break;
4310 			}
4311 			if (bp_first == NULL) {
4312 				bp_first = bp;
4313 			}
4314 			bp_last = bp;
4315 		}
4316 	}
4317 	if (found) {
4318 		lck_mtx_unlock(bpf_mlock);
4319 		os_log_error(OS_LOG_DEFAULT,
4320 		    "bpfattach - %s with dlt %d is already attached",
4321 		    if_name(ifp), dlt);
4322 		kfree_type(struct bpf_if, bp_new);
4323 		return EEXIST;
4324 	}
4325 
4326 	bp_new->bif_ifp = ifp;
4327 	bp_new->bif_dlt = dlt;
4328 	bp_new->bif_send = send;
4329 	bp_new->bif_tap = tap;
4330 
4331 	if (bp_first == NULL) {
4332 		/* No other entries for this ifp */
4333 		bp_new->bif_next = bpf_iflist;
4334 		bpf_iflist = bp_new;
4335 	} else {
4336 		if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
4337 			/* Make this the first entry for this interface */
4338 			if (bp_before_first != NULL) {
4339 				/*  point the previous to us */
4340 				bp_before_first->bif_next = bp_new;
4341 			} else {
4342 				/* we're the new head */
4343 				bpf_iflist = bp_new;
4344 			}
4345 			bp_new->bif_next = bp_first;
4346 		} else {
4347 			/* Add this after the last entry for this interface */
4348 			bp_new->bif_next = bp_last->bif_next;
4349 			bp_last->bif_next = bp_new;
4350 		}
4351 	}
4352 
4353 	/*
4354 	 * Compute the length of the bpf header.  This is not necessarily
4355 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
4356 	 * that the network layer header begins on a longword boundary (for
4357 	 * performance reasons and to alleviate alignment restrictions).
4358 	 */
4359 	bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
4360 	bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
4361 	    sizeof(struct bpf_hdr_ext)) - hdrlen;
4362 	bp_new->bif_comphdrlen = BPF_WORDALIGN(hdrlen +
4363 	    sizeof(struct bpf_comp_hdr)) - hdrlen;
4364 
4365 	/* Take a reference on the interface */
4366 	ifnet_reference(ifp);
4367 
4368 	lck_mtx_unlock(bpf_mlock);
4369 
4370 	return 0;
4371 }
4372 
4373 /*
4374  * Detach bpf from an interface.  This involves detaching each descriptor
4375  * associated with the interface, and leaving bd_bif NULL.  Notify each
4376  * descriptor as it's detached so that any sleepers wake up and get
4377  * ENXIO.
4378  */
4379 void
bpfdetach(struct ifnet * ifp)4380 bpfdetach(struct ifnet *ifp)
4381 {
4382 	struct bpf_if   *bp, *bp_prev, *bp_next;
4383 	struct bpf_d    *d;
4384 
4385 	if (bpf_debug != 0) {
4386 		os_log(OS_LOG_DEFAULT, "%s: %s", __func__, if_name(ifp));
4387 	}
4388 
4389 	lck_mtx_lock(bpf_mlock);
4390 
4391 	/*
4392 	 * Build the list of devices attached to that interface
4393 	 * that we need to free while keeping the lock to maintain
4394 	 * the integrity of the interface list
4395 	 */
4396 	bp_prev = NULL;
4397 	for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
4398 		bp_next = bp->bif_next;
4399 
4400 		if (ifp != bp->bif_ifp) {
4401 			bp_prev = bp;
4402 			continue;
4403 		}
4404 		/* Unlink from the interface list */
4405 		if (bp_prev) {
4406 			bp_prev->bif_next = bp->bif_next;
4407 		} else {
4408 			bpf_iflist = bp->bif_next;
4409 		}
4410 
4411 		/* Detach the devices attached to the interface */
4412 		while ((d = bp->bif_dlist) != NULL) {
4413 			/*
4414 			 * Take an extra reference to prevent the device
4415 			 * from being freed when bpf_detachd() releases
4416 			 * the reference for the interface list
4417 			 */
4418 			bpf_acquire_d(d);
4419 
4420 			/*
4421 			 * Wait for active read and writes to complete
4422 			 */
4423 			while (d->bd_hbuf_read || d->bd_hbuf_write) {
4424 				msleep((caddr_t)d, bpf_mlock, PRINET, "bpfdetach", NULL);
4425 			}
4426 
4427 			bpf_detachd(d);
4428 			bpf_wakeup(d);
4429 			bpf_release_d(d);
4430 		}
4431 		ifnet_release(ifp);
4432 	}
4433 
4434 	lck_mtx_unlock(bpf_mlock);
4435 }
4436 
4437 void
bpf_init(__unused void * unused)4438 bpf_init(__unused void *unused)
4439 {
4440 	int     maj;
4441 
4442 	/* bpf_comp_hdr is an overlay of bpf_hdr */
4443 	_CASSERT(BPF_WORDALIGN(sizeof(struct bpf_hdr)) ==
4444 	    BPF_WORDALIGN(sizeof(struct bpf_comp_hdr)));
4445 
4446 	/* compression length must fits in a byte */
4447 	_CASSERT(BPF_HDR_COMP_LEN_MAX <= UCHAR_MAX );
4448 
4449 	(void) PE_parse_boot_argn("bpf_hdr_comp", &bpf_hdr_comp_enable,
4450 	    sizeof(bpf_hdr_comp_enable));
4451 
4452 	if (bpf_devsw_installed == 0) {
4453 		bpf_devsw_installed = 1;
4454 		maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
4455 		if (maj == -1) {
4456 			bpf_devsw_installed = 0;
4457 			os_log_error(OS_LOG_DEFAULT,
4458 			    "bpf_init: failed to allocate a major number");
4459 			return;
4460 		}
4461 
4462 		for (int i = 0; i < NBPFILTER; i++) {
4463 			bpf_make_dev_t(maj);
4464 		}
4465 	}
4466 }
4467 
4468 static int
4469 sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
4470 {
4471 #pragma unused(arg1, arg2)
4472 	int i, err;
4473 
4474 	i = bpf_maxbufsize;
4475 
4476 	err = sysctl_handle_int(oidp, &i, 0, req);
4477 	if (err != 0 || req->newptr == USER_ADDR_NULL) {
4478 		return err;
4479 	}
4480 
4481 	if (i < 0 || i > BPF_BUFSIZE_CAP) {
4482 		i = BPF_BUFSIZE_CAP;
4483 	}
4484 
4485 	bpf_maxbufsize = i;
4486 	return err;
4487 }
4488 
4489 static int
4490 sysctl_bpf_bufsize_cap SYSCTL_HANDLER_ARGS
4491 {
4492 #pragma unused(arg1, arg2)
4493 	int i, err;
4494 
4495 	i = BPF_BUFSIZE_CAP;
4496 
4497 	err = sysctl_handle_int(oidp, &i, 0, req);
4498 	if (err != 0 || req->newptr == USER_ADDR_NULL) {
4499 		return err;
4500 	}
4501 
4502 	return err;
4503 }
4504 
4505 /*
4506  * Fill filter statistics
4507  */
4508 static void
bpfstats_fill_xbpf(struct xbpf_d * d,struct bpf_d * bd)4509 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
4510 {
4511 	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
4512 
4513 	d->bd_structsize = sizeof(struct xbpf_d);
4514 	d->bd_promisc = bd->bd_promisc != 0 ? 1 : 0;
4515 	d->bd_immediate = d->bd_immediate != 0 ? 1 : 0;
4516 	d->bd_hdrcmplt = bd->bd_hdrcmplt != 0 ? 1 : 0;
4517 	d->bd_async = bd->bd_async != 0 ? 1 : 0;
4518 	d->bd_headdrop = bd->bd_headdrop != 0 ? 1 : 0;
4519 	d->bd_direction = (uint8_t)bd->bd_direction;
4520 	d->bh_compreq = bd->bd_flags & BPF_COMP_REQ ? 1 : 0;
4521 	d->bh_compenabled = bd->bd_flags & BPF_COMP_ENABLED ? 1 : 0;
4522 	d->bd_exthdr = bd->bd_flags & BPF_EXTENDED_HDR ? 1 : 0;
4523 	d->bd_trunc = bd->bd_flags & BPF_TRUNCATE ? 1 : 0;
4524 	d->bd_pkthdrv2 = bd->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
4525 
4526 	d->bd_dev_minor = (uint8_t)bd->bd_dev_minor;
4527 
4528 	d->bd_sig = bd->bd_sig;
4529 
4530 	d->bd_rcount = bd->bd_rcount;
4531 	d->bd_dcount = bd->bd_dcount;
4532 	d->bd_fcount = bd->bd_fcount;
4533 	d->bd_wcount = bd->bd_wcount;
4534 	d->bd_wdcount = bd->bd_wdcount;
4535 	d->bd_slen = bd->bd_slen;
4536 	d->bd_hlen = bd->bd_hlen;
4537 	d->bd_bufsize = bd->bd_bufsize;
4538 	d->bd_pid = bd->bd_pid;
4539 	if (bd->bd_bif != NULL && bd->bd_bif->bif_ifp != NULL) {
4540 		strlcpy(d->bd_ifname,
4541 		    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
4542 	}
4543 
4544 	d->bd_comp_count = bd->bd_bcs.bcs_count_compressed_prefix;
4545 	d->bd_comp_size = bd->bd_bcs.bcs_total_compressed_prefix_size;
4546 
4547 	d->bd_scnt = bd->bd_scnt;
4548 	d->bd_hcnt = bd->bd_hcnt;
4549 
4550 	d->bd_read_count = bd->bd_bcs.bcs_total_read;
4551 	d->bd_fsize = bd->bd_bcs.bcs_total_size;
4552 }
4553 
4554 /*
4555  * Handle `netstat -B' stats request
4556  */
4557 static int
4558 sysctl_bpf_stats SYSCTL_HANDLER_ARGS
4559 {
4560 	int error;
4561 	struct xbpf_d *xbdbuf;
4562 	unsigned int x_cnt;
4563 	vm_size_t buf_size;
4564 
4565 	if (req->oldptr == USER_ADDR_NULL) {
4566 		return SYSCTL_OUT(req, 0, nbpfilter * sizeof(struct xbpf_d));
4567 	}
4568 	if (nbpfilter == 0) {
4569 		return SYSCTL_OUT(req, 0, 0);
4570 	}
4571 	buf_size = req->oldlen;
4572 	if (buf_size > BPF_MAX_DEVICES * sizeof(struct xbpf_d)) {
4573 		buf_size = BPF_MAX_DEVICES * sizeof(struct xbpf_d);
4574 	}
4575 	xbdbuf = kalloc_data(buf_size, Z_WAITOK | Z_ZERO);
4576 
4577 	lck_mtx_lock(bpf_mlock);
4578 	if (buf_size < (nbpfilter * sizeof(struct xbpf_d))) {
4579 		lck_mtx_unlock(bpf_mlock);
4580 		kfree_data(xbdbuf, buf_size);
4581 		return ENOMEM;
4582 	}
4583 	x_cnt = 0;
4584 	unsigned int i;
4585 
4586 	for (i = 0; i < nbpfilter; i++) {
4587 		struct bpf_d *bd = bpf_dtab[i];
4588 		struct xbpf_d *xbd;
4589 
4590 		if (bd == NULL || bd == BPF_DEV_RESERVED ||
4591 		    (bd->bd_flags & BPF_CLOSING) != 0) {
4592 			continue;
4593 		}
4594 		VERIFY(x_cnt < nbpfilter);
4595 
4596 		xbd = &xbdbuf[x_cnt++];
4597 		bpfstats_fill_xbpf(xbd, bd);
4598 	}
4599 	lck_mtx_unlock(bpf_mlock);
4600 
4601 	error = SYSCTL_OUT(req, xbdbuf, x_cnt * sizeof(struct xbpf_d));
4602 	kfree_data(xbdbuf, buf_size);
4603 	return error;
4604 }
4605