1 /*
2 * Copyright (c) 1999-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
29 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 /*
31 * Mach Operating System
32 * Copyright (c) 1987 Carnegie-Mellon University
33 * All rights reserved. The CMU software License Agreement specifies
34 * the terms and conditions for use and redistribution.
35 */
36 /*
37 * Copyright (c) 1994 NeXT Computer, Inc. All rights reserved.
38 *
39 * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
40 * All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 * must display the following acknowledgement:
52 * This product includes software developed by the University of
53 * California, Berkeley and its contributors.
54 * 4. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * @(#)mbuf.h 8.3 (Berkeley) 1/21/94
71 */
72 /*
73 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
74 * support for mandatory and extensible security protections. This notice
75 * is included in support of clause 2.2 (b) of the Apple Public License,
76 * Version 2.0.
77 */
78
79 #ifndef _SYS_MBUF_H_
80 #define _SYS_MBUF_H_
81
82 #include <sys/appleapiopts.h>
83 #include <sys/cdefs.h>
84 #include <sys/_types/_u_int32_t.h> /* u_int32_t */
85 #include <sys/_types/_u_int64_t.h> /* u_int64_t */
86 #include <sys/_types/_u_short.h> /* u_short */
87
88 #ifdef KERNEL
89 #include <sys/kpi_mbuf.h>
90 #endif
91
92 #ifdef XNU_KERNEL_PRIVATE
93 #include <sys/lock.h>
94 #include <sys/queue.h>
95 #include <machine/endian.h>
96 /*
97 * Mbufs are of a single size, which includes overhead.
98 * An mbuf may add a single "mbuf cluster" of size
99 * MCLBYTES/MBIGCLBYTES/M16KCLBYTES (also in machine/param.h), which has
100 * no additional overhead and is used instead of the internal data area;
101 * this is done when at least MINCLSIZE of data must be stored.
102 */
103 #if CONFIG_MBUF_MCACHE
104 #include <sys/mcache.h>
105 #define _MSIZESHIFT 8 /* 256 */
106 #define _MSIZE (1 << _MSIZESHIFT) /* size of an mbuf */
107 #else /* CONFIG_MBUF_MCACHE */
108 #define _MSIZE 512
109 #endif /* CONFIG_MBUF_MCACHE */
110
111 #define NCLPGSHIFT (PAGE_SHIFT - MCLSHIFT)
112 #define NCLPG (1 << NCLPGSHIFT) /* # of cl per page */
113
114 #define NBCLPGSHIFT (PAGE_SHIFT - MBIGCLSHIFT)
115 #define NBCLPG (1 << NBCLPGSHIFT) /* # of big cl per page */
116
117 #define NMBPCL (MCLBYTES / _MSIZE)
118
119 #define NCLPJCLSHIFT (M16KCLSHIFT - MCLSHIFT)
120 #define NCLPJCL (1 << NCLPJCLSHIFT) /* # of cl per jumbo cl */
121
122 #define NCLPBGSHIFT (MBIGCLSHIFT - MCLSHIFT)
123 #define NCLPBG (1 << NCLPBGSHIFT) /* # of cl per big cl */
124
125 /*
126 * Macros for type conversion
127 * mtod(m,t) - convert mbuf pointer to data pointer of correct type
128 * mtodo(m, o) -- Same as above but with offset 'o' into data.
129 */
130 #define mtod(m, t) ((t)(void *)m_mtod_current(m))
131 #define mtodo(m, o) ((void *)(mtod(m, uint8_t *) + (o)))
132
133 /* header at beginning of each mbuf: */
134 struct m_hdr {
135 struct mbuf *mh_next; /* next buffer in chain */
136 struct mbuf *mh_nextpkt; /* next chain in queue/record */
137 uintptr_t mh_data; /* location of data */
138 int32_t mh_len; /* amount of data in this mbuf */
139 u_int16_t mh_type; /* type of data in this mbuf */
140 u_int16_t mh_flags; /* flags; see below */
141 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
142 /* This is needed because of how _MLEN is defined and used. Ideally, _MLEN
143 * should be defined using the offsetof(struct mbuf, M_dat), since there is
144 * no guarantee that mbuf.M_dat will start where mbuf.m_hdr ends. The compiler
145 * may (and does in the armv7k case) insert padding between m_hdr and M_dat in
146 * mbuf. We cannot easily use offsetof, however, since _MLEN is referenced
147 * in the definition of mbuf.
148 */
149 } __attribute__((aligned(8)));
150 #else
151 };
152 #endif
153
154 /*
155 * Packet tag structure (see below for details).
156 */
157 struct m_tag {
158 uint64_t m_tag_cookie; /* Error checking */
159 SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */
160 void *__sized_by(m_tag_len) m_tag_data;
161 uint16_t m_tag_type; /* Module specific type */
162 uint16_t m_tag_len; /* Length of data */
163 uint32_t m_tag_id; /* Module ID */
164 void *m_tag_mb_cl; /* pointer to mbuf or cluster container */
165 #ifndef __LP64__
166 u_int32_t m_tag_pad;
167 #endif /* !__LP64__ */
168 };
169
170 #define M_TAG_ALIGN(len) \
171 (P2ROUNDUP(len, sizeof (u_int64_t)) + sizeof (struct m_tag))
172
173 #define M_TAG_INIT(tag, id, type, len, data, mb_cl) { \
174 VERIFY(IS_P2ALIGNED((tag), sizeof(u_int64_t))); \
175 (tag)->m_tag_type = (type); \
176 (tag)->m_tag_len = (uint16_t)(len); \
177 (tag)->m_tag_id = (id); \
178 (tag)->m_tag_data = (data); \
179 (tag)->m_tag_mb_cl = (mb_cl); \
180 m_tag_create_cookie(tag); \
181 }
182
183 #define M_TAG_VALID_PATTERN 0xfeedfacefeedfaceULL
184 #define M_TAG_FREE_PATTERN 0xdeadbeefdeadbeefULL
185
186 /*
187 * Packet tag header structure at the top of mbuf whe mbufs are use for m_tag
188 * Pointers are 32-bit in ILP32; m_tag needs 64-bit alignment, hence padded.
189 */
190 struct m_taghdr {
191 #ifndef __LP64__
192 u_int32_t pad; /* For structure alignment */
193 #endif /* !__LP64__ */
194 u_int64_t mth_refcnt; /* Number of tags in this mbuf */
195 };
196
197 /*
198 * Driver auxiliary metadata tag (KERNEL_TAG_TYPE_DRVAUX).
199 */
200 struct m_drvaux_tag {
201 u_int32_t da_family; /* IFNET_FAMILY values */
202 u_int32_t da_subfamily; /* IFNET_SUBFAMILY values */
203 u_int32_t da_reserved; /* for future */
204 u_int32_t da_length; /* length of following data */
205 };
206
207 /* Values for pftag_flags (16-bit wide) */
208 #define PF_TAG_GENERATED 0x1 /* pkt generated by PF */
209 #define PF_TAG_FRAGCACHE 0x2
210 #define PF_TAG_TRANSLATE_LOCALHOST 0x4
211 #if PF_ECN
212 #define PF_TAG_HDR_INET 0x8 /* hdr points to IPv4 */
213 #define PF_TAG_HDR_INET6 0x10 /* hdr points to IPv6 */
214 #endif /* PF_ECN */
215 #define PF_TAG_REASSEMBLED 0x20 /* pkt reassembled by PF */
216 #define PF_TAG_REFRAGMENTED 0x40 /* pkt refragmented by PF */
217 /*
218 * PF mbuf tag
219 */
220 struct pf_mtag {
221 u_int16_t pftag_flags; /* PF_TAG flags */
222 u_int16_t pftag_rtableid; /* alternate routing table id */
223 u_int16_t pftag_tag;
224 u_int16_t pftag_routed;
225 #if PF_ECN
226 void *pftag_hdr; /* saved hdr pos in mbuf, for ECN */
227 #endif /* PF_ECN */
228 };
229
230 /* System reserved PF tags */
231 #define PF_TAG_ID_SYSTEM_SERVICE 0xff00
232 #define PF_TAG_ID_STACK_DROP 0xff01
233
234 /*
235 * PF fragment tag
236 */
237 struct pf_fragment_tag {
238 uint32_t ft_id; /* fragment id */
239 uint16_t ft_hdrlen; /* header length of reassembled pkt */
240 uint16_t ft_unfragpartlen; /* length of the per-fragment headers */
241 uint16_t ft_extoff; /* last extension header offset or 0 */
242 uint16_t ft_maxlen; /* maximum fragment payload length */
243 };
244
245 /*
246 * TCP mbuf tag
247 */
248 struct tcp_pktinfo {
249 union {
250 struct {
251 uint16_t seg_size; /* segment size (actual MSS) */
252 uint16_t hdr_len; /* size of IP+TCP header, might be zero */
253 uint32_t start_seq; /* start seq of this packet */
254 pid_t pid;
255 pid_t e_pid;
256 } __tx;
257 struct {
258 uint8_t seg_cnt; /* # of coalesced TCP pkts */
259 } __rx;
260 } __offload;
261 #define tx_seg_size proto_mtag.__pr_u.tcp.tm_tcp.__offload.__tx.seg_size
262 #define tso_segsz tx_seg_size
263 #define tx_hdr_len proto_mtag.__pr_u.tcp.tm_tcp.__offload.__tx.hdr_len
264 #define tx_start_seq proto_mtag.__pr_u.tcp.tm_tcp.__offload.__tx.start_seq
265 #define tx_tcp_pid proto_mtag.__pr_u.tcp.tm_tcp.__offload.__tx.pid
266 #define tx_tcp_e_pid proto_mtag.__pr_u.tcp.tm_tcp.__offload.__tx.e_pid
267
268 #define rx_seg_cnt proto_mtag.__pr_u.tcp.tm_tcp.__offload.__rx.seg_cnt
269 };
270
271 /*
272 * MPTCP mbuf tag
273 */
274 struct mptcp_pktinfo {
275 uint64_t mtpi_dsn; /* MPTCP Data Sequence Number */
276 uint32_t mtpi_rel_seq; /* Relative Seq Number */
277 uint16_t mtpi_length; /* Length of mapping */
278 uint16_t mtpi_csum;
279 #define mp_dsn proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_dsn
280 #define mp_rseq proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_rel_seq
281 #define mp_rlen proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_length
282 #define mp_csum proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_csum
283 };
284
285 /*
286 * TCP specific mbuf tag. Note that the current implementation uses
287 * MPTCP metadata strictly between MPTCP and the TCP subflow layers,
288 * hence tm_tcp and tm_mptcp are mutually exclusive. This also means
289 * that TCP messages functionality is currently incompatible with MPTCP.
290 */
291 struct tcp_mtag {
292 union {
293 struct tcp_pktinfo tm_tcp; /* TCP and below */
294 struct mptcp_pktinfo tm_mptcp; /* MPTCP-TCP only */
295 };
296 };
297
298 struct udp_mtag {
299 pid_t _pid;
300 pid_t _e_pid;
301 #define tx_udp_pid proto_mtag.__pr_u.udp._pid
302 #define tx_udp_e_pid proto_mtag.__pr_u.udp._e_pid
303 };
304
305 struct rawip_mtag {
306 pid_t _pid;
307 pid_t _e_pid;
308 #define tx_rawip_pid proto_mtag.__pr_u.rawip._pid
309 #define tx_rawip_e_pid proto_mtag.__pr_u.rawip._e_pid
310 };
311
312 struct driver_mtag_ {
313 uintptr_t _drv_tx_compl_arg;
314 uintptr_t _drv_tx_compl_data;
315 kern_return_t _drv_tx_status;
316 uint16_t _drv_flowid;
317 #define drv_tx_compl_arg builtin_mtag._drv_mtag._drv_tx_compl_arg
318 #define drv_tx_compl_data builtin_mtag._drv_mtag._drv_tx_compl_data
319 #define drv_tx_status builtin_mtag._drv_mtag._drv_tx_status
320 #define drv_flowid builtin_mtag._drv_mtag._drv_flowid
321 };
322
323 /*
324 * Protocol specific mbuf tag (at most one protocol metadata per mbuf).
325 *
326 * Care must be taken to ensure that they are mutually exclusive, e.g.
327 * IPsec policy ID implies no TCP segment offload (which is fine given
328 * that the former is used on the virtual ipsec interface that does
329 * not advertise the TSO capability.)
330 */
331 struct proto_mtag_ {
332 union {
333 struct tcp_mtag tcp; /* TCP specific */
334 struct udp_mtag udp; /* UDP specific */
335 struct rawip_mtag rawip; /* raw IPv4/IPv6 specific */
336 } __pr_u;
337 };
338
339 /*
340 * NECP specific mbuf tag.
341 */
342 struct necp_mtag_ {
343 u_int32_t necp_policy_id;
344 u_int32_t necp_skip_policy_id;
345 u_int32_t necp_route_rule_id;
346 u_int16_t necp_last_interface_index;
347 u_int16_t necp_app_id;
348 };
349
350 union builtin_mtag {
351 struct {
352 struct proto_mtag_ _proto_mtag; /* built-in protocol-specific tag */
353 struct pf_mtag _pf_mtag; /* built-in PF tag */
354 struct necp_mtag_ _necp_mtag; /* built-in NECP tag */
355 } _net_mtag;
356 struct driver_mtag_ _drv_mtag;
357 #define necp_mtag builtin_mtag._net_mtag._necp_mtag
358 #define proto_mtag builtin_mtag._net_mtag._proto_mtag
359 #define driver_mtag builtin_mtag._drv_mtag
360 };
361
362 /*
363 * Record/packet header in first mbuf of chain; valid only if M_PKTHDR set.
364 */
365 struct pkthdr {
366 struct ifnet *rcvif; /* rcv interface */
367 /* variables for ip and tcp reassembly */
368 void *pkt_hdr; /* pointer to packet header */
369 int32_t len; /* total packet length */
370 /* variables for hardware checksum */
371 /* Note: csum_flags is used for hardware checksum and VLAN */
372 u_int32_t csum_flags; /* flags regarding checksum */
373 union {
374 struct {
375 u_int16_t val; /* checksum value */
376 u_int16_t start; /* checksum start offset */
377 } _csum_rx;
378 #define csum_rx_val _csum_rx.val
379 #define csum_rx_start _csum_rx.start
380 struct {
381 u_int16_t start; /* checksum start offset */
382 u_int16_t stuff; /* checksum stuff offset */
383 } _csum_tx;
384 #define csum_tx_start _csum_tx.start
385 #define csum_tx_stuff _csum_tx.stuff
386 /*
387 * Generic data field used by csum routines.
388 * It gets used differently in different contexts.
389 */
390 u_int32_t csum_data;
391 };
392 u_int16_t vlan_tag; /* VLAN tag, host byte order */
393 /*
394 * Packet classifier info
395 *
396 * PKTF_FLOW_ID set means valid flow ID. A non-zero flow ID value
397 * means the packet has been classified by one of the flow sources.
398 * It is also a prerequisite for flow control advisory, which is
399 * enabled by additionally setting PKTF_FLOW_ADV.
400 *
401 * The protocol value is a best-effort representation of the payload.
402 * It is opportunistically updated and used only for optimization.
403 * It is not a substitute for parsing the protocol header(s); use it
404 * only as a hint.
405 *
406 * If PKTF_IFAINFO is set, pkt_ifainfo contains one or both of the
407 * indices of interfaces which own the source and/or destination
408 * addresses of the packet. For the local/loopback case (PKTF_LOOP),
409 * both should be valid, and thus allows for the receiving end to
410 * quickly determine the actual interfaces used by the the addresses;
411 * they may not necessarily be the same or refer to the loopback
412 * interface. Otherwise, in the non-local/loopback case, the indices
413 * are opportunistically set, and because of that only one may be set
414 * (0 means the index has not been determined.) In addition, the
415 * interface address flags are also recorded. This allows us to avoid
416 * storing the corresponding {in,in6}_ifaddr in an mbuf tag. Ideally
417 * this would be a superset of {ia,ia6}_flags, but the namespaces are
418 * overlapping at present, so we'll need a new set of values in future
419 * to achieve this. For now, we will just rely on the address family
420 * related code paths examining this mbuf to interpret the flags.
421 */
422 u_int8_t pkt_proto; /* IPPROTO value */
423 u_int8_t pkt_flowsrc; /* FLOWSRC values */
424 u_int32_t pkt_flowid; /* flow ID */
425 u_int32_t pkt_flags; /* PKTF flags (see below) */
426 u_int32_t pkt_svc; /* MBUF_SVC value */
427
428 u_int32_t pkt_compl_context; /* Packet completion context */
429
430 union {
431 struct {
432 u_int16_t src; /* ifindex of src addr i/f */
433 u_int16_t src_flags; /* src PKT_IFAIFF flags */
434 u_int16_t dst; /* ifindex of dst addr i/f */
435 u_int16_t dst_flags; /* dst PKT_IFAIFF flags */
436 } _pkt_iaif;
437 #define src_ifindex _pkt_iaif.src
438 #define src_iff _pkt_iaif.src_flags
439 #define dst_ifindex _pkt_iaif.dst
440 #define dst_iff _pkt_iaif.dst_flags
441 u_int64_t pkt_ifainfo; /* data field used by ifainfo */
442 struct {
443 u_int32_t if_data; /* bytes in interface queue */
444 u_int32_t sndbuf_data; /* bytes in socket buffer */
445 } _pkt_bsr; /* Buffer status report used by cellular interface */
446 #define bufstatus_if _pkt_bsr.if_data
447 #define bufstatus_sndbuf _pkt_bsr.sndbuf_data
448 };
449 u_int64_t pkt_timestamp; /* TX: enqueue time, RX: receive timestamp */
450 u_int64_t pkt_deadline; /* In Mach time. */
451
452 /*
453 * Tags (external and built-in)
454 */
455 SLIST_HEAD(packet_tags, m_tag) tags; /* list of external tags */
456 union builtin_mtag builtin_mtag;
457
458 uint32_t comp_gencnt;
459 uint32_t pkt_crumbs:16,
460 pkt_compl_callbacks:8,
461 pkt_ext_flags:6,
462 pkt_unused:2; /* Currently unused - feel free to grab those 2 bits */
463 /*
464 * Module private scratch space (32-bit aligned), currently 16-bytes
465 * large. Anything stored here is not guaranteed to survive across
466 * modules. The AQM layer (outbound) uses all 16-bytes for both
467 * packet scheduling and flow advisory information.
468 */
469 struct {
470 union {
471 u_int8_t __mpriv8[16];
472 u_int16_t __mpriv16[8];
473 struct {
474 union {
475 u_int8_t __val8[4];
476 u_int16_t __val16[2];
477 u_int32_t __val32;
478 } __mpriv32_u;
479 } __mpriv32[4];
480 u_int64_t __mpriv64[2];
481 } __mpriv_u;
482 } pkt_mpriv __attribute__((aligned(4)));
483 /*
484 * While qset_id takes 64 bits here, as upper 32 bits of qset_id are reserved
485 * currently, there is a scope to limit to 32 bits if other use cases need
486 * pkt_mpriv
487 */
488 #define pkt_mpriv_qsetid pkt_mpriv.__mpriv_u.__mpriv64[0]
489 #define pkt_mpriv_srcid pkt_mpriv.__mpriv_u.__mpriv32[2].__mpriv32_u.__val32
490 #define pkt_mpriv_fidx pkt_mpriv.__mpriv_u.__mpriv32[3].__mpriv32_u.__val32
491 };
492
493 /*
494 * Flow data source type. A data source module is responsible for generating
495 * a unique flow ID and associating it to each data flow as pkt_flowid.
496 * This is required for flow control/advisory, as it allows the output queue
497 * to identify the data source object and inform that it can resume its
498 * transmission (in the event it was flow controlled.)
499 */
500 #define FLOWSRC_INPCB 1 /* flow ID generated by INPCB */
501 #define FLOWSRC_IFNET 2 /* flow ID generated by interface */
502 #define FLOWSRC_PF 3 /* flow ID generated by PF */
503 #define FLOWSRC_CHANNEL 4 /* flow ID generated by channel */
504
505 /*
506 * FLOWSRC_MPKL_INPUT is not a true flow data source and is used for
507 * multi-layer packet logging. We're usurping the pkt_flowsrc field because
508 * the mbuf packet header ran out of space and pkt_flowsrc is normally
509 * used on output so we assume we can safely overwrite the normal semantic of
510 * the field.
511 * This value is meant to be used on incoming packet from a lower level protocol
512 * to pass information to some upper level protocol. When FLOWSRC_MPKL_INPUT
513 * is set, the following fields are used:
514 * - pkt_proto: the IP protocol ID of the lower level protocol
515 * - pkt_flowid: the identifier of the packet at the lower protocol.
516 * For example ESP would set pkt_proto to IPPROTO_ESP and pkt_flowid to the SPI.
517 */
518
519 /*
520 * Packet flags. Unlike m_flags, all packet flags are copied along when
521 * copying m_pkthdr, i.e. no equivalent of M_COPYFLAGS here. These flags
522 * (and other classifier info) will be cleared during DLIL input.
523 *
524 * Some notes about M_LOOP and PKTF_LOOP:
525 *
526 * - M_LOOP flag is overloaded, and its use is discouraged. Historically,
527 * that flag was used by the KAME implementation for allowing certain
528 * certain exceptions to be made in the IP6_EXTHDR_CHECK() logic; this
529 * was originally meant to be set as the packet is looped back to the
530 * system, and in some circumstances temporarily set in ip6_output().
531 * Over time, this flag was used by the pre-output routines to indicate
532 * to the DLIL frameout and output routines, that the packet may be
533 * looped back to the system under the right conditions. In addition,
534 * this is an mbuf flag rather than an mbuf packet header flag.
535 *
536 * - PKTF_LOOP is an mbuf packet header flag, which is set if and only
537 * if the packet was looped back to the system. This flag should be
538 * used instead for newer code.
539 */
540 #define PKTF_FLOW_ID 0x1 /* pkt has valid flowid value */
541 #define PKTF_FLOW_ADV 0x2 /* pkt triggers local flow advisory */
542 #define PKTF_FLOW_LOCALSRC 0x4 /* pkt is locally originated */
543 #define PKTF_FLOW_RAWSOCK 0x8 /* pkt locally generated by raw sock */
544 #define PKTF_PRIO_PRIVILEGED 0x10 /* packet priority is privileged */
545 #define PKTF_PROXY_DST 0x20 /* processed but not locally destined */
546 #define PKTF_INET_RESOLVE 0x40 /* IPv4 resolver packet */
547 #define PKTF_INET6_RESOLVE 0x80 /* IPv6 resolver packet */
548 #define PKTF_RESOLVE_RTR 0x100 /* pkt is for resolving router */
549 #define PKTF_SKIP_PKTAP 0x200 /* pkt has already passed through pktap */
550 #define PKTF_WAKE_PKT 0x400 /* packet caused system to wake from sleep */
551 #define PKTF_MPTCP 0x800 /* TCP with MPTCP metadata */
552 #define PKTF_MPSO 0x1000 /* MPTCP socket meta data */
553 #define PKTF_LOOP 0x2000 /* loopbacked packet */
554 #define PKTF_IFAINFO 0x4000 /* pkt has valid interface addr info */
555 #define PKTF_SO_BACKGROUND 0x8000 /* data is from background source */
556 #define PKTF_FORWARDED 0x10000 /* pkt was forwarded from another i/f */
557 #define PKTF_PRIV_GUARDED 0x20000 /* pkt_mpriv area guard enabled */
558 #define PKTF_KEEPALIVE 0x40000 /* pkt is kernel-generated keepalive */
559 #define PKTF_SO_REALTIME 0x80000 /* data is realtime traffic */
560 #define PKTF_VALID_UNSENT_DATA 0x100000 /* unsent data is valid */
561 #define PKTF_TCP_REXMT 0x200000 /* packet is TCP retransmission */
562 #define PKTF_REASSEMBLED 0x400000 /* Packet was reassembled */
563 #define PKTF_TX_COMPL_TS_REQ 0x800000 /* tx completion timestamp requested */
564 #define PKTF_TS_VALID 0x1000000 /* pkt timestamp is valid */
565 #define PKTF_DRIVER_MTAG 0x2000000 /* driver mbuf tags fields inited */
566 #define PKTF_NEW_FLOW 0x4000000 /* Data from a new flow */
567 #define PKTF_START_SEQ 0x8000000 /* valid start sequence */
568 #define PKTF_LAST_PKT 0x10000000 /* last packet in the flow */
569 #define PKTF_MPTCP_REINJ 0x20000000 /* Packet has been reinjected for MPTCP */
570 #define PKTF_MPTCP_DFIN 0x40000000 /* Packet is a data-fin */
571 #define PKTF_HBH_CHKED 0x80000000 /* HBH option is checked */
572
573 #define PKTF_EXT_OUTPUT_SCOPE 0x1 /* outgoing packet has ipv6 address scope id */
574 #define PKTF_EXT_L4S 0x2 /* pkts is from a L4S connection */
575 #define PKTF_EXT_QUIC 0x4 /* flag to denote a QUIC packet */
576 #define PKTF_EXT_QSET_ID_VALID 0x8 /* flag to denote if traffic rules are run */
577 #define PKTF_EXT_ULPN 0x10 /* packet transitted coprocessor */
578 #define PKTF_EXT_LPW 0x20 /* packet received in low power wake */
579
580 #define PKT_CRUMB_TS_COMP_REQ 0x0001 /* timestamp completion requested */
581 #define PKT_CRUMB_TS_COMP_CB 0x0002 /* timestamp callback called */
582 #define PKT_CRUMB_DLIL_OUTPUT 0x0004 /* dlil_output called */
583 #define PKT_CRUMB_FLOW_TX 0x0008 /* dp_flow_tx_process called */
584 #define PKT_CRUMB_FQ_ENQUEUE 0x0010 /* fq_enqueue called */
585 #define PKT_CRUMB_FQ_DEQUEUE 0x0020 /* fq_dequeue called */
586 #define PKT_CRUMB_SK_PKT_COPY 0x0040 /* copy from mbuf to skywalk packet */
587 #define PKT_CRUMB_TCP_OUTPUT 0x0080
588 #define PKT_CRUMB_UDP_OUTPUT 0x0100
589 #define PKT_CRUMB_SOSEND 0x0200
590 #define PKT_CRUMB_DLIL_INPUT 0x0400
591 #define PKT_CRUMB_IP_INPUT 0x0800
592 #define PKT_CRUMB_TCP_INPUT 0x1000
593 #define PKT_CRUMB_UDP_INPUT 0x2000
594
595 /* m_hdr_common crumbs flags */
596 #define CRUMB_INPUT_FLAG 0x0000000000010000
597 #define CRUMB_INTERFACE_FLAG 0x000000000001ffff
598
599 /* flags related to flow control/advisory and identification */
600 #define PKTF_FLOW_MASK \
601 (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)
602
603 /*
604 * Description of external storage mapped into mbuf, valid only if M_EXT set.
605 */
606 typedef void (*__single m_ext_free_func_t)(caddr_t, u_int, caddr_t);
607 struct m_ext {
608 caddr_t __counted_by(ext_size) ext_buf; /* start of buffer */
609 m_ext_free_func_t ext_free; /* free routine (plain-text), if not the usual */
610 u_int ext_size; /* size of the external buffer */
611 caddr_t ext_arg; /* additional ext_free argument (plain-text) */
612 struct ext_ref {
613 struct mbuf *paired;
614 u_int16_t minref;
615 u_int16_t refcnt;
616 u_int16_t prefcnt;
617 u_int16_t flags;
618 u_int32_t priv;
619 } *ext_refflags;
620 };
621
622 /* define m_ext to a type since it gets redefined below */
623 typedef struct m_ext _m_ext_t;
624
625 #if CONFIG_MBUF_MCACHE
626 /*
627 * The following _MLEN and _MHLEN macros are private to xnu. Private code
628 * that are outside of xnu must use the mbuf_get_{mlen,mhlen} routines since
629 * the sizes of the structures are dependent upon specific xnu configs.
630 */
631 #define _MLEN (_MSIZE - sizeof(struct m_hdr)) /* normal data len */
632 #define _MHLEN (_MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */
633
634 #define NMBPGSHIFT (PAGE_SHIFT - _MSIZESHIFT)
635 #define NMBPG (1 << NMBPGSHIFT) /* # of mbufs per page */
636
637 #define NMBPCLSHIFT (MCLSHIFT - _MSIZESHIFT)
638
639 /*
640 * The mbuf object
641 */
642 struct mbuf {
643 struct m_hdr m_hdr;
644 union {
645 struct {
646 struct pkthdr MH_pkthdr; /* M_PKTHDR set */
647 union {
648 struct m_ext MH_ext; /* M_EXT set */
649 char MH_databuf[_MHLEN];
650 } MH_dat;
651 } MH;
652 char M_databuf[_MLEN]; /* !M_PKTHDR, !M_EXT */
653 } M_dat;
654 };
655
656 #define m_next m_hdr.mh_next
657 #define m_len m_hdr.mh_len
658 #define m_data m_hdr.mh_data
659 #define m_type m_hdr.mh_type
660 #define m_flags m_hdr.mh_flags
661 #define m_nextpkt m_hdr.mh_nextpkt
662 #define m_act m_nextpkt
663
664 #define m_ext M_dat.MH.MH_dat.MH_ext
665 #define m_pkthdr M_dat.MH.MH_pkthdr
666 #define m_pktdat M_dat.MH.MH_dat.MH_databuf
667
668 #else /* !CONFIG_MBUF_MCACHE */
669 /*
670 * The following _MLEN and _MHLEN macros are private to xnu. Private code
671 * that are outside of xnu must use the mbuf_get_{mlen,mhlen} routines since
672 * the sizes of the structures are dependent upon specific xnu configs.
673 */
674 #define _MLEN (_MSIZE - sizeof(struct m_hdr_common)) /* normal data len */
675 #define _MHLEN (_MLEN) /* data len w/pkthdr */
676
677 struct m_hdr_common {
678 struct m_hdr M_hdr;
679 struct m_ext M_ext __attribute__((aligned(16))); /* M_EXT set */
680 #if defined(__arm64__)
681 uint64_t m_hdr_crumbs;
682 #endif
683 struct pkthdr M_pkthdr __attribute__((aligned(16))); /* M_PKTHDR set */
684 };
685
686 _Static_assert(sizeof(struct m_hdr_common) == 224, "Crumbs effecting size of struct");
687 #if defined(__arm64__)
688 _Static_assert(sizeof(struct m_hdr_common) == 224, "Crumbs effecting size of struct");
689 #endif
690
691 /*
692 * The mbuf object
693 */
694 struct mbuf {
695 struct m_hdr_common M_hdr_common;
696 union {
697 char MH_databuf[_MHLEN];
698 char M_databuf[_MLEN]; /* !M_PKTHDR, !M_EXT */
699 } M_dat __attribute__((aligned(16)));
700 };
701
702 #define m_next M_hdr_common.M_hdr.mh_next
703 #define m_len M_hdr_common.M_hdr.mh_len
704 #define m_data M_hdr_common.M_hdr.mh_data
705 #define m_type M_hdr_common.M_hdr.mh_type
706 #define m_flags M_hdr_common.M_hdr.mh_flags
707 #define m_nextpkt M_hdr_common.M_hdr.mh_nextpkt
708
709 #define m_ext M_hdr_common.M_ext
710 #define m_pkthdr M_hdr_common.M_pkthdr
711 #define m_pktdat M_dat.MH_databuf
712 #if defined(__arm64__)
713 #define m_mhdrcommon_crumbs M_hdr_common.m_hdr_crumbs
714 #endif /* __arm64__ */
715 #endif /* CONFIG_MBUF_MCACHE */
716
717 #define m_act m_nextpkt
718 #define m_dat M_dat.M_databuf
719 #define m_pktlen(_m) ((_m)->m_pkthdr.len)
720 #define m_pftag(_m) (&(_m)->m_pkthdr.builtin_mtag._net_mtag._pf_mtag)
721 #define m_necptag(_m) (&(_m)->m_pkthdr.builtin_mtag._net_mtag._necp_mtag)
722
723 /* mbuf flags (private) */
724 #define M_EXT 0x0001 /* has associated external storage */
725 #define M_PKTHDR 0x0002 /* start of record */
726 #define M_EOR 0x0004 /* end of record */
727 #define M_PROTO1 0x0008 /* protocol-specific */
728 #define M_PROTO2 0x0010 /* protocol-specific */
729 #define M_PROTO3 0x0020 /* protocol-specific */
730 #define M_LOOP 0x0040 /* packet is looped back (also see PKTF_LOOP) */
731 #define M_PROTO5 0x0080 /* protocol-specific */
732
733 /* mbuf pkthdr flags, also in m_flags (private) */
734 #define M_BCAST 0x0100 /* send/received as link-level broadcast */
735 #define M_MCAST 0x0200 /* send/received as link-level multicast */
736 #define M_FRAG 0x0400 /* packet is a fragment of a larger packet */
737 #define M_FIRSTFRAG 0x0800 /* packet is first fragment */
738 #define M_LASTFRAG 0x1000 /* packet is last fragment */
739 #define M_PROMISC 0x2000 /* packet is promiscuous (shouldn't go to stack) */
740 #define M_HASFCS 0x4000 /* packet has FCS */
741 #define M_TAGHDR 0x8000 /* m_tag hdr structure at top of mbuf data */
742
743 /*
744 * Flags to purge when crossing layers.
745 */
746 #define M_PROTOFLAGS \
747 (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO5)
748
749 /* flags copied when copying m_pkthdr */
750 #define M_COPYFLAGS \
751 (M_PKTHDR|M_EOR|M_PROTO1|M_PROTO2|M_PROTO3 | \
752 M_LOOP|M_PROTO5|M_BCAST|M_MCAST|M_FRAG | \
753 M_FIRSTFRAG|M_LASTFRAG|M_PROMISC|M_HASFCS)
754
755 /* flags indicating hw checksum support and sw checksum requirements */
756 #define CSUM_IP 0x0001 /* will csum IP */
757 #define CSUM_TCP 0x0002 /* will csum TCP */
758 #define CSUM_UDP 0x0004 /* will csum UDP */
759 #define CSUM_IP_FRAGS 0x0008 /* will csum IP fragments */
760 #define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */
761 #define CSUM_TCPIPV6 0x0020 /* will csum TCP for IPv6 */
762 #define CSUM_UDPIPV6 0x0040 /* will csum UDP for IPv6 */
763 #define CSUM_FRAGMENT_IPV6 0x0080 /* will do IPv6 fragmentation */
764
765 #define CSUM_IP_CHECKED 0x0100 /* did csum IP */
766 #define CSUM_IP_VALID 0x0200 /* ... the csum is valid */
767 #define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */
768 #define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */
769 #define CSUM_PARTIAL 0x1000 /* simple Sum16 computation */
770 #define CSUM_ZERO_INVERT 0x2000 /* invert 0 to -0 (0xffff) */
771
772 #define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP)
773 #define CSUM_DELAY_IP (CSUM_IP) /* IPv4 only: no IPv6 IP cksum */
774 #define CSUM_DELAY_IPV6_DATA (CSUM_TCPIPV6 | CSUM_UDPIPV6)
775 #define CSUM_DATA_IPV6_VALID CSUM_DATA_VALID /* csum_data field is valid */
776
777 #define CSUM_TX_FLAGS \
778 (CSUM_DELAY_IP | CSUM_DELAY_DATA | CSUM_DELAY_IPV6_DATA | \
779 CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_ZERO_INVERT)
780
781 #define CSUM_RX_FULL_FLAGS \
782 (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_PSEUDO_HDR | \
783 CSUM_DATA_VALID)
784
785 #define CSUM_RX_FLAGS \
786 (CSUM_RX_FULL_FLAGS | CSUM_PARTIAL)
787
788
789
790 /*
791 * Note: see also IF_HWASSIST_CSUM defined in <net/if_var.h>
792 */
793
794 /* VLAN tag present */
795 #define CSUM_VLAN_TAG_VALID 0x00010000 /* vlan_tag field is valid */
796
797 /* checksum start adjustment has been done */
798 #define CSUM_ADJUST_DONE 0x00020000
799
800 /* VLAN encapsulation present */
801 #define CSUM_VLAN_ENCAP_PRESENT 0x00040000 /* mbuf has vlan encapsulation */
802
803 /* TCP Segment Offloading requested on this mbuf */
804 #define CSUM_TSO_IPV4 0x00100000 /* This mbuf needs to be segmented by the NIC */
805 #define CSUM_TSO_IPV6 0x00200000 /* This mbuf needs to be segmented by the NIC */
806
807 #define TSO_IPV4_OK(_ifp, _m) \
808 (((_ifp)->if_hwassist & IFNET_TSO_IPV4) && \
809 ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) \
810
811 #define TSO_IPV4_NOTOK(_ifp, _m) \
812 (!((_ifp)->if_hwassist & IFNET_TSO_IPV4) && \
813 ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) \
814
815 #define TSO_IPV6_OK(_ifp, _m) \
816 (((_ifp)->if_hwassist & IFNET_TSO_IPV6) && \
817 ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) \
818
819 #define TSO_IPV6_NOTOK(_ifp, _m) \
820 (!((_ifp)->if_hwassist & IFNET_TSO_IPV6) && \
821 ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) \
822
823 #endif /* XNU_KERNEL_PRIVATE */
824
825 /* mbuf types */
826 #define MT_FREE 0 /* should be on free list */
827 #define MT_DATA 1 /* dynamic (data) allocation */
828 #define MT_HEADER 2 /* packet header */
829 #define MT_SOCKET 3 /* socket structure */
830 #define MT_PCB 4 /* protocol control block */
831 #define MT_RTABLE 5 /* routing tables */
832 #define MT_HTABLE 6 /* IMP host tables */
833 #define MT_ATABLE 7 /* address resolution tables */
834 #define MT_SONAME 8 /* socket name */
835 #define MT_SOOPTS 10 /* socket options */
836 #define MT_FTABLE 11 /* fragment reassembly header */
837 #define MT_RIGHTS 12 /* access rights */
838 #define MT_IFADDR 13 /* interface address */
839 #define MT_CONTROL 14 /* extra-data protocol message */
840 #define MT_OOBDATA 15 /* expedited data */
841 #define MT_TAG 16 /* volatile metadata associated to pkts */
842 #define MT_MAX 32 /* enough? */
843
844 enum {
845 MTF_FREE = (1 << MT_FREE),
846 MTF_DATA = (1 << MT_DATA),
847 MTF_HEADER = (1 << MT_HEADER),
848 MTF_SOCKET = (1 << MT_SOCKET),
849 MTF_PCB = (1 << MT_PCB),
850 MTF_RTABLE = (1 << MT_RTABLE),
851 MTF_HTABLE = (1 << MT_HTABLE),
852 MTF_ATABLE = (1 << MT_ATABLE),
853 MTF_SONAME = (1 << MT_SONAME),
854 MTF_SOOPTS = (1 << MT_SOOPTS),
855 MTF_FTABLE = (1 << MT_FTABLE),
856 MTF_RIGHTS = (1 << MT_RIGHTS),
857 MTF_IFADDR = (1 << MT_IFADDR),
858 MTF_CONTROL = (1 << MT_CONTROL),
859 MTF_OOBDATA = (1 << MT_OOBDATA),
860 MTF_TAG = (1 << MT_TAG),
861 };
862
863 #ifdef XNU_KERNEL_PRIVATE
864 /*
865 * mbuf allocation/deallocation macros:
866 *
867 * MGET(struct mbuf *m, int how, int type)
868 * allocates an mbuf and initializes it to contain internal data.
869 *
870 * MGETHDR(struct mbuf *m, int how, int type)
871 * allocates an mbuf and initializes it to contain a packet header
872 * and internal data.
873 */
874
875 #define MGET(m, how, type) ((m) = m_get((how), (type)))
876
877 #define MGETHDR(m, how, type) ((m) = m_gethdr((how), (type)))
878
879 /*
880 * Mbuf cluster macros.
881 * MCLALLOC(caddr_t p, int how) allocates an mbuf cluster.
882 * MCLGET adds such clusters to a normal mbuf;
883 * the flag M_EXT is set upon success.
884 * MCLFREE releases a reference to a cluster allocated by MCLALLOC,
885 * freeing the cluster if the reference count has reached 0.
886 *
887 * Normal mbuf clusters are normally treated as character arrays
888 * after allocation, but use the first word of the buffer as a free list
889 * pointer while on the free list.
890 */
891 union mcluster {
892 union mcluster *mcl_next;
893 char mcl_buf[MCLBYTES];
894 };
895
896 #define MCLGET(m, how) ((m) = m_mclget(m, how))
897
898 /*
899 * Mbuf big cluster
900 */
901 union mbigcluster {
902 union mbigcluster *mbc_next;
903 char mbc_buf[MBIGCLBYTES];
904 };
905
906 /*
907 * Mbuf jumbo cluster
908 */
909 union m16kcluster {
910 union m16kcluster *m16kcl_next;
911 char m16kcl_buf[M16KCLBYTES];
912 };
913
914 #define MCLHASREFERENCE(m) m_mclhasreference(m)
915
916 /*
917 * MFREE(struct mbuf *m, struct mbuf *n)
918 * Free a single mbuf and associated external storage.
919 * Place the successor, if any, in n.
920 */
921
922 #define MFREE(m, n) ((n) = m_free(m))
923
924 /*
925 * Copy mbuf pkthdr from from to to.
926 * from must have M_PKTHDR set, and to must be empty.
927 * aux pointer will be moved to `to'.
928 */
929 #define M_COPY_PKTHDR(to, from) m_copy_pkthdr(to, from)
930
931 #define M_COPY_PFTAG(to, from) m_copy_pftag(to, from)
932
933 #define M_COPY_NECPTAG(to, from) m_copy_necptag(to, from)
934
935 #define M_COPY_CLASSIFIER(to, from) m_copy_classifier(to, from)
936
937 /*
938 * Evaluate TRUE if it's safe to write to the mbuf m's data region (this can
939 * be both the local data payload, or an external buffer area, depending on
940 * whether M_EXT is set).
941 */
942 #define M_WRITABLE(m) (((m)->m_flags & M_EXT) == 0 || !MCLHASREFERENCE(m))
943
944 /*
945 * These macros are mapped to the appropriate KPIs, so that private code
946 * can be simply recompiled in order to be forward-compatible with future
947 * changes toward the struture sizes.
948 */
949 #ifdef XNU_KERNEL_PRIVATE
950 #define MLEN _MLEN
951 #define MHLEN _MHLEN
952 #define MINCLSIZE (MLEN + MHLEN)
953 #else
954 #define MLEN mbuf_get_mlen() /* normal mbuf data len */
955 #define MHLEN mbuf_get_mhlen() /* data len in an mbuf w/pkthdr */
956 #define MINCLSIZE mbuf_get_minclsize() /* cluster usage threshold */
957 #endif
958 /*
959 * Return the address of the start of the buffer associated with an mbuf,
960 * handling external storage, packet-header mbufs, and regular data mbufs.
961 */
962 #define M_START(m) \
963 (((m)->m_flags & M_EXT) ? (caddr_t)(m)->m_ext.ext_buf : \
964 ((m)->m_flags & M_PKTHDR) ? &(m)->m_pktdat[0] : \
965 &(m)->m_dat[0])
966
967 /*
968 * Return the size of the buffer associated with an mbuf, handling external
969 * storage, packet-header mbufs, and regular data mbufs.
970 */
971 #define M_SIZE(m) \
972 (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size : \
973 ((m)->m_flags & M_PKTHDR) ? MHLEN : \
974 MLEN)
975
976 #define M_ALIGN(m, len) m_align(m, len)
977 #define MH_ALIGN(m, len) m_align(m, len)
978 #define MEXT_ALIGN(m, len) m_align(m, len)
979
980 /*
981 * Compute the amount of space available before the current start of data in
982 * an mbuf.
983 *
984 * The M_WRITABLE() is a temporary, conservative safety measure: the burden
985 * of checking writability of the mbuf data area rests solely with the caller.
986 */
987 #define M_LEADINGSPACE(m) \
988 (M_WRITABLE(m) ? ((m)->m_data - (uintptr_t)M_START(m)) : 0)
989
990 /*
991 * Compute the amount of space available after the end of data in an mbuf.
992 *
993 * The M_WRITABLE() is a temporary, conservative safety measure: the burden
994 * of checking writability of the mbuf data area rests solely with the caller.
995 */
996 #define M_TRAILINGSPACE(m) \
997 (M_WRITABLE(m) ? \
998 ((M_START(m) + M_SIZE(m)) - (mtod(m, caddr_t) + (m)->m_len)) : 0)
999
1000 /*
1001 * Arrange to prepend space of size plen to mbuf m.
1002 * If a new mbuf must be allocated, how specifies whether to wait.
1003 * If how is M_DONTWAIT and allocation fails, the original mbuf chain
1004 * is freed and m is set to NULL.
1005 */
1006 #define M_PREPEND(m, plen, how, align) \
1007 ((m) = m_prepend_2((m), (plen), (how), (align)))
1008
1009 /* change mbuf to new type */
1010 #define MCHTYPE(m, t) m_mchtype(m, t)
1011
1012 /* compatiblity with 4.3 */
1013 #define m_copy(m, o, l) m_copym((m), (o), (l), M_DONTWAIT)
1014
1015 #define MBSHIFT 20 /* 1MB */
1016 #define MBSIZE (1 << MBSHIFT)
1017 #define GBSHIFT 30 /* 1GB */
1018 #define GBSIZE (1 << GBSHIFT)
1019
1020 /*
1021 * M_STRUCT_GET ensures that intermediate protocol header (from "off" to
1022 * "off+len") is located in single mbuf, on contiguous memory region.
1023 * The pointer to the region will be returned to pointer variable "val",
1024 * with type "typ".
1025 *
1026 * M_STRUCT_GET0 does the same, except that it aligns the structure at
1027 * very top of mbuf. GET0 is likely to make memory copy than GET.
1028 */
1029 #define M_STRUCT_GET(val, typ, m, off, len) \
1030 do { \
1031 struct mbuf *t; \
1032 int tmp; \
1033 \
1034 if ((m)->m_len >= (off) + (len)) { \
1035 (val) = (typ)(mtod((m), caddr_t) + (off)); \
1036 } else { \
1037 t = m_pulldown((m), (off), (len), &tmp); \
1038 if (t != NULL) { \
1039 if (t->m_len < tmp + (len)) \
1040 panic("m_pulldown malfunction"); \
1041 (val) = (typ)(mtod(t, caddr_t) + tmp); \
1042 } else { \
1043 (val) = (typ)NULL; \
1044 (m) = NULL; \
1045 } \
1046 } \
1047 } while (0)
1048
1049 #define M_STRUCT_GET0(val, typ, m, off, len) \
1050 do { \
1051 struct mbuf *t; \
1052 \
1053 if ((off) == 0 && ((m)->m_len >= (len))) { \
1054 (val) = (typ)(void *)mtod(m, caddr_t); \
1055 } else { \
1056 t = m_pulldown((m), (off), (len), NULL); \
1057 if (t != NULL) { \
1058 if (t->m_len < (len)) \
1059 panic("m_pulldown malfunction"); \
1060 (val) = (typ)(void *)mtod(t, caddr_t); \
1061 } else { \
1062 (val) = (typ)NULL; \
1063 (m) = NULL; \
1064 } \
1065 } \
1066 } while (0)
1067
1068 #define MBUF_INPUT_CHECK(m, rcvif) \
1069 do { \
1070 if (!(m->m_flags & MBUF_PKTHDR) || \
1071 m->m_len < 0 || \
1072 m->m_len > njclbytes || \
1073 m->m_type == MT_FREE || \
1074 ((m->m_flags & M_EXT) != 0 && m->m_ext.ext_buf == NULL)) { \
1075 panic_plain("Failed mbuf validity check: mbuf %p len %d " \
1076 "type %d flags 0x%x data %p rcvif %s ifflags 0x%x", \
1077 m, m->m_len, m->m_type, m->m_flags, \
1078 ((m->m_flags & M_EXT) \
1079 ? m->m_ext.ext_buf \
1080 : (caddr_t __unsafe_indexable)m->m_data), \
1081 if_name(rcvif), \
1082 (rcvif->if_flags & 0xffff)); \
1083 } \
1084 } while (0)
1085
1086 /*
1087 * Simple mbuf queueing system
1088 *
1089 * This is basically a SIMPLEQ adapted to mbuf use (i.e. using
1090 * m_nextpkt instead of field.sqe_next).
1091 *
1092 * m_next is ignored, so queueing chains of mbufs is possible
1093 */
1094 #define MBUFQ_HEAD(name) \
1095 struct name { \
1096 struct mbuf *mq_first; /* first packet */ \
1097 struct mbuf **mq_last; /* addr of last next packet */ \
1098 }
1099
1100 #define MBUFQ_INIT(q) do { \
1101 MBUFQ_FIRST(q) = NULL; \
1102 (q)->mq_last = &MBUFQ_FIRST(q); \
1103 } while (0)
1104
1105 #define MBUFQ_PREPEND(q, m) do { \
1106 if ((MBUFQ_NEXT(m) = MBUFQ_FIRST(q)) == NULL) \
1107 (q)->mq_last = &MBUFQ_NEXT(m); \
1108 MBUFQ_FIRST(q) = (m); \
1109 } while (0)
1110
1111 #define MBUFQ_ENQUEUE(q, m) do { \
1112 MBUFQ_NEXT(m) = NULL; \
1113 *(q)->mq_last = (m); \
1114 (q)->mq_last = &MBUFQ_NEXT(m); \
1115 } while (0)
1116
1117 #define MBUFQ_ENQUEUE_MULTI(q, m, n) do { \
1118 MBUFQ_NEXT(n) = NULL; \
1119 *(q)->mq_last = (m); \
1120 (q)->mq_last = &MBUFQ_NEXT(n); \
1121 } while (0)
1122
1123 #define MBUFQ_DEQUEUE(q, m) do { \
1124 if (((m) = MBUFQ_FIRST(q)) != NULL) { \
1125 if ((MBUFQ_FIRST(q) = MBUFQ_NEXT(m)) == NULL) \
1126 (q)->mq_last = &MBUFQ_FIRST(q); \
1127 else \
1128 MBUFQ_NEXT(m) = NULL; \
1129 } \
1130 } while (0)
1131
1132 #define MBUFQ_REMOVE(q, m) do { \
1133 if (MBUFQ_FIRST(q) == (m)) { \
1134 MBUFQ_DEQUEUE(q, m); \
1135 } else { \
1136 struct mbuf *_m = MBUFQ_FIRST(q); \
1137 while (MBUFQ_NEXT(_m) != (m)) \
1138 _m = MBUFQ_NEXT(_m); \
1139 if ((MBUFQ_NEXT(_m) = \
1140 MBUFQ_NEXT(MBUFQ_NEXT(_m))) == NULL) \
1141 (q)->mq_last = &MBUFQ_NEXT(_m); \
1142 } \
1143 } while (0)
1144
1145 #define MBUFQ_DRAIN(q) do { \
1146 struct mbuf *__m0; \
1147 while ((__m0 = MBUFQ_FIRST(q)) != NULL) { \
1148 MBUFQ_FIRST(q) = MBUFQ_NEXT(__m0); \
1149 MBUFQ_NEXT(__m0) = NULL; \
1150 m_freem(__m0); \
1151 } \
1152 (q)->mq_last = &MBUFQ_FIRST(q); \
1153 } while (0)
1154
1155 #define MBUFQ_DROP_AND_DRAIN(q, d, r) do { \
1156 struct mbuf *__m0; \
1157 while ((__m0 = MBUFQ_FIRST(q)) != NULL) { \
1158 MBUFQ_FIRST(q) = MBUFQ_NEXT(__m0); \
1159 MBUFQ_NEXT(__m0) = NULL; \
1160 m_drop(__m0, (d) | DROPTAP_FLAG_L2_MISSING, (r), NULL, 0); \
1161 } \
1162 (q)->mq_last = &MBUFQ_FIRST(q); \
1163 } while (0)
1164
1165 #define MBUFQ_FOREACH(m, q) \
1166 for ((m) = MBUFQ_FIRST(q); \
1167 (m); \
1168 (m) = MBUFQ_NEXT(m))
1169
1170 #define MBUFQ_FOREACH_SAFE(m, q, tvar) \
1171 for ((m) = MBUFQ_FIRST(q); \
1172 (m) && ((tvar) = MBUFQ_NEXT(m), 1); \
1173 (m) = (tvar))
1174
1175 #define MBUFQ_EMPTY(q) ((q)->mq_first == NULL)
1176 #define MBUFQ_FIRST(q) ((q)->mq_first)
1177 #define MBUFQ_NEXT(m) ((m)->m_nextpkt)
1178 /*
1179 * mq_last is initialized to point to mq_first, so check if they're
1180 * equal and return NULL when the list is empty. Otherwise, we need
1181 * to subtract the offset of MBUQ_NEXT (i.e. m_nextpkt field) to get
1182 * to the base mbuf address to return to caller.
1183 */
1184 #define MBUFQ_LAST(head) \
1185 (((head)->mq_last == &MBUFQ_FIRST(head)) ? NULL : \
1186 __container_of((head)->mq_last, struct mbuf, m_nextpkt))
1187
1188 #if (DEBUG || DEVELOPMENT)
1189 #define MBUFQ_ADD_CRUMB_MULTI(_q, _h, _t, _f) do { \
1190 struct mbuf * _saved = (_t)->m_nextpkt; \
1191 struct mbuf * _m; \
1192 for (_m = (_h); _m != NULL; _m = MBUFQ_NEXT(_m)) { \
1193 m_add_crumb((_m), (_f)); \
1194 } \
1195 (_t)->m_nextpkt = _saved; \
1196 } while (0)
1197
1198 #define MBUFQ_ADD_CRUMB(_q, _m, _f) do { \
1199 m_add_crumb((_m), (_f)); \
1200 } while (0)
1201 #else
1202 #define MBUFQ_ADD_CRUMB_MULTI(_q, _h, _t, _f)
1203 #define MBUFQ_ADD_CRUMB(_q, _m, _f)
1204 #endif /* (DEBUG || DEVELOPMENT) */
1205
1206 struct mbufq {
1207 MBUFQ_HEAD(counted_mbufq) mq;
1208 uint32_t count;
1209 uint32_t bytes;
1210 };
1211
1212 inline void
mbufq_init(struct mbufq * q)1213 mbufq_init(struct mbufq *q)
1214 {
1215 MBUFQ_INIT(&q->mq);
1216 q->bytes = q->count = 0;
1217 }
1218
1219 inline void
mbufq_enqueue(struct mbufq * q,struct mbuf * head,struct mbuf * tail,uint32_t cnt,uint32_t bytes)1220 mbufq_enqueue(struct mbufq *q, struct mbuf *head, struct mbuf *tail,
1221 uint32_t cnt, uint32_t bytes)
1222 {
1223 MBUFQ_ENQUEUE_MULTI(&q->mq, head, tail);
1224 q->count += cnt;
1225 q->bytes += bytes;
1226 }
1227
1228 inline boolean_t
mbufq_empty(struct mbufq * q)1229 mbufq_empty(struct mbufq *q)
1230 {
1231 return q->count == 0;
1232 }
1233
1234 inline struct mbuf*
mbufq_first(struct mbufq * q)1235 mbufq_first(struct mbufq *q)
1236 {
1237 return MBUFQ_FIRST(&q->mq);
1238 }
1239
1240 inline struct mbuf*
mbufq_last(struct mbufq * q)1241 mbufq_last(struct mbufq *q)
1242 {
1243 return MBUFQ_LAST(&q->mq);
1244 }
1245
1246 #endif /* XNU_KERNEL_PRIVATE */
1247
1248 /*
1249 * Mbuf statistics (legacy).
1250 */
1251 struct mbstat {
1252 u_int32_t m_mbufs; /* mbufs obtained from page pool */
1253 u_int32_t m_clusters; /* clusters obtained from page pool */
1254 u_int32_t m_spare; /* spare field */
1255 u_int32_t m_clfree; /* free clusters */
1256 u_int32_t m_drops; /* times failed to find space */
1257 u_int32_t m_wait; /* times waited for space */
1258 u_int32_t m_drain; /* times drained protocols for space */
1259 u_short m_mtypes[256]; /* type specific mbuf allocations */
1260 u_int32_t m_mcfail; /* times m_copym failed */
1261 u_int32_t m_mpfail; /* times m_pullup failed */
1262 u_int32_t m_msize; /* length of an mbuf */
1263 u_int32_t m_mclbytes; /* length of an mbuf cluster */
1264 u_int32_t m_minclsize; /* min length of data to allocate a cluster */
1265 u_int32_t m_mlen; /* length of data in an mbuf */
1266 u_int32_t m_mhlen; /* length of data in a header mbuf */
1267 u_int32_t m_bigclusters; /* clusters obtained from page pool */
1268 u_int32_t m_bigclfree; /* free clusters */
1269 u_int32_t m_bigmclbytes; /* length of an mbuf cluster */
1270 u_int32_t m_forcedefunct; /* times we force defunct'ed an app's sockets */
1271 };
1272
1273 /* Compatibillity with 10.3 */
1274 struct ombstat {
1275 u_int32_t m_mbufs; /* mbufs obtained from page pool */
1276 u_int32_t m_clusters; /* clusters obtained from page pool */
1277 u_int32_t m_spare; /* spare field */
1278 u_int32_t m_clfree; /* free clusters */
1279 u_int32_t m_drops; /* times failed to find space */
1280 u_int32_t m_wait; /* times waited for space */
1281 u_int32_t m_drain; /* times drained protocols for space */
1282 u_short m_mtypes[256]; /* type specific mbuf allocations */
1283 u_int32_t m_mcfail; /* times m_copym failed */
1284 u_int32_t m_mpfail; /* times m_pullup failed */
1285 u_int32_t m_msize; /* length of an mbuf */
1286 u_int32_t m_mclbytes; /* length of an mbuf cluster */
1287 u_int32_t m_minclsize; /* min length of data to allocate a cluster */
1288 u_int32_t m_mlen; /* length of data in an mbuf */
1289 u_int32_t m_mhlen; /* length of data in a header mbuf */
1290 };
1291
1292 /*
1293 * mbuf class statistics.
1294 */
1295 #define MAX_MBUF_CNAME 15
1296
1297 #if defined(XNU_KERNEL_PRIVATE)
1298 /* For backwards compatibility with 32-bit userland process */
1299 struct omb_class_stat {
1300 char mbcl_cname[MAX_MBUF_CNAME + 1]; /* class name */
1301 u_int32_t mbcl_size; /* buffer size */
1302 u_int32_t mbcl_total; /* # of buffers created */
1303 u_int32_t mbcl_active; /* # of active buffers */
1304 u_int32_t mbcl_infree; /* # of available buffers */
1305 u_int32_t mbcl_slab_cnt; /* # of available slabs */
1306 u_int32_t mbcl_pad; /* padding */
1307 u_int64_t mbcl_alloc_cnt; /* # of times alloc is called */
1308 u_int64_t mbcl_free_cnt; /* # of times free is called */
1309 u_int64_t mbcl_notified; /* # of notified wakeups */
1310 u_int64_t mbcl_purge_cnt; /* # of purges so far */
1311 u_int64_t mbcl_fail_cnt; /* # of allocation failures */
1312 u_int32_t mbcl_ctotal; /* total only for this class */
1313 u_int32_t mbcl_release_cnt; /* amount of memory returned */
1314 /*
1315 * Cache layer statistics
1316 */
1317 u_int32_t mbcl_mc_state; /* cache state (see below) */
1318 u_int32_t mbcl_mc_cached; /* # of cached buffers */
1319 u_int32_t mbcl_mc_waiter_cnt; /* # waiters on the cache */
1320 u_int32_t mbcl_mc_wretry_cnt; /* # of wait retries */
1321 u_int32_t mbcl_mc_nwretry_cnt; /* # of no-wait retry attempts */
1322 u_int32_t mbcl_reserved[7]; /* for future use */
1323 } __attribute__((__packed__));
1324 #endif /* XNU_KERNEL_PRIVATE */
1325
1326 typedef struct mb_class_stat {
1327 char mbcl_cname[MAX_MBUF_CNAME + 1]; /* class name */
1328 u_int32_t mbcl_size; /* buffer size */
1329 u_int32_t mbcl_total; /* # of buffers created */
1330 u_int32_t mbcl_active; /* # of active buffers */
1331 u_int32_t mbcl_infree; /* # of available buffers */
1332 u_int32_t mbcl_slab_cnt; /* # of available slabs */
1333 #if defined(KERNEL) || defined(__LP64__)
1334 u_int32_t mbcl_pad; /* padding */
1335 #endif /* KERNEL || __LP64__ */
1336 u_int64_t mbcl_alloc_cnt; /* # of times alloc is called */
1337 u_int64_t mbcl_free_cnt; /* # of times free is called */
1338 u_int64_t mbcl_notified; /* # of notified wakeups */
1339 u_int64_t mbcl_purge_cnt; /* # of purges so far */
1340 u_int64_t mbcl_fail_cnt; /* # of allocation failures */
1341 u_int32_t mbcl_ctotal; /* total only for this class */
1342 u_int32_t mbcl_release_cnt; /* amount of memory returned */
1343 /*
1344 * Cache layer statistics
1345 */
1346 u_int32_t mbcl_mc_state; /* cache state (see below) */
1347 u_int32_t mbcl_mc_cached; /* # of cached buffers */
1348 u_int32_t mbcl_mc_waiter_cnt; /* # waiters on the cache */
1349 u_int32_t mbcl_mc_wretry_cnt; /* # of wait retries */
1350 u_int32_t mbcl_mc_nwretry_cnt; /* # of no-wait retry attempts */
1351 u_int32_t mbcl_reserved[7]; /* for future use */
1352 } mb_class_stat_t;
1353
1354 #define MCS_DISABLED 0 /* cache is permanently disabled */
1355 #define MCS_ONLINE 1 /* cache is online */
1356 #define MCS_PURGING 2 /* cache is being purged */
1357 #define MCS_OFFLINE 3 /* cache is offline (resizing) */
1358
1359 #if defined(XNU_KERNEL_PRIVATE)
1360 #define MB_STAT_MAX_MB_CLASSES 8 /* Max number of distinct Mbuf classes. */
1361 #endif /* XNU_KERNEL_PRIVATE */
1362
1363 #if defined(XNU_KERNEL_PRIVATE)
1364 /* For backwards compatibility with 32-bit userland process */
1365 struct omb_stat {
1366 u_int32_t mbs_cnt; /* number of classes */
1367 u_int32_t mbs_pad; /* padding */
1368 struct omb_class_stat mbs_class[MB_STAT_MAX_MB_CLASSES]; /* class array */
1369 } __attribute__((__packed__));
1370 #endif /* XNU_KERNEL_PRIVATE */
1371
1372 typedef struct mb_stat {
1373 u_int32_t mbs_cnt; /* number of classes */
1374 #if defined(KERNEL) || defined(__LP64__)
1375 u_int32_t mbs_pad; /* padding */
1376 #endif /* KERNEL || __LP64__ */
1377 #if defined(XNU_KERNEL_PRIVATE)
1378 mb_class_stat_t mbs_class[MB_STAT_MAX_MB_CLASSES];
1379 #else /* XNU_KERNEL_PRIVATE */
1380 mb_class_stat_t mbs_class[1]; /* class array */
1381 #endif /* XNU_KERNEL_PRIVATE */
1382 } mb_stat_t;
1383
1384 #ifdef PRIVATE
1385 #define MLEAK_STACK_DEPTH 16 /* Max PC stack depth */
1386
1387 typedef struct mleak_trace_stat {
1388 u_int64_t mltr_collisions;
1389 u_int64_t mltr_hitcount;
1390 u_int64_t mltr_allocs;
1391 u_int64_t mltr_depth;
1392 u_int64_t mltr_addr[MLEAK_STACK_DEPTH];
1393 } mleak_trace_stat_t;
1394
1395 typedef struct mleak_stat {
1396 u_int32_t ml_isaddr64; /* 64-bit KVA? */
1397 u_int32_t ml_cnt; /* number of traces */
1398 mleak_trace_stat_t ml_trace[1]; /* trace array */
1399 } mleak_stat_t;
1400
1401 struct mleak_table {
1402 u_int32_t mleak_capture; /* sampling capture counter */
1403 u_int32_t mleak_sample_factor; /* sample factor */
1404
1405 /* Times two active records want to occupy the same spot */
1406 u_int64_t alloc_collisions;
1407 u_int64_t trace_collisions;
1408
1409 /* Times new record lands on spot previously occupied by freed alloc */
1410 u_int64_t alloc_overwrites;
1411 u_int64_t trace_overwrites;
1412
1413 /* Times a new alloc or trace is put into the hash table */
1414 u_int64_t alloc_recorded;
1415 u_int64_t trace_recorded;
1416
1417 /* Total number of outstanding allocs */
1418 u_int64_t outstanding_allocs;
1419
1420 /* Times mleak_log returned false because couldn't acquire the lock */
1421 u_int64_t total_conflicts;
1422 };
1423
1424 #define HAS_M_TAG_STATS 1
1425
1426 struct m_tag_stats {
1427 u_int32_t mts_id;
1428 u_int16_t mts_type;
1429 u_int16_t mts_len;
1430 u_int64_t mts_alloc_count;
1431 u_int64_t mts_alloc_failed;
1432 u_int64_t mts_free_count;
1433 };
1434
1435
1436 #define M_TAG_TYPE_NAMES \
1437 "other,dummynet,ipfilt,encap,inet6,ipsec,cfil_udp,pf_reass,aqm,drvaux"
1438
1439 #endif /* PRIVATE */
1440
1441 #ifdef KERNEL_PRIVATE
1442 __BEGIN_DECLS
1443
1444 /*
1445 * Exported (private)
1446 */
1447
1448 extern struct mbstat mbstat; /* statistics */
1449
1450 __END_DECLS
1451 #endif /* KERNEL_PRIVATE */
1452
1453 #ifdef XNU_KERNEL_PRIVATE
1454 __BEGIN_DECLS
1455
1456 /*
1457 * Not exported (xnu private)
1458 */
1459
1460 /* flags to m_get/MGET */
1461 /* Need to include malloc.h to get right options for malloc */
1462 #include <sys/malloc.h>
1463
1464 struct mbuf;
1465
1466 /* length to m_copy to copy all */
1467 #define M_COPYALL 1000000000
1468
1469 #define M_DONTWAIT M_NOWAIT
1470 #define M_WAIT M_WAITOK
1471
1472 /* modes for m_copym and variants */
1473 #define M_COPYM_NOOP_HDR 0 /* don't copy/move pkthdr contents */
1474 #define M_COPYM_COPY_HDR 1 /* copy pkthdr from old to new */
1475 #define M_COPYM_MOVE_HDR 2 /* move pkthdr from old to new */
1476 #define M_COPYM_MUST_COPY_HDR 3 /* MUST copy pkthdr from old to new */
1477 #define M_COPYM_MUST_MOVE_HDR 4 /* MUST move pkthdr from old to new */
1478
1479 extern void m_freem(struct mbuf *) __XNU_INTERNAL(m_freem);
1480 extern void m_drop(mbuf_t, uint16_t, uint32_t, const char *, uint16_t);
1481 extern void m_drop_if(mbuf_t, struct ifnet *, uint16_t, uint32_t, const char *, uint16_t);
1482 extern void m_drop_list(mbuf_t, struct ifnet *, uint16_t, uint32_t, const char *, uint16_t);
1483 extern void m_drop_extended(mbuf_t, struct ifnet *, char *,
1484 uint16_t, uint32_t, const char *, uint16_t);
1485
1486 extern u_int64_t mcl_to_paddr(char *);
1487 extern void m_adj(struct mbuf *, int);
1488 extern void m_cat(struct mbuf *, struct mbuf *);
1489 extern void m_copydata(struct mbuf *, int, int len, void * __sized_by(len));
1490 extern struct mbuf *m_copym(struct mbuf *, int, int, int);
1491 extern struct mbuf *m_copym_mode(struct mbuf *, int, int, int, struct mbuf **, int *, uint32_t);
1492 extern struct mbuf *m_get(int, int);
1493 extern struct mbuf *m_gethdr(int, int);
1494 extern struct mbuf *m_getpacket(void);
1495 extern struct mbuf *m_getpackets(int, int, int);
1496 extern struct mbuf *m_mclget(struct mbuf *, int);
1497 extern void *__unsafe_indexable m_mtod(struct mbuf *);
1498 extern struct mbuf *m_prepend_2(struct mbuf *, int, int, int);
1499 extern struct mbuf *m_pullup(struct mbuf *, int);
1500 extern struct mbuf *m_split(struct mbuf *, int, int);
1501 extern void m_mclfree(caddr_t p);
1502 extern bool mbuf_class_under_pressure(struct mbuf *m);
1503 extern int m_chain_capacity(const struct mbuf *m);
1504
1505 /*
1506 * Accessors for the mbuf data range.
1507 * The "lower bound" is the start of the memory range that m->m_data is allowed
1508 * to point into. The "start" is where m->m_data points to; equivalent to the
1509 * late m_mtod. The end is where m->m_data + m->m_len points to. The upper bound
1510 * is the end of the memory range that m->m_data + m->m_len is allowed to point
1511 * into.
1512 * In a well-formed range, lower bound <= start <= end <= upper bound. An
1513 * ill-formed range always means a programming error.
1514 */
1515 __stateful_pure static inline caddr_t __header_bidi_indexable
m_mtod_lower_bound(struct mbuf * m)1516 m_mtod_lower_bound(struct mbuf *m)
1517 {
1518 return M_START(m);
1519 }
1520
1521 __stateful_pure static inline caddr_t __header_bidi_indexable
m_mtod_current(struct mbuf * m)1522 m_mtod_current(struct mbuf *m)
1523 {
1524 caddr_t data = m_mtod_lower_bound(m);
1525 return data + (m->m_data - (uintptr_t)data);
1526 }
1527
1528 __stateful_pure static inline caddr_t __header_bidi_indexable
m_mtod_end(struct mbuf * m)1529 m_mtod_end(struct mbuf *m)
1530 {
1531 return m_mtod_current(m) + m->m_len;
1532 }
1533
1534 __stateful_pure static inline caddr_t __header_bidi_indexable
m_mtod_upper_bound(struct mbuf * m)1535 m_mtod_upper_bound(struct mbuf *m)
1536 {
1537 return m_mtod_lower_bound(m) + M_SIZE(m);
1538 }
1539
1540 static inline bool
m_has_mtype(const struct mbuf * m,int mtype_flags)1541 m_has_mtype(const struct mbuf *m, int mtype_flags)
1542 {
1543 return (1 << m->m_type) & mtype_flags;
1544 }
1545
1546 static inline int
m_capacity(const struct mbuf * m)1547 m_capacity(const struct mbuf *m)
1548 {
1549 return _MSIZE + ((m->m_flags & M_EXT) ? m->m_ext.ext_size : 0);
1550 }
1551
1552 /*
1553 * On platforms which require strict alignment (currently for anything but
1554 * i386 or x86_64 or arm64), this macro checks whether the data pointer of an mbuf
1555 * is 32-bit aligned (this is the expected minimum alignment for protocol
1556 * headers), and assert otherwise.
1557 */
1558 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
1559 #define MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(_m)
1560 #else /* !__i386__ && !__x86_64__ && !__arm64__ */
1561 #define MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(_m) do { \
1562 if (!IS_P2ALIGNED((_m)->m_data, sizeof (u_int32_t))) { \
1563 if (((_m)->m_flags & M_PKTHDR) && \
1564 (_m)->m_pkthdr.rcvif != NULL) { \
1565 panic_plain("\n%s: mbuf %p data ptr %p is not " \
1566 "32-bit aligned [%s: alignerrs=%lld]\n", \
1567 __func__, (_m), \
1568 (caddr_t __unsafe_indexable)(_m)->m_data, \
1569 if_name((_m)->m_pkthdr.rcvif), \
1570 (_m)->m_pkthdr.rcvif->if_alignerrs); \
1571 } else { \
1572 panic_plain("\n%s: mbuf %p data ptr %p is not " \
1573 "32-bit aligned\n", \
1574 __func__, (_m), \
1575 (caddr_t __unsafe_indexable)(_m)->m_data); \
1576 } \
1577 } \
1578 } while (0)
1579 #endif /* !__i386__ && !__x86_64__ && !__arm64__ */
1580
1581 /* Maximum number of MBUF_SC values (excluding MBUF_SC_UNSPEC) */
1582 #define MBUF_SC_MAX_CLASSES 10
1583
1584 /*
1585 * These conversion macros rely on the corresponding MBUF_SC and
1586 * MBUF_TC values in order to establish the following mapping:
1587 *
1588 * MBUF_SC_BK_SYS ] ==> MBUF_TC_BK
1589 * MBUF_SC_BK ]
1590 *
1591 * MBUF_SC_BE ] ==> MBUF_TC_BE
1592 * MBUF_SC_RD ]
1593 * MBUF_SC_OAM ]
1594 *
1595 * MBUF_SC_AV ] ==> MBUF_TC_VI
1596 * MBUF_SC_RV ]
1597 * MBUF_SC_VI ]
1598 * MBUF_SC_SIG ]
1599 *
1600 * MBUF_SC_VO ] ==> MBUF_TC_VO
1601 * MBUF_SC_CTL ]
1602 *
1603 * The values assigned to each service class allows for a fast mapping to
1604 * the corresponding MBUF_TC traffic class values, as well as to retrieve the
1605 * assigned index; therefore care must be taken when comparing against these
1606 * values. Use the corresponding class and index macros to retrieve the
1607 * corresponding portion, and never assume that a higher class corresponds
1608 * to a higher index.
1609 */
1610 #define MBUF_SCVAL(x) ((x) & 0xffff)
1611 #define MBUF_SCIDX(x) ((((x) >> 16) & 0xff) >> 3)
1612 #define MBUF_SC2TC(_sc) (MBUF_SCVAL(_sc) >> 7)
1613 #define MBUF_TC2SCVAL(_tc) ((_tc) << 7)
1614 #define IS_MBUF_SC_BACKGROUND(_sc) (((_sc) == MBUF_SC_BK_SYS) || \
1615 ((_sc) == MBUF_SC_BK))
1616 #define IS_MBUF_SC_REALTIME(_sc) ((_sc) >= MBUF_SC_AV && (_sc) <= MBUF_SC_VO)
1617 #define IS_MBUF_SC_BESTEFFORT(_sc) ((_sc) == MBUF_SC_BE || \
1618 (_sc) == MBUF_SC_RD || (_sc) == MBUF_SC_OAM)
1619
1620 #define SCIDX_BK_SYS MBUF_SCIDX(MBUF_SC_BK_SYS)
1621 #define SCIDX_BK MBUF_SCIDX(MBUF_SC_BK)
1622 #define SCIDX_BE MBUF_SCIDX(MBUF_SC_BE)
1623 #define SCIDX_RD MBUF_SCIDX(MBUF_SC_RD)
1624 #define SCIDX_OAM MBUF_SCIDX(MBUF_SC_OAM)
1625 #define SCIDX_AV MBUF_SCIDX(MBUF_SC_AV)
1626 #define SCIDX_RV MBUF_SCIDX(MBUF_SC_RV)
1627 #define SCIDX_VI MBUF_SCIDX(MBUF_SC_VI)
1628 #define SCIDX_SIG MBUF_SCIDX(MBUF_SC_SIG)
1629 #define SCIDX_VO MBUF_SCIDX(MBUF_SC_VO)
1630 #define SCIDX_CTL MBUF_SCIDX(MBUF_SC_CTL)
1631
1632 #define SCVAL_BK_SYS MBUF_SCVAL(MBUF_SC_BK_SYS)
1633 #define SCVAL_BK MBUF_SCVAL(MBUF_SC_BK)
1634 #define SCVAL_BE MBUF_SCVAL(MBUF_SC_BE)
1635 #define SCVAL_RD MBUF_SCVAL(MBUF_SC_RD)
1636 #define SCVAL_OAM MBUF_SCVAL(MBUF_SC_OAM)
1637 #define SCVAL_AV MBUF_SCVAL(MBUF_SC_AV)
1638 #define SCVAL_RV MBUF_SCVAL(MBUF_SC_RV)
1639 #define SCVAL_VI MBUF_SCVAL(MBUF_SC_VI)
1640 #define SCVAL_SIG MBUF_SCVAL(MBUF_SC_SIG)
1641 #define SCVAL_VO MBUF_SCVAL(MBUF_SC_VO)
1642 #define SCVAL_CTL MBUF_SCVAL(MBUF_SC_CTL)
1643
1644 #define MBUF_VALID_SC(c) \
1645 (c == MBUF_SC_BK_SYS || c == MBUF_SC_BK || c == MBUF_SC_BE || \
1646 c == MBUF_SC_RD || c == MBUF_SC_OAM || c == MBUF_SC_AV || \
1647 c == MBUF_SC_RV || c == MBUF_SC_VI || c == MBUF_SC_SIG || \
1648 c == MBUF_SC_VO || c == MBUF_SC_CTL)
1649
1650 #define MBUF_VALID_SCIDX(c) \
1651 (c == SCIDX_BK_SYS || c == SCIDX_BK || c == SCIDX_BE || \
1652 c == SCIDX_RD || c == SCIDX_OAM || c == SCIDX_AV || \
1653 c == SCIDX_RV || c == SCIDX_VI || c == SCIDX_SIG || \
1654 c == SCIDX_VO || c == SCIDX_CTL)
1655
1656 #define MBUF_VALID_SCVAL(c) \
1657 (c == SCVAL_BK_SYS || c == SCVAL_BK || c == SCVAL_BE || \
1658 c == SCVAL_RD || c == SCVAL_OAM || c == SCVAL_AV || \
1659 c == SCVAL_RV || c == SCVAL_VI || c == SCVAL_SIG || \
1660 c == SCVAL_VO || SCVAL_CTL)
1661
1662 extern unsigned int nmbclusters; /* number of mapped clusters */
1663 extern int njcl; /* # of jumbo clusters */
1664 extern int njclbytes; /* size of a jumbo cluster */
1665 extern int max_hdr; /* largest link+protocol header */
1666 extern int max_datalen; /* MHLEN - max_hdr */
1667
1668 extern int max_linkhdr; /* largest link-level header */
1669
1670 /* Use max_protohdr instead of _max_protohdr */
1671 extern int max_protohdr; /* largest protocol header */
1672
1673 extern uint32_t high_sb_max;
1674
1675 __private_extern__ unsigned int mbuf_default_ncl(uint64_t);
1676 __private_extern__ void mbinit(void);
1677 __private_extern__ struct mbuf *m_clattach(struct mbuf *, int, caddr_t __sized_by(extsize),
1678 void (*)(caddr_t, u_int, caddr_t), size_t extsize, caddr_t, int, int);
1679 __private_extern__ char * __sized_by_or_null(MBIGCLBYTES) m_bigalloc(int);
1680 __private_extern__ void m_bigfree(caddr_t, u_int, caddr_t);
1681 __private_extern__ struct mbuf *m_mbigget(struct mbuf *, int);
1682 __private_extern__ char * __sized_by_or_null(M16KCLBYTES) m_16kalloc(int);
1683 __private_extern__ void m_16kfree(caddr_t, u_int, caddr_t);
1684 __private_extern__ struct mbuf *m_m16kget(struct mbuf *, int);
1685 __private_extern__ int m_reinit(struct mbuf *, int);
1686 __private_extern__ struct mbuf *m_free(struct mbuf *) __XNU_INTERNAL(m_free);
1687 __private_extern__ struct mbuf *m_getclr(int, int);
1688 __private_extern__ struct mbuf *m_getptr(struct mbuf *, int, int *);
1689 __private_extern__ unsigned int m_length(struct mbuf *);
1690 __private_extern__ unsigned int m_length2(struct mbuf *, struct mbuf **);
1691 __private_extern__ unsigned int m_fixhdr(struct mbuf *);
1692 __private_extern__ struct mbuf *m_defrag(struct mbuf *, int);
1693 __private_extern__ struct mbuf *m_defrag_offset(struct mbuf *, u_int32_t, int);
1694 __private_extern__ struct mbuf *m_prepend(struct mbuf *, int, int);
1695 __private_extern__ struct mbuf *m_copyup(struct mbuf *, int, int);
1696 __private_extern__ struct mbuf *m_retry(int, int);
1697 __private_extern__ struct mbuf *m_retryhdr(int, int);
1698 __private_extern__ int m_freem_list(struct mbuf *);
1699 __private_extern__ int m_append(struct mbuf *, int len, caddr_t __sized_by(len));
1700 __private_extern__ struct mbuf *m_last(struct mbuf *);
1701 __private_extern__ struct mbuf *m_pulldown(struct mbuf *, int, int, int *);
1702
1703 __private_extern__ struct mbuf *m_getcl(int, int, int);
1704 __private_extern__ char * __sized_by_or_null(MCLBYTES) m_mclalloc(int);
1705 __private_extern__ int m_mclhasreference(struct mbuf *);
1706 __private_extern__ void m_copy_pkthdr(struct mbuf *, struct mbuf *);
1707 __private_extern__ int m_dup_pkthdr(struct mbuf *, struct mbuf *, int);
1708 __private_extern__ void m_copy_pftag(struct mbuf *, struct mbuf *);
1709 __private_extern__ void m_copy_necptag(struct mbuf *, struct mbuf *);
1710 __private_extern__ void m_copy_classifier(struct mbuf *, struct mbuf *);
1711
1712 __private_extern__ struct mbuf *m_dtom(void *);
1713 __private_extern__ int m_mtocl(void *);
1714 __private_extern__ union mcluster *m_cltom(int);
1715
1716 __private_extern__ void m_align(struct mbuf *, int);
1717
1718 __private_extern__ struct mbuf *m_normalize(struct mbuf *m);
1719 __private_extern__ void m_mchtype(struct mbuf *m, int t);
1720
1721 __private_extern__ void m_copyback(struct mbuf *, int, int len, const void * __sized_by(len));
1722 __private_extern__ struct mbuf *m_copyback_cow(struct mbuf *, int, int len,
1723 const void * __sized_by(len), int);
1724 __private_extern__ int m_makewritable(struct mbuf **, int, int, int);
1725 __private_extern__ struct mbuf *m_dup(struct mbuf *m, int how);
1726 __private_extern__ struct mbuf *m_copym_with_hdrs(struct mbuf *, int, int, int,
1727 struct mbuf **, int *, uint32_t);
1728 __private_extern__ struct mbuf *m_getpackethdrs(int, int);
1729 __private_extern__ struct mbuf *m_getpacket_how(int);
1730 __private_extern__ struct mbuf *m_getpackets_internal(unsigned int *, int,
1731 int, int, size_t);
1732 __private_extern__ struct mbuf *m_allocpacket_internal(unsigned int *, size_t,
1733 unsigned int *, int, int, size_t);
1734
1735 __private_extern__ int m_ext_set_prop(struct mbuf *, uint32_t, uint32_t);
1736 __private_extern__ uint32_t m_ext_get_prop(struct mbuf *);
1737 __private_extern__ int m_ext_paired_is_active(struct mbuf *);
1738 __private_extern__ void m_ext_paired_activate(struct mbuf *);
1739
1740 __private_extern__ void m_add_crumb(struct mbuf *, uint16_t);
1741 __private_extern__ void m_add_hdr_crumb(struct mbuf *, uint64_t, uint64_t);
1742 __private_extern__ void m_add_hdr_crumb_chain(struct mbuf *, uint64_t, uint64_t);
1743
1744 static inline void
m_add_hdr_crumb_interface_output(mbuf_t m,int index,bool chain)1745 m_add_hdr_crumb_interface_output(mbuf_t m, int index, bool chain)
1746 {
1747 if (chain) {
1748 m_add_hdr_crumb_chain(m, index, CRUMB_INTERFACE_FLAG);
1749 } else {
1750 m_add_hdr_crumb(m, index, CRUMB_INTERFACE_FLAG);
1751 }
1752 }
1753
1754 static inline void
m_add_hdr_crumb_interface_input(mbuf_t m,int index,bool chain)1755 m_add_hdr_crumb_interface_input(mbuf_t m, int index, bool chain)
1756 {
1757 if (chain) {
1758 m_add_hdr_crumb_chain(m, index | CRUMB_INPUT_FLAG, CRUMB_INTERFACE_FLAG);
1759 } else {
1760 m_add_hdr_crumb(m, index | CRUMB_INPUT_FLAG, CRUMB_INTERFACE_FLAG);
1761 }
1762 }
1763 __private_extern__ void mbuf_drain(boolean_t);
1764
1765 /*
1766 * Packets may have annotations attached by affixing a list of "packet
1767 * tags" to the pkthdr structure. Packet tags are dynamically allocated
1768 * semi-opaque data structures that have a fixed header (struct m_tag)
1769 * that specifies the size of the memory block and an <id,type> pair that
1770 * identifies it. The id identifies the module and the type identifies the
1771 * type of data for that module. The id of zero is reserved for the kernel.
1772 *
1773 * By default packet tags are allocated via kalloc except on Intel that still
1774 * uses the legacy implementation of using mbufs for packet tags.
1775 *
1776 * When kalloc is used for allocation, packet tags returned by m_tag_allocate have
1777 * the default memory alignment implemented by kalloc.
1778 *
1779 * When mbufs are used for allocation packets tag returned by m_tag_allocate has
1780 * the default memory alignment implemented by malloc.
1781 *
1782 * To reference the private data one should use a construct like:
1783 * struct m_tag *mtag = m_tag_allocate(...);
1784 * struct foo *p = (struct foo *)(mtag->m_tag_data);
1785 *
1786 * There should be no assumption on the location of the private data relative to the
1787 * 'struct m_tag'
1788 *
1789 * When kalloc is used, packet tags that are internal to xnu use KERNEL_MODULE_TAG_ID and
1790 * they are allocated with kalloc_type using a single container data structure that has
1791 * the 'struct m_tag' followed by a data structure for the private data
1792 *
1793 * Packet tags that are allocated by KEXTs are external to xnu and type of the private data
1794 * is unknown to xnu, so they are allocated in two chunks:
1795 * - one allocation with kalloc_type for the 'struct m_tag'
1796 * - one allocation using kheap_alloc as for the private data
1797 *
1798 * Note that packet tags of type KERNEL_TAG_TYPE_DRVAUX are allocated by KEXTs with
1799 * a variable length so they are allocated in two chunks
1800 *
1801 * In all cases the 'struct m_tag' is allocated using kalloc_type to avoid type
1802 * confusion.
1803 */
1804
1805 #define KERNEL_MODULE_TAG_ID 0
1806
1807 enum {
1808 KERNEL_TAG_TYPE_NONE = 0,
1809 KERNEL_TAG_TYPE_DUMMYNET = 1,
1810 KERNEL_TAG_TYPE_IPFILT = 2,
1811 KERNEL_TAG_TYPE_ENCAP = 3,
1812 KERNEL_TAG_TYPE_INET6 = 4,
1813 KERNEL_TAG_TYPE_IPSEC = 5,
1814 KERNEL_TAG_TYPE_CFIL_UDP = 6,
1815 KERNEL_TAG_TYPE_PF_REASS = 7,
1816 KERNEL_TAG_TYPE_AQM = 8,
1817 KERNEL_TAG_TYPE_DRVAUX = 9,
1818 KERNEL_TAG_TYPE_COUNT = 10
1819 };
1820
1821 /* Packet tag routines */
1822 __private_extern__ struct m_tag *m_tag_create(u_int32_t, u_int16_t, int, int,
1823 struct mbuf *);
1824 __private_extern__ void m_tag_free(struct m_tag *);
1825 __private_extern__ void m_tag_prepend(struct mbuf *, struct m_tag *);
1826 __private_extern__ void m_tag_unlink(struct mbuf *, struct m_tag *);
1827 __private_extern__ void m_tag_delete(struct mbuf *, struct m_tag *);
1828 __private_extern__ void m_tag_delete_chain(struct mbuf *);
1829 __private_extern__ struct m_tag *m_tag_locate(struct mbuf *, u_int32_t,
1830 u_int16_t);
1831 __private_extern__ struct m_tag *m_tag_copy(struct m_tag *, int);
1832 __private_extern__ int m_tag_copy_chain(struct mbuf *, struct mbuf *, int);
1833 __private_extern__ void m_tag_init(struct mbuf *, int);
1834 __private_extern__ struct m_tag *m_tag_first(struct mbuf *);
1835 __private_extern__ struct m_tag *m_tag_next(struct mbuf *, struct m_tag *);
1836
1837 typedef struct m_tag * (*m_tag_kalloc_func_t)(u_int32_t id, u_int16_t type, uint16_t len, int wait);
1838 typedef void (*m_tag_kfree_func_t)(struct m_tag *tag);
1839
1840 int m_register_internal_tag_type(uint16_t type, uint16_t len, m_tag_kalloc_func_t alloc_func, m_tag_kfree_func_t free_func);
1841 void m_tag_create_cookie(struct m_tag *);
1842
1843 void mbuf_tag_init(void);
1844
1845 __private_extern__ u_int32_t m_scratch_get(struct mbuf *, u_int8_t **);
1846
1847 __private_extern__ void m_classifier_init(struct mbuf *, uint32_t);
1848
1849 __private_extern__ int m_set_service_class(struct mbuf *, mbuf_svc_class_t);
1850 __private_extern__ mbuf_svc_class_t m_get_service_class(struct mbuf *);
1851 __private_extern__ mbuf_svc_class_t m_service_class_from_idx(u_int32_t);
1852 __private_extern__ mbuf_svc_class_t m_service_class_from_val(u_int32_t);
1853 __private_extern__ int m_set_traffic_class(struct mbuf *, mbuf_traffic_class_t);
1854 __private_extern__ mbuf_traffic_class_t m_get_traffic_class(struct mbuf *);
1855
1856 __private_extern__ void mbuf_tag_init(void);
1857
1858 #define ADDCARRY(_x) do { \
1859 while (((_x) >> 16) != 0) \
1860 (_x) = ((_x) >> 16) + ((_x) & 0xffff); \
1861 } while (0)
1862
1863 __private_extern__ u_int16_t m_adj_sum16(struct mbuf *, u_int32_t,
1864 u_int32_t, u_int32_t, u_int32_t);
1865 __private_extern__ u_int16_t m_sum16(struct mbuf *, u_int32_t, u_int32_t);
1866
1867 __private_extern__ void mbuf_set_tx_time(struct mbuf *m, uint64_t tx_time);
1868
1869 __private_extern__ struct ext_ref *m_get_rfa(struct mbuf *);
1870 __private_extern__ m_ext_free_func_t m_get_ext_free(struct mbuf *);
1871
1872 __private_extern__ void m_do_tx_compl_callback(struct mbuf *, struct ifnet *);
1873 __private_extern__ mbuf_tx_compl_func m_get_tx_compl_callback(u_int32_t);
1874
1875 __END_DECLS
1876
1877 /* START - the following can be moved to uipc_mbuf.c once we got rid of CONFIG_MBUF_MCACHE */
1878 typedef enum {
1879 MC_MBUF = 0, /* Regular mbuf */
1880 MC_CL, /* Cluster */
1881 MC_BIGCL, /* Large (4KB) cluster */
1882 MC_16KCL, /* Jumbo (16KB) cluster */
1883 MC_MBUF_CL, /* mbuf + cluster */
1884 MC_MBUF_BIGCL, /* mbuf + large (4KB) cluster */
1885 MC_MBUF_16KCL, /* mbuf + jumbo (16KB) cluster */
1886 MC_MAX
1887 } mbuf_class_t;
1888
1889 typedef struct {
1890 mbuf_class_t mtbl_class; /* class type */
1891 #if CONFIG_MBUF_MCACHE
1892 mcache_t *mtbl_cache; /* mcache for this buffer class */
1893 TAILQ_HEAD(mcl_slhead, mcl_slab) mtbl_slablist; /* slab list */
1894 mcache_obj_t *mtbl_cobjlist; /* composite objects freelist */
1895 #endif
1896 mb_class_stat_t *mtbl_stats; /* statistics fetchable via sysctl */
1897 u_int32_t mtbl_maxsize; /* maximum buffer size */
1898 int mtbl_minlimit; /* minimum allowed */
1899 int mtbl_maxlimit; /* maximum allowed */
1900 u_int32_t mtbl_wantpurge; /* purge during next reclaim */
1901 uint32_t mtbl_avgtotal; /* average total on iOS */
1902 u_int32_t mtbl_expand; /* worker should expand the class */
1903 } mbuf_table_t;
1904
1905 /*
1906 * Allocation statistics related to mbuf types (up to MT_MAX-1) are updated
1907 * atomically and stored in a per-CPU structure which is lock-free; this is
1908 * done in order to avoid writing to the global mbstat data structure which
1909 * would cause false sharing. During sysctl request for kern.ipc.mbstat,
1910 * the statistics across all CPUs will be converged into the mbstat.m_mtypes
1911 * array and returned to the application. Any updates for types greater or
1912 * equal than MT_MAX would be done atomically to the mbstat; this slows down
1913 * performance but is okay since the kernel uses only up to MT_MAX-1 while
1914 * anything beyond that (up to type 255) is considered a corner case.
1915 */
1916 typedef struct {
1917 unsigned int cpu_mtypes[MT_MAX];
1918 } mbuf_mtypes_t;
1919
1920 #define MBUF_CLASS_MIN MC_MBUF
1921 #define MBUF_CLASS_MAX MC_MBUF_16KCL
1922 #define MBUF_CLASS_LAST MC_16KCL
1923
1924 #define MBUF_CLASS_COMPOSITE(c) \
1925 ((int)(c) > MBUF_CLASS_LAST)
1926
1927 #define m_class(c) mbuf_table[c].mtbl_class
1928 #define m_maxsize(c) mbuf_table[c].mtbl_maxsize
1929 #define m_minlimit(c) mbuf_table[c].mtbl_minlimit
1930 #define m_maxlimit(c) mbuf_table[c].mtbl_maxlimit
1931 #define m_cname(c) mbuf_table[c].mtbl_stats->mbcl_cname
1932 #define m_size(c) mbuf_table[c].mtbl_stats->mbcl_size
1933 #define m_total(c) mbuf_table[c].mtbl_stats->mbcl_total
1934 #define m_infree(c) mbuf_table[c].mtbl_stats->mbcl_infree
1935
1936 #define NELEM(a) (sizeof (a) / sizeof ((a)[0]))
1937 #define MB_WDT_MAXTIME 10 /* # of secs before watchdog panic */
1938
1939 /*
1940 * This flag is set for all mbufs that come out of and into the composite
1941 * mbuf + cluster caches, i.e. MC_MBUF_CL and MC_MBUF_BIGCL. mbufs that
1942 * are marked with such a flag have clusters attached to them, and will be
1943 * treated differently when they are freed; instead of being placed back
1944 * into the mbuf and cluster freelists, the composite mbuf + cluster objects
1945 * are placed back into the appropriate composite cache's freelist, and the
1946 * actual freeing is deferred until the composite objects are purged. At
1947 * such a time, this flag will be cleared from the mbufs and the objects
1948 * will be freed into their own separate freelists.
1949 */
1950 #define EXTF_COMPOSITE 0x1
1951
1952 /*
1953 * This flag indicates that the external cluster is read-only, i.e. it is
1954 * or was referred to by more than one mbufs. Once set, this flag is never
1955 * cleared.
1956 */
1957 #define EXTF_READONLY 0x2
1958
1959 /*
1960 * This flag indicates that the external cluster is paired with the mbuf.
1961 * Pairing implies an external free routine defined which will be invoked
1962 * when the reference count drops to the minimum at m_free time. This
1963 * flag is never cleared.
1964 */
1965 #define EXTF_PAIRED 0x4
1966
1967 #define EXTF_MASK \
1968 (EXTF_COMPOSITE | EXTF_READONLY | EXTF_PAIRED)
1969
1970 #define MEXT_MINREF(m) ((m_get_rfa(m))->minref)
1971 #define MEXT_REF(m) ((m_get_rfa(m))->refcnt)
1972 #define MEXT_PREF(m) ((m_get_rfa(m))->prefcnt)
1973 #define MEXT_FLAGS(m) ((m_get_rfa(m))->flags)
1974 #define MEXT_PRIV(m) ((m_get_rfa(m))->priv)
1975 #define MEXT_PMBUF(m) ((m_get_rfa(m))->paired)
1976 #define MBUF_IS_COMPOSITE(m) \
1977 (MEXT_REF(m) == MEXT_MINREF(m) && \
1978 (MEXT_FLAGS(m) & EXTF_MASK) == EXTF_COMPOSITE)
1979
1980 /*
1981 * This macro can be used to test if the mbuf is paired to an external
1982 * cluster. The test for MEXT_PMBUF being equal to the mbuf in subject
1983 * is important, as EXTF_PAIRED alone is insufficient since it is immutable,
1984 * and thus survives calls to m_free_paired.
1985 */
1986 #define MBUF_IS_PAIRED(m) \
1987 (((m)->m_flags & M_EXT) && \
1988 (MEXT_FLAGS(m) & EXTF_MASK) == EXTF_PAIRED && \
1989 MEXT_PMBUF(m) == (m))
1990
1991 #define MBUF_CL_INIT(m, buf, rfa, ref, flag) \
1992 mext_init(m, buf, m_maxsize(MC_CL), NULL, NULL, rfa, 0, \
1993 ref, 0, flag, 0, NULL)
1994
1995 #define MBUF_BIGCL_INIT(m, buf, rfa, ref, flag) \
1996 mext_init(m, buf, m_maxsize(MC_BIGCL), m_bigfree, NULL, rfa, 0, \
1997 ref, 0, flag, 0, NULL)
1998
1999 #define MBUF_16KCL_INIT(m, buf, rfa, ref, flag) \
2000 mext_init(m, buf, m_maxsize(MC_16KCL), m_16kfree, NULL, rfa, 0, \
2001 ref, 0, flag, 0, NULL)
2002
2003 #define MBSTAT_MTYPES_MAX \
2004 (sizeof (mbstat.m_mtypes) / sizeof (mbstat.m_mtypes[0]))
2005
2006 #define mtype_stat_add(type, n) { \
2007 if ((unsigned)(type) < MT_MAX) { \
2008 mbuf_mtypes_t *mbs = PERCPU_GET(mbuf_mtypes); \
2009 os_atomic_add(&mbs->cpu_mtypes[type], n, relaxed); \
2010 } else if ((unsigned)(type) < (unsigned)MBSTAT_MTYPES_MAX) { \
2011 os_atomic_add((int16_t *)&mbstat.m_mtypes[type], n, relaxed); \
2012 } \
2013 }
2014
2015 #define mtype_stat_sub(t, n) mtype_stat_add(t, -(n))
2016 #define mtype_stat_inc(t) mtype_stat_add(t, 1)
2017 #define mtype_stat_dec(t) mtype_stat_sub(t, 1)
2018 /* END - the following can be moved to uipc_mbuf.c once we got rid of CONFIG_MBUF_MCACHE */
2019
2020 #if CONFIG_MBUF_MCACHE
2021 extern lck_mtx_t *const mbuf_mlock;
2022 extern int nclusters; /* # of clusters for non-jumbo (legacy) sizes */
2023 extern unsigned char *mbutl; /* start VA of mbuf pool */
2024 extern unsigned int mb_memory_pressure_percentage;
2025 extern struct mb_stat *mb_stat;
2026 PERCPU_DECL(mbuf_mtypes_t, mbuf_mtypes);
2027
2028 extern mbuf_table_t mbuf_table[];
2029
2030 extern void mbuf_mtypes_sync(void);
2031 extern void mbuf_stat_sync(void);
2032 extern void mbuf_table_init(void);
2033 extern void m_incref(struct mbuf *m);
2034 extern uint16_t m_decref(struct mbuf *m);
2035 extern struct mbuf *m_get_common(int wait, short type, int hdr);
2036 extern int m_free_paired(struct mbuf *m);
2037 extern caddr_t m_get_ext_arg(struct mbuf *m);
2038 extern int mbuf_watchdog_defunct_iterate(proc_t p, void *arg);
2039 extern void m_set_ext(struct mbuf *m, struct ext_ref *rfa, m_ext_free_func_t ext_free,
2040 caddr_t ext_arg);
2041 extern void mext_init(struct mbuf *m, void *__sized_by(size)buf, u_int size,
2042 m_ext_free_func_t free, caddr_t free_arg, struct ext_ref *rfa,
2043 u_int16_t min, u_int16_t ref, u_int16_t pref, u_int16_t flag,
2044 u_int32_t priv, struct mbuf *pm);
2045 extern int mbuf_get_class(struct mbuf *m);
2046 extern void mbuf_init(struct mbuf *m, int pkthdr, int type);
2047 extern void mbuf_mcheck(struct mbuf *m);
2048 #endif /* CONFIG_MBUF_MCACHE */
2049
2050 #endif /* XNU_KERNEL_PRIVATE */
2051 #endif /* !_SYS_MBUF_H_ */
2052