1 /* 2 * Copyright (c) 2016-2022 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29 #ifndef _SKYWALK_OS_PACKET_PRIVATE_H_ 30 #define _SKYWALK_OS_PACKET_PRIVATE_H_ 31 32 #if defined(PRIVATE) || defined(BSD_KERNEL_PRIVATE) 33 #include <skywalk/os_packet.h> 34 #include <skywalk/os_nexus_private.h> 35 #include <skywalk/os_channel_private.h> 36 #include <libkern/OSByteOrder.h> 37 #include <netinet/in.h> 38 #include <net/ethernet.h> 39 40 #if defined(BSD_KERNEL_PRIVATE) 41 /* 42 * Flow (currently for kernel, potentially for userland one day). 43 * 44 * XXX: When we expose this to userland, we need to be make sure to NOT 45 * expose kernel pointer/address values embedded within. 46 * 47 * Values in flow_{l2,l3,l4} are stored in network byte order. Pointers 48 * are defined using mach_vm_address_t because it's stable across user 49 * and kernel, and therefore keeps the structure size the same. 50 * 51 * Because this structure might be initialized on a per-packet allocation 52 * basis, it as well as some of its member sub-subtructures are allocated 53 * on a 16-bytes address boundary to allow 128-bit operations on platforms 54 * that support them. 55 * 56 * XXX: when adding new fields, try to leverage __pad ones first. 57 * 58 * TODO: we should consider embedding a flow_key structure here and 59 * use that to store the tuples. That way we can leverage that for 60 * flow lookups without having to copy things back-and-forth. 61 */ 62 struct __flow { 63 union { 64 /* 65 * The following is always zeroed out on each alloc. 66 */ 67 struct __flow_init { 68 /* 69 * Layer 3 70 */ 71 struct __flow_l3 { 72 union { 73 struct __flow_l3_ipv4_addrs { 74 struct in_addr _src; 75 struct in_addr _dst; 76 } _l3_ipv4; 77 struct __flow_l3_ipv6_addrs { 78 struct in6_addr _src; 79 struct in6_addr _dst; 80 } _l3_ipv6; 81 }; 82 uint8_t _l3_ip_ver; 83 uint8_t _l3_proto; 84 uint8_t _l3_hlen; 85 unsigned _l3_is_frag : 1; 86 unsigned _l3_is_first_frag : 1; 87 unsigned _l3_reserved_flags : 6; 88 uint32_t _l3_frag_id; 89 mach_vm_address_t _l3_ptr; 90 } __l3; 91 /* 92 * AQM 93 */ 94 struct __flow_classq { 95 uint32_t _fcq_hash; /* classq-specific hash */ 96 uint32_t _fcq_flags; /* classq-specific flags */ 97 } __classq; 98 /* 99 * Misc. 100 */ 101 uint32_t __ulen; /* user data length */ 102 uint8_t __ulp_encap; /* e.g. IPPROTO_QUIC */ 103 uint8_t __pad[3]; 104 uint64_t __pad64[2]; 105 /* 106 * Flow Source. 107 */ 108 struct __flow_source { 109 union { 110 /* source identifier */ 111 uint64_t _fsrc_id_64[2]; 112 uint32_t _fsrc_id_32[4]; 113 uuid_t _fsrc_id; 114 } __attribute__((aligned(sizeof(uint64_t)))); 115 flowadv_idx_t _fsrc_fidx; /* flow adv. index */ 116 uint8_t _fsrc_type; /* FLOWSRC_* mbuf.h */ 117 uint8_t _fsrc_pad[3]; 118 } __source; 119 /* 120 * Policy. 121 */ 122 struct __flow_policy { 123 uint32_t _fpc_id; /* policy id of pkt sender */ 124 uint32_t _fpc_pad; 125 union { 126 /* process identifier */ 127 uint64_t _fpc_euuid_64[2]; 128 uint32_t _fpc_euuid_32[4]; 129 uuid_t _fpc_euuid; 130 } __attribute__((aligned(sizeof(uint64_t)))); 131 } __policy; 132 } flow_init; 133 uint64_t flow_init_data[16]; 134 } __attribute((aligned(16))); 135 #define flow_l3 flow_init.__l3 136 #define flow_classq flow_init.__classq 137 #define flow_ulen flow_init.__ulen 138 #define flow_ulp_encap flow_init.__ulp_encap 139 #define flow_source flow_init.__source 140 #define flow_policy flow_init.__policy 141 142 #define flow_ipv4_addrs flow_l3._l3_ipv4 143 #define flow_ipv4_src flow_l3._l3_ipv4._src 144 #define flow_ipv4_dst flow_l3._l3_ipv4._dst 145 #define flow_ipv6_addrs flow_l3._l3_ipv6 146 #define flow_ipv6_src flow_l3._l3_ipv6._src 147 #define flow_ipv6_dst flow_l3._l3_ipv6._dst 148 #define flow_ip_ver flow_l3._l3_ip_ver 149 #define flow_ip_proto flow_l3._l3_proto 150 #define flow_ip_hlen flow_l3._l3_hlen 151 #define flow_ip_hdr flow_l3._l3_ptr 152 #define flow_ip_frag_id flow_l3._l3_frag_id 153 #define flow_ip_is_frag flow_l3._l3_is_frag 154 #define flow_ip_is_first_frag flow_l3._l3_is_first_frag 155 156 #define flow_classq_hash flow_classq._fcq_hash 157 #define flow_classq_flags flow_classq._fcq_flags 158 159 #define flow_src_token flow_source._fsrc_id_32[0] 160 #define flow_src_id flow_source._fsrc_id 161 #define flow_src_fidx flow_source._fsrc_fidx 162 #define flow_src_type flow_source._fsrc_type 163 164 #define flow_policy_id flow_policy._fpc_id 165 #define flow_policy_euuid flow_policy._fpc_euuid 166 167 /* 168 * Layer 4. 169 */ 170 union { 171 struct __flow_l4 { 172 union { 173 struct __flow_l4_tcp { 174 in_port_t _src; 175 in_port_t _dst; 176 uint32_t _seq; 177 uint32_t _ack; 178 union { 179 struct { 180 #if BYTE_ORDER == LITTLE_ENDIAN 181 uint8_t _tcp_res:4; 182 uint8_t _off:4; 183 #else /* BYTE_ORDER == BIG_ENDIAN */ 184 uint8_t _off:4; 185 uint8_t _tcp_res:4; 186 #endif /* BYTE_ORDER == BIG_ENDIAN */ 187 uint8_t _flags; 188 uint16_t _win; 189 }; 190 uint32_t _ofw; 191 }; 192 } _l4_tcp; 193 struct __flow_l4_udp { 194 in_port_t _src; 195 in_port_t _dst; 196 uint32_t _ls; 197 } _l4_udp; 198 struct __flow_l4_esp { 199 uint32_t _spi; 200 } _l4_esp; 201 }; 202 uint8_t _l4_hlen; 203 uint8_t _l4_agg_fast; 204 uint8_t _l4_pad[6]; 205 mach_vm_address_t _l4_ptr; 206 } flow_l4; 207 uint64_t flow_l4_data[4]; 208 } __attribute((aligned(sizeof(uint64_t)))); 209 #define flow_tcp flow_l4._l4_tcp 210 #define flow_tcp_src flow_l4._l4_tcp._src 211 #define flow_tcp_dst flow_l4._l4_tcp._dst 212 #define flow_tcp_seq flow_l4._l4_tcp._seq 213 #define flow_tcp_ack flow_l4._l4_tcp._ack 214 #define flow_tcp_off flow_l4._l4_tcp._off 215 #define flow_tcp_flags flow_l4._l4_tcp._flags 216 #define flow_tcp_win flow_l4._l4_tcp._win 217 #define flow_tcp_hlen flow_l4._l4_hlen 218 #define flow_tcp_hdr flow_l4._l4_ptr 219 #define flow_tcp_agg_fast flow_l4._l4_agg_fast 220 #define flow_udp flow_l4._l4_udp 221 #define flow_udp_src flow_l4._l4_udp._src 222 #define flow_udp_dst flow_l4._l4_udp._dst 223 #define flow_udp_hlen flow_l4._l4_hlen 224 #define flow_udp_hdr flow_l4._l4_ptr 225 #define flow_esp_spi flow_l4._l4_esp._spi 226 } __attribute((aligned(16))); 227 #endif /* BSD_KERNEL_PRIVATE */ 228 229 /* 230 * Maximum size of L2, L3 & L4 headers combined. 231 */ 232 #define PKT_MAX_PROTO_HEADER_SIZE 256 233 234 /* based on 2KB buflet size */ 235 #define BUFLETS_MIN 1 /* Ethernet MTU (default) */ 236 #define BUFLETS_9K_JUMBO 5 /* 9000 bytes MTU */ 237 #define BUFLETS_GSO 46 /* 64KB GSO, Ethernet MTU */ 238 239 /* 240 * Common buflet structure shared by {__user,__kern}_buflet. 241 * 242 * | boff | doff | dlen | | | 243 * +-------------+----------+-------------------+---------+-------------+ 244 * objaddr baddr dlim objlim 245 */ 246 struct __buflet { 247 union { 248 /* for skmem batch alloc/free */ 249 uint64_t __buflet_next; 250 /* address of next buflet in chain */ 251 const mach_vm_address_t __nbft_addr; 252 }; 253 /* buffer data address */ 254 const mach_vm_address_t __baddr; 255 /* index of buflet object in the owning buflet region */ 256 const obj_idx_t __bft_idx; 257 /* buffer object index in buffer region */ 258 const obj_idx_t __bidx; 259 /* object index in buflet region of next buflet(for buflet chaining) */ 260 const obj_idx_t __nbft_idx; 261 const uint16_t __dlim; /* maximum length */ 262 uint16_t __dlen; /* length of data in buflet */ 263 uint16_t __doff; /* offset of data in buflet */ 264 const uint16_t __flag; 265 const uint16_t __gro_len; /* length of each gro segement */ 266 /* offset of buf_addr relative to the start of the buffer object */ 267 const uint16_t __buf_off; 268 #define BUFLET_FLAG_EXTERNAL 0x0001 269 #define BUFLET_FLAG_LARGE_BUF 0x0002 /* buflet holds large buffer */ 270 #define BUFLET_FLAG_RAW 0x0004 /* buflet comes from the raw bflt cache */ 271 } __attribute((packed)); 272 273 /* 274 * A buflet represents the smallest buffer fragment representing 275 * part of the packet. The index refers to the position of the buflet 276 * in the pool, and the data length represents the actual payload 277 * size -- not the buflet size itself as it is fixed for all objects 278 * in the pool. 279 */ 280 struct __user_buflet { 281 /* 282 * Common area between user and kernel variants. 283 */ 284 struct __buflet buf_com; 285 #define buf_addr buf_com.__baddr 286 #define buf_nbft_addr buf_com.__nbft_addr 287 #define buf_idx buf_com.__bidx 288 #define buf_nbft_idx buf_com.__nbft_idx 289 #define buf_dlim buf_com.__dlim 290 #define buf_dlen buf_com.__dlen 291 #define buf_doff buf_com.__doff 292 #define buf_flag buf_com.__flag 293 #define buf_bft_idx_reg buf_com.__bft_idx 294 #define buf_grolen buf_com.__gro_len 295 #define buf_boff buf_com.__buf_off 296 }; 297 298 #define BUFLET_HAS_LARGE_BUF(_buf) \ 299 (((_buf)->buf_flag & BUFLET_FLAG_LARGE_BUF) != 0) 300 #define BUFLET_FROM_RAW_BFLT_CACHE(_buf) \ 301 (((_buf)->buf_flag & BUFLET_FLAG_RAW) != 0) 302 303 #define BUF_BADDR(_buf, _addr) \ 304 *__DECONST(mach_vm_address_t *, &(_buf)->buf_addr) = \ 305 (mach_vm_address_t)(_addr) 306 307 #define BUF_BIDX(_buf, _idx) \ 308 *__DECONST(obj_idx_t *, &(_buf)->buf_idx) = (obj_idx_t)(_idx) 309 310 #define BUF_NBFT_ADDR(_buf, _addr) \ 311 *__DECONST(mach_vm_address_t *, &(_buf)->buf_nbft_addr) = \ 312 (mach_vm_address_t)(_addr) 313 314 #define BUF_NBFT_IDX(_buf, _idx) \ 315 *__DECONST(obj_idx_t *, &(_buf)->buf_nbft_idx) = (obj_idx_t)(_idx) 316 317 #define BUF_BFT_IDX_REG(_buf, _idx) \ 318 *__DECONST(obj_idx_t *, &(_buf)->buf_bft_idx_reg) = (_idx) 319 320 #define UBUF_LINK(_pubft, _ubft) do { \ 321 ASSERT((_ubft) != NULL); \ 322 BUF_NBFT_ADDR(_pubft, _ubft); \ 323 BUF_NBFT_IDX(_pubft, (_ubft)->buf_bft_idx_reg); \ 324 } while (0) 325 326 #ifdef KERNEL 327 #define BUF_CTOR(_buf, _baddr, _bidx, _dlim, _dlen, _doff, _nbaddr, _nbidx, _bflag, _boff, _grolen) do { \ 328 _CASSERT(sizeof ((_buf)->buf_addr) == sizeof (mach_vm_address_t)); \ 329 _CASSERT(sizeof ((_buf)->buf_idx) == sizeof (obj_idx_t)); \ 330 _CASSERT(sizeof ((_buf)->buf_dlim) == sizeof (uint16_t)); \ 331 _CASSERT(sizeof ((_buf)->buf_boff) == sizeof (uint16_t)); \ 332 _CASSERT(sizeof ((_buf)->buf_grolen) == sizeof (uint16_t)); \ 333 _CASSERT(sizeof ((_buf)->buf_flag) == sizeof (uint16_t)); \ 334 BUF_BADDR(_buf, _baddr); \ 335 BUF_NBFT_ADDR(_buf, _nbaddr); \ 336 BUF_BIDX(_buf, _bidx); \ 337 BUF_NBFT_IDX(_buf, _nbidx); \ 338 (_buf)->buf_dlen = (_dlen); \ 339 (_buf)->buf_doff = (_doff); \ 340 *(uint16_t *)(uintptr_t)&(_buf)->buf_dlim = (_dlim); \ 341 *(uint16_t *)(uintptr_t)&(_buf)->buf_boff = (_boff); \ 342 *(uint16_t *)(uintptr_t)&(_buf)->buf_grolen = (_grolen); \ 343 *(uint16_t *)(uintptr_t)&(_buf)->buf_flag = (_bflag); \ 344 } while (0) 345 346 #define BUF_INIT(_buf, _dlen, _doff) do { \ 347 (_buf)->buf_dlen = (_dlen); \ 348 (_buf)->buf_doff = (_doff); \ 349 } while (0) 350 351 #endif /* KERNEL */ 352 353 #ifdef KERNEL 354 #define BUF_IN_RANGE(_buf) \ 355 ((_buf)->buf_addr >= (mach_vm_address_t)(_buf)->buf_objaddr && \ 356 ((uintptr_t)(_buf)->buf_addr + (_buf)->buf_dlim) <= \ 357 ((uintptr_t)(_buf)->buf_objaddr + (_buf)->buf_objlim) && \ 358 ((mach_vm_address_t)(_buf)->buf_objaddr + (_buf)->buf_boff == (_buf)->buf_addr) && \ 359 ((_buf)->buf_doff + (_buf)->buf_dlen) <= (_buf)->buf_dlim && \ 360 (_buf)->buf_grolen <= (_buf)->buf_dlen) 361 #else /* !KERNEL */ 362 #define BUF_IN_RANGE(_buf) \ 363 (((_buf)->buf_doff + (_buf)->buf_dlen) <= (_buf)->buf_dlim) 364 #endif /* !KERNEL */ 365 366 /* 367 * Metadata preamble. This structure is placed at begining of each 368 * __{user,kern}_{quantum,packet} object. Each user metadata object has a 369 * unique red zone pattern, which is an XOR of the redzone cookie and 370 * offset of the metadata object in the object's region. Due to the use 371 * of tagged pointer, we need the structure size to be multiples of 16. 372 * See SK_PTR_TAG() definition for details. 373 */ 374 struct __metadata_preamble { 375 union { 376 uint64_t _mdp_next; /* for batch alloc/free (K) */ 377 uint64_t mdp_redzone; /* red zone cookie (U) */ 378 }; 379 const obj_idx_t mdp_idx; /* index within region (UK) */ 380 uint16_t mdp_type; /* nexus_meta_type_t (UK) */ 381 uint16_t mdp_subtype; /* nexus_meta_subtype_t (UK) */ 382 }; 383 384 #define METADATA_PREAMBLE_SZ (sizeof (struct __metadata_preamble)) 385 386 #define METADATA_PREAMBLE(_md) \ 387 ((struct __metadata_preamble *) \ 388 ((mach_vm_address_t)(_md) - METADATA_PREAMBLE_SZ)) 389 390 #define METADATA_IDX(_md) \ 391 (METADATA_PREAMBLE(_md)->mdp_idx) 392 393 #define METADATA_TYPE(_md) \ 394 (METADATA_PREAMBLE(_md)->mdp_type) 395 396 #define METADATA_SUBTYPE(_md) \ 397 (METADATA_PREAMBLE(_md)->mdp_subtype) 398 399 /* 400 * Common packet structure shared by {__user,__kern}_quantum. 401 */ 402 struct __quantum { 403 union { 404 uuid_t __uuid; /* flow UUID */ 405 uint8_t __val8[16]; 406 uint16_t __val16[8]; 407 uint32_t __val32[4]; 408 uint64_t __val64[2]; 409 } __flow_id_u; 410 #define __q_flow_id __flow_id_u.__uuid 411 #define __q_flow_id_val8 __flow_id_u.__val8 412 #define __q_flow_id_val16 __flow_id_u.__val16 413 #define __q_flow_id_val32 __flow_id_u.__val32 414 #define __q_flow_id_val64 __flow_id_u.__val64 415 416 uint32_t __q_len; 417 418 /* QoS service class, see packet_svc_class_t */ 419 uint32_t __q_svc_class; /* PKT_SC_* values */ 420 421 /* 422 * See notes on _QUM_{INTERNALIZE,EXTERNALIZE}() regarding 423 * portion of this structure above __flags that gets copied. 424 * Adding more user-mutable fields after __flags would also 425 * require adjusting those macros as well. 426 */ 427 volatile uint16_t __q_flags; /* QUMF_* flags */ 428 uint16_t __q_pad[3]; 429 } __attribute((aligned(sizeof(uint64_t)))); 430 431 /* 432 * Quantum. 433 * 434 * This structure is aligned for efficient copy and accesses. 435 * It is the user version of the __kernel_quantum structure. 436 * 437 * XXX: Do NOT store kernel pointer/address values here. 438 */ 439 struct __user_quantum { 440 /* 441 * Common area between user and kernel variants. 442 */ 443 struct __quantum qum_com; 444 #define qum_flow_id qum_com.__q_flow_id 445 #define qum_flow_id_val8 qum_com.__q_flow_id_val8 446 #define qum_flow_id_val16 qum_com.__q_flow_id_val16 447 #define qum_flow_id_val32 qum_com.__q_flow_id_val32 448 #define qum_flow_id_val64 qum_com.__q_flow_id_val64 449 #define qum_len qum_com.__q_len 450 #define qum_qflags qum_com.__q_flags 451 #define qum_svc_class qum_com.__q_svc_class 452 453 /* 454 * Userland specific. 455 */ 456 struct __user_buflet qum_buf[1]; /* 1 buflet */ 457 /* 458 * use count for packet. 459 */ 460 uint16_t qum_usecnt; 461 } __attribute((aligned(sizeof(uint64_t)))); 462 463 /* 464 * Valid values for (16-bit) qum_qflags. 465 */ 466 #define QUM_F_FINALIZED 0x0001 /* has been finalized */ 467 #define QUM_F_DROPPED 0x0002 /* has been dropped */ 468 #define QUM_F_FLOW_CLASSIFIED 0x0010 /* flow has been classified */ 469 #ifdef KERNEL 470 #define QUM_F_INTERNALIZED 0x1000 /* has been internalized */ 471 #define QUM_F_KERNEL_ONLY 0x8000 /* kernel only; no user counterpart */ 472 473 /* invariant flags we want to keep */ 474 #define QUM_F_SAVE_MASK (QUM_F_KERNEL_ONLY) 475 /* kernel-only flags that's never externalized */ 476 #define QUM_F_KERNEL_FLAGS (QUM_F_INTERNALIZED|QUM_F_KERNEL_ONLY) 477 #endif /* KERNEL */ 478 479 #ifdef KERNEL 480 #define _KQUM_CTOR(_kqum, _flags, _len, _baddr, _bidx, _dlim, _qidx) do { \ 481 (_kqum)->qum_flow_id_val64[0] = 0; \ 482 (_kqum)->qum_flow_id_val64[1] = 0; \ 483 (_kqum)->qum_qflags = (_flags); \ 484 (_kqum)->qum_len = (_len); \ 485 _CASSERT(sizeof(METADATA_IDX(_kqum)) == sizeof(obj_idx_t)); \ 486 *(obj_idx_t *)(uintptr_t)&METADATA_IDX(_kqum) = (_qidx); \ 487 BUF_CTOR(&(_kqum)->qum_buf[0], (_baddr), (_bidx), (_dlim), 0, 0, 0, \ 488 OBJ_IDX_NONE, 0, 0, 0); \ 489 } while (0) 490 491 #define _KQUM_INIT(_kqum, _flags, _len, _qidx) do { \ 492 (_kqum)->qum_flow_id_val64[0] = 0; \ 493 (_kqum)->qum_flow_id_val64[1] = 0; \ 494 (_kqum)->qum_qflags = (_flags); \ 495 (_kqum)->qum_len = (_len); \ 496 BUF_INIT(&(_kqum)->qum_buf[0], 0, 0); \ 497 } while (0) 498 #endif /* KERNEL */ 499 500 /* 501 * Common packet structure shared by {__user,__kern}_packet. 502 */ 503 struct __packet_com { 504 /* Link layer (offset relevant to first buflet) */ 505 uint16_t __link_flags; /* PKT_LINKF_* flags */ 506 507 /* 508 * Headroom/protocol header length 509 * 510 * Since the security model of Skywalk nexus is that we doesn't trust 511 * packets either from above (userspace) or below (driver/firmware), 512 * the only metadata field that nexus makes use of from external is the 513 * headroom. Based on headroom, the flowswitch starts demux routine on 514 * l2 header, if any. The l2_len is stored in this step. Then the flow 515 * extraction (l3+l4 flow) begins parsing from (headroom + l2_len). 516 * 517 * __headroom is the empty buffer space before any packet data, 518 * it is also the equivalent to the first header offset. 519 * 520 * __l2_len is l2 (link layer) protocol header length, if any. 521 */ 522 uint8_t __headroom; 523 uint8_t __l2_len; 524 525 /* 526 * Checksum offload. 527 * 528 * Partial checksum does not require any header parsing and is 529 * therefore simpler to implement both in software and hardware. 530 * 531 * On transmit, PKT_CSUMF_PARTIAL indicates that a partial one's 532 * complement checksum to be computed on the span starting from 533 * pkt_csum_tx_start_off to the end of the packet, and have the 534 * resulted checksum value written at the location specified by 535 * pkt_csum_tx_stuff_off. 536 * 537 * The PKT_CSUMF_ZERO_INVERT flag is used on transmit to indicate 538 * that the value 0xffff (negative 0 in one's complement) must be 539 * substituted for the value of 0. This is set for UDP packets, 540 * since otherwise the receiver may not validate the checksum 541 * (UDP/IPv4), or drop the packet altogether (UDP/IPv6). 542 * 543 * On receive, PKT_CSUMF_PARTIAL indicates that a partial one's 544 * complement checksum has been computed on the span beginning at 545 * pkt_csum_rx_start_off to the end of the packet, and that the 546 * computed value is now stored in pkt_csum_rx_value. 547 * 548 * All offsets are relative to the base of the first buflet. 549 */ 550 uint32_t __csum_flags; /* PKT_CSUMF_* flags */ 551 union { 552 struct { 553 uint16_t __csum_start_off; /* start offset */ 554 uint16_t __csum_value; /* checksum value */ 555 } __csum_rx; 556 struct { 557 uint16_t __csum_start_off; /* start offset */ 558 uint16_t __csum_stuff_off; /* stuff offset */ 559 } __csum_tx; 560 uint32_t __csum_data; 561 }; 562 563 /* Compression generation count */ 564 uint32_t __comp_gencnt; 565 566 /* 567 * Trace ID for each sampled packet. 568 * Non-zero ID indicates that the packet is being actively traced. 569 */ 570 packet_trace_id_t __trace_id; 571 572 /* Aggregation type */ 573 uint8_t __aggr_type; /* PKT_AGGR_* values */ 574 uint8_t __seg_cnt; /* Number of LRO-packets */ 575 576 uint16_t __proto_seg_sz; /* Protocol segment size */ 577 578 /* 579 * See notes on _PKT_{INTERNALIZE,EXTERNALIZE}() regarding portion 580 * of this structure above __p_flags that gets copied. Adding 581 * more user-mutable fields after __p_flags would also require 582 * adjusting those macros as well. 583 */ 584 union { 585 volatile uint32_t __flags32[2]; 586 volatile uint64_t __flags; /* PKT_F_* flags */ 587 }; 588 } __attribute((aligned(sizeof(uint64_t)))); 589 590 struct __packet { 591 union { 592 uint64_t __pkt_data[4]; 593 struct __packet_com __pkt_com; 594 }; 595 #define __p_link_flags __pkt_com.__link_flags 596 #define __p_headroom __pkt_com.__headroom 597 #define __p_l2_len __pkt_com.__l2_len 598 #define __p_csum_flags __pkt_com.__csum_flags 599 #define __p_csum_rx __pkt_com.__csum_rx 600 #define __p_csum_tx __pkt_com.__csum_tx 601 #define __p_csum_data __pkt_com.__csum_data 602 #define __p_comp_gencnt __pkt_com.__comp_gencnt 603 #define __p_aggr_type __pkt_com.__aggr_type 604 #define __p_seg_cnt __pkt_com.__seg_cnt 605 #define __p_proto_seg_sz __pkt_com.__proto_seg_sz 606 #define __p_trace_id __pkt_com.__trace_id 607 #define __p_flags32 __pkt_com.__flags32 608 #define __p_flags __pkt_com.__flags 609 }; 610 611 /* optional packet token types */ 612 #define PKT_OPT_TOKEN_TYPE_OPAQUE 1 /* token has opaque data */ 613 #define PKT_OPT_TOKEN_TYPE_PACKET_ID 2 /* token has packet_id */ 614 615 /* maximum token size */ 616 #define PKT_OPT_MAX_TOKEN_SIZE 16 617 618 struct __packet_opt_com { 619 union { 620 uint64_t __token_data[2]; 621 uint8_t __token[PKT_OPT_MAX_TOKEN_SIZE]; 622 }; 623 uint64_t __expire_ts; 624 uint16_t __vlan_tag; 625 uint16_t __token_len; 626 uint8_t __token_type; 627 uint8_t __expiry_action; 628 uint8_t __app_type; 629 uint8_t __app_metadata; 630 } __attribute((aligned(sizeof(uint64_t)))); 631 632 struct __packet_opt { 633 union { 634 uint64_t __pkt_opt_data[4]; 635 struct __packet_opt_com __pkt_opt_com; 636 }; 637 #define __po_token_type __pkt_opt_com.__token_type 638 #define __po_token_len __pkt_opt_com.__token_len 639 #define __po_vlan_tag __pkt_opt_com.__vlan_tag 640 #define __po_token_data __pkt_opt_com.__token_data 641 #define __po_token __pkt_opt_com.__token 642 #define __po_expire_ts __pkt_opt_com.__expire_ts 643 #define __po_expiry_action __pkt_opt_com.__expiry_action 644 #define __po_app_type __pkt_opt_com.__app_type 645 #define __po_app_metadata __pkt_opt_com.__app_metadata 646 }; 647 648 /* 649 * Packet. 650 * 651 * This structure is aligned for efficient copy and accesses. 652 * It is the user version of the __kern_packet structure. 653 * 654 * XXX: Do NOT store kernel pointer/address values here. 655 */ 656 struct __user_packet { 657 struct __user_quantum pkt_qum; 658 /* 659 * pkt_flow_id is the flow identifier used by user space stack to identfy a 660 * flow. This identifier is passed as a metadata on all packets generated by 661 * the user space stack. On RX flowswitch fills in this metadata on every 662 * packet and can be used by user space stack for flow classification purposes. 663 */ 664 #define pkt_flow_id pkt_qum.qum_flow_id 665 #define pkt_flow_id_64 pkt_qum.qum_flow_id_val64 666 #define pkt_qum_qflags pkt_qum.qum_qflags 667 #define pkt_length pkt_qum.qum_len 668 #define pkt_qum_buf pkt_qum.qum_buf[0] 669 #define pkt_svc_class pkt_qum.qum_svc_class 670 #ifdef KERNEL 671 /* 672 * pkt_flow_token is a globally unique flow identifier generated by the 673 * flowswitch for each flow. Flowswitch stamps every TX packet with this 674 * identifier. This is the flow identifier which would be visible to the AQM 675 * logic and the driver. 676 * pkt_flow_token uses the first 4 bytes of pkt_flow_id as the storage space. 677 * This is not a problem as pkt_flow_id is only for flowswitch consumption 678 * and is not required by any other module after the flowswitch TX processing 679 * stage. 680 */ 681 #define pkt_flow_token pkt_qum.qum_flow_id_val32[0] 682 #endif /* KERNEL */ 683 684 /* 685 * Common area between user and kernel variants. 686 */ 687 struct __packet pkt_com; 688 #define pkt_link_flags pkt_com.__p_link_flags 689 #define pkt_headroom pkt_com.__p_headroom 690 #define pkt_l2_len pkt_com.__p_l2_len 691 #define pkt_csum_flags pkt_com.__p_csum_flags 692 #define pkt_csum_rx_start_off pkt_com.__p_csum_rx.__csum_start_off 693 #define pkt_csum_rx_value pkt_com.__p_csum_rx.__csum_value 694 #define pkt_csum_tx_start_off pkt_com.__p_csum_tx.__csum_start_off 695 #define pkt_csum_tx_stuff_off pkt_com.__p_csum_tx.__csum_stuff_off 696 #define pkt_csum_data pkt_com.__p_csum_data 697 #define pkt_comp_gencnt pkt_com.__p_comp_gencnt 698 #define pkt_aggr_type pkt_com.__p_aggr_type 699 #define pkt_seg_cnt pkt_com.__p_seg_cnt 700 #define pkt_proto_seg_sz pkt_com.__p_proto_seg_sz 701 #define pkt_trace_id pkt_com.__p_trace_id 702 #if BYTE_ORDER == LITTLE_ENDIAN 703 #define pkt_pflags32 pkt_com.__p_flags32[0] 704 #else /* BYTE_ORDER != LITTLE_ENDIAN */ 705 #define pkt_pflags32 pkt_com.__p_flags32[1] 706 #endif /* BYTE_ORDER != LITTLE_ENDIAN */ 707 #define pkt_pflags pkt_com.__p_flags 708 709 /* 710 * Optional common metadata. 711 */ 712 struct __packet_opt pkt_com_opt; 713 714 /* 715 * Userland specific. 716 */ 717 718 /* 719 * pkt_{bufs,max} aren't part of the common area, on purpose, 720 * since we selectively update them on internalize/externalize. 721 */ 722 const uint16_t pkt_bufs_max; /* maximum size of buflet chain */ 723 const uint16_t pkt_bufs_cnt; /* buflet chain size */ 724 } __attribute((aligned(sizeof(uint64_t)))); 725 726 /* the size of __user_packet structure for n total buflets */ 727 #define _USER_PACKET_SIZE(n) sizeof(struct __user_packet) 728 729 /* 730 * Valid values for pkt_link_flags. 731 */ 732 #define PKT_LINKF_BCAST 0x0001 /* send/received as link-level bcast */ 733 #define PKT_LINKF_MCAST 0x0002 /* send/received as link-level mcast */ 734 #define PKT_LINKF_ETHFCS 0x0004 /* has Ethernet FCS */ 735 736 /* 737 * XXX IMPORTANT - READ THIS XXX 738 * 739 * Valid values for (64-bit) pkt_pflags. 740 * 741 * The lower 32-bit values are equivalent to PKTF_* flags used by mbufs, 742 * hence the unused values are reserved. Do not use define any of these 743 * values unless they correspond to PKTF_* flags. Make sure to do the 744 * following when adding a value in the lower 32-bit range: 745 * 746 * a. If the flag is kernel-only, prefix it with 2 underscore characters, 747 * then add a PKT_F_* alias under the KERNEL block conditional. This 748 * will help ensure that the libsyscall code doesn't mistakenly use it. 749 * 750 * b. In pp_init(), add compile-time assertion to ensure that the PKT_F_* 751 * value matches the corresponding PKTF_* as defined in <sys/mbuf.h>. 752 * 753 * c. Add the new flag to PKT_F_USER_MASK depending on whether it's allowed 754 * to be used by userland. Flags not added to this mask will only be 755 * used by the kernel. We only internalize and externalize flags listed 756 * in PKT_F_USER_MASK. 757 * 758 * d. Add the new flag to PKT_F_COMMON_MASK. 759 * 760 * When adding an upper 32-bit value, ensure (a) and (c) above are done. 761 * 762 * Legend: 763 * 764 * (K) - Kernel-only 765 * (U+K) - User and kernel 766 * (reserved) - Only to be used for mapping with mbuf PKTF_* flags 767 */ 768 #define __PKT_F_FLOW_ID 0x0000000000000001ULL /* (K) */ 769 #define __PKT_F_FLOW_ADV 0x0000000000000002ULL /* (K) */ 770 /* 0x0000000000000004ULL (reserved) */ 771 /* 0x0000000000000008ULL (reserved) */ 772 /* 0x0000000000000010ULL (reserved) */ 773 /* 0x0000000000000020ULL (reserved) */ 774 /* 0x0000000000000040ULL (reserved) */ 775 /* 0x0000000000000080ULL (reserved) */ 776 /* 0x0000000000000100ULL (reserved) */ 777 /* 0x0000000000000200ULL (reserved) */ 778 #define PKT_F_WAKE_PKT 0x0000000000000400ULL /* (U+K) */ 779 /* 0x0000000000000800ULL (reserved) */ 780 /* 0x0000000000001000ULL (reserved) */ 781 /* 0x0000000000002000ULL (reserved) */ 782 /* 0x0000000000004000ULL (reserved) */ 783 #define PKT_F_BACKGROUND 0x0000000000008000ULL /* (U+K) */ 784 /* 0x0000000000010000ULL (reserved) */ 785 /* 0x0000000000020000ULL (reserved) */ 786 #define PKT_F_KEEPALIVE 0x0000000000040000ULL /* (U+K) */ 787 #define PKT_F_REALTIME 0x0000000000080000ULL /* (U+K) */ 788 /* 0x0000000000100000ULL (reserved) */ 789 #define PKT_F_REXMT 0x0000000000200000ULL /* (U+K) */ 790 /* 0x0000000000400000ULL (reserved) */ 791 #define __PKT_F_TX_COMPL_TS_REQ 0x0000000000800000ULL /* (K) */ 792 #define __PKT_F_TS_VALID 0x0000000001000000ULL /* (K) */ 793 /* 0x0000000002000000ULL (reserved) */ 794 #define __PKT_F_NEW_FLOW 0x0000000004000000ULL /* (K) */ 795 #define __PKT_F_START_SEQ 0x0000000008000000ULL /* (K) */ 796 #define PKT_F_LAST_PKT 0x0000000010000000ULL /* (U+K) */ 797 /* 0x0000000020000000ULL (reserved) */ 798 /* 0x0000000040000000ULL (reserved) */ 799 /* 0x0000000080000000ULL (reserved) */ 800 /* --------------------- upper 32-bit below */ 801 #define PKT_F_OPT_GROUP_START 0x0000000100000000ULL /* (U+K) */ 802 #define PKT_F_OPT_GROUP_END 0x0000000200000000ULL /* (U+K) */ 803 #define PKT_F_OPT_EXPIRE_TS 0x0000000400000000ULL /* (U+K) */ 804 #define PKT_F_OPT_TOKEN 0x0000000800000000ULL /* (U+K) */ 805 #define __PKT_F_FLOW_DATA 0x0000001000000000ULL /* (K) */ 806 #define __PKT_F_TX_COMPL_DATA 0x0000002000000000ULL /* (K) */ 807 #define __PKT_F_MBUF_DATA 0x0000004000000000ULL /* (K) */ 808 #define PKT_F_TRUNCATED 0x0000008000000000ULL /* (U+K) */ 809 #define __PKT_F_PKT_DATA 0x0000010000000000ULL /* (K) */ 810 #define PKT_F_PROMISC 0x0000020000000000ULL /* (U+K) */ 811 #define PKT_F_OPT_VLTAG 0x0000040000000000ULL /* (U+K) */ 812 #define PKT_F_OPT_VLTAG_IN_PKT 0x0000080000000000ULL /* (U+K) */ 813 #define __PKT_F_TX_PORT_DATA 0x0000100000000000ULL /* (K) */ 814 #define PKT_F_OPT_EXP_ACTION 0x0000200000000000ULL /* (U+K) */ 815 #define PKT_F_OPT_APP_METADATA 0x0000400000000000ULL /* (U+K) */ 816 #define PKT_F_L4S 0x0000800000000000ULL /* (U+K) */ 817 /* 0x0001000000000000ULL */ 818 /* 0x0002000000000000ULL */ 819 /* 0x0004000000000000ULL */ 820 /* 0x0008000000000000ULL */ 821 /* 0x0010000000000000ULL */ 822 /* 0x0020000000000000ULL */ 823 /* 0x0040000000000000ULL */ 824 /* 0x0080000000000000ULL */ 825 #define __PKT_F_OPT_ALLOC 0x0100000000000000ULL /* (K) */ 826 #define __PKT_F_FLOW_ALLOC 0x0200000000000000ULL /* (K) */ 827 #define __PKT_F_TX_COMPL_ALLOC 0x0400000000000000ULL /* (K) */ 828 /* 0x0800000000000000ULL */ 829 /* 0x1000000000000000ULL */ 830 /* 0x2000000000000000ULL */ 831 /* 0x4000000000000000ULL */ 832 /* 0x8000000000000000ULL */ 833 834 /* 835 * Packet option flags. 836 */ 837 #define PKT_F_OPT_DATA \ 838 (PKT_F_OPT_GROUP_START | PKT_F_OPT_GROUP_END | \ 839 PKT_F_OPT_EXPIRE_TS | PKT_F_OPT_TOKEN | \ 840 PKT_F_OPT_VLTAG | PKT_F_OPT_VLTAG_IN_PKT | PKT_F_OPT_EXP_ACTION | \ 841 PKT_F_OPT_APP_METADATA) 842 843 #ifdef KERNEL 844 /* 845 * Flags exposed to user (and kernel). See notes above. 846 */ 847 #define PKT_F_USER_MASK \ 848 (PKT_F_BACKGROUND | PKT_F_REALTIME | PKT_F_REXMT | \ 849 PKT_F_LAST_PKT | PKT_F_OPT_DATA | PKT_F_PROMISC | \ 850 PKT_F_TRUNCATED | PKT_F_WAKE_PKT | PKT_F_L4S) 851 852 /* 853 * Aliases for kernel-only flags. See notes above. The ones marked 854 * with (common) have corresponding PKTF_* definitions and are also 855 * included in PKT_F_COMMON_MASK below. 856 */ 857 #define PKT_F_FLOW_ID __PKT_F_FLOW_ID /* (common) */ 858 #define PKT_F_FLOW_ADV __PKT_F_FLOW_ADV /* (common) */ 859 #define PKT_F_TX_COMPL_TS_REQ __PKT_F_TX_COMPL_TS_REQ /* (common) */ 860 #define PKT_F_TS_VALID __PKT_F_TS_VALID /* (common) */ 861 #define PKT_F_NEW_FLOW __PKT_F_NEW_FLOW /* (common) */ 862 #define PKT_F_START_SEQ __PKT_F_START_SEQ /* (common) */ 863 #define PKT_F_FLOW_DATA __PKT_F_FLOW_DATA 864 #define PKT_F_TX_COMPL_DATA __PKT_F_TX_COMPL_DATA 865 #define PKT_F_MBUF_DATA __PKT_F_MBUF_DATA 866 #define PKT_F_PKT_DATA __PKT_F_PKT_DATA 867 #define PKT_F_OPT_ALLOC __PKT_F_OPT_ALLOC 868 #define PKT_F_FLOW_ALLOC __PKT_F_FLOW_ALLOC 869 #define PKT_F_TX_COMPL_ALLOC __PKT_F_TX_COMPL_ALLOC 870 #define PKT_F_TX_PORT_DATA __PKT_F_TX_PORT_DATA 871 872 /* 873 * Flags related to mbuf attached to the packet. 874 */ 875 #define PKT_F_MBUF_MASK (PKT_F_MBUF_DATA | PKT_F_TRUNCATED) 876 877 /* 878 * Flags related to packet attached to the packet. 879 */ 880 #define PKT_F_PKT_MASK (PKT_F_PKT_DATA | PKT_F_TRUNCATED) 881 882 /* 883 * Invariant flags kept during _PKT_COPY(). At the moment we keep 884 * all except those related to the attached mbuf. 885 */ 886 #define PKT_F_COPY_MASK (~(PKT_F_MBUF_MASK | PKT_F_PKT_MASK)) 887 888 /* 889 * Lower 32-bit flags common to mbuf and __kern_packet. See notes above. 890 * DO NOT add flags to this mask unless they have equivalent PKTF_* flags 891 * defined in <sys/mbuf.h> 892 */ 893 #define PKT_F_COMMON_MASK \ 894 (PKT_F_BACKGROUND | PKT_F_REALTIME | PKT_F_REXMT | \ 895 PKT_F_LAST_PKT | PKT_F_FLOW_ID | PKT_F_FLOW_ADV | \ 896 PKT_F_TX_COMPL_TS_REQ | PKT_F_TS_VALID | PKT_F_NEW_FLOW | \ 897 PKT_F_START_SEQ | PKT_F_KEEPALIVE | PKT_F_WAKE_PKT) 898 899 /* 900 * Flags retained across alloc/free. 901 */ 902 #define PKT_F_INIT_MASK \ 903 (PKT_F_OPT_ALLOC | PKT_F_FLOW_ALLOC | PKT_F_TX_COMPL_ALLOC) 904 #endif /* KERNEL */ 905 906 /* 907 * 64-bit tagged pointer (limit tag to least significant byte). 908 * We use 2 bits to encode type, and another 2 bits for subtype. 909 */ 910 #define SK_PTR_TYPE_MASK ((uint64_t)0x3) /* 00 11 */ 911 #define SK_PTR_SUBTYPE_MASK ((uint64_t)0xc) /* 11 00 */ 912 #define SK_PTR_TAG_MASK ((uint64_t)0xf) /* 11 11 */ 913 914 #define SK_PTR_TAG(_p) ((uint64_t)(_p) & SK_PTR_TAG_MASK) 915 #define SK_PTR_ADDR_MASK (~SK_PTR_TAG_MASK) 916 917 #define SK_PTR_TYPE(_p) ((uint64_t)(_p) & SK_PTR_TYPE_MASK) 918 #define SK_PTR_TYPE_ENC(_t) ((uint64_t)(_t) & SK_PTR_TYPE_MASK) 919 920 #define SK_PTR_SUBTYPE(_p) (((uint64_t)(_p) & SK_PTR_SUBTYPE_MASK) >> 2) 921 #define SK_PTR_SUBTYPE_ENC(_s) (((uint64_t)(_s) << 2) & SK_PTR_SUBTYPE_MASK) 922 923 #define SK_PTR_ADDR(_p) ((uint64_t)(_p) & SK_PTR_ADDR_MASK) 924 #define SK_PTR_ADDR_ENC(_p) ((uint64_t)(_p) & SK_PTR_ADDR_MASK) 925 926 #define SK_PTR_ENCODE(_p, _t, _s) \ 927 (SK_PTR_ADDR_ENC(_p) | SK_PTR_TYPE_ENC(_t) | SK_PTR_SUBTYPE_ENC(_s)) 928 929 #define SK_PTR_ADDR_UQUM(_ph) ((struct __user_quantum *)SK_PTR_ADDR(_ph)) 930 #define SK_PTR_ADDR_UPKT(_ph) ((struct __user_packet *)SK_PTR_ADDR(_ph)) 931 932 #ifdef KERNEL 933 __BEGIN_DECLS 934 /* 935 * Packets. 936 */ 937 extern struct mbuf *kern_packet_get_mbuf(const kern_packet_t); 938 __END_DECLS 939 #else /* !KERNEL */ 940 #if defined(LIBSYSCALL_INTERFACE) 941 __BEGIN_DECLS 942 extern void pkt_subtype_assert_fail(const packet_t, uint64_t, uint64_t); 943 extern void pkt_type_assert_fail(const packet_t, uint64_t); 944 __END_DECLS 945 #endif /* LIBSYSCALL_INTERFACE */ 946 #endif /* !KERNEL */ 947 #if defined(LIBSYSCALL_INTERFACE) || defined(BSD_KERNEL_PRIVATE) 948 #include <skywalk/packet_common.h> 949 #endif /* LIBSYSCALL_INTERFACE || BSD_KERNEL_PRIVATE */ 950 #endif /* PRIVATE || BSD_KERNEL_PRIVATE */ 951 #endif /* !_SKYWALK_OS_PACKET_PRIVATE_H_ */ 952