1 /*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #ifndef _SKYWALK_PACKET_PACKETVAR_H_
30 #define _SKYWALK_PACKET_PACKETVAR_H_
31
32 #ifdef BSD_KERNEL_PRIVATE
33 #include <skywalk/core/skywalk_var.h>
34 #include <skywalk/os_packet_private.h>
35
36 /*
37 * Kernel variant of __user_buflet.
38 *
39 * The main difference here is the support for shared buffers, where
40 * multiple buflets may point to the same buffer object at different
41 * data span within it, each holding a reference to the buffer object,
42 * i.e. the "use" count. The buf_addr therefore points to the beginning
43 * of the data span; the buf_len describes the length of the span; and
44 * the buf_doff describes the offset relative to the beginning of the
45 * span as noted by buf_addr. The buffer object is stored in buf_objaddr.
46 */
47 struct __kern_buflet {
48 /*
49 * Common area between user and kernel variants.
50 */
51 struct __buflet buf_com;
52 /*
53 * Kernel specific.
54 */
55 /* buffer control of the buffer object */
56 const struct skmem_bufctl *buf_ctl;
57
58 #define buf_objaddr buf_ctl->bc_addr
59 #define buf_objlim buf_ctl->bc_lim
60 } __attribute((packed));
61
62 struct __kern_buflet_ext {
63 /*
64 * This is an overlay structure on __kern_buflet.
65 */
66 struct __kern_buflet kbe_overlay;
67 /*
68 * extended variant specific.
69 */
70 /* mirrored user buflet */
71 struct __user_buflet const *kbe_buf_user;
72
73 /* buflet user packet pool hash bucket linkage */
74 SLIST_ENTRY(__kern_buflet_ext) kbe_buf_upp_link;
75
76 /* pid of the process using the buflet */
77 pid_t kbe_buf_pid;
78 } __attribute((packed));
79
80 #define KBUF_CTOR(_kbuf, _baddr, _bidxreg, _bc, _pp, _large) do { \
81 _CASSERT(sizeof ((_kbuf)->buf_addr) == sizeof (mach_vm_address_t));\
82 /* kernel variant (deconst) */ \
83 BUF_CTOR(_kbuf, _baddr, _bidxreg, (_large) ? PP_BUF_SIZE_LARGE(_pp) :\
84 PP_BUF_SIZE_DEF(_pp), 0, 0, (_kbuf)->buf_nbft_addr, \
85 (_kbuf)->buf_nbft_idx, (_kbuf)->buf_flag); \
86 *(struct skmem_bufctl **)(uintptr_t)&(_kbuf)->buf_ctl = (_bc); \
87 /* this may be called to initialize unused buflets */ \
88 if (__probable((_bc) != NULL)) { \
89 skmem_bufctl_use(_bc); \
90 } \
91 /* no need to construct user variant as it is done in externalize */ \
92 } while (0)
93
94 #define KBUF_EXT_CTOR(_kbuf, _ubuf, _baddr, _bidxreg, _bc, \
95 _bft_idx_reg, _pp, _large) do { \
96 ASSERT(_bft_idx_reg != OBJ_IDX_NONE); \
97 _CASSERT(sizeof((_kbuf)->buf_flag) == sizeof(uint16_t)); \
98 /* we don't set buf_nbft_addr here as during construction it */ \
99 /* is used by skmem batch alloc logic */ \
100 *__DECONST(uint16_t *, &(_kbuf)->buf_flag) = BUFLET_FLAG_EXTERNAL;\
101 if (_large) { \
102 *__DECONST(uint16_t *, &(_kbuf)->buf_flag) |= \
103 BUFLET_FLAG_LARGE_BUF; \
104 } \
105 BUF_NBFT_IDX(_kbuf, OBJ_IDX_NONE); \
106 BUF_BFT_IDX_REG(_kbuf, _bft_idx_reg); \
107 *__DECONST(struct __user_buflet **, \
108 &((struct __kern_buflet_ext *)(_kbuf))->kbe_buf_user) = (_ubuf);\
109 KBUF_CTOR(_kbuf, _baddr, _bidxreg, _bc, _pp, _large); \
110 } while (0)
111
112 #define KBUF_INIT(_kbuf) do { \
113 ASSERT((_kbuf)->buf_ctl != NULL); \
114 ASSERT((_kbuf)->buf_addr != 0); \
115 ASSERT((_kbuf)->buf_dlim != 0); \
116 /* kernel variant (deconst) */ \
117 BUF_INIT(_kbuf, 0, 0); \
118 } while (0)
119
120 #define KBUF_EXT_INIT(_kbuf, _pp) do { \
121 ASSERT((_kbuf)->buf_ctl != NULL); \
122 ASSERT((_kbuf)->buf_flag & BUFLET_FLAG_EXTERNAL); \
123 ASSERT((_kbuf)->buf_bft_idx_reg != OBJ_IDX_NONE); \
124 BUF_BADDR(_kbuf, (_kbuf)->buf_ctl->bc_addr); \
125 BUF_NBFT_ADDR(_kbuf, 0); \
126 BUF_NBFT_IDX(_kbuf, OBJ_IDX_NONE); \
127 *__DECONST(uint32_t *, &(_kbuf)->buf_dlim) = \
128 BUFLET_HAS_LARGE_BUF(_kbuf) ? PP_BUF_SIZE_LARGE((_pp)) : \
129 PP_BUF_SIZE_DEF((_pp)); \
130 (_kbuf)->buf_dlen = 0; \
131 (_kbuf)->buf_doff = 0; \
132 ((struct __kern_buflet_ext *__unsafe_indexable)(_kbuf))->kbe_buf_pid = (pid_t)-1; \
133 ((struct __kern_buflet_ext *__unsafe_indexable)(_kbuf))->kbe_buf_upp_link.sle_next = NULL;\
134 } while (0)
135
136 /* initialize struct __user_buflet from struct __kern_buflet */
137 #define UBUF_INIT(_kbuf, _ubuf) do { \
138 BUF_CTOR(_ubuf, 0, (_kbuf)->buf_idx, (_kbuf)->buf_dlim, \
139 (_kbuf)->buf_dlen, (_kbuf)->buf_doff, (_kbuf)->buf_nbft_addr,\
140 (_kbuf)->buf_nbft_idx, (_kbuf)->buf_flag); \
141 BUF_BFT_IDX_REG(_ubuf, (_kbuf)->buf_bft_idx_reg); \
142 } while (0)
143
144 #define KBUF_EXTERNALIZE(_kbuf, _ubuf, _pp) do { \
145 ASSERT((_kbuf)->buf_dlim == BUFLET_HAS_LARGE_BUF(_kbuf) ? \
146 PP_BUF_SIZE_LARGE((_pp)) : PP_BUF_SIZE_DEF((_pp))); \
147 ASSERT((_kbuf)->buf_addr != 0); \
148 /* For now, user-facing pool does not support shared */ \
149 /* buffer, since otherwise the ubuf and kbuf buffer */ \
150 /* indices would not match. Assert this is the case.*/ \
151 ASSERT((_kbuf)->buf_addr == (mach_vm_address_t)(_kbuf)->buf_objaddr);\
152 /* Initialize user buflet metadata from kernel buflet */ \
153 UBUF_INIT(_kbuf, _ubuf); \
154 } while (0)
155
156 #define KBUF_LINK(_pkbuf, _kbuf) do { \
157 ASSERT(__DECONST(void *, (_pkbuf)->buf_nbft_addr) == NULL); \
158 ASSERT(__DECONST(obj_idx_t, (_pkbuf)->buf_nbft_idx) == OBJ_IDX_NONE); \
159 ASSERT((_kbuf) != NULL); \
160 ASSERT((_kbuf)->buf_bft_idx_reg != OBJ_IDX_NONE); \
161 BUF_NBFT_ADDR(_pkbuf, _kbuf); \
162 BUF_NBFT_IDX(_pkbuf, (_kbuf)->buf_bft_idx_reg); \
163 } while (0)
164
165 #define KBUF_DTOR(_kbuf, _usecnt) do { \
166 if (__probable((_kbuf)->buf_ctl != NULL)) { \
167 (_usecnt) = skmem_bufctl_unuse( \
168 __DECONST(struct skmem_bufctl *, (_kbuf)->buf_ctl));\
169 *(struct skmem_bufctl **) \
170 (uintptr_t)&(_kbuf)->buf_ctl = NULL; \
171 } \
172 BUF_BADDR(_kbuf, 0); \
173 BUF_BIDX(_kbuf, OBJ_IDX_NONE); \
174 } while (0)
175
176 /*
177 * Copy kernel buflet (and add reference count to buffer).
178 */
179 #define _KBUF_COPY(_skb, _dkb) do { \
180 ASSERT((_skb)->buf_nbft_addr == 0); \
181 ASSERT((_skb)->buf_nbft_idx == OBJ_IDX_NONE); \
182 ASSERT(!((_dkb)->buf_flag & BUFLET_FLAG_EXTERNAL)); \
183 _CASSERT(sizeof(struct __kern_buflet) == 50); \
184 /* copy everything in the kernel buflet */ \
185 sk_copy64_40((uint64_t *)(void *)(_skb), (uint64_t *)(void *)(_dkb));\
186 ((uint64_t *)(void *)(_dkb))[5] = ((uint64_t *)(void *)(_skb))[5]; \
187 ((uint16_t *)(void *)(_dkb))[24] = ((uint16_t *)(void *)(_skb))[24]; \
188 ASSERT((_dkb)->buf_ctl == (_skb)->buf_ctl); \
189 _CASSERT(sizeof((_dkb)->buf_flag) == sizeof(uint16_t)); \
190 *__DECONST(uint16_t *, &(_dkb)->buf_flag) &= ~BUFLET_FLAG_EXTERNAL;\
191 if (__probable((_dkb)->buf_ctl != NULL)) { \
192 skmem_bufctl_use(__DECONST(struct skmem_bufctl *, \
193 (_dkb)->buf_ctl)); \
194 } \
195 } while (0)
196
197 /*
198 * Kernel variant of __user_quantum.
199 */
200 struct __kern_quantum {
201 /*
202 * Common area between user and kernel variants.
203 */
204 struct __quantum qum_com;
205
206 /*
207 * Kernel specific.
208 */
209 SLIST_ENTRY(__kern_quantum) qum_upp_link;
210 const struct kern_pbufpool *qum_pp;
211 const struct __user_quantum *qum_user;
212 const struct __kern_slot_desc *qum_ksd;
213 struct __kern_buflet qum_buf[1]; /* 1 buflet */
214 pid_t qum_pid;
215 } __attribute((aligned(sizeof(uint64_t))));
216
217 #define KQUM_CTOR(_kqum, _midx, _uqum, _pp, _qflags) do { \
218 ASSERT((uintptr_t)(_kqum) != (uintptr_t)(_uqum)); \
219 _CASSERT(sizeof(METADATA_IDX(_kqum)) == sizeof(obj_idx_t)); \
220 /* kernel variant (deconst) */ \
221 _KQUM_CTOR(_kqum, (PP_KERNEL_ONLY(_pp) ? \
222 QUM_F_KERNEL_ONLY : 0) | _qflags, 0, 0, OBJ_IDX_NONE, \
223 PP_BUF_SIZE_DEF((_pp)), _midx); \
224 _CASSERT(NEXUS_META_TYPE_MAX <= UINT16_MAX); \
225 METADATA_TYPE(_kqum) = (uint16_t)(_pp)->pp_md_type; \
226 _CASSERT(NEXUS_META_SUBTYPE_MAX <= UINT16_MAX); \
227 METADATA_SUBTYPE(_kqum) = (uint16_t)(_pp)->pp_md_subtype; \
228 *(struct kern_pbufpool **)(uintptr_t)&(_kqum)->qum_pp = (_pp); \
229 *(struct __user_quantum **)(uintptr_t)&(_kqum)->qum_user = (_uqum); \
230 *(obj_idx_t *)(uintptr_t)&METADATA_IDX(_kqum) = (_midx); \
231 (_kqum)->qum_pid = (pid_t)-1; \
232 *(struct __kern_slot_desc **)(uintptr_t)&(_kqum)->qum_ksd = NULL;\
233 /* no need to construct user variant as it is done in externalize */ \
234 } while (0)
235
236 #define KQUM_INIT(_kqum, _flags) do { \
237 ASSERT((_kqum)->qum_ksd == NULL); \
238 ASSERT((_kqum)->qum_pid == (pid_t)-1); \
239 /* kernel variant (deconst) */ \
240 _KQUM_INIT(_kqum, (PP_KERNEL_ONLY((_kqum)->qum_pp) ? \
241 QUM_F_KERNEL_ONLY : 0) | _flags, 0, METADATA_IDX(_kqum)); \
242 /* no need to initialize user variant as it is done in externalize */ \
243 } while (0)
244
245 __attribute__((always_inline))
246 inline boolean_t
_UUID_MATCH(uuid_t u1,uuid_t u2)247 _UUID_MATCH(uuid_t u1, uuid_t u2)
248 {
249 uint64_t *a = (uint64_t *)(void *) u1;
250 uint64_t *b = (uint64_t *)(void *) u2;
251 bool first_same = (a[0] == b[0]);
252 bool second_same = (a[1] == b[1]);
253
254 return first_same && second_same;
255 }
256
257 #define _UUID_COPY(_dst, _src) do { \
258 _CASSERT(sizeof (uuid_t) == 16); \
259 sk_copy64_16((uint64_t *)(void *)_src, (uint64_t *)(void *)_dst); \
260 } while (0)
261
262 #define _UUID_CLEAR(_u) do { \
263 uint64_t *__dst = (uint64_t *)(void *)(_u); \
264 _CASSERT(sizeof (uuid_t) == 16); \
265 *(__dst++) = 0; /* qw[0] */ \
266 *(__dst) = 0; /* qw[1] */ \
267 } while (0)
268
269 /*
270 * _QUM_COPY only copies the user metadata portion of the quantum;
271 * at the moment this is everything from the beginning down to __q_flags,
272 * but no more. It preserves the destination's QUM_F_SAVE_MASK bits.
273 *
274 * NOTE: this needs to be adjusted if more user-mutable field is added
275 * after __q_flags.
276 */
277 #define _QUM_COPY(_skq, _dkq) do { \
278 volatile uint16_t _sf = ((_dkq)->qum_qflags & QUM_F_SAVE_MASK); \
279 _CASSERT(sizeof (_sf) == sizeof ((_dkq)->qum_qflags)); \
280 _CASSERT(offsetof(struct __quantum, __q_flags) == 24); \
281 /* copy everything above (and excluding) __q_flags */ \
282 sk_copy64_24((uint64_t *)(void *)&(_skq)->qum_com, \
283 (uint64_t *)(void *)&(_dkq)->qum_com); \
284 /* copy __q_flags and restore saved bits */ \
285 (_dkq)->qum_qflags = ((_skq)->qum_qflags & ~QUM_F_SAVE_MASK) | _sf; \
286 } while (0)
287
288 /*
289 * _QUM_INTERNALIZE internalizes a portion of the quantum that includes
290 * user visible fields without overwriting the portion that's private to
291 * the kernel; see comments on _QUM_COPY().
292 */
293 #define _QUM_INTERNALIZE(_uq, _kq) do { \
294 _QUM_COPY(_uq, _kq); \
295 /* drop all but QUM_F_SAVE_MASK */ \
296 (_kq)->qum_qflags &= QUM_F_SAVE_MASK; \
297 } while (0)
298
299 /*
300 * _QUM_EXTERNALIZE externalizes a portion of the quantum that's user
301 * visible without including fields that's private to the kernel; at
302 * the moment this is everything from the begininng down to __q_flags,
303 * but no more. It does NOT preserve the destination's QUM_F_SAVE_MASK
304 * bits, but instead copies all bits except QUMF_KERNEL_FLAGS ones.
305 *
306 * NOTE: this needs to be adjusted if more user-mutable field is added
307 * after __q_flags. This macro is used only during externalize.
308 */
309 #define _QUM_EXTERNALIZE(_kq, _uq) do { \
310 _CASSERT(offsetof(struct __quantum, __q_flags) == 24); \
311 _CASSERT(sizeof(METADATA_IDX(_uq)) == sizeof(obj_idx_t)); \
312 /* copy __quantum excluding qum_qflags */ \
313 sk_copy64_24((uint64_t *)(void *)&(_kq)->qum_com, \
314 (uint64_t *)(void *)&(_uq)->qum_com); \
315 /* copy qum_qflags excluding saved bits */ \
316 (_uq)->qum_qflags = ((_kq)->qum_qflags & ~QUM_F_KERNEL_FLAGS); \
317 /* re-initialize user metadata */ \
318 *(obj_idx_t *)(uintptr_t)&METADATA_IDX(_uq) = METADATA_IDX(_kq); \
319 METADATA_TYPE(_uq) = METADATA_TYPE(_kq); \
320 METADATA_SUBTYPE(_uq) = METADATA_SUBTYPE(_kq); \
321 (_uq)->qum_usecnt = 0; \
322 } while (0)
323
324 /*
325 * Transmit completion.
326 */
327 struct __packet_compl {
328 /*
329 * Tx completion data
330 * _arg & _data: context data which are passed as arguments
331 * to the registered Tx completion callback.
332 * _tx_status: Tx status set by the driver.
333 */
334 union {
335 uint64_t compl_data64[3];
336 struct {
337 uintptr_t _cb_arg;
338 uintptr_t _cb_data;
339 uint32_t _tx_status;
340 uint32_t _pad;
341 } compl_data;
342 };
343 /* bitmap indicating the requested packet completion callbacks */
344 uint32_t compl_callbacks;
345 /* Context identifier for a given packet completion */
346 uint32_t compl_context;
347 };
348
349 /*
350 * Kernel variant of __user_packet.
351 */
352 struct __kern_packet {
353 struct __kern_quantum pkt_qum;
354 #define pkt_user pkt_qum.qum_user
355
356 /*
357 * Common area between user and kernel variants.
358 */
359 struct __packet pkt_com;
360
361 /*
362 * Option common area (PKT_F_OPT_DATA),
363 * non-NULL if PKT_F_OPT_ALLOC is set.
364 */
365 struct __packet_opt *pkt_com_opt;
366
367 /* TX: enqueue time, RX: receive timestamp */
368 uint64_t pkt_timestamp;
369
370 /* next chain in queue; used while enqueuing to classq or reass */
371 struct __kern_packet *pkt_nextpkt;
372
373 /*
374 * Attached mbuf or pkt.
375 * Used by compat netif driver (PKT_F_MBUF_DATA) or interface
376 * filters (PKT_F_PKT_DATA).
377 */
378 union {
379 struct mbuf *pkt_mbuf;
380 struct __kern_packet *pkt_pkt;
381 };
382 /*
383 * Flow classifier data (PKT_F_FLOW_DATA),
384 * non-NULL if PKT_F_FLOW_ALLOC is set.
385 */
386 struct __flow *pkt_flow; /* classifier info */
387 #define pkt_flow_ipv4_addrs pkt_flow->flow_ipv4_addrs
388 #define pkt_flow_ipv4_src pkt_flow->flow_ipv4_src
389 #define pkt_flow_ipv4_dst pkt_flow->flow_ipv4_dst
390 #define pkt_flow_ipv6_addrs pkt_flow->flow_ipv6_addrs
391 #define pkt_flow_ipv6_src pkt_flow->flow_ipv6_src
392 #define pkt_flow_ipv6_dst pkt_flow->flow_ipv6_dst
393 #define pkt_flow_ip_ver pkt_flow->flow_ip_ver
394 #define pkt_flow_ip_proto pkt_flow->flow_ip_proto
395 #define pkt_flow_ip_hdr pkt_flow->flow_ip_hdr
396 #define pkt_flow_tcp pkt_flow->flow_tcp
397 #define pkt_flow_tcp_src pkt_flow->flow_tcp_src
398 #define pkt_flow_tcp_dst pkt_flow->flow_tcp_dst
399 #define pkt_flow_tcp_seq pkt_flow->flow_tcp_seq
400 #define pkt_flow_tcp_ack pkt_flow->flow_tcp_ack
401 #define pkt_flow_tcp_off pkt_flow->flow_tcp_off
402 #define pkt_flow_tcp_flags pkt_flow->flow_tcp_flags
403 #define pkt_flow_tcp_win pkt_flow->flow_tcp_win
404 #define pkt_flow_tcp_hlen pkt_flow->flow_tcp_hlen
405 #define pkt_flow_tcp_hdr pkt_flow->flow_tcp_hdr
406 #define pkt_flow_tcp_agg_fast pkt_flow->flow_tcp_agg_fast
407 #define pkt_flow_udp pkt_flow->flow_udp
408 #define pkt_flow_udp_src pkt_flow->flow_udp_src
409 #define pkt_flow_udp_dst pkt_flow->flow_udp_dst
410 #define pkt_flow_udp_hlen pkt_flow->flow_udp_hlen
411 #define pkt_flow_udp_hdr pkt_flow->flow_udp_hdr
412 #define pkt_flow_esp_spi pkt_flow->flow_esp_spi
413 #define pkt_transport_protocol pkt_flow->flow_ulp_encap
414 #define pkt_flow_ip_hlen pkt_flow->flow_ip_hlen
415 #define pkt_flow_ulen pkt_flow->flow_ulen
416 #define pkt_flow_ip_frag_id pkt_flow->flow_ip_frag_id
417 #define pkt_flow_ip_is_frag pkt_flow->flow_ip_is_frag
418 #define pkt_flow_ip_is_first_frag pkt_flow->flow_ip_is_first_frag
419 #define pkt_flowsrc_token pkt_flow->flow_src_token
420 #define pkt_flowsrc_id pkt_flow->flow_src_id
421 #define pkt_flowsrc_fidx pkt_flow->flow_src_fidx
422 #define pkt_flowsrc_type pkt_flow->flow_src_type
423 #define pkt_classq_hash pkt_flow->flow_classq_hash
424 #define pkt_classq_flags pkt_flow->flow_classq_flags
425 #define pkt_policy_id pkt_flow->flow_policy_id
426 #define pkt_skip_policy_id pkt_flow->flow_skip_policy_id
427 #define pkt_policy_euuid pkt_flow->flow_policy_euuid
428
429 /*
430 * Transmit completion data (PKT_TX_COMPL_DATA),
431 * non-NULL if PKT_F_TX_COMPL_ALLOC is set.
432 */
433 struct __packet_compl *pkt_tx_compl; /* TX completion info */
434 #define pkt_tx_compl_data pkt_tx_compl->compl_data
435 #define pkt_tx_compl_data64 pkt_tx_compl->compl_data64
436 #define pkt_tx_compl_cb_arg pkt_tx_compl->compl_data._cb_arg
437 #define pkt_tx_compl_cb_data pkt_tx_compl->compl_data._cb_data
438 #define pkt_tx_compl_status pkt_tx_compl->compl_data._tx_status
439 #define pkt_tx_compl_callbacks pkt_tx_compl->compl_callbacks
440 #define pkt_tx_compl_context pkt_tx_compl->compl_context
441
442 void * pkt_priv; /* free to use for every layer */
443
444
445 /*
446 * Kernel specific.
447 *
448 * pkt_{bufs,max} aren't part of the common area, on purpose,
449 * since we selectively update them on internalize/externalize.
450 */
451 const uint16_t pkt_bufs_max; /* maximum size of buflet chain */
452 const uint16_t pkt_bufs_cnt; /* buflet chain size */
453 uint32_t pkt_chain_count; /* number of packets in chain */
454 uint32_t pkt_chain_bytes; /* number of bytes in chain */
455
456 nexus_port_t pkt_nx_port; /* user channel port */
457 /*
458 * gencnt of pkt_nx_port's corresponding vpna. So that we can tell
459 * whether the port in pkt_nx_port has been defuncted or reused.
460 */
461 uint16_t pkt_vpna_gencnt;
462
463 /* Cellular Host Driver generated trace_tag */
464 packet_trace_tag_t pkt_trace_tag;
465 /* index of the qset that the pkt comes from */
466 uint8_t pkt_qset_idx;
467 uint8_t _pad[1];
468 } __attribute((aligned(sizeof(uint64_t))));
469
470
471 /* the size of __user_packet structure for n total buflets */
472 #define _KERN_PACKET_SIZE(n) sizeof(struct __kern_packet)
473
474 #define _PKT_COM_INIT(_p, _pflags) do { \
475 /* save packet flags since it might be wiped out */ \
476 volatile uint64_t __pflags = (_pflags); \
477 /* first wipe it clean */ \
478 _CASSERT(sizeof(struct __packet_com) == 32); \
479 _CASSERT(sizeof(struct __packet) == 32); \
480 sk_zero_32(&(_p)->pkt_com.__pkt_data[0]); \
481 /* then initialize */ \
482 (_p)->pkt_pflags = (__pflags); \
483 (_p)->pkt_svc_class = KPKT_SC_UNSPEC; \
484 } while (0)
485
486 #define _PKT_CTOR(_p, _pflags, _bufcnt, _maxfrags) do { \
487 _PKT_COM_INIT(_p, _pflags); \
488 _CASSERT(sizeof ((_p)->pkt_bufs_max) == sizeof (uint16_t)); \
489 _CASSERT(sizeof ((_p)->pkt_bufs_cnt) == sizeof (uint16_t)); \
490 /* deconst */ \
491 *(uint16_t *)(uintptr_t)&(_p)->pkt_bufs_max = (_maxfrags); \
492 *(uint16_t *)(uintptr_t)&(_p)->pkt_bufs_cnt = (_bufcnt); \
493 } while (0)
494
495 #define KPKT_CLEAR_MBUF_PKT_DATA(_pk) do { \
496 _CASSERT(offsetof(struct __kern_packet, pkt_mbuf) == \
497 offsetof(struct __kern_packet, pkt_pkt)); \
498 (_pk)->pkt_pflags &= ~(PKT_F_MBUF_MASK|PKT_F_PKT_MASK); \
499 /* the following also clears pkt_pkt */ \
500 (_pk)->pkt_mbuf = NULL; \
501 } while (0)
502
503 #define KPKT_CLEAR_MBUF_DATA(_pk) do { \
504 (_pk)->pkt_pflags &= ~PKT_F_MBUF_MASK; \
505 (_pk)->pkt_mbuf = NULL; \
506 } while (0)
507
508 #define KPKT_CLEAR_PKT_DATA(_pk) do { \
509 (_pk)->pkt_pflags &= ~PKT_F_PKT_MASK; \
510 (_pk)->pkt_pkt = NULL; \
511 } while (0)
512
513 #define KPKT_CLEAR_FLOW_INIT(_fl) do { \
514 _CASSERT(sizeof ((_fl)->flow_init_data) == 128); \
515 sk_zero_128(&(_fl)->flow_init_data[0]); \
516 } while (0)
517
518 #define KPKT_CLEAR_FLOW_ALL(_fl) do { \
519 bzero(_fl, sizeof(struct __flow)); \
520 } while (0)
521
522 #define _KPKT_CTOR_PRIV_VARS(_p, _opt, _flow, _txcomp) do { \
523 (_p)->pkt_com_opt = (_opt); \
524 (_p)->pkt_flow = (_flow); \
525 (_p)->pkt_tx_compl = (_txcomp); \
526 } while (0)
527
528 #define _KPKT_INIT_FPD_VARS(_p)
529
530 #define _KPKT_INIT_PRIV_VARS(_p) do { \
531 struct __flow *__fl = (_p)->pkt_flow; \
532 (_p)->pkt_timestamp = 0; \
533 (_p)->pkt_nextpkt = NULL; \
534 (_p)->pkt_priv = NULL; \
535 _KPKT_INIT_FPD_VARS(_p); \
536 KPKT_CLEAR_MBUF_PKT_DATA(_p); \
537 if (__probable(__fl != NULL)) { \
538 KPKT_CLEAR_FLOW_INIT(__fl); \
539 } \
540 (_p)->pkt_chain_count = (_p)->pkt_chain_bytes = 0; \
541 (_p)->pkt_nx_port = NEXUS_PORT_ANY; \
542 (_p)->pkt_vpna_gencnt = 0; \
543 (_p)->pkt_trace_tag = 0; \
544 (_p)->pkt_qset_idx = 0; \
545 } while (0)
546
547 #define KPKT_CTOR(_pk, _pflags, _opt, _flow, _txcomp, _midx, _pu, _pp, \
548 _bufcnt, _maxfrags, _qflags) do { \
549 ASSERT((uintptr_t)(_pk) != (uintptr_t)(_pu)); \
550 /* ASSERT((_pu) != NULL || PP_KERNEL_ONLY(_pp)); */ \
551 /* kernel (and user) quantum */ \
552 KQUM_CTOR(&(_pk)->pkt_qum, _midx, \
553 (((_pu) == NULL) ? NULL : &(_pu)->pkt_qum), _pp, _qflags); \
554 /* kernel packet variant */ \
555 _PKT_CTOR(_pk, _pflags, _bufcnt, _maxfrags); \
556 _KPKT_CTOR_PRIV_VARS(_pk, _opt, _flow, _txcomp); \
557 /* no need to construct user variant as it is done in externalize */ \
558 } while (0)
559
560 #define KPKT_INIT(_pk, _flags) do { \
561 KQUM_INIT(&(_pk)->pkt_qum, _flags); \
562 _PKT_COM_INIT(_pk, (_pk)->pkt_pflags); \
563 _KPKT_INIT_PRIV_VARS(_pk); \
564 /* no need to initialize user variant as it is done in externalize */ \
565 } while (0)
566
567 #define _KPKT_INIT_TX_COMPL_DATA(_p) do { \
568 if (((_p)->pkt_pflags & PKT_F_TX_COMPL_DATA) == 0) { \
569 ASSERT((_p)->pkt_pflags & PKT_F_TX_COMPL_ALLOC); \
570 (_p)->pkt_pflags |= PKT_F_TX_COMPL_DATA; \
571 _CASSERT(sizeof((_p)->pkt_tx_compl_data64) == 24); \
572 /* 32-bit compl_data should be in the union */ \
573 _CASSERT(sizeof((_p)->pkt_tx_compl_data) <= 24); \
574 (_p)->pkt_tx_compl_data64[0] = 0; \
575 (_p)->pkt_tx_compl_data64[1] = 0; \
576 (_p)->pkt_tx_compl_data64[2] = 0; \
577 } \
578 } while (0)
579
580 /*
581 * Copy optional meta data.
582 * Both source and destination must be a kernel packet.
583 */
584 #define _PKT_COPY_OPT_DATA(_skp, _dkp) do { \
585 if (__improbable(((_skp)->pkt_pflags & PKT_F_OPT_DATA) != 0)) { \
586 _CASSERT(sizeof(struct __packet_opt) == 40); \
587 ASSERT((_skp)->pkt_pflags & PKT_F_OPT_ALLOC); \
588 sk_copy64_40((uint64_t *)(struct __packet_opt *__header_bidi_indexable)(_skp)->pkt_com_opt, \
589 (uint64_t *)(struct __packet_opt *__header_bidi_indexable)(_dkp)->pkt_com_opt); \
590 } \
591 } while (0)
592
593 /*
594 * _PKT_COPY only copies the user metadata portion of the packet;
595 * at the moment this is everything from the beginning down to __p_flags,
596 * but no more. It additionally copies only QUM_F_COPY_MASK bits from
597 * the source __p_flags to the destination's.
598 *
599 * NOTE: this needs to be adjusted if more user-mutable field is added
600 * after __p_flags.
601 */
602 #define _PKT_COPY(_skp, _dkp) do { \
603 _CASSERT(sizeof(struct __packet) == 32); \
604 _CASSERT(sizeof(struct __packet_com) == 32); \
605 _CASSERT(offsetof(struct __packet, __p_flags) == 24); \
606 /* copy __packet excluding pkt_pflags */ \
607 sk_copy64_24((uint64_t *)(struct __packet *__header_bidi_indexable)&(_skp)->pkt_com, \
608 (uint64_t *)(struct __packet *__header_bidi_indexable)&(_dkp)->pkt_com); \
609 /* copy relevant pkt_pflags bits */ \
610 (_dkp)->pkt_pflags = ((_skp)->pkt_pflags & PKT_F_COPY_MASK); \
611 /* copy __packet_opt if applicable */ \
612 _PKT_COPY_OPT_DATA((_skp), (_dkp)); \
613 } while (0)
614
615
616 /*
617 * Copy Transmit completion data.
618 */
619 #define _PKT_COPY_TX_PORT_DATA(_skp, _dkp) do { \
620 (_dkp)->pkt_nx_port = (_skp)->pkt_nx_port; \
621 (_dkp)->pkt_vpna_gencnt = (_skp)->pkt_vpna_gencnt; \
622 (_dkp)->pkt_pflags |= ((_skp)->pkt_pflags & PKT_F_TX_PORT_DATA);\
623 } while (0)
624
625 /*
626 * _PKT_INTERNALIZE internalizes a portion of the packet that includes
627 * user visible fields without overwriting the portion that's private to
628 * the kernel.
629 *
630 * NOTE: this needs to be adjusted if more user-mutable data is added
631 * after __p_flags. This macro is used only during internalize.
632 */
633 #define _PKT_INTERNALIZE(_up, _kp) do { \
634 volatile uint64_t _kf = ((_kp)->pkt_pflags & ~PKT_F_USER_MASK); \
635 _CASSERT(sizeof(struct __packet) == 32); \
636 _CASSERT(sizeof(struct __packet_com) == 32); \
637 _CASSERT(offsetof(struct __packet, __p_flags) == 24); \
638 /* copy __packet excluding pkt_pflags */ \
639 sk_copy64_24((uint64_t *)(void *)&(_up)->pkt_com, \
640 (uint64_t *)(void *)&(_kp)->pkt_com); \
641 /* copy pkt_pflags and restore kernel bits */ \
642 (_kp)->pkt_pflags = ((_up)->pkt_pflags & PKT_F_USER_MASK) | _kf;\
643 /* copy (internalize) __packet_opt if applicable */ \
644 if (__improbable(((_kp)->pkt_pflags & PKT_F_OPT_DATA) != 0)) { \
645 _CASSERT(sizeof(struct __packet_opt) == 40); \
646 ASSERT((_kp)->pkt_pflags & PKT_F_OPT_ALLOC); \
647 sk_copy64_40((uint64_t *)(void *)&(_up)->pkt_com_opt, \
648 (uint64_t *)(struct __packet_opt *__header_bidi_indexable)(_kp)->pkt_com_opt); \
649 } \
650 } while (0)
651
652 /*
653 * _PKT_EXTERNALIZE externalizes a portion of the packet that's user
654 * visible without including fields that's private to the kernel; at the
655 * moment this is everything from the beginning down to __p_flags,
656 * but no more.
657 *
658 * NOTE: this needs to be adjusted if more user-mutable data is added
659 * after __p_flags. This macro is used only during externalize.
660 */
661 #define _PKT_EXTERNALIZE(_kp, _up) do { \
662 _CASSERT(sizeof(struct __packet) == 32); \
663 _CASSERT(sizeof(struct __packet_com) == 32); \
664 _CASSERT(offsetof(struct __packet, __p_flags) == 24); \
665 /* copy __packet excluding pkt_pflags */ \
666 sk_copy64_24((uint64_t *)(void *)&(_kp)->pkt_com, \
667 (uint64_t *)(void *)&(_up)->pkt_com); \
668 /* copy pkt_pflags excluding kernel bits */ \
669 (_up)->pkt_pflags = ((_kp)->pkt_pflags & PKT_F_USER_MASK); \
670 /* copy (externalize) __packet_opt if applicable */ \
671 if (__improbable(((_kp)->pkt_pflags & PKT_F_OPT_DATA) != 0)) { \
672 _CASSERT(sizeof(struct __packet_opt) == 40); \
673 ASSERT((_kp)->pkt_pflags & PKT_F_OPT_ALLOC); \
674 sk_copy64_40((uint64_t *)(struct __packet_opt *__header_bidi_indexable)(_kp)->pkt_com_opt, \
675 (uint64_t *)(void *)&(_up)->pkt_com_opt); \
676 } \
677 } while (0)
678
679 #define SK_PTR_ADDR_KQUM(_ph) __unsafe_forge_single(struct __kern_quantum *, \
680 (SK_PTR_ADDR(_ph)))
681 #define SK_PTR_ADDR_KPKT(_ph) __unsafe_forge_single(struct __kern_packet *, \
682 (SK_PTR_ADDR(_ph)))
683 #define SK_PTR_KPKT(_pa) ((struct __kern_packet *)(void *)(_pa))
684 #define SK_PKT2PH(_pkt) \
685 (SK_PTR_ENCODE((_pkt), METADATA_TYPE((_pkt)), METADATA_SUBTYPE((_pkt))))
686
687 /*
688 * Set the length of the data to various places: __user_slot_desc,
689 * __kern_quantum, and for a packet, the buflet.
690 * !!! This should be used only for dropping the packet as the macro
691 * is not functionally correct.
692 *
693 * TODO: [email protected] -- maybe finalize here as well?
694 */
695 #define METADATA_SET_LEN(_md, _len, _doff) do { \
696 struct __kern_quantum *_q = \
697 (struct __kern_quantum *)(void *)(_md); \
698 _q->qum_len = (_len); \
699 switch (METADATA_TYPE(_q)) { \
700 case NEXUS_META_TYPE_PACKET: { \
701 struct __kern_packet *_p = \
702 (struct __kern_packet *)(void *)(_md); \
703 struct __kern_buflet *_kbft; \
704 PKT_GET_FIRST_BUFLET(_p, _p->pkt_bufs_cnt, _kbft); \
705 _kbft->buf_dlen = (_len); \
706 _kbft->buf_doff = (_doff); \
707 break; \
708 } \
709 default: \
710 ASSERT(METADATA_TYPE(_q) == NEXUS_META_TYPE_QUANTUM); \
711 _q->qum_buf[0].buf_dlen = (_len); \
712 _q->qum_buf[0].buf_doff = (_doff); \
713 break; \
714 } \
715 } while (0)
716
717 #define METADATA_ADJUST_LEN(_md, _len, _doff) do { \
718 struct __kern_quantum *_q = \
719 (struct __kern_quantum *)(void *)(_md); \
720 switch (METADATA_TYPE(_q)) { \
721 case NEXUS_META_TYPE_PACKET: { \
722 struct __kern_packet *_p = \
723 (struct __kern_packet *)(void *)(_md); \
724 struct __kern_buflet *_kbft; \
725 PKT_GET_FIRST_BUFLET(_p, _p->pkt_bufs_cnt, _kbft); \
726 _kbft->buf_dlen += (_len); \
727 _kbft->buf_doff = (_doff); \
728 break; \
729 } \
730 default: \
731 ASSERT(METADATA_TYPE(_q) == NEXUS_META_TYPE_QUANTUM); \
732 _q->qum_buf[0].buf_dlen += (_len); \
733 _q->qum_buf[0].buf_doff = (_doff); \
734 break; \
735 } \
736 } while (0)
737
738 __attribute__((always_inline))
739 static inline kern_packet_t
SD_GET_TAGGED_METADATA(const struct __kern_slot_desc * ksd)740 SD_GET_TAGGED_METADATA(const struct __kern_slot_desc *ksd)
741 {
742 return __improbable(ksd->sd_md == NULL) ? 0 :
743 SK_PTR_ENCODE(ksd->sd_md, METADATA_TYPE(ksd->sd_qum),
744 METADATA_SUBTYPE(ksd->sd_qum));
745 }
746
747 __attribute__((always_inline))
748 static inline errno_t
KR_SLOT_ATTACH_METADATA(const kern_channel_ring_t kring,struct __kern_slot_desc * ksd,struct __kern_quantum * kqum)749 KR_SLOT_ATTACH_METADATA(const kern_channel_ring_t kring,
750 struct __kern_slot_desc *ksd, struct __kern_quantum *kqum)
751 {
752 obj_idx_t idx = KR_SLOT_INDEX(kring,
753 (struct __slot_desc *)(void *)ksd);
754
755 /* Ensure this is only done by the thread doing a sync syscall */
756 ASSERT(sk_is_sync_protected());
757 ASSERT(kqum->qum_pp == kring->ckr_pp);
758 ASSERT(kqum->qum_ksd == NULL);
759 /*
760 * Packets being attached to a slot should always be internalized.
761 * Internalized packet should be in finalized or dropped state.
762 */
763 ASSERT(kqum->qum_qflags & QUM_F_INTERNALIZED);
764 ASSERT(((kqum->qum_qflags & QUM_F_FINALIZED) != 0) ^
765 ((kqum->qum_qflags & QUM_F_DROPPED) != 0));
766
767 kqum->qum_ksd = ksd;
768
769 KSD_ATTACH_METADATA(ksd, kqum);
770 if (!KR_KERNEL_ONLY(kring)) {
771 USD_ATTACH_METADATA(KR_USD(kring, idx), METADATA_IDX(kqum));
772 }
773
774 return 0;
775 }
776
777 __attribute__((always_inline))
778 static inline struct __kern_quantum *
KR_SLOT_DETACH_METADATA(const kern_channel_ring_t kring,struct __kern_slot_desc * ksd)779 KR_SLOT_DETACH_METADATA(const kern_channel_ring_t kring,
780 struct __kern_slot_desc *ksd)
781 {
782 struct __kern_quantum *kqum = ksd->sd_qum;
783 obj_idx_t idx = KR_SLOT_INDEX(kring,
784 (struct __slot_desc *)(void *)ksd);
785
786 /* Ensure this is only done by the thread doing a sync syscall */
787 ASSERT(sk_is_sync_protected());
788 ASSERT(KSD_VALID_METADATA(ksd));
789 ASSERT(kqum->qum_ksd == ksd);
790 ASSERT(kqum->qum_pp == kring->ckr_pp);
791 /*
792 * Packets being attached to a slot would always be internalized.
793 * We also detach externalized packets on an rx ring on behalf
794 * of the user space if the channel is not in user packet pool mode.
795 * Externalized packet should be in finalized or dropped state.
796 */
797 ASSERT((kqum->qum_qflags & (QUM_F_INTERNALIZED)) ||
798 ((((kqum->qum_qflags & QUM_F_FINALIZED) != 0) ^
799 ((kqum->qum_qflags & QUM_F_DROPPED) != 0))));
800
801 /* detaching requires the packet to be finalized later */
802 kqum->qum_qflags &= ~QUM_F_FINALIZED;
803 kqum->qum_ksd = NULL;
804
805 KSD_DETACH_METADATA(ksd);
806 if (!KR_KERNEL_ONLY(kring)) {
807 USD_DETACH_METADATA(KR_USD(kring, idx));
808 }
809
810 return kqum;
811 }
812
813 __attribute__((always_inline))
814 static inline errno_t
KR_SLOT_ATTACH_BUF_METADATA(const kern_channel_ring_t kring,struct __kern_slot_desc * ksd,struct __kern_buflet * kbuf)815 KR_SLOT_ATTACH_BUF_METADATA(const kern_channel_ring_t kring,
816 struct __kern_slot_desc *ksd, struct __kern_buflet *kbuf)
817 {
818 obj_idx_t idx = KR_SLOT_INDEX(kring,
819 (struct __slot_desc *)(void *)ksd);
820
821 /* Ensure this is only done by the thread doing a sync syscall */
822 ASSERT(sk_is_sync_protected());
823
824 KSD_ATTACH_METADATA(ksd, kbuf);
825 /*
826 * buflet is attached only to the user packet pool alloc ring.
827 */
828 ASSERT(!KR_KERNEL_ONLY(kring));
829 ASSERT(kring->ckr_tx == CR_KIND_ALLOC);
830 USD_ATTACH_METADATA(KR_USD(kring, idx), kbuf->buf_bft_idx_reg);
831 return 0;
832 }
833
834 #if (DEVELOPMENT || DEBUG)
835 SYSCTL_DECL(_kern_skywalk_packet);
836 extern int pkt_trailers;
837 #endif /* !DEVELOPMENT && !DEBUG */
838
839 typedef void (pkt_copy_from_pkt_t)(const enum txrx, kern_packet_t,
840 const uint16_t, kern_packet_t, const uint16_t, const uint32_t,
841 const boolean_t, const uint16_t, const uint16_t, const boolean_t);
842
843 typedef void (pkt_copy_from_mbuf_t)(const enum txrx, kern_packet_t,
844 const uint16_t, struct mbuf *, const uint16_t, const uint32_t,
845 const boolean_t, const uint16_t);
846
847 typedef void (pkt_copy_to_mbuf_t)(const enum txrx, kern_packet_t,
848 const uint16_t, struct mbuf *, const uint16_t, const uint32_t,
849 const boolean_t, const uint16_t);
850
851 __BEGIN_DECLS
852 extern void pkt_subtype_assert_fail(const kern_packet_t, uint64_t, uint64_t);
853 extern void pkt_type_assert_fail(const kern_packet_t, uint64_t);
854
855 extern pkt_copy_from_pkt_t pkt_copy_from_pkt;
856 extern pkt_copy_from_pkt_t pkt_copy_multi_buflet_from_pkt;
857 extern pkt_copy_from_mbuf_t pkt_copy_from_mbuf;
858 extern pkt_copy_from_mbuf_t pkt_copy_multi_buflet_from_mbuf;
859 extern pkt_copy_to_mbuf_t pkt_copy_to_mbuf;
860 extern pkt_copy_to_mbuf_t pkt_copy_multi_buflet_to_mbuf;
861
862 extern void pkt_copypkt_sum(kern_packet_t, uint16_t, kern_packet_t,
863 uint16_t, uint16_t, uint32_t *, boolean_t);
864 extern uint32_t
865 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *__sized_by(len) dbaddr,
866 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start);
867 extern uint32_t pkt_sum(kern_packet_t, uint16_t, uint16_t);
868 extern uint32_t pkt_mcopypkt_sum(mbuf_t, int, kern_packet_t, uint16_t,
869 uint16_t, boolean_t);
870 extern uint32_t
871 m_copydata_sum(struct mbuf *m, int off, int len, void *__sized_by(len) vp, uint32_t initial_sum,
872 boolean_t *odd_start);
873 extern void pkt_copy(void *__sized_by(len) src, void *__sized_by(len) dst,
874 size_t len);
875
876 #if (DEVELOPMENT || DEBUG)
877 extern uint32_t pkt_add_trailers(kern_packet_t, const uint32_t, const uint16_t);
878 extern uint32_t pkt_add_trailers_mbuf(struct mbuf *, const uint16_t);
879 #endif /* !DEVELOPMENT && !DEBUG */
880 __END_DECLS
881 #endif /* BSD_KERNEL_PRIVATE */
882 #endif /* !_SKYWALK_PACKET_PACKETVAR_H_ */
883