1 /*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #ifndef _SKYWALK_PACKET_PACKETVAR_H_
30 #define _SKYWALK_PACKET_PACKETVAR_H_
31
32 #ifdef BSD_KERNEL_PRIVATE
33 #include <skywalk/core/skywalk_var.h>
34 #include <skywalk/os_packet_private.h>
35
36 /*
37 * Kernel variant of __user_buflet.
38 *
39 * The main difference here is the support for shared buffers, where
40 * multiple buflets may point to the same buffer object at different
41 * data span within it, each holding a reference to the buffer object,
42 * i.e. the "use" count. The buf_addr therefore points to the beginning
43 * of the data span; the buf_len describes the length of the span; and
44 * the buf_doff describes the offset relative to the beginning of the
45 * span as noted by buf_addr. The buffer object is stored in buf_objaddr.
46 */
47 struct __kern_buflet {
48 /*
49 * Common area between user and kernel variants.
50 */
51 struct __buflet buf_com;
52 /*
53 * Kernel specific.
54 */
55 /* buffer control of the buffer object */
56 const struct skmem_bufctl *buf_ctl;
57
58 #define buf_objaddr buf_ctl->bc_addr
59 #define buf_objlim buf_ctl->bc_lim
60 } __attribute((packed));
61
62 struct __kern_buflet_ext {
63 /*
64 * This is an overlay structure on __kern_buflet.
65 */
66 struct __kern_buflet kbe_overlay;
67 /*
68 * extended variant specific.
69 */
70 /* mirrored user buflet */
71 struct __user_buflet const *kbe_buf_user;
72
73 /* buflet user packet pool hash bucket linkage */
74 SLIST_ENTRY(__kern_buflet_ext) kbe_buf_upp_link;
75
76 /* pid of the process using the buflet */
77 pid_t kbe_buf_pid;
78 } __attribute((packed));
79
80 #define KBUF_CTOR(_kbuf, _baddr, _bidxreg, _bc, _pp, _large) do { \
81 static_assert(sizeof ((_kbuf)->buf_addr) == sizeof (mach_vm_address_t));\
82 /* kernel variant (deconst) */ \
83 BUF_CTOR(_kbuf, _baddr, _bidxreg, (_large) ? PP_BUF_SIZE_LARGE(_pp) :\
84 PP_BUF_SIZE_DEF(_pp), 0, 0, (_kbuf)->buf_nbft_addr, \
85 (_kbuf)->buf_nbft_idx, (_kbuf)->buf_flag); \
86 *(struct skmem_bufctl **)(uintptr_t)&(_kbuf)->buf_ctl = (_bc); \
87 /* this may be called to initialize unused buflets */ \
88 if (__probable((_bc) != NULL)) { \
89 skmem_bufctl_use(_bc); \
90 } \
91 /* no need to construct user variant as it is done in externalize */ \
92 } while (0)
93
94 #define KBUF_EXT_CTOR(_kbuf, _ubuf, _baddr, _bidxreg, _bc, \
95 _bft_idx_reg, _pp, _large) do { \
96 ASSERT(_bft_idx_reg != OBJ_IDX_NONE); \
97 static_assert(sizeof((_kbuf)->buf_flag) == sizeof(uint16_t)); \
98 /* we don't set buf_nbft_addr here as during construction it */ \
99 /* is used by skmem batch alloc logic */ \
100 *__DECONST(uint16_t *, &(_kbuf)->buf_flag) = BUFLET_FLAG_EXTERNAL;\
101 if (_large) { \
102 *__DECONST(uint16_t *, &(_kbuf)->buf_flag) |= \
103 BUFLET_FLAG_LARGE_BUF; \
104 } \
105 BUF_NBFT_IDX(_kbuf, OBJ_IDX_NONE); \
106 BUF_BFT_IDX_REG(_kbuf, _bft_idx_reg); \
107 *__DECONST(struct __user_buflet **, \
108 &((struct __kern_buflet_ext *)(_kbuf))->kbe_buf_user) = (_ubuf);\
109 KBUF_CTOR(_kbuf, _baddr, _bidxreg, _bc, _pp, _large); \
110 } while (0)
111
112 #define KBUF_INIT(_kbuf) do { \
113 ASSERT((_kbuf)->buf_ctl != NULL); \
114 ASSERT((_kbuf)->buf_addr != 0); \
115 ASSERT((_kbuf)->buf_dlim != 0); \
116 /* kernel variant (deconst) */ \
117 BUF_INIT(_kbuf, 0, 0); \
118 } while (0)
119
120 #define KBUF_EXT_INIT(_kbuf, _pp) do { \
121 ASSERT((_kbuf)->buf_ctl != NULL); \
122 ASSERT((_kbuf)->buf_flag & BUFLET_FLAG_EXTERNAL); \
123 ASSERT((_kbuf)->buf_bft_idx_reg != OBJ_IDX_NONE); \
124 BUF_BADDR(_kbuf, (_kbuf)->buf_ctl->bc_addr); \
125 BUF_NBFT_ADDR(_kbuf, 0); \
126 BUF_NBFT_IDX(_kbuf, OBJ_IDX_NONE); \
127 *__DECONST(uint32_t *, &(_kbuf)->buf_dlim) = \
128 BUFLET_HAS_LARGE_BUF(_kbuf) ? PP_BUF_SIZE_LARGE((_pp)) : \
129 PP_BUF_SIZE_DEF((_pp)); \
130 (_kbuf)->buf_dlen = 0; \
131 (_kbuf)->buf_doff = 0; \
132 ((struct __kern_buflet_ext *__unsafe_indexable)(_kbuf))->kbe_buf_pid = (pid_t)-1; \
133 ((struct __kern_buflet_ext *__unsafe_indexable)(_kbuf))->kbe_buf_upp_link.sle_next = NULL;\
134 } while (0)
135
136 /* initialize struct __user_buflet from struct __kern_buflet */
137 #define UBUF_INIT(_kbuf, _ubuf) do { \
138 BUF_CTOR(_ubuf, 0, (_kbuf)->buf_idx, (_kbuf)->buf_dlim, \
139 (_kbuf)->buf_dlen, (_kbuf)->buf_doff, (_kbuf)->buf_nbft_addr,\
140 (_kbuf)->buf_nbft_idx, (_kbuf)->buf_flag); \
141 BUF_BFT_IDX_REG(_ubuf, (_kbuf)->buf_bft_idx_reg); \
142 } while (0)
143
144 #define KBUF_EXTERNALIZE(_kbuf, _ubuf, _pp) do { \
145 ASSERT((_kbuf)->buf_dlim == BUFLET_HAS_LARGE_BUF(_kbuf) ? \
146 PP_BUF_SIZE_LARGE((_pp)) : PP_BUF_SIZE_DEF((_pp))); \
147 ASSERT((_kbuf)->buf_addr != 0); \
148 /* For now, user-facing pool does not support shared */ \
149 /* buffer, since otherwise the ubuf and kbuf buffer */ \
150 /* indices would not match. Assert this is the case.*/ \
151 ASSERT((_kbuf)->buf_addr == (mach_vm_address_t)(_kbuf)->buf_objaddr);\
152 /* Initialize user buflet metadata from kernel buflet */ \
153 UBUF_INIT(_kbuf, _ubuf); \
154 } while (0)
155
156 #define KBUF_LINK(_pkbuf, _kbuf) do { \
157 ASSERT(__DECONST(void *, (_pkbuf)->buf_nbft_addr) == NULL); \
158 ASSERT(__DECONST(obj_idx_t, (_pkbuf)->buf_nbft_idx) == OBJ_IDX_NONE); \
159 ASSERT((_kbuf) != NULL); \
160 ASSERT((_kbuf)->buf_bft_idx_reg != OBJ_IDX_NONE); \
161 BUF_NBFT_ADDR(_pkbuf, _kbuf); \
162 BUF_NBFT_IDX(_pkbuf, (_kbuf)->buf_bft_idx_reg); \
163 } while (0)
164
165 #define KBUF_DTOR(_kbuf, _usecnt) do { \
166 if (__probable((_kbuf)->buf_ctl != NULL)) { \
167 (_usecnt) = skmem_bufctl_unuse( \
168 __DECONST(struct skmem_bufctl *, (_kbuf)->buf_ctl));\
169 *(struct skmem_bufctl **) \
170 (uintptr_t)&(_kbuf)->buf_ctl = NULL; \
171 } \
172 BUF_BADDR(_kbuf, 0); \
173 BUF_BIDX(_kbuf, OBJ_IDX_NONE); \
174 } while (0)
175
176 /*
177 * Copy kernel buflet (and add reference count to buffer).
178 */
179 #define _KBUF_COPY(_skb, _dkb) do { \
180 ASSERT((_skb)->buf_nbft_addr == 0); \
181 ASSERT((_skb)->buf_nbft_idx == OBJ_IDX_NONE); \
182 ASSERT(!((_dkb)->buf_flag & BUFLET_FLAG_EXTERNAL)); \
183 static_assert(sizeof(struct __kern_buflet) == 50); \
184 /* copy everything in the kernel buflet */ \
185 sk_copy64_40((uint64_t *)(void *)(_skb), (uint64_t *)(void *)(_dkb));\
186 ((uint64_t *)(void *)(_dkb))[5] = ((uint64_t *)(void *)(_skb))[5]; \
187 ((uint16_t *)(void *)(_dkb))[24] = ((uint16_t *)(void *)(_skb))[24]; \
188 ASSERT((_dkb)->buf_ctl == (_skb)->buf_ctl); \
189 static_assert(sizeof((_dkb)->buf_flag) == sizeof(uint16_t)); \
190 *__DECONST(uint16_t *, &(_dkb)->buf_flag) &= ~BUFLET_FLAG_EXTERNAL;\
191 if (__probable((_dkb)->buf_ctl != NULL)) { \
192 skmem_bufctl_use(__DECONST(struct skmem_bufctl *, \
193 (_dkb)->buf_ctl)); \
194 } \
195 } while (0)
196
197 /*
198 * Kernel variant of __user_quantum.
199 */
200 struct __kern_quantum {
201 /*
202 * Common area between user and kernel variants.
203 */
204 struct __quantum qum_com;
205
206 /*
207 * Kernel specific.
208 */
209 SLIST_ENTRY(__kern_quantum) qum_upp_link;
210 const struct kern_pbufpool *qum_pp;
211 const struct __user_quantum *qum_user;
212 const struct __kern_slot_desc *qum_ksd;
213 struct __kern_buflet qum_buf[1]; /* 1 buflet */
214 pid_t qum_pid;
215 } __attribute((aligned(sizeof(uint64_t))));
216
217 #define KQUM_CTOR(_kqum, _midx, _uqum, _pp, _qflags) do { \
218 ASSERT((uintptr_t)(_kqum) != (uintptr_t)(_uqum)); \
219 static_assert(sizeof(METADATA_IDX(_kqum)) == sizeof(obj_idx_t)); \
220 /* kernel variant (deconst) */ \
221 _KQUM_CTOR(_kqum, (PP_KERNEL_ONLY(_pp) ? \
222 QUM_F_KERNEL_ONLY : 0) | _qflags, 0, 0, OBJ_IDX_NONE, \
223 PP_BUF_SIZE_DEF((_pp)), _midx); \
224 static_assert(NEXUS_META_TYPE_MAX <= UINT16_MAX); \
225 METADATA_TYPE(_kqum) = (uint16_t)(_pp)->pp_md_type; \
226 static_assert(NEXUS_META_SUBTYPE_MAX <= UINT16_MAX); \
227 METADATA_SUBTYPE(_kqum) = (uint16_t)(_pp)->pp_md_subtype; \
228 *(struct kern_pbufpool **)(uintptr_t)&(_kqum)->qum_pp = (_pp); \
229 *(struct __user_quantum **)(uintptr_t)&(_kqum)->qum_user = (_uqum); \
230 *(obj_idx_t *)(uintptr_t)&METADATA_IDX(_kqum) = (_midx); \
231 (_kqum)->qum_pid = (pid_t)-1; \
232 *(struct __kern_slot_desc **)(uintptr_t)&(_kqum)->qum_ksd = NULL;\
233 /* no need to construct user variant as it is done in externalize */ \
234 } while (0)
235
236 #define KQUM_INIT(_kqum, _flags) do { \
237 ASSERT((_kqum)->qum_ksd == NULL); \
238 ASSERT((_kqum)->qum_pid == (pid_t)-1); \
239 /* kernel variant (deconst) */ \
240 _KQUM_INIT(_kqum, (PP_KERNEL_ONLY((_kqum)->qum_pp) ? \
241 QUM_F_KERNEL_ONLY : 0) | _flags, 0, METADATA_IDX(_kqum)); \
242 /* no need to initialize user variant as it is done in externalize */ \
243 } while (0)
244
245 __attribute__((always_inline))
246 inline boolean_t
_UUID_MATCH(uuid_t u1,uuid_t u2)247 _UUID_MATCH(uuid_t u1, uuid_t u2)
248 {
249 uint64_t *a = (uint64_t *)(void *) u1;
250 uint64_t *b = (uint64_t *)(void *) u2;
251 bool first_same = (a[0] == b[0]);
252 bool second_same = (a[1] == b[1]);
253
254 return first_same && second_same;
255 }
256
257 #define _UUID_COPY(_dst, _src) do { \
258 static_assert(sizeof(uuid_t) == 16); \
259 sk_copy64_16((uint64_t *)(void *)_src, (uint64_t *)(void *)_dst); \
260 } while (0)
261
262 #define _UUID_CLEAR(_u) do { \
263 uint64_t *__dst = (uint64_t *)(void *)(_u); \
264 static_assert(sizeof(uuid_t) == 16); \
265 *(__dst++) = 0; /* qw[0] */ \
266 *(__dst) = 0; /* qw[1] */ \
267 } while (0)
268
269 /*
270 * _QUM_COPY only copies the user metadata portion of the quantum;
271 * at the moment this is everything from the beginning down to __q_flags,
272 * but no more. It preserves the destination's QUM_F_SAVE_MASK bits.
273 *
274 * NOTE: this needs to be adjusted if more user-mutable field is added
275 * after __q_flags.
276 */
277 #define _QUM_COPY(_skq, _dkq) do { \
278 volatile uint16_t _sf = ((_dkq)->qum_qflags & QUM_F_SAVE_MASK); \
279 static_assert(sizeof(_sf) == sizeof((_dkq)->qum_qflags)); \
280 static_assert(offsetof(struct __quantum, __q_flags) == 24); \
281 /* copy everything above (and excluding) __q_flags */ \
282 sk_copy64_24((uint64_t *)(void *)&(_skq)->qum_com, \
283 (uint64_t *)(void *)&(_dkq)->qum_com); \
284 /* copy __q_flags and restore saved bits */ \
285 (_dkq)->qum_qflags = ((_skq)->qum_qflags & ~QUM_F_SAVE_MASK) | _sf; \
286 } while (0)
287
288 /*
289 * _QUM_INTERNALIZE internalizes a portion of the quantum that includes
290 * user visible fields without overwriting the portion that's private to
291 * the kernel; see comments on _QUM_COPY().
292 */
293 #define _QUM_INTERNALIZE(_uq, _kq) do { \
294 _QUM_COPY(_uq, _kq); \
295 /* drop all but QUM_F_SAVE_MASK */ \
296 (_kq)->qum_qflags &= QUM_F_SAVE_MASK; \
297 } while (0)
298
299 /*
300 * _QUM_EXTERNALIZE externalizes a portion of the quantum that's user
301 * visible without including fields that's private to the kernel; at
302 * the moment this is everything from the begininng down to __q_flags,
303 * but no more. It does NOT preserve the destination's QUM_F_SAVE_MASK
304 * bits, but instead copies all bits except QUMF_KERNEL_FLAGS ones.
305 *
306 * NOTE: this needs to be adjusted if more user-mutable field is added
307 * after __q_flags. This macro is used only during externalize.
308 */
309 #define _QUM_EXTERNALIZE(_kq, _uq) do { \
310 static_assert(offsetof(struct __quantum, __q_flags) == 24); \
311 static_assert(sizeof(METADATA_IDX(_uq)) == sizeof(obj_idx_t)); \
312 /* copy __quantum excluding qum_qflags */ \
313 sk_copy64_24((uint64_t *)(void *)&(_kq)->qum_com, \
314 (uint64_t *)(void *)&(_uq)->qum_com); \
315 /* copy qum_qflags excluding saved bits */ \
316 (_uq)->qum_qflags = ((_kq)->qum_qflags & ~QUM_F_KERNEL_FLAGS); \
317 /* re-initialize user metadata */ \
318 *(obj_idx_t *)(uintptr_t)&METADATA_IDX(_uq) = METADATA_IDX(_kq); \
319 METADATA_TYPE(_uq) = METADATA_TYPE(_kq); \
320 METADATA_SUBTYPE(_uq) = METADATA_SUBTYPE(_kq); \
321 (_uq)->qum_usecnt = 0; \
322 } while (0)
323
324 /*
325 * Transmit completion.
326 */
327 struct __packet_compl {
328 /*
329 * Tx completion data
330 * _arg & _data: context data which are passed as arguments
331 * to the registered Tx completion callback.
332 * _tx_status: Tx status set by the driver.
333 */
334 union {
335 uint64_t compl_data64[3];
336 struct {
337 uintptr_t _cb_arg;
338 uintptr_t _cb_data;
339 uint32_t _tx_status;
340 uint32_t _pad;
341 } compl_data;
342 };
343 /* bitmap indicating the requested packet completion callbacks */
344 uint8_t compl_callbacks;
345 /* Context identifier for a given packet completion */
346 uint32_t compl_context;
347 };
348
349 /*
350 * Kernel variant of __user_packet.
351 */
352 struct __kern_packet {
353 struct __kern_quantum pkt_qum;
354 #define pkt_user pkt_qum.qum_user
355
356 /*
357 * Common area between user and kernel variants.
358 */
359 struct __packet pkt_com;
360
361 /*
362 * Option common area (PKT_F_OPT_DATA),
363 * non-NULL if PKT_F_OPT_ALLOC is set.
364 */
365 struct __packet_opt *pkt_com_opt;
366
367 /* TX: enqueue time, RX: receive timestamp */
368 uint64_t pkt_timestamp;
369
370 /* next chain in queue; used while enqueuing to classq or reass */
371 struct __kern_packet *pkt_nextpkt;
372
373 /*
374 * Attached mbuf or pkt.
375 * Used by compat netif driver (PKT_F_MBUF_DATA) or interface
376 * filters (PKT_F_PKT_DATA).
377 */
378 union {
379 struct mbuf *pkt_mbuf;
380 struct __kern_packet *pkt_pkt;
381 };
382 /*
383 * Flow classifier data (PKT_F_FLOW_DATA),
384 * non-NULL if PKT_F_FLOW_ALLOC is set.
385 */
386 struct __flow *pkt_flow; /* classifier info */
387 #define pkt_flow_ipv4_addrs pkt_flow->flow_ipv4_addrs
388 #define pkt_flow_ipv4_src pkt_flow->flow_ipv4_src
389 #define pkt_flow_ipv4_dst pkt_flow->flow_ipv4_dst
390 #define pkt_flow_ipv6_addrs pkt_flow->flow_ipv6_addrs
391 #define pkt_flow_ipv6_src pkt_flow->flow_ipv6_src
392 #define pkt_flow_ipv6_dst pkt_flow->flow_ipv6_dst
393 #define pkt_flow_ip_ver pkt_flow->flow_ip_ver
394 #define pkt_flow_ip_proto pkt_flow->flow_ip_proto
395 #define pkt_flow_ip_hdr pkt_flow->flow_ip_hdr
396 #define pkt_flow_tcp pkt_flow->flow_tcp
397 #define pkt_flow_tcp_src pkt_flow->flow_tcp_src
398 #define pkt_flow_tcp_dst pkt_flow->flow_tcp_dst
399 #define pkt_flow_tcp_seq pkt_flow->flow_tcp_seq
400 #define pkt_flow_tcp_ack pkt_flow->flow_tcp_ack
401 #define pkt_flow_tcp_off pkt_flow->flow_tcp_off
402 #define pkt_flow_tcp_flags pkt_flow->flow_tcp_flags
403 #define pkt_flow_tcp_win pkt_flow->flow_tcp_win
404 #define pkt_flow_tcp_hlen pkt_flow->flow_tcp_hlen
405 #define pkt_flow_tcp_hdr pkt_flow->flow_tcp_hdr
406 #define pkt_flow_tcp_agg_fast pkt_flow->flow_tcp_agg_fast
407 #define pkt_flow_udp pkt_flow->flow_udp
408 #define pkt_flow_udp_src pkt_flow->flow_udp_src
409 #define pkt_flow_udp_dst pkt_flow->flow_udp_dst
410 #define pkt_flow_udp_hlen pkt_flow->flow_udp_hlen
411 #define pkt_flow_udp_hdr pkt_flow->flow_udp_hdr
412 #define pkt_flow_esp_spi pkt_flow->flow_esp_spi
413 #define pkt_transport_protocol pkt_flow->flow_ulp_encap
414 #define pkt_flow_ip_hlen pkt_flow->flow_ip_hlen
415 #define pkt_flow_ulen pkt_flow->flow_ulen
416 #define pkt_flow_ip_frag_id pkt_flow->flow_ip_frag_id
417 #define pkt_flow_ip_is_frag pkt_flow->flow_ip_is_frag
418 #define pkt_flow_ip_is_first_frag pkt_flow->flow_ip_is_first_frag
419 #define pkt_flowsrc_token pkt_flow->flow_src_token
420 #define pkt_flowsrc_id pkt_flow->flow_src_id
421 #define pkt_flowsrc_fidx pkt_flow->flow_src_fidx
422 #define pkt_flowsrc_type pkt_flow->flow_src_type
423 #define pkt_classq_hash pkt_flow->flow_classq_hash
424 #define pkt_classq_flags pkt_flow->flow_classq_flags
425 #define pkt_policy_id pkt_flow->flow_policy_id
426 #define pkt_skip_policy_id pkt_flow->flow_skip_policy_id
427 #define pkt_policy_euuid pkt_flow->flow_policy_euuid
428
429 /*
430 * Transmit completion data (PKT_TX_COMPL_DATA),
431 * non-NULL if PKT_F_TX_COMPL_ALLOC is set.
432 */
433 struct __packet_compl *pkt_tx_compl; /* TX completion info */
434 #define pkt_tx_compl_data pkt_tx_compl->compl_data
435 #define pkt_tx_compl_data64 pkt_tx_compl->compl_data64
436 #define pkt_tx_compl_cb_arg pkt_tx_compl->compl_data._cb_arg
437 #define pkt_tx_compl_cb_data pkt_tx_compl->compl_data._cb_data
438 #define pkt_tx_compl_status pkt_tx_compl->compl_data._tx_status
439 #define pkt_tx_compl_callbacks pkt_tx_compl->compl_callbacks
440 #define pkt_tx_compl_context pkt_tx_compl->compl_context
441
442 void * pkt_priv; /* free to use for every layer */
443
444
445 /*
446 * Kernel specific.
447 *
448 * pkt_{bufs,max} aren't part of the common area, on purpose,
449 * since we selectively update them on internalize/externalize.
450 */
451 const uint16_t pkt_bufs_max; /* maximum size of buflet chain */
452 const uint16_t pkt_bufs_cnt; /* buflet chain size */
453 uint32_t pkt_chain_count; /* number of packets in chain */
454 uint32_t pkt_chain_bytes; /* number of bytes in chain */
455
456 nexus_port_t pkt_nx_port; /* user channel port */
457 /*
458 * gencnt of pkt_nx_port's corresponding vpna. So that we can tell
459 * whether the port in pkt_nx_port has been defuncted or reused.
460 */
461 uint16_t pkt_vpna_gencnt;
462
463 /* Cellular Host Driver generated trace_tag */
464 packet_trace_tag_t pkt_trace_tag;
465 /* index of the qset that the pkt comes from */
466 uint8_t pkt_qset_idx;
467 uint8_t _pad[1];
468 } __attribute((aligned(sizeof(uint64_t))));
469
470
471 /* the size of __user_packet structure for n total buflets */
472 #define _KERN_PACKET_SIZE(n) sizeof(struct __kern_packet)
473
474 #define _PKT_COM_INIT(_p, _pflags) do { \
475 /* save packet flags since it might be wiped out */ \
476 volatile uint64_t __pflags = (_pflags); \
477 /* first wipe it clean */ \
478 static_assert(sizeof(struct __packet_com) == 32); \
479 static_assert(sizeof(struct __packet) == 32); \
480 sk_zero_32(&(_p)->pkt_com.__pkt_data[0]); \
481 /* then initialize */ \
482 (_p)->pkt_pflags = (__pflags); \
483 (_p)->pkt_svc_class = KPKT_SC_UNSPEC; \
484 } while (0)
485
486 #define _PKT_CTOR(_p, _pflags, _bufcnt, _maxfrags) do { \
487 _PKT_COM_INIT(_p, _pflags); \
488 static_assert(sizeof((_p)->pkt_bufs_max) == sizeof(uint16_t)); \
489 static_assert(sizeof((_p)->pkt_bufs_cnt) == sizeof(uint16_t)); \
490 /* deconst */ \
491 *(uint16_t *)(uintptr_t)&(_p)->pkt_bufs_max = (_maxfrags); \
492 *(uint16_t *)(uintptr_t)&(_p)->pkt_bufs_cnt = (_bufcnt); \
493 } while (0)
494
495 #define KPKT_CLEAR_MBUF_PKT_DATA(_pk) do { \
496 static_assert(offsetof(struct __kern_packet, pkt_mbuf) == offsetof(struct __kern_packet, pkt_pkt)); \
497 (_pk)->pkt_pflags &= ~(PKT_F_MBUF_MASK|PKT_F_PKT_MASK); \
498 /* the following also clears pkt_pkt */ \
499 (_pk)->pkt_mbuf = NULL; \
500 } while (0)
501
502 #define KPKT_CLEAR_MBUF_DATA(_pk) do { \
503 (_pk)->pkt_pflags &= ~PKT_F_MBUF_MASK; \
504 (_pk)->pkt_mbuf = NULL; \
505 } while (0)
506
507 #define KPKT_CLEAR_PKT_DATA(_pk) do { \
508 (_pk)->pkt_pflags &= ~PKT_F_PKT_MASK; \
509 (_pk)->pkt_pkt = NULL; \
510 } while (0)
511
512 #define KPKT_CLEAR_FLOW_INIT(_fl) do { \
513 static_assert(sizeof((_fl)->flow_init_data) == 128); \
514 sk_zero_128(&(_fl)->flow_init_data[0]); \
515 } while (0)
516
517 #define KPKT_CLEAR_FLOW_ALL(_fl) do { \
518 bzero(_fl, sizeof(struct __flow)); \
519 } while (0)
520
521 #define _KPKT_CTOR_PRIV_VARS(_p, _opt, _flow, _txcomp) do { \
522 (_p)->pkt_com_opt = (_opt); \
523 (_p)->pkt_flow = (_flow); \
524 (_p)->pkt_tx_compl = (_txcomp); \
525 } while (0)
526
527 #define _KPKT_INIT_FPD_VARS(_p)
528
529 #define _KPKT_INIT_PRIV_VARS(_p) do { \
530 struct __flow *__fl = (_p)->pkt_flow; \
531 (_p)->pkt_timestamp = 0; \
532 (_p)->pkt_nextpkt = NULL; \
533 (_p)->pkt_priv = NULL; \
534 _KPKT_INIT_FPD_VARS(_p); \
535 KPKT_CLEAR_MBUF_PKT_DATA(_p); \
536 if (__probable(__fl != NULL)) { \
537 KPKT_CLEAR_FLOW_INIT(__fl); \
538 } \
539 (_p)->pkt_chain_count = (_p)->pkt_chain_bytes = 0; \
540 (_p)->pkt_nx_port = NEXUS_PORT_ANY; \
541 (_p)->pkt_vpna_gencnt = 0; \
542 (_p)->pkt_trace_tag = 0; \
543 (_p)->pkt_qset_idx = 0; \
544 } while (0)
545
546 #define KPKT_CTOR(_pk, _pflags, _opt, _flow, _txcomp, _midx, _pu, _pp, \
547 _bufcnt, _maxfrags, _qflags) do { \
548 ASSERT((uintptr_t)(_pk) != (uintptr_t)(_pu)); \
549 /* ASSERT((_pu) != NULL || PP_KERNEL_ONLY(_pp)); */ \
550 /* kernel (and user) quantum */ \
551 KQUM_CTOR(&(_pk)->pkt_qum, _midx, \
552 (((_pu) == NULL) ? NULL : &(_pu)->pkt_qum), _pp, _qflags); \
553 /* kernel packet variant */ \
554 _PKT_CTOR(_pk, _pflags, _bufcnt, _maxfrags); \
555 _KPKT_CTOR_PRIV_VARS(_pk, _opt, _flow, _txcomp); \
556 /* no need to construct user variant as it is done in externalize */ \
557 } while (0)
558
559 #define KPKT_INIT(_pk, _flags) do { \
560 KQUM_INIT(&(_pk)->pkt_qum, _flags); \
561 _PKT_COM_INIT(_pk, (_pk)->pkt_pflags); \
562 _KPKT_INIT_PRIV_VARS(_pk); \
563 /* no need to initialize user variant as it is done in externalize */ \
564 } while (0)
565
566 #define _KPKT_INIT_TX_COMPL_DATA(_p) do { \
567 if (((_p)->pkt_pflags & PKT_F_TX_COMPL_DATA) == 0) { \
568 ASSERT((_p)->pkt_pflags & PKT_F_TX_COMPL_ALLOC); \
569 (_p)->pkt_pflags |= PKT_F_TX_COMPL_DATA; \
570 static_assert(sizeof((_p)->pkt_tx_compl_data64) == 24); \
571 /* 32-bit compl_data should be in the union */ \
572 static_assert(sizeof((_p)->pkt_tx_compl_data) <= 24); \
573 (_p)->pkt_tx_compl_data64[0] = 0; \
574 (_p)->pkt_tx_compl_data64[1] = 0; \
575 (_p)->pkt_tx_compl_data64[2] = 0; \
576 } \
577 } while (0)
578
579 /*
580 * Copy optional meta data.
581 * Both source and destination must be a kernel packet.
582 */
583 #define _PKT_COPY_OPT_DATA(_skp, _dkp) do { \
584 if (__improbable(((_skp)->pkt_pflags & PKT_F_OPT_DATA) != 0)) { \
585 static_assert(sizeof(struct __packet_opt) == 40); \
586 ASSERT((_skp)->pkt_pflags & PKT_F_OPT_ALLOC); \
587 sk_copy64_40((uint64_t *)(struct __packet_opt *__header_bidi_indexable)(_skp)->pkt_com_opt, \
588 (uint64_t *)(struct __packet_opt *__header_bidi_indexable)(_dkp)->pkt_com_opt); \
589 } \
590 } while (0)
591
592 /*
593 * _PKT_COPY only copies the user metadata portion of the packet;
594 * at the moment this is everything from the beginning down to __p_flags,
595 * but no more. It additionally copies only QUM_F_COPY_MASK bits from
596 * the source __p_flags to the destination's.
597 *
598 * NOTE: this needs to be adjusted if more user-mutable field is added
599 * after __p_flags.
600 */
601 #define _PKT_COPY(_skp, _dkp) do { \
602 static_assert(sizeof(struct __packet) == 32); \
603 static_assert(sizeof(struct __packet_com) == 32); \
604 static_assert(offsetof(struct __packet, __p_flags) == 24); \
605 /* copy __packet excluding pkt_pflags */ \
606 sk_copy64_24((uint64_t *)(struct __packet *__header_bidi_indexable)&(_skp)->pkt_com, \
607 (uint64_t *)(struct __packet *__header_bidi_indexable)&(_dkp)->pkt_com); \
608 /* copy relevant pkt_pflags bits */ \
609 (_dkp)->pkt_pflags = ((_skp)->pkt_pflags & PKT_F_COPY_MASK); \
610 /* copy __packet_opt if applicable */ \
611 _PKT_COPY_OPT_DATA((_skp), (_dkp)); \
612 } while (0)
613
614
615 /*
616 * Copy Transmit completion data.
617 */
618 #define _PKT_COPY_TX_PORT_DATA(_skp, _dkp) do { \
619 (_dkp)->pkt_nx_port = (_skp)->pkt_nx_port; \
620 (_dkp)->pkt_vpna_gencnt = (_skp)->pkt_vpna_gencnt; \
621 (_dkp)->pkt_pflags |= ((_skp)->pkt_pflags & PKT_F_TX_PORT_DATA);\
622 } while (0)
623
624 /*
625 * _PKT_INTERNALIZE internalizes a portion of the packet that includes
626 * user visible fields without overwriting the portion that's private to
627 * the kernel.
628 *
629 * NOTE: this needs to be adjusted if more user-mutable data is added
630 * after __p_flags. This macro is used only during internalize.
631 */
632 #define _PKT_INTERNALIZE(_up, _kp) do { \
633 volatile uint64_t _kf = ((_kp)->pkt_pflags & ~PKT_F_USER_MASK); \
634 static_assert(sizeof(struct __packet) == 32); \
635 static_assert(sizeof(struct __packet_com) == 32); \
636 static_assert(offsetof(struct __packet, __p_flags) == 24); \
637 /* copy __packet excluding pkt_pflags */ \
638 sk_copy64_24((uint64_t *)(void *)&(_up)->pkt_com, \
639 (uint64_t *)(void *)&(_kp)->pkt_com); \
640 /* copy pkt_pflags and restore kernel bits */ \
641 (_kp)->pkt_pflags = ((_up)->pkt_pflags & PKT_F_USER_MASK) | _kf;\
642 /* copy (internalize) __packet_opt if applicable */ \
643 if (__improbable(((_kp)->pkt_pflags & PKT_F_OPT_DATA) != 0)) { \
644 static_assert(sizeof(struct __packet_opt) == 40); \
645 ASSERT((_kp)->pkt_pflags & PKT_F_OPT_ALLOC); \
646 sk_copy64_40((uint64_t *)(void *)&(_up)->pkt_com_opt, \
647 (uint64_t *)(struct __packet_opt *__header_bidi_indexable)(_kp)->pkt_com_opt); \
648 } \
649 } while (0)
650
651 /*
652 * _PKT_EXTERNALIZE externalizes a portion of the packet that's user
653 * visible without including fields that's private to the kernel; at the
654 * moment this is everything from the beginning down to __p_flags,
655 * but no more.
656 *
657 * NOTE: this needs to be adjusted if more user-mutable data is added
658 * after __p_flags. This macro is used only during externalize.
659 */
660 #define _PKT_EXTERNALIZE(_kp, _up) do { \
661 static_assert(sizeof(struct __packet) == 32); \
662 static_assert(sizeof(struct __packet_com) == 32); \
663 static_assert(offsetof(struct __packet, __p_flags) == 24); \
664 /* copy __packet excluding pkt_pflags */ \
665 sk_copy64_24((uint64_t *)(void *)&(_kp)->pkt_com, \
666 (uint64_t *)(void *)&(_up)->pkt_com); \
667 /* copy pkt_pflags excluding kernel bits */ \
668 (_up)->pkt_pflags = ((_kp)->pkt_pflags & PKT_F_USER_MASK); \
669 /* copy (externalize) __packet_opt if applicable */ \
670 if (__improbable(((_kp)->pkt_pflags & PKT_F_OPT_DATA) != 0)) { \
671 static_assert(sizeof(struct __packet_opt) == 40); \
672 ASSERT((_kp)->pkt_pflags & PKT_F_OPT_ALLOC); \
673 sk_copy64_40((uint64_t *)(struct __packet_opt *__header_bidi_indexable)(_kp)->pkt_com_opt, \
674 (uint64_t *)(void *)&(_up)->pkt_com_opt); \
675 } \
676 } while (0)
677
678 #define SK_PTR_ADDR_KQUM(_ph) __unsafe_forge_single(struct __kern_quantum *, (_ph))
679 #define SK_PTR_ADDR_KPKT(_ph) __unsafe_forge_single(struct __kern_packet *, (_ph))
680 #define SK_PTR_KPKT(_pa) ((struct __kern_packet *)(void *)(_pa))
681 #define SK_PKT2PH(_pkt) ((uint64_t)(_pkt))
682
683 /*
684 * Set the length of the data to various places: __user_slot_desc,
685 * __kern_quantum, and for a packet, the buflet.
686 * !!! This should be used only for dropping the packet as the macro
687 * is not functionally correct.
688 *
689 * TODO: [email protected] -- maybe finalize here as well?
690 */
691 #define METADATA_SET_LEN(_md, _len, _doff) do { \
692 struct __kern_quantum *_q = \
693 (struct __kern_quantum *)(void *)(_md); \
694 _q->qum_len = (_len); \
695 struct __kern_packet *_p = \
696 (struct __kern_packet *)(void *)(_md); \
697 struct __kern_buflet *_kbft; \
698 PKT_GET_FIRST_BUFLET(_p, _p->pkt_bufs_cnt, _kbft); \
699 _kbft->buf_dlen = (_len); \
700 _kbft->buf_doff = (_doff); \
701 } while (0)
702
703 #define METADATA_ADJUST_LEN(_md, _len, _doff) do { \
704 struct __kern_packet *_p = \
705 (struct __kern_packet *)(void *)(_md); \
706 struct __kern_buflet *_kbft; \
707 PKT_GET_FIRST_BUFLET(_p, _p->pkt_bufs_cnt, _kbft); \
708 _kbft->buf_dlen += (_len); \
709 _kbft->buf_doff = (_doff); \
710 } while (0)
711
712 __attribute__((always_inline))
713 static inline kern_packet_t
SD_GET_TAGGED_METADATA(const struct __kern_slot_desc * ksd)714 SD_GET_TAGGED_METADATA(const struct __kern_slot_desc *ksd)
715 {
716 return __improbable(ksd->sd_md == NULL) ? 0 :
717 SK_PTR_ENCODE(ksd->sd_md, METADATA_TYPE(ksd->sd_qum),
718 METADATA_SUBTYPE(ksd->sd_qum));
719 }
720
721 __attribute__((always_inline))
722 static inline errno_t
KR_SLOT_ATTACH_METADATA(const kern_channel_ring_t kring,struct __kern_slot_desc * ksd,struct __kern_quantum * kqum)723 KR_SLOT_ATTACH_METADATA(const kern_channel_ring_t kring,
724 struct __kern_slot_desc *ksd, struct __kern_quantum *kqum)
725 {
726 obj_idx_t idx = KR_SLOT_INDEX(kring,
727 (struct __slot_desc *)(void *)ksd);
728
729 /* Ensure this is only done by the thread doing a sync syscall */
730 ASSERT(sk_is_sync_protected());
731 ASSERT(kqum->qum_pp == kring->ckr_pp);
732 ASSERT(kqum->qum_ksd == NULL);
733 /*
734 * Packets being attached to a slot should always be internalized.
735 * Internalized packet should be in finalized or dropped state.
736 */
737 ASSERT(kqum->qum_qflags & QUM_F_INTERNALIZED);
738 ASSERT(((kqum->qum_qflags & QUM_F_FINALIZED) != 0) ^
739 ((kqum->qum_qflags & QUM_F_DROPPED) != 0));
740
741 kqum->qum_ksd = ksd;
742
743 KSD_ATTACH_METADATA(ksd, kqum);
744 if (!KR_KERNEL_ONLY(kring)) {
745 USD_ATTACH_METADATA(KR_USD(kring, idx), METADATA_IDX(kqum));
746 }
747
748 return 0;
749 }
750
751 __attribute__((always_inline))
752 static inline struct __kern_quantum *
KR_SLOT_DETACH_METADATA(const kern_channel_ring_t kring,struct __kern_slot_desc * ksd)753 KR_SLOT_DETACH_METADATA(const kern_channel_ring_t kring,
754 struct __kern_slot_desc *ksd)
755 {
756 struct __kern_quantum *kqum = ksd->sd_qum;
757 obj_idx_t idx = KR_SLOT_INDEX(kring,
758 (struct __slot_desc *)(void *)ksd);
759
760 /* Ensure this is only done by the thread doing a sync syscall */
761 ASSERT(sk_is_sync_protected());
762 ASSERT(KSD_VALID_METADATA(ksd));
763 ASSERT(kqum->qum_ksd == ksd);
764 ASSERT(kqum->qum_pp == kring->ckr_pp);
765 /*
766 * Packets being attached to a slot would always be internalized.
767 * We also detach externalized packets on an rx ring on behalf
768 * of the user space if the channel is not in user packet pool mode.
769 * Externalized packet should be in finalized or dropped state.
770 */
771 ASSERT((kqum->qum_qflags & (QUM_F_INTERNALIZED)) ||
772 ((((kqum->qum_qflags & QUM_F_FINALIZED) != 0) ^
773 ((kqum->qum_qflags & QUM_F_DROPPED) != 0))));
774
775 /* detaching requires the packet to be finalized later */
776 kqum->qum_qflags &= ~QUM_F_FINALIZED;
777 kqum->qum_ksd = NULL;
778
779 KSD_DETACH_METADATA(ksd);
780 if (!KR_KERNEL_ONLY(kring)) {
781 USD_DETACH_METADATA(KR_USD(kring, idx));
782 }
783
784 return kqum;
785 }
786
787 __attribute__((always_inline))
788 static inline errno_t
KR_SLOT_ATTACH_BUF_METADATA(const kern_channel_ring_t kring,struct __kern_slot_desc * ksd,struct __kern_buflet * kbuf)789 KR_SLOT_ATTACH_BUF_METADATA(const kern_channel_ring_t kring,
790 struct __kern_slot_desc *ksd, struct __kern_buflet *kbuf)
791 {
792 obj_idx_t idx = KR_SLOT_INDEX(kring,
793 (struct __slot_desc *)(void *)ksd);
794
795 /* Ensure this is only done by the thread doing a sync syscall */
796 ASSERT(sk_is_sync_protected());
797
798 KSD_ATTACH_METADATA(ksd, kbuf);
799 /*
800 * buflet is attached only to the user packet pool alloc ring.
801 */
802 ASSERT(!KR_KERNEL_ONLY(kring));
803 ASSERT(kring->ckr_tx == CR_KIND_ALLOC);
804 USD_ATTACH_METADATA(KR_USD(kring, idx), kbuf->buf_bft_idx_reg);
805 return 0;
806 }
807
808 #if (DEVELOPMENT || DEBUG)
809 SYSCTL_DECL(_kern_skywalk_packet);
810 extern int pkt_trailers;
811 #endif /* !DEVELOPMENT && !DEBUG */
812
813 typedef void (pkt_copy_from_pkt_t)(const enum txrx, kern_packet_t,
814 const uint16_t, kern_packet_t, const uint16_t, const uint32_t,
815 const boolean_t, const uint16_t, const uint16_t, const boolean_t);
816
817 typedef void (pkt_copy_from_mbuf_t)(const enum txrx, kern_packet_t,
818 const uint16_t, struct mbuf *, const uint16_t, const uint32_t,
819 const boolean_t, const uint16_t);
820
821 typedef void (pkt_copy_to_mbuf_t)(const enum txrx, kern_packet_t,
822 const uint16_t, struct mbuf *, const uint16_t, const uint32_t,
823 const boolean_t, const uint16_t);
824
825 __BEGIN_DECLS
826
827 extern pkt_copy_from_pkt_t pkt_copy_from_pkt;
828 extern pkt_copy_from_pkt_t pkt_copy_multi_buflet_from_pkt;
829 extern pkt_copy_from_mbuf_t pkt_copy_from_mbuf;
830 extern pkt_copy_from_mbuf_t pkt_copy_multi_buflet_from_mbuf;
831 extern pkt_copy_to_mbuf_t pkt_copy_to_mbuf;
832 extern pkt_copy_to_mbuf_t pkt_copy_multi_buflet_to_mbuf;
833
834 extern void pkt_copypkt_sum(kern_packet_t, uint16_t, kern_packet_t,
835 uint16_t, uint16_t, uint32_t *, boolean_t);
836 extern uint32_t
837 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *__sized_by(len) dbaddr,
838 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start);
839 extern uint32_t pkt_sum(kern_packet_t, uint16_t, uint16_t);
840 extern uint32_t pkt_mcopypkt_sum(mbuf_t, int, kern_packet_t, uint16_t,
841 uint16_t, boolean_t);
842 extern uint32_t
843 m_copydata_sum(struct mbuf *m, int off, int len, void *__sized_by(len) vp, uint32_t initial_sum,
844 boolean_t *odd_start);
845 extern void pkt_copy(void *__sized_by(len) src, void *__sized_by(len) dst,
846 size_t len);
847
848 #if (DEVELOPMENT || DEBUG)
849 extern uint32_t pkt_add_trailers(kern_packet_t, const uint32_t, const uint16_t);
850 extern uint32_t pkt_add_trailers_mbuf(struct mbuf *, const uint16_t);
851 #endif /* !DEVELOPMENT && !DEBUG */
852 __END_DECLS
853 #endif /* BSD_KERNEL_PRIVATE */
854 #endif /* !_SKYWALK_PACKET_PACKETVAR_H_ */
855