1 /*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #ifndef _SKYWALK_PACKET_PACKETVAR_H_
30 #define _SKYWALK_PACKET_PACKETVAR_H_
31
32 #ifdef BSD_KERNEL_PRIVATE
33 #include <skywalk/core/skywalk_var.h>
34 #include <skywalk/os_packet_private.h>
35
36 /*
37 * Kernel variant of __user_buflet.
38 *
39 * The main difference here is the support for shared buffers, where
40 * multiple buflets may point to the same buffer object at different
41 * data span within it, each holding a reference to the buffer object,
42 * i.e. the "use" count. The buf_addr therefore points to the beginning
43 * of the data span; the buf_len describes the length of the span; and
44 * the buf_doff describes the offset relative to the beginning of the
45 * span as noted by buf_addr. The buffer object is stored in buf_objaddr.
46 */
47 struct __kern_buflet {
48 /*
49 * Common area between user and kernel variants.
50 */
51 struct __buflet buf_com;
52 /*
53 * Kernel specific.
54 */
55 /* buffer control of the buffer object */
56 const struct skmem_bufctl *buf_ctl;
57 #if !defined(__LP64__)
58 uint32_t __padding;
59 #endif /* !__LP64__ */
60
61 #define buf_objaddr buf_ctl->bc_addr
62 #define buf_objlim buf_ctl->bc_lim
63 } __attribute((packed));
64
65 struct __kern_buflet_ext {
66 /*
67 * This is an overlay structure on nexus adapter.
68 */
69 struct __kern_buflet kbe_overlay;
70 /*
71 * extended variant specific.
72 */
73 /* mirrored user buflet */
74 struct __user_buflet const *kbe_buf_user;
75
76 /* buflet user packet pool hash bucket linkage */
77 SLIST_ENTRY(__kern_buflet_ext) kbe_buf_upp_link;
78
79 /* pid of the process using the buflet */
80 pid_t kbe_buf_pid;
81 } __attribute((packed));
82
83 #define KBUF_CTOR(_kbuf, _baddr, _bidxreg, _bc, _pp) do { \
84 _CASSERT(sizeof ((_kbuf)->buf_addr) == sizeof (mach_vm_address_t));\
85 /* kernel variant (deconst) */ \
86 BUF_CTOR(_kbuf, _baddr, _bidxreg, (_pp)->pp_buflet_size, 0, 0, \
87 (_kbuf)->buf_nbft_addr, (_kbuf)->buf_nbft_idx, (_kbuf)->buf_flag);\
88 *(struct skmem_bufctl **)(uintptr_t)&(_kbuf)->buf_ctl = (_bc); \
89 /* this may be called to initialize unused buflets */ \
90 if (__probable((_bc) != NULL)) { \
91 skmem_bufctl_use(_bc); \
92 } \
93 /* no need to construct user variant as it is done in externalize */ \
94 } while (0)
95
96 #define KBUF_EXT_CTOR(_kbuf, _ubuf, _baddr, _bidxreg, _bc, \
97 _bft_idx_reg, _pp) do { \
98 ASSERT(_bft_idx_reg != OBJ_IDX_NONE); \
99 _CASSERT(sizeof((_kbuf)->buf_flag) == sizeof(uint16_t)); \
100 /* we don't set buf_nbft_addr here as during construction it */ \
101 /* is used by skmem batch alloc logic */ \
102 *__DECONST(uint16_t *, &(_kbuf)->buf_flag) = BUFLET_FLAG_EXTERNAL;\
103 BUF_NBFT_IDX(_kbuf, OBJ_IDX_NONE); \
104 BUF_BFT_IDX_REG(_kbuf, _bft_idx_reg); \
105 *__DECONST(struct __user_buflet **, \
106 &((struct __kern_buflet_ext *)(_kbuf))->kbe_buf_user) = (_ubuf);\
107 KBUF_CTOR(_kbuf, _baddr, _bidxreg, _bc, _pp); \
108 } while (0)
109
110 #define KBUF_INIT(_kbuf) do { \
111 ASSERT((_kbuf)->buf_ctl != NULL); \
112 ASSERT((_kbuf)->buf_addr != 0); \
113 ASSERT((_kbuf)->buf_dlim != 0); \
114 /* kernel variant (deconst) */ \
115 BUF_INIT(_kbuf, 0, 0); \
116 } while (0)
117
118 #define KBUF_EXT_INIT(_kbuf, _pp) do { \
119 ASSERT((_kbuf)->buf_ctl != NULL); \
120 ASSERT((_kbuf)->buf_flag & BUFLET_FLAG_EXTERNAL); \
121 ASSERT((_kbuf)->buf_bft_idx_reg != OBJ_IDX_NONE); \
122 BUF_BADDR(_kbuf, (_kbuf)->buf_ctl->bc_addr); \
123 BUF_NBFT_ADDR(_kbuf, 0); \
124 BUF_NBFT_IDX(_kbuf, OBJ_IDX_NONE); \
125 *__DECONST(uint16_t *, &(_kbuf)->buf_dlim) = (_pp)->pp_buflet_size;\
126 (_kbuf)->buf_dlen = 0; \
127 (_kbuf)->buf_doff = 0; \
128 ((struct __kern_buflet_ext *)(_kbuf))->kbe_buf_pid = (pid_t)-1; \
129 ((struct __kern_buflet_ext *)(_kbuf))->kbe_buf_upp_link.sle_next = NULL;\
130 } while (0)
131
132 /* initialize struct __user_buflet from struct __kern_buflet */
133 #define UBUF_INIT(_kbuf, _ubuf) do { \
134 BUF_CTOR(_ubuf, 0, (_kbuf)->buf_idx, (_kbuf)->buf_dlim, \
135 (_kbuf)->buf_dlen, (_kbuf)->buf_doff, (_kbuf)->buf_nbft_addr,\
136 (_kbuf)->buf_nbft_idx, (_kbuf)->buf_flag); \
137 BUF_BFT_IDX_REG(_ubuf, (_kbuf)->buf_bft_idx_reg); \
138 } while (0)
139
140 #define KBUF_EXTERNALIZE(_kbuf, _ubuf, _pp) do { \
141 ASSERT((_kbuf)->buf_dlim == (_pp)->pp_buflet_size); \
142 ASSERT((_kbuf)->buf_addr != 0); \
143 /* For now, user-facing pool does not support shared */ \
144 /* buffer, since otherwise the ubuf and kbuf buffer */ \
145 /* indices would not match. Assert this is the case.*/ \
146 ASSERT((_kbuf)->buf_addr == (mach_vm_address_t)(_kbuf)->buf_objaddr);\
147 /* Initialize user buflet metadata from kernel buflet */ \
148 UBUF_INIT(_kbuf, _ubuf); \
149 } while (0)
150
151 #define KBUF_LINK(_pkbuf, _kbuf) do { \
152 ASSERT(__DECONST(void *, (_pkbuf)->buf_nbft_addr) == NULL); \
153 ASSERT(__DECONST(obj_idx_t, (_pkbuf)->buf_nbft_idx) == OBJ_IDX_NONE); \
154 ASSERT((_kbuf) != NULL); \
155 ASSERT((_kbuf)->buf_bft_idx_reg != OBJ_IDX_NONE); \
156 BUF_NBFT_ADDR(_pkbuf, _kbuf); \
157 BUF_NBFT_IDX(_pkbuf, (_kbuf)->buf_bft_idx_reg); \
158 } while (0)
159
160 #define KBUF_DTOR(_kbuf, _usecnt) do { \
161 if (__probable((_kbuf)->buf_ctl != NULL)) { \
162 (_usecnt) = skmem_bufctl_unuse( \
163 __DECONST(struct skmem_bufctl *, (_kbuf)->buf_ctl));\
164 *(struct skmem_bufctl **) \
165 (uintptr_t)&(_kbuf)->buf_ctl = NULL; \
166 } \
167 BUF_BADDR(_kbuf, 0); \
168 BUF_BIDX(_kbuf, OBJ_IDX_NONE); \
169 } while (0)
170
171 /*
172 * Copy kernel buflet (and add reference count to buffer).
173 */
174 #define _KBUF_COPY(_skb, _dkb) do { \
175 ASSERT((_skb)->buf_nbft_addr == 0); \
176 ASSERT((_skb)->buf_nbft_idx == OBJ_IDX_NONE); \
177 ASSERT(!((_dkb)->buf_flag & BUFLET_FLAG_EXTERNAL)); \
178 _CASSERT(sizeof(struct __kern_buflet) == 44); \
179 /* copy everything in the kernel buflet */ \
180 sk_copy64_40((uint64_t *)(void *)(_skb), (uint64_t *)(void *)(_dkb));\
181 ((uint32_t *)(void *)(_dkb))[10] = ((uint32_t *)(void *)(_skb))[10];\
182 ASSERT((_dkb)->buf_ctl == (_skb)->buf_ctl); \
183 _CASSERT(sizeof((_dkb)->buf_flag) == sizeof(uint16_t)); \
184 *__DECONST(uint16_t *, &(_dkb)->buf_flag) &= ~BUFLET_FLAG_EXTERNAL;\
185 if (__probable((_dkb)->buf_ctl != NULL)) { \
186 skmem_bufctl_use(__DECONST(struct skmem_bufctl *, \
187 (_dkb)->buf_ctl)); \
188 } \
189 } while (0)
190
191 /*
192 * Kernel variant of __user_quantum.
193 */
194 struct __kern_quantum {
195 /*
196 * Common area between user and kernel variants.
197 */
198 struct __quantum qum_com;
199
200 /*
201 * Kernel specific.
202 */
203 SLIST_ENTRY(__kern_quantum) qum_upp_link;
204 const struct kern_pbufpool *qum_pp;
205 const struct __user_quantum *qum_user;
206 const struct __kern_slot_desc *qum_ksd;
207 struct __kern_buflet qum_buf[1]; /* 1 buflet */
208 pid_t qum_pid;
209 } __attribute((aligned(sizeof(uint64_t))));
210
211 #define KQUM_CTOR(_kqum, _midx, _uqum, _pp, _qflags) do { \
212 ASSERT((uintptr_t)(_kqum) != (uintptr_t)(_uqum)); \
213 _CASSERT(sizeof(METADATA_IDX(_kqum)) == sizeof(obj_idx_t)); \
214 /* kernel variant (deconst) */ \
215 _KQUM_CTOR(_kqum, (PP_KERNEL_ONLY(_pp) ? \
216 QUM_F_KERNEL_ONLY : 0) | _qflags, 0, 0, OBJ_IDX_NONE, \
217 (_pp)->pp_buflet_size, _midx); \
218 _CASSERT(NEXUS_META_TYPE_MAX <= UINT16_MAX); \
219 METADATA_TYPE(_kqum) = (uint16_t)(_pp)->pp_md_type; \
220 _CASSERT(NEXUS_META_SUBTYPE_MAX <= UINT16_MAX); \
221 METADATA_SUBTYPE(_kqum) = (uint16_t)(_pp)->pp_md_subtype; \
222 *(struct kern_pbufpool **)(uintptr_t)&(_kqum)->qum_pp = (_pp); \
223 *(struct __user_quantum **)(uintptr_t)&(_kqum)->qum_user = (_uqum); \
224 *(obj_idx_t *)(uintptr_t)&METADATA_IDX(_kqum) = (_midx); \
225 (_kqum)->qum_pid = (pid_t)-1; \
226 *(struct __kern_slot_desc **)(uintptr_t)&(_kqum)->qum_ksd = NULL;\
227 /* no need to construct user variant as it is done in externalize */ \
228 } while (0)
229
230 #define KQUM_INIT(_kqum, _flags) do { \
231 ASSERT((_kqum)->qum_ksd == NULL); \
232 ASSERT((_kqum)->qum_pid == (pid_t)-1); \
233 /* kernel variant (deconst) */ \
234 _KQUM_INIT(_kqum, (PP_KERNEL_ONLY((_kqum)->qum_pp) ? \
235 QUM_F_KERNEL_ONLY : 0) | _flags, 0, METADATA_IDX(_kqum)); \
236 /* no need to initialize user variant as it is done in externalize */ \
237 } while (0)
238
239 __attribute__((always_inline))
240 inline boolean_t
_UUID_MATCH(uuid_t u1,uuid_t u2)241 _UUID_MATCH(uuid_t u1, uuid_t u2)
242 {
243 uint64_t *a = (uint64_t *)(void *) u1;
244 uint64_t *b = (uint64_t *)(void *) u2;
245 bool first_same = (a[0] == b[0]);
246 bool second_same = (a[1] == b[1]);
247
248 return first_same && second_same;
249 }
250
251 #define _UUID_COPY(_dst, _src) do { \
252 _CASSERT(sizeof (uuid_t) == 16); \
253 sk_copy64_16((uint64_t *)(void *)_src, (uint64_t *)(void *)_dst); \
254 } while (0)
255
256 #define _UUID_CLEAR(_u) do { \
257 uint64_t *__dst = (uint64_t *)(void *)(_u); \
258 _CASSERT(sizeof (uuid_t) == 16); \
259 *(__dst++) = 0; /* qw[0] */ \
260 *(__dst) = 0; /* qw[1] */ \
261 } while (0)
262
263 /*
264 * _QUM_COPY only copies the user metadata portion of the quantum;
265 * at the moment this is everything from the beginning down to __q_flags,
266 * but no more. It preserves the destination's QUM_F_SAVE_MASK bits.
267 *
268 * NOTE: this needs to be adjusted if more user-mutable field is added
269 * after __q_flags.
270 */
271 #define _QUM_COPY(_skq, _dkq) do { \
272 volatile uint16_t _sf = ((_dkq)->qum_qflags & QUM_F_SAVE_MASK); \
273 _CASSERT(sizeof (_sf) == sizeof ((_dkq)->qum_qflags)); \
274 _CASSERT(offsetof(struct __quantum, __q_flags) == 24); \
275 /* copy everything above (and excluding) __q_flags */ \
276 sk_copy64_24((uint64_t *)(void *)&(_skq)->qum_com, \
277 (uint64_t *)(void *)&(_dkq)->qum_com); \
278 /* copy __q_flags and restore saved bits */ \
279 (_dkq)->qum_qflags = ((_skq)->qum_qflags & ~QUM_F_SAVE_MASK) | _sf; \
280 } while (0)
281
282 /*
283 * _QUM_INTERNALIZE internalizes a portion of the quantum that includes
284 * user visible fields without overwriting the portion that's private to
285 * the kernel; see comments on _QUM_COPY().
286 */
287 #define _QUM_INTERNALIZE(_uq, _kq) do { \
288 _QUM_COPY(_uq, _kq); \
289 /* drop all but QUM_F_SAVE_MASK */ \
290 (_kq)->qum_qflags &= QUM_F_SAVE_MASK; \
291 } while (0)
292
293 /*
294 * _QUM_EXTERNALIZE externalizes a portion of the quantum that's user
295 * visible without including fields that's private to the kernel; at
296 * the moment this is everything from the begininng down to __q_flags,
297 * but no more. It does NOT preserve the destination's QUM_F_SAVE_MASK
298 * bits, but instead copies all bits except QUMF_KERNEL_FLAGS ones.
299 *
300 * NOTE: this needs to be adjusted if more user-mutable field is added
301 * after __q_flags. This macro is used only during externalize.
302 */
303 #define _QUM_EXTERNALIZE(_kq, _uq) do { \
304 _CASSERT(offsetof(struct __quantum, __q_flags) == 24); \
305 _CASSERT(sizeof(METADATA_IDX(_uq)) == sizeof(obj_idx_t)); \
306 /* copy __quantum excluding qum_qflags */ \
307 sk_copy64_24((uint64_t *)(void *)&(_kq)->qum_com, \
308 (uint64_t *)(void *)&(_uq)->qum_com); \
309 /* copy qum_qflags excluding saved bits */ \
310 (_uq)->qum_qflags = ((_kq)->qum_qflags & ~QUM_F_KERNEL_FLAGS); \
311 /* re-initialize user metadata */ \
312 *(obj_idx_t *)(uintptr_t)&METADATA_IDX(_uq) = METADATA_IDX(_kq); \
313 METADATA_TYPE(_uq) = METADATA_TYPE(_kq); \
314 METADATA_SUBTYPE(_uq) = METADATA_SUBTYPE(_kq); \
315 (_uq)->qum_usecnt = 0; \
316 } while (0)
317
318 /*
319 * Transmit completion.
320 */
321 struct __packet_compl {
322 /*
323 * Tx completion data
324 * _arg & _data: context data which are passed as arguments
325 * to the registered Tx completion callback.
326 * _tx_status: Tx status set by the driver.
327 */
328 union {
329 uint64_t compl_data64[3];
330 struct {
331 uintptr_t _cb_arg;
332 uintptr_t _cb_data;
333 uint32_t _tx_status;
334 uint32_t _pad;
335 } compl_data;
336 };
337 /* bitmap indicating the requested packet completion callbacks */
338 uint32_t compl_callbacks;
339 /* Context identifier for a given packet completion */
340 uint32_t compl_context;
341 };
342
343 /*
344 * Kernel variant of __user_packet.
345 */
346 struct __kern_packet {
347 struct __kern_quantum pkt_qum;
348 #define pkt_user pkt_qum.qum_user
349
350 /*
351 * Common area between user and kernel variants.
352 */
353 struct __packet pkt_com;
354
355 /*
356 * Option common area (PKT_F_OPT_DATA),
357 * non-NULL if PKT_F_OPT_ALLOC is set.
358 */
359 struct __packet_opt *pkt_com_opt;
360
361 /* TX: enqueue time, RX: receive timestamp */
362 uint64_t pkt_timestamp;
363
364 /* next chain in queue; used while enqueuing to classq or reass */
365 struct __kern_packet *pkt_nextpkt;
366
367 /*
368 * Attached mbuf or pkt.
369 * Used by compat netif driver (PKT_F_MBUF_DATA) or interface
370 * filters (PKT_F_PKT_DATA).
371 */
372 union {
373 struct mbuf *pkt_mbuf;
374 struct __kern_packet *pkt_pkt;
375 };
376 /*
377 * Flow classifier data (PKT_F_FLOW_DATA),
378 * non-NULL if PKT_F_FLOW_ALLOC is set.
379 */
380 struct __flow *pkt_flow; /* classifier info */
381 #define pkt_flow_ipv4_addrs pkt_flow->flow_ipv4_addrs
382 #define pkt_flow_ipv4_src pkt_flow->flow_ipv4_src
383 #define pkt_flow_ipv4_dst pkt_flow->flow_ipv4_dst
384 #define pkt_flow_ipv6_addrs pkt_flow->flow_ipv6_addrs
385 #define pkt_flow_ipv6_src pkt_flow->flow_ipv6_src
386 #define pkt_flow_ipv6_dst pkt_flow->flow_ipv6_dst
387 #define pkt_flow_ip_ver pkt_flow->flow_ip_ver
388 #define pkt_flow_ip_proto pkt_flow->flow_ip_proto
389 #define pkt_flow_ip_hdr pkt_flow->flow_ip_hdr
390 #define pkt_flow_tcp pkt_flow->flow_tcp
391 #define pkt_flow_tcp_src pkt_flow->flow_tcp_src
392 #define pkt_flow_tcp_dst pkt_flow->flow_tcp_dst
393 #define pkt_flow_tcp_seq pkt_flow->flow_tcp_seq
394 #define pkt_flow_tcp_ack pkt_flow->flow_tcp_ack
395 #define pkt_flow_tcp_off pkt_flow->flow_tcp_off
396 #define pkt_flow_tcp_flags pkt_flow->flow_tcp_flags
397 #define pkt_flow_tcp_win pkt_flow->flow_tcp_win
398 #define pkt_flow_tcp_hlen pkt_flow->flow_tcp_hlen
399 #define pkt_flow_tcp_hdr pkt_flow->flow_tcp_hdr
400 #define pkt_flow_tcp_agg_fast pkt_flow->flow_tcp_agg_fast
401 #define pkt_flow_udp pkt_flow->flow_udp
402 #define pkt_flow_udp_src pkt_flow->flow_udp_src
403 #define pkt_flow_udp_dst pkt_flow->flow_udp_dst
404 #define pkt_flow_udp_hlen pkt_flow->flow_udp_hlen
405 #define pkt_flow_udp_hdr pkt_flow->flow_udp_hdr
406 #define pkt_flow_esp_spi pkt_flow->flow_esp_spi
407 #define pkt_transport_protocol pkt_flow->flow_ulp_encap
408 #define pkt_flow_ip_hlen pkt_flow->flow_ip_hlen
409 #define pkt_flow_ulen pkt_flow->flow_ulen
410 #define pkt_flow_ip_frag_id pkt_flow->flow_ip_frag_id
411 #define pkt_flow_ip_is_frag pkt_flow->flow_ip_is_frag
412 #define pkt_flow_ip_is_first_frag pkt_flow->flow_ip_is_first_frag
413 #define pkt_flowsrc_token pkt_flow->flow_src_token
414 #define pkt_flowsrc_id pkt_flow->flow_src_id
415 #define pkt_flowsrc_fidx pkt_flow->flow_src_fidx
416 #define pkt_flowsrc_type pkt_flow->flow_src_type
417 #define pkt_classq_hash pkt_flow->flow_classq_hash
418 #define pkt_classq_flags pkt_flow->flow_classq_flags
419 #define pkt_policy_id pkt_flow->flow_policy_id
420 #define pkt_policy_euuid pkt_flow->flow_policy_euuid
421
422 /*
423 * Transmit completion data (PKT_TX_COMPL_DATA),
424 * non-NULL if PKT_F_TX_COMPL_ALLOC is set.
425 */
426 struct __packet_compl *pkt_tx_compl; /* TX completion info */
427 #define pkt_tx_compl_data pkt_tx_compl->compl_data
428 #define pkt_tx_compl_data64 pkt_tx_compl->compl_data64
429 #define pkt_tx_compl_cb_arg pkt_tx_compl->compl_data._cb_arg
430 #define pkt_tx_compl_cb_data pkt_tx_compl->compl_data._cb_data
431 #define pkt_tx_compl_status pkt_tx_compl->compl_data._tx_status
432 #define pkt_tx_compl_callbacks pkt_tx_compl->compl_callbacks
433 #define pkt_tx_compl_context pkt_tx_compl->compl_context
434
435 /*
436 * Kernel specific.
437 *
438 * pkt_{bufs,max} aren't part of the common area, on purpose,
439 * since we selectively update them on internalize/externalize.
440 */
441 const uint16_t pkt_bufs_max; /* maximum size of buflet chain */
442 const uint16_t pkt_bufs_cnt; /* buflet chain size */
443 uint32_t pkt_chain_count; /* number of packets in chain */
444 uint32_t pkt_chain_bytes; /* number of bytes in chain */
445 } __attribute((aligned(sizeof(uint64_t))));
446
447 /* the size of __user_packet structure for n total buflets */
448 #define _KERN_PACKET_SIZE(n) sizeof(struct __kern_packet)
449
450 /*
451 * Valid values for pkt_flags.
452 */
453 #define PKT_KFLAGS_TX_COMPL 0x8 /* has pkt_tx_compl */
454
455 #define _PKT_COM_INIT(_p, _pflags) do { \
456 /* save packet flags since it might be wiped out */ \
457 volatile uint64_t __pflags = (_pflags); \
458 /* first wipe it clean */ \
459 _CASSERT(sizeof(struct __packet_com) == 32); \
460 _CASSERT(sizeof(struct __packet) == 32); \
461 sk_zero_32(&(_p)->pkt_com.__pkt_data[0]); \
462 /* then initialize */ \
463 (_p)->pkt_pflags = (__pflags); \
464 (_p)->pkt_svc_class = KPKT_SC_UNSPEC; \
465 } while (0)
466
467 #define _PKT_CTOR(_p, _pflags, _bufcnt, _maxfrags) do { \
468 _PKT_COM_INIT(_p, _pflags); \
469 _CASSERT(sizeof ((_p)->pkt_bufs_max) == sizeof (uint16_t)); \
470 _CASSERT(sizeof ((_p)->pkt_bufs_cnt) == sizeof (uint16_t)); \
471 /* deconst */ \
472 *(uint16_t *)(uintptr_t)&(_p)->pkt_bufs_max = (_maxfrags); \
473 *(uint16_t *)(uintptr_t)&(_p)->pkt_bufs_cnt = (_bufcnt); \
474 } while (0)
475
476 #define KPKT_CLEAR_MBUF_PKT_DATA(_pk) do { \
477 _CASSERT(offsetof(struct __kern_packet, pkt_mbuf) == \
478 offsetof(struct __kern_packet, pkt_pkt)); \
479 (_pk)->pkt_pflags &= ~(PKT_F_MBUF_MASK|PKT_F_PKT_MASK); \
480 /* the following also clears pkt_pkt */ \
481 (_pk)->pkt_mbuf = NULL; \
482 } while (0)
483
484 #define KPKT_CLEAR_MBUF_DATA(_pk) do { \
485 (_pk)->pkt_pflags &= ~PKT_F_MBUF_MASK; \
486 (_pk)->pkt_mbuf = NULL; \
487 } while (0)
488
489 #define KPKT_CLEAR_PKT_DATA(_pk) do { \
490 (_pk)->pkt_pflags &= ~PKT_F_PKT_MASK; \
491 (_pk)->pkt_pkt = NULL; \
492 } while (0)
493
494 #define KPKT_CLEAR_FLOW_INIT(_fl) do { \
495 _CASSERT(sizeof ((_fl)->flow_init_data) == 128); \
496 sk_zero_128(&(_fl)->flow_init_data[0]); \
497 } while (0)
498
499 #define KPKT_CLEAR_FLOW_ALL(_fl) do { \
500 bzero(_fl, sizeof(struct __flow)); \
501 } while (0)
502
503 #define _KPKT_CTOR_PRIV_VARS(_p, _opt, _flow, _txcomp) do { \
504 (_p)->pkt_com_opt = (_opt); \
505 (_p)->pkt_flow = (_flow); \
506 (_p)->pkt_tx_compl = (_txcomp); \
507 } while (0)
508
509 #define _KPKT_INIT_PRIV_VARS(_p) do { \
510 struct __flow *__fl = (_p)->pkt_flow; \
511 (_p)->pkt_timestamp = 0; \
512 (_p)->pkt_nextpkt = NULL; \
513 KPKT_CLEAR_MBUF_PKT_DATA(_p); \
514 if (__probable(__fl != NULL)) { \
515 KPKT_CLEAR_FLOW_INIT(__fl); \
516 } \
517 (_p)->pkt_chain_count = (_p)->pkt_chain_bytes = 0; \
518 } while (0)
519
520 #define KPKT_CTOR(_pk, _pflags, _opt, _flow, _txcomp, _midx, _pu, _pp, \
521 _bufcnt, _maxfrags, _qflags) do { \
522 ASSERT((uintptr_t)(_pk) != (uintptr_t)(_pu)); \
523 /* ASSERT((_pu) != NULL || PP_KERNEL_ONLY(_pp)); */ \
524 /* kernel (and user) quantum */ \
525 KQUM_CTOR(&(_pk)->pkt_qum, _midx, \
526 (((_pu) == NULL) ? NULL : &(_pu)->pkt_qum), _pp, _qflags); \
527 /* kernel packet variant */ \
528 _PKT_CTOR(_pk, _pflags, _bufcnt, _maxfrags); \
529 _KPKT_CTOR_PRIV_VARS(_pk, _opt, _flow, _txcomp); \
530 /* no need to construct user variant as it is done in externalize */ \
531 } while (0)
532
533 #define KPKT_INIT(_pk, _flags) do { \
534 KQUM_INIT(&(_pk)->pkt_qum, _flags); \
535 _PKT_COM_INIT(_pk, (_pk)->pkt_pflags); \
536 _KPKT_INIT_PRIV_VARS(_pk); \
537 /* no need to initialize user variant as it is done in externalize */ \
538 } while (0)
539
540 #define _KPKT_INIT_TX_COMPL_DATA(_p) do { \
541 if (((_p)->pkt_pflags & PKT_F_TX_COMPL_DATA) == 0) { \
542 ASSERT((_p)->pkt_pflags & PKT_F_TX_COMPL_ALLOC); \
543 (_p)->pkt_pflags |= PKT_F_TX_COMPL_DATA; \
544 _CASSERT(sizeof((_p)->pkt_tx_compl_data64) == 24); \
545 /* 32-bit compl_data should be in the union */ \
546 _CASSERT(sizeof((_p)->pkt_tx_compl_data) <= 24); \
547 (_p)->pkt_tx_compl_data64[0] = 0; \
548 (_p)->pkt_tx_compl_data64[1] = 0; \
549 (_p)->pkt_tx_compl_data64[2] = 0; \
550 } \
551 } while (0)
552
553 /*
554 * Copy optional meta data.
555 * Both source and destination must be a kernel packet.
556 */
557 #define _PKT_COPY_OPT_DATA(_skp, _dkp) do { \
558 if (__improbable(((_skp)->pkt_pflags & PKT_F_OPT_DATA) != 0)) { \
559 _CASSERT(sizeof(struct __packet_opt) == 32); \
560 ASSERT((_skp)->pkt_pflags & PKT_F_OPT_ALLOC); \
561 sk_copy64_32((uint64_t *)(void *)(_skp)->pkt_com_opt, \
562 (uint64_t *)(void *)(_dkp)->pkt_com_opt); \
563 } \
564 } while (0)
565
566 /*
567 * _PKT_COPY only copies the user metadata portion of the packet;
568 * at the moment this is everything from the beginning down to __p_flags,
569 * but no more. It additionally copies only QUM_F_COPY_MASK bits from
570 * the source __p_flags to the destination's.
571 *
572 * NOTE: this needs to be adjusted if more user-mutable field is added
573 * after __p_flags.
574 */
575 #define _PKT_COPY(_skp, _dkp) do { \
576 _CASSERT(sizeof(struct __packet) == 32); \
577 _CASSERT(sizeof(struct __packet_com) == 32); \
578 _CASSERT(offsetof(struct __packet, __p_flags) == 24); \
579 /* copy __packet excluding pkt_pflags */ \
580 sk_copy64_24((uint64_t *)(void *)&(_skp)->pkt_com, \
581 (uint64_t *)(void *)&(_dkp)->pkt_com); \
582 /* copy relevant pkt_pflags bits */ \
583 (_dkp)->pkt_pflags = ((_skp)->pkt_pflags & PKT_F_COPY_MASK); \
584 /* copy __packet_opt if applicable */ \
585 _PKT_COPY_OPT_DATA((_skp), (_dkp)); \
586 } while (0)
587
588 /*
589 * _PKT_INTERNALIZE internalizes a portion of the packet that includes
590 * user visible fields without overwriting the portion that's private to
591 * the kernel.
592 *
593 * NOTE: this needs to be adjusted if more user-mutable data is added
594 * after __p_flags. This macro is used only during internalize.
595 */
596 #define _PKT_INTERNALIZE(_up, _kp) do { \
597 volatile uint64_t _kf = ((_kp)->pkt_pflags & ~PKT_F_USER_MASK); \
598 _CASSERT(sizeof(struct __packet) == 32); \
599 _CASSERT(sizeof(struct __packet_com) == 32); \
600 _CASSERT(offsetof(struct __packet, __p_flags) == 24); \
601 /* copy __packet excluding pkt_pflags */ \
602 sk_copy64_24((uint64_t *)(void *)&(_up)->pkt_com, \
603 (uint64_t *)(void *)&(_kp)->pkt_com); \
604 /* copy pkt_pflags and restore kernel bits */ \
605 (_kp)->pkt_pflags = ((_up)->pkt_pflags & PKT_F_USER_MASK) | _kf;\
606 /* copy (internalize) __packet_opt if applicable */ \
607 if (__improbable(((_kp)->pkt_pflags & PKT_F_OPT_DATA) != 0)) { \
608 _CASSERT(sizeof(struct __packet_opt) == 32); \
609 ASSERT((_kp)->pkt_pflags & PKT_F_OPT_ALLOC); \
610 sk_copy64_32((uint64_t *)(void *)&(_up)->pkt_com_opt, \
611 (uint64_t *)(void *)(_kp)->pkt_com_opt); \
612 } \
613 } while (0)
614
615 /*
616 * _PKT_EXTERNALIZE externalizes a portion of the packet that's user
617 * visible without including fields that's private to the kernel; at the
618 * moment this is everything from the beginning down to __p_flags,
619 * but no more.
620 *
621 * NOTE: this needs to be adjusted if more user-mutable data is added
622 * after __p_flags. This macro is used only during externalize.
623 */
624 #define _PKT_EXTERNALIZE(_kp, _up) do { \
625 _CASSERT(sizeof(struct __packet) == 32); \
626 _CASSERT(sizeof(struct __packet_com) == 32); \
627 _CASSERT(offsetof(struct __packet, __p_flags) == 24); \
628 /* copy __packet excluding pkt_pflags */ \
629 sk_copy64_24((uint64_t *)(void *)&(_kp)->pkt_com, \
630 (uint64_t *)(void *)&(_up)->pkt_com); \
631 /* copy pkt_pflags excluding kernel bits */ \
632 (_up)->pkt_pflags = ((_kp)->pkt_pflags & PKT_F_USER_MASK); \
633 /* copy (externalize) __packet_opt if applicable */ \
634 if (__improbable(((_kp)->pkt_pflags & PKT_F_OPT_DATA) != 0)) { \
635 _CASSERT(sizeof(struct __packet_opt) == 32); \
636 ASSERT((_kp)->pkt_pflags & PKT_F_OPT_ALLOC); \
637 sk_copy64_32((uint64_t *)(void *)(_kp)->pkt_com_opt, \
638 (uint64_t *)(void *)&(_up)->pkt_com_opt); \
639 } \
640 } while (0)
641
642 #define SK_PTR_ADDR_KQUM(_ph) ((struct __kern_quantum *)SK_PTR_ADDR(_ph))
643 #define SK_PTR_ADDR_KPKT(_ph) ((struct __kern_packet *)SK_PTR_ADDR(_ph))
644 #define SK_PTR_KPKT(_pa) ((struct __kern_packet *)(void *)(_pa))
645 #define SK_PKT2PH(_pkt) \
646 (SK_PTR_ENCODE((_pkt), METADATA_TYPE((_pkt)), METADATA_SUBTYPE((_pkt))))
647
648 /*
649 * Set the length of the data to various places: __user_slot_desc,
650 * __kern_quantum, and for a packet, the buflet.
651 * !!! This should be used only for dropping the packet as the macro
652 * is not functionally correct.
653 *
654 * TODO: [email protected] -- maybe finalize here as well?
655 */
656 #define METADATA_SET_LEN(_md, _len, _doff) do { \
657 struct __kern_quantum *_q = \
658 (struct __kern_quantum *)(void *)(_md); \
659 _q->qum_len = (_len); \
660 switch (METADATA_TYPE(_q)) { \
661 case NEXUS_META_TYPE_PACKET: { \
662 struct __kern_packet *_p = \
663 (struct __kern_packet *)(void *)(_md); \
664 struct __kern_buflet *_kbft; \
665 PKT_GET_FIRST_BUFLET(_p, _p->pkt_bufs_cnt, _kbft); \
666 _kbft->buf_dlen = (_len); \
667 _kbft->buf_doff = (_doff); \
668 break; \
669 } \
670 default: \
671 ASSERT(METADATA_TYPE(_q) == NEXUS_META_TYPE_QUANTUM); \
672 _q->qum_buf[0].buf_dlen = (_len); \
673 _q->qum_buf[0].buf_doff = (_doff); \
674 break; \
675 } \
676 } while (0)
677
678 #define METADATA_ADJUST_LEN(_md, _len, _doff) do { \
679 struct __kern_quantum *_q = \
680 (struct __kern_quantum *)(void *)(_md); \
681 switch (METADATA_TYPE(_q)) { \
682 case NEXUS_META_TYPE_PACKET: { \
683 struct __kern_packet *_p = \
684 (struct __kern_packet *)(void *)(_md); \
685 struct __kern_buflet *_kbft; \
686 PKT_GET_FIRST_BUFLET(_p, _p->pkt_bufs_cnt, _kbft); \
687 _kbft->buf_dlen += (_len); \
688 _kbft->buf_doff = (_doff); \
689 break; \
690 } \
691 default: \
692 ASSERT(METADATA_TYPE(_q) == NEXUS_META_TYPE_QUANTUM); \
693 _q->qum_buf[0].buf_dlen += (_len); \
694 _q->qum_buf[0].buf_doff = (_doff); \
695 break; \
696 } \
697 } while (0)
698
699 __attribute__((always_inline))
700 static inline kern_packet_t
SD_GET_TAGGED_METADATA(const struct __kern_slot_desc * ksd)701 SD_GET_TAGGED_METADATA(const struct __kern_slot_desc *ksd)
702 {
703 return __improbable(ksd->sd_md == NULL) ? 0 :
704 SK_PTR_ENCODE(ksd->sd_md, METADATA_TYPE(ksd->sd_qum),
705 METADATA_SUBTYPE(ksd->sd_qum));
706 }
707
708 __attribute__((always_inline))
709 static inline errno_t
KR_SLOT_ATTACH_METADATA(const kern_channel_ring_t kring,struct __kern_slot_desc * ksd,struct __kern_quantum * kqum)710 KR_SLOT_ATTACH_METADATA(const kern_channel_ring_t kring,
711 struct __kern_slot_desc *ksd, struct __kern_quantum *kqum)
712 {
713 obj_idx_t idx = KR_SLOT_INDEX(kring,
714 (struct __slot_desc *)(void *)ksd);
715
716 /* Ensure this is only done by the thread doing a sync syscall */
717 ASSERT(sk_is_sync_protected());
718 ASSERT(kqum->qum_pp == kring->ckr_pp);
719 ASSERT(kqum->qum_ksd == NULL);
720 /*
721 * Packets being attached to a slot should always be internalized.
722 * Internalized packet should be in finalized or dropped state.
723 */
724 ASSERT(kqum->qum_qflags & QUM_F_INTERNALIZED);
725 ASSERT(((kqum->qum_qflags & QUM_F_FINALIZED) != 0) ^
726 ((kqum->qum_qflags & QUM_F_DROPPED) != 0));
727
728 kqum->qum_ksd = ksd;
729
730 KSD_ATTACH_METADATA(ksd, kqum);
731 if (!KR_KERNEL_ONLY(kring)) {
732 USD_ATTACH_METADATA(KR_USD(kring, idx), METADATA_IDX(kqum));
733 }
734
735 return 0;
736 }
737
738 __attribute__((always_inline))
739 static inline struct __kern_quantum *
KR_SLOT_DETACH_METADATA(const kern_channel_ring_t kring,struct __kern_slot_desc * ksd)740 KR_SLOT_DETACH_METADATA(const kern_channel_ring_t kring,
741 struct __kern_slot_desc *ksd)
742 {
743 struct __kern_quantum *kqum = ksd->sd_qum;
744 obj_idx_t idx = KR_SLOT_INDEX(kring,
745 (struct __slot_desc *)(void *)ksd);
746
747 /* Ensure this is only done by the thread doing a sync syscall */
748 ASSERT(sk_is_sync_protected());
749 ASSERT(KSD_VALID_METADATA(ksd));
750 ASSERT(kqum->qum_ksd == ksd);
751 ASSERT(kqum->qum_pp == kring->ckr_pp);
752 /*
753 * Packets being attached to a slot would always be internalized.
754 * We also detach externalized packets on an rx ring on behalf
755 * of the user space if the channel is not in user packet pool mode.
756 * Externalized packet should be in finalized or dropped state.
757 */
758 ASSERT((kqum->qum_qflags & (QUM_F_INTERNALIZED)) ||
759 ((((kqum->qum_qflags & QUM_F_FINALIZED) != 0) ^
760 ((kqum->qum_qflags & QUM_F_DROPPED) != 0))));
761
762 /* detaching requires the packet to be finalized later */
763 kqum->qum_qflags &= ~QUM_F_FINALIZED;
764 kqum->qum_ksd = NULL;
765
766 KSD_DETACH_METADATA(ksd);
767 if (!KR_KERNEL_ONLY(kring)) {
768 USD_DETACH_METADATA(KR_USD(kring, idx));
769 }
770
771 return kqum;
772 }
773
774 __attribute__((always_inline))
775 static inline errno_t
KR_SLOT_ATTACH_BUF_METADATA(const kern_channel_ring_t kring,struct __kern_slot_desc * ksd,struct __kern_buflet * kbuf)776 KR_SLOT_ATTACH_BUF_METADATA(const kern_channel_ring_t kring,
777 struct __kern_slot_desc *ksd, struct __kern_buflet *kbuf)
778 {
779 obj_idx_t idx = KR_SLOT_INDEX(kring,
780 (struct __slot_desc *)(void *)ksd);
781
782 /* Ensure this is only done by the thread doing a sync syscall */
783 ASSERT(sk_is_sync_protected());
784
785 KSD_ATTACH_METADATA(ksd, kbuf);
786 /*
787 * buflet is attached only to the user packet pool alloc ring.
788 */
789 ASSERT(!KR_KERNEL_ONLY(kring));
790 ASSERT(kring->ckr_tx == CR_KIND_ALLOC);
791 USD_ATTACH_METADATA(KR_USD(kring, idx), kbuf->buf_bft_idx_reg);
792 return 0;
793 }
794
795 #if (DEVELOPMENT || DEBUG)
796 SYSCTL_DECL(_kern_skywalk_packet);
797 extern int pkt_trailers;
798 #endif /* !DEVELOPMENT && !DEBUG */
799
800 typedef void (pkt_copy_from_pkt_t)(const enum txrx, kern_packet_t,
801 const uint16_t, kern_packet_t, const uint16_t, const uint32_t,
802 const boolean_t, const uint16_t, const uint16_t, const boolean_t);
803
804 typedef void (pkt_copy_from_mbuf_t)(const enum txrx, kern_packet_t,
805 const uint16_t, struct mbuf *, const uint16_t, const uint32_t,
806 const boolean_t, const uint16_t);
807
808 typedef void (pkt_copy_to_mbuf_t)(const enum txrx, kern_packet_t,
809 const uint16_t, struct mbuf *, const uint16_t, const uint32_t,
810 const boolean_t, const uint16_t);
811
812 __BEGIN_DECLS
813 extern void pkt_subtype_assert_fail(const kern_packet_t, uint64_t, uint64_t);
814 extern void pkt_type_assert_fail(const kern_packet_t, uint64_t);
815
816 extern pkt_copy_from_pkt_t pkt_copy_from_pkt;
817 extern pkt_copy_from_pkt_t pkt_copy_multi_buflet_from_pkt;
818 extern pkt_copy_from_mbuf_t pkt_copy_from_mbuf;
819 extern pkt_copy_from_mbuf_t pkt_copy_multi_buflet_from_mbuf;
820 extern pkt_copy_to_mbuf_t pkt_copy_to_mbuf;
821 extern pkt_copy_to_mbuf_t pkt_copy_multi_buflet_to_mbuf;
822
823 extern void pkt_copypkt_sum(kern_packet_t, uint16_t, kern_packet_t,
824 uint16_t, uint16_t, uint32_t *, boolean_t);
825 extern uint32_t
826 pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
827 uint16_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start);
828 extern uint32_t pkt_sum(kern_packet_t, uint16_t, uint16_t);
829 extern uint32_t pkt_mcopypkt_sum(mbuf_t, int, kern_packet_t, uint16_t,
830 uint16_t, boolean_t);
831 extern uint32_t
832 m_copydata_sum(struct mbuf *m, int off, int len, void *vp, uint32_t initial_sum,
833 boolean_t *odd_start);
834 extern void pkt_copy(void *src, void *dst, size_t len);
835
836 #if (DEVELOPMENT || DEBUG)
837 extern uint32_t pkt_add_trailers(kern_packet_t, const uint32_t, const uint16_t);
838 extern uint32_t pkt_add_trailers_mbuf(struct mbuf *, const uint16_t);
839 #endif /* !DEVELOPMENT && !DEBUG */
840 __END_DECLS
841 #endif /* BSD_KERNEL_PRIVATE */
842 #endif /* !_SKYWALK_PACKET_PACKETVAR_H_ */
843