xref: /xnu-11417.140.69/libsyscall/wrappers/skywalk/os_channel.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 #include <stdlib.h>
31 #include <stddef.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <unistd.h>
35 #include <errno.h>
36 #include <os/atomic_private.h>
37 #include <skywalk/os_skywalk_private.h>
38 #include <skywalk/os_packet_private.h>
39 
40 #ifndef LIBSYSCALL_INTERFACE
41 #error "LIBSYSCALL_INTERFACE not defined"
42 #endif /* !LIBSYSCALL_INTERFACE */
43 
44 /*
45  * Defined here as we don't have Libc
46  */
47 extern int __getpid(void);
48 extern int __kill(int pid, int signum, int posix);
49 extern int __exit(int) __attribute__((noreturn));
50 
51 static ring_id_t _ring_id(struct ch_info *cinfo, const ring_id_type_t type);
52 static void os_channel_info2attr(struct channel *chd, channel_attr_t cha);
53 static int _flowadv_id_equal(struct __flowadv_entry *, uuid_t);
54 
55 /*
56  * This is pretty much what an inlined memcmp() would do for UUID
57  * comparison; since we don't have access to memcmp() here, we
58  * manually handle it ourselves.
59  */
60 #define UUID_COMPARE(a, b)                                                  \
61 	(a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3] &&    \
62 	a[4] == b[4] && a[5] == b[5] && a[6] == b[6] && a[7] == b[7] &&     \
63 	a[8] == b[8] && a[9] == b[9] && a[10] == b[10] && a[11] == b[11] && \
64 	a[12] == b[12] && a[13] == b[13] && a[14] == b[14] && a[15] == b[15])
65 
66 #define _SLOT_INDEX(_chrd, _slot)                                       \
67 	((slot_idx_t)((_slot - (_chrd)->chrd_slot_desc)))
68 
69 #define _SLOT_DESC(_chrd, _idx)                                         \
70 	(SLOT_DESC_USD(&(_chrd)->chrd_slot_desc[_idx]))
71 
72 #define _METADATA(_chrd, _ring, _midx)                                  \
73 	((void *)((_chrd)->chrd_md_base_addr +                          \
74 	((_midx) * (_ring)->ring_md_size) + METADATA_PREAMBLE_SZ))
75 
76 #define _SLOT_METADATA(_chrd, _ring, _idx)                              \
77 	_METADATA(_chrd, _ring, _SLOT_DESC(_chrd, _idx)->sd_md_idx)
78 
79 #define _SLOT_METADATA_IDX_VERIFY(_chrd, _md, _midx)    do {            \
80 	if (__improbable((_md) != _METADATA((_chrd), (_chrd)->chrd_ring, \
81 	    (_midx))) && !_CHANNEL_RING_IS_DEFUNCT(_chrd)) {            \
82 	        SK_ABORT_WITH_CAUSE("bad packet handle", (_midx));      \
83 	/* NOTREACHED */                                                \
84 	        __builtin_unreachable();                                \
85 	}                                                               \
86 } while (0)
87 
88 #define _BFT_INDEX(_chrd, _bft) (_bft)->buf_bft_idx_reg
89 
90 #define _SLOT_BFT_METADATA(_chrd, _ring, _idx)                          \
91 	_CHANNEL_RING_BFT(_chrd, _ring, _SLOT_DESC(_chrd, _idx)->sd_md_idx)
92 
93 #define _SLOT_BFT_METADATA_IDX_VERIFY(_chrd, _md, _midx)    do {        \
94 	if (__improbable((mach_vm_address_t)(_md) !=                    \
95 	    _CHANNEL_RING_BFT((_chrd), (_chrd)->chrd_ring, (_midx))) && \
96 	    !_CHANNEL_RING_IS_DEFUNCT(_chrd)) {                         \
97 	        SK_ABORT_WITH_CAUSE("bad buflet handle", (_midx));      \
98 	/* NOTREACHED */                                                \
99 	        __builtin_unreachable();                                \
100 	}                                                               \
101 } while (0)
102 
103 #define _SLOT_DESC_VERIFY(_chrd, _sdp) do {                             \
104 	if (__improbable(!SD_VALID_METADATA(_sdp)) &&                   \
105 	    !_CHANNEL_RING_IS_DEFUNCT(_chrd)) {                         \
106 	        SK_ABORT("Slot descriptor has no metadata");            \
107 	/* NOTREACHED */                                        \
108 	        __builtin_unreachable();                                \
109 	}                                                               \
110 } while (0)
111 
112 #define _METADATA_VERIFY(_chrd, _md) do {                               \
113 	if (__improbable(METADATA_PREAMBLE(_md)->mdp_redzone !=         \
114 	    (((mach_vm_address_t)(_md) - (_chrd)->chrd_md_base_addr) ^  \
115 	    __os_ch_md_redzone_cookie)) &&                              \
116 	    !_CHANNEL_RING_IS_DEFUNCT(_chrd)) {                         \
117 	        SK_ABORT_WITH_CAUSE("Metadata redzone corrupted",       \
118 	            METADATA_PREAMBLE(_md)->mdp_redzone);               \
119 	/* NOTREACHED */                                        \
120 	        __builtin_unreachable();                                \
121 	}                                                               \
122 } while (0)
123 
124 #define _PKT_BUFCNT_VERIFY(_chrd, _bcnt, _bmax) do {                    \
125 	if (__improbable((_chrd)->chrd_max_bufs < (_bmax))) {           \
126 	        SK_ABORT_WITH_CAUSE("Invalid max bufcnt", (_bmax));     \
127 	/* NOTREACHED */                                                \
128 	        __builtin_unreachable();                                \
129 	}                                                               \
130 	if (__improbable((_bcnt) > (_bmax))) {                          \
131 	        SK_ABORT_WITH_CAUSE("Invalid bufcnt", (_bcnt));         \
132 	/* NOTREACHED */                                                \
133 	        __builtin_unreachable();                                \
134 	}                                                               \
135 } while (0)
136 
137 #define _ABORT_MSGSZ    1024
138 
139 #define _SCHEMA_VER_VERIFY(_chd) do {                                   \
140 	/* ensure all stores are globally visible */                    \
141 	os_atomic_thread_fence(seq_cst);                                                  \
142 	if (CHD_SCHEMA(_chd)->csm_ver != CSM_CURRENT_VERSION)	{       \
143 	        char *_msg = malloc(_ABORT_MSGSZ);                      \
144 	        uint32_t _ver = (uint32_t)CHD_SCHEMA(_chd)->csm_ver;    \
145 	/* we're stuck with %x and %s formatters */             \
146 	        (void) _mach_snprintf(_msg, _ABORT_MSGSZ,               \
147 	            "Schema region version mismatch: 0x%x != 0x%x\n"    \
148 	            "Kernel version: %s - did you forget to install "   \
149 	            "a matching libsystem_kernel.dylib?\n"              \
150 	            "Kernel UUID: %x%x%x%x-%x%x-%x%x-%x%x-%x%x%x%x%x%x", \
151 	            _ver, (uint32_t)CSM_CURRENT_VERSION,                \
152 	            CHD_SCHEMA(_chd)->csm_kern_name,                    \
153 	            CHD_SCHEMA(_chd)->csm_kern_uuid[0],                 \
154 	            CHD_SCHEMA(_chd)->csm_kern_uuid[1],                 \
155 	            CHD_SCHEMA(_chd)->csm_kern_uuid[2],                 \
156 	            CHD_SCHEMA(_chd)->csm_kern_uuid[3],                 \
157 	            CHD_SCHEMA(_chd)->csm_kern_uuid[4],                 \
158 	            CHD_SCHEMA(_chd)->csm_kern_uuid[5],                 \
159 	            CHD_SCHEMA(_chd)->csm_kern_uuid[6],                 \
160 	            CHD_SCHEMA(_chd)->csm_kern_uuid[7],                 \
161 	            CHD_SCHEMA(_chd)->csm_kern_uuid[8],                 \
162 	            CHD_SCHEMA(_chd)->csm_kern_uuid[9],                 \
163 	            CHD_SCHEMA(_chd)->csm_kern_uuid[10],                \
164 	            CHD_SCHEMA(_chd)->csm_kern_uuid[11],                \
165 	            CHD_SCHEMA(_chd)->csm_kern_uuid[12],                \
166 	            CHD_SCHEMA(_chd)->csm_kern_uuid[13],                \
167 	            CHD_SCHEMA(_chd)->csm_kern_uuid[14],                \
168 	            CHD_SCHEMA(_chd)->csm_kern_uuid[15]);               \
169 	        SK_ABORT_DYNAMIC(_msg);                                 \
170 	/* NOTREACHED */                                        \
171 	        __builtin_unreachable();                                \
172 	}                                                               \
173 } while (0)
174 
175 #define _SLOT_ATTACH_METADATA(_usd, _md_idx) do {                       \
176 	(_usd)->sd_md_idx = (_md_idx);                                  \
177 	(_usd)->sd_flags |= SD_IDX_VALID;                               \
178 } while (0)
179 
180 #define _SLOT_DETACH_METADATA(_usd) do	{                               \
181 	(_usd)->sd_md_idx = OBJ_IDX_NONE;                               \
182 	(_usd)->sd_flags &= ~SD_IDX_VALID;                              \
183 } while (0)
184 
185 #define _CHANNEL_OFFSET(_type, _ptr, _offset)                           \
186 	((_type)(void *)((uintptr_t)(_ptr) + (_offset)))
187 
188 #define _CHANNEL_SCHEMA(_base, _off)                                    \
189 	_CHANNEL_OFFSET(struct __user_channel_schema *, _base, _off)
190 
191 #define _CHANNEL_RING_DEF_BUF(_chrd, _ring, _idx)                       \
192 	((_chrd)->chrd_def_buf_base_addr +                              \
193 	((_idx) * (_ring)->ring_def_buf_size))
194 
195 #define _CHANNEL_RING_LARGE_BUF(_chrd, _ring, _idx)                     \
196 	((_chrd)->chrd_large_buf_base_addr +                            \
197 	((_idx) * (_ring)->ring_large_buf_size))
198 
199 #define _CHANNEL_RING_BUF(_chrd, _ring, _bft)                           \
200 	BUFLET_HAS_LARGE_BUF(_bft) ?                                    \
201 	_CHANNEL_RING_LARGE_BUF(_chrd, _ring, (_bft)->buf_idx) :        \
202 	_CHANNEL_RING_DEF_BUF(_chrd, _ring, (_bft)->buf_idx)
203 
204 #define _CHANNEL_RING_BFT(_chrd, _ring, _idx)                           \
205 	((_chrd)->chrd_bft_base_addr + ((_idx) * (_ring)->ring_bft_size))
206 
207 #define _CHANNEL_RING_NEXT(_ring, _cur)                                 \
208 	(__improbable((_cur) + 1 == (_ring)->ring_num_slots) ? 0 : (_cur) + 1)
209 
210 #define _CHANNEL_RING_IS_DEFUNCT(_chrd)                                 \
211 	(!(*(_chrd)->chrd_csm_flags & CSM_ACTIVE))
212 
213 #define _CHANNEL_IS_DEFUNCT(_chd)                                       \
214 	(!(CHD_SCHEMA(_chd)->csm_flags & CSM_ACTIVE))
215 
216 #define _CH_PKT_GET_FIRST_BUFLET(_pkt, _bft, _chrd, _ring) do {         \
217 	if (__probable((_pkt)->pkt_qum_buf.buf_idx != OBJ_IDX_NONE)) {  \
218 	        (_bft) = &(_pkt)->pkt_qum_buf;                          \
219 	} else if ((_pkt)->pkt_qum_buf.buf_nbft_idx != OBJ_IDX_NONE) {  \
220 	        (_bft) = _CHANNEL_RING_BFT(_chrd, _ring,                \
221 	            (_pkt)->pkt_qum_buf.buf_nbft_idx);                  \
222 	} else {                                                        \
223 	        (_bft) = NULL;                                          \
224 	}                                                               \
225 } while (0)
226 
227 /*
228  * A per process copy of the channel metadata redzone cookie.
229  */
230 __attribute__((visibility("hidden")))
231 static uint64_t __os_ch_md_redzone_cookie = 0;
232 
233 __attribute__((always_inline, visibility("hidden")))
234 static inline uint32_t
_num_tx_rings(struct ch_info * ci)235 _num_tx_rings(struct ch_info *ci)
236 {
237 	ring_id_t first, last;
238 
239 	first = _ring_id(ci, CHANNEL_FIRST_TX_RING);
240 	last = _ring_id(ci, CHANNEL_LAST_TX_RING);
241 
242 	return (last - first) + 1;
243 }
244 
245 __attribute__((always_inline, visibility("hidden")))
246 static inline uint32_t
_num_rx_rings(struct ch_info * ci)247 _num_rx_rings(struct ch_info *ci)
248 {
249 	ring_id_t first, last;
250 
251 	first = _ring_id(ci, CHANNEL_FIRST_RX_RING);
252 	last = _ring_id(ci, CHANNEL_LAST_RX_RING);
253 
254 	return (last - first) + 1;
255 }
256 
257 __attribute__((always_inline, visibility("hidden")))
258 static inline uint32_t
_num_allocator_rings(const struct __user_channel_schema * csm)259 _num_allocator_rings(const struct __user_channel_schema *csm)
260 {
261 	return csm->csm_allocator_ring_pairs << 1;
262 }
263 
264 __attribute__((visibility("hidden")))
265 static void
os_channel_init_ring(struct channel_ring_desc * chrd,struct channel * chd,uint32_t ring_index)266 os_channel_init_ring(struct channel_ring_desc *chrd,
267     struct channel *chd, uint32_t ring_index)
268 {
269 	struct __user_channel_schema *csm = CHD_SCHEMA(chd);
270 	struct __user_channel_ring *ring = NULL;
271 	struct __slot_desc *sd = NULL;
272 	nexus_meta_type_t md_type;
273 	nexus_meta_subtype_t md_subtype;
274 
275 	ring = _CHANNEL_OFFSET(struct __user_channel_ring *, csm,
276 	    csm->csm_ring_ofs[ring_index].ring_off);
277 	sd = _CHANNEL_OFFSET(struct __slot_desc *, csm,
278 	    csm->csm_ring_ofs[ring_index].sd_off);
279 	md_type = csm->csm_md_type;
280 	md_subtype = csm->csm_md_subtype;
281 
282 	if (ring == NULL || sd == NULL) {
283 		SK_ABORT("Channel schema not valid");
284 		/* NOTREACHED */
285 		__builtin_unreachable();
286 	} else if (!(md_type == NEXUS_META_TYPE_QUANTUM ||
287 	    md_type == NEXUS_META_TYPE_PACKET)) {
288 		SK_ABORT_WITH_CAUSE("Metadata type unknown", md_type);
289 		/* NOTREACHED */
290 		__builtin_unreachable();
291 	} else if (!(md_subtype == NEXUS_META_SUBTYPE_PAYLOAD ||
292 	    md_subtype == NEXUS_META_SUBTYPE_RAW)) {
293 		SK_ABORT_WITH_CAUSE("Metadata subtype unknown", md_subtype);
294 		/* NOTREACHED */
295 		__builtin_unreachable();
296 	}
297 
298 	chrd->chrd_slot_desc = sd;
299 	chrd->chrd_csm_flags = &chd->chd_schema->csm_flags;
300 	/* const overrides */
301 	*(struct channel **)(uintptr_t)&chrd->chrd_channel = chd;
302 	*(struct __user_channel_ring **)(uintptr_t)&chrd->chrd_ring = ring;
303 	*(nexus_meta_type_t *)(uintptr_t)&chrd->chrd_md_type = md_type;
304 	*(nexus_meta_subtype_t *)(uintptr_t)&chrd->chrd_md_subtype = md_subtype;
305 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_shmem_base_addr =
306 	    CHD_INFO(chd)->cinfo_mem_base;
307 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_def_buf_base_addr =
308 	    (mach_vm_address_t)((uintptr_t)ring + ring->ring_def_buf_base);
309 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_md_base_addr =
310 	    (mach_vm_address_t)((uintptr_t)ring + ring->ring_md_base);
311 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_sd_base_addr =
312 	    (mach_vm_address_t)((uintptr_t)ring + ring->ring_sd_base);
313 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_bft_base_addr =
314 	    (mach_vm_address_t)((uintptr_t)ring + ring->ring_bft_base);
315 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_large_buf_base_addr =
316 	    (mach_vm_address_t)((uintptr_t)ring + ring->ring_large_buf_base);
317 	*(uint32_t *)(uintptr_t)&chrd->chrd_max_bufs =
318 	    CHD_PARAMS(chd)->nxp_max_frags;
319 }
320 
321 __attribute__((always_inline, visibility("hidden")))
322 static inline mach_vm_address_t
_initialize_metadata_address(const channel_ring_t chrd,struct __user_quantum * q,uint16_t * bdoff)323 _initialize_metadata_address(const channel_ring_t chrd,
324     struct __user_quantum *q, uint16_t *bdoff)
325 {
326 	int i;
327 	struct __user_buflet *ubft0;
328 	const struct __user_channel_ring *ring = chrd->chrd_ring;
329 
330 	switch (chrd->chrd_md_type) {
331 	case NEXUS_META_TYPE_PACKET: {
332 		struct __user_buflet *ubft, *pbft;
333 		struct __user_packet *p = (struct __user_packet *)q;
334 		uint16_t bcnt = p->pkt_bufs_cnt;
335 		uint16_t bmax = p->pkt_bufs_max;
336 
337 		_CASSERT(sizeof(p->pkt_qum_buf.buf_addr) ==
338 		    sizeof(mach_vm_address_t));
339 		/*
340 		 * In the event of a defunct, we'd be accessing zero-filled
341 		 * memory and end up with 0 for bcnt or bmax.
342 		 */
343 		if (__improbable((bcnt == 0) || (bmax == 0))) {
344 			if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
345 				SK_ABORT("bad bufcnt");
346 				/* NOTREACHED */
347 				__builtin_unreachable();
348 			}
349 			return 0;
350 		}
351 		_PKT_BUFCNT_VERIFY(chrd, bcnt, bmax);
352 		_CH_PKT_GET_FIRST_BUFLET(p, ubft, chrd, ring);
353 		if (__improbable(ubft == NULL)) {
354 			SK_ABORT("bad packet: no buflet");
355 			/* NOTREACHED */
356 			__builtin_unreachable();
357 		}
358 		/*
359 		 * special handling for empty packet buflet.
360 		 */
361 		if (__improbable(p->pkt_qum_buf.buf_idx == OBJ_IDX_NONE)) {
362 			*__DECONST(mach_vm_address_t *,
363 			    &p->pkt_qum_buf.buf_addr) = 0;
364 			*__DECONST(mach_vm_address_t *,
365 			    &p->pkt_qum_buf.buf_nbft_addr) =
366 			    (mach_vm_address_t)ubft;
367 		}
368 		ubft0 = ubft;
369 		for (i = 0; (i < bcnt) && (ubft != NULL); i++) {
370 			pbft = ubft;
371 			if (__probable(pbft->buf_idx != OBJ_IDX_NONE)) {
372 				*(mach_vm_address_t *)(uintptr_t)
373 				&(pbft->buf_addr) = _CHANNEL_RING_BUF(chrd,
374 				    ring, pbft);
375 			} else {
376 				*(mach_vm_address_t *)(uintptr_t)
377 				&(pbft->buf_addr) = NULL;
378 			}
379 			if (pbft->buf_nbft_idx != OBJ_IDX_NONE) {
380 				ubft = _CHANNEL_RING_BFT(chrd, ring,
381 				    pbft->buf_nbft_idx);
382 			} else {
383 				ubft = NULL;
384 			}
385 			*__DECONST(mach_vm_address_t *, &pbft->buf_nbft_addr) =
386 			    (mach_vm_address_t)ubft;
387 		}
388 		if (__improbable(pbft->buf_nbft_idx != OBJ_IDX_NONE)) {
389 			if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
390 				SK_ABORT("non terminating buflet chain");
391 				/* NOTREACHED */
392 				__builtin_unreachable();
393 			}
394 			return 0;
395 		}
396 		if (__improbable(i != bcnt)) {
397 			SK_ABORT_WITH_CAUSE("invalid buflet count", bcnt);
398 			/* NOTREACHED */
399 			__builtin_unreachable();
400 		}
401 		break;
402 	}
403 	default:
404 		ubft0 = &q->qum_buf[0];
405 		_CASSERT(sizeof(q->qum_buf[0].buf_addr) ==
406 		    sizeof(mach_vm_address_t));
407 		/* immutable: compute pointers from the index */
408 		*(mach_vm_address_t *)(uintptr_t)&ubft0->buf_addr =
409 		    _CHANNEL_RING_BUF(chrd, ring, ubft0);
410 		break;
411 	}
412 
413 	/* return address and offset of the first buffer */
414 	*bdoff = ubft0->buf_doff;
415 	return ubft0->buf_addr;
416 }
417 
418 /*
419  * _slot_index_is_valid
420  * - verify that the slot index is within valid bounds
421  * - if the head is less than (or equal to) the tail (case A below)
422  *	head <= valid < tail
423  * - if the head is greater than the tail (case B below)
424  *      valid < tail
425  *    or
426  *	head <= valid < num_slots
427  *
428  * case A: x x x x x x x H o o o o o T x x x x x x
429  * case B: o o o o o T x x x x H o o o o o o o o o
430  *
431  * 'H' - head
432  * 'T' - tail
433  * 'x' - invalid
434  * 'o' - valid
435  */
436 __attribute__((always_inline, visibility("hidden")))
437 static inline int
_slot_index_is_valid(const struct __user_channel_ring * ring,slot_idx_t idx)438 _slot_index_is_valid(const struct __user_channel_ring *ring, slot_idx_t idx)
439 {
440 	int     is_valid = 0;
441 
442 	if (ring->ring_head <= ring->ring_tail) {
443 		if (__probable(idx >= ring->ring_head && idx < ring->ring_tail)) {
444 			is_valid = 1;
445 		}
446 	} else {
447 		if (__probable(idx < ring->ring_tail ||
448 		    (idx >= ring->ring_head && idx < ring->ring_num_slots))) {
449 			is_valid = 1;
450 		}
451 	}
452 
453 	return is_valid;
454 }
455 
456 channel_t
os_channel_create_extended(const uuid_t uuid,const nexus_port_t port,const ring_dir_t dir,const ring_id_t ring,const channel_attr_t cha)457 os_channel_create_extended(const uuid_t uuid, const nexus_port_t port,
458     const ring_dir_t dir, const ring_id_t ring, const channel_attr_t cha)
459 {
460 	uint32_t num_tx_rings, num_rx_rings, num_allocator_rings;
461 	uint32_t ring_offset, ring_index, num_event_rings, num_large_buf_alloc_rings;
462 	struct __user_channel_schema *ucs;
463 	struct channel *chd = NULL;
464 	struct ch_info *ci = NULL;
465 	struct ch_init init;
466 	int i, fd = -1;
467 	int err = 0;
468 	size_t chd_sz;
469 
470 	SK_ALIGN64_CASSERT(struct ch_info, cinfo_mem_map_size);
471 
472 	switch (dir) {
473 	case CHANNEL_DIR_TX_RX:
474 	case CHANNEL_DIR_TX:
475 	case CHANNEL_DIR_RX:
476 		break;
477 	default:
478 		err = EINVAL;
479 		goto done;
480 	}
481 
482 	ci = malloc(CHD_INFO_SIZE);
483 	if (ci == NULL) {
484 		err = errno = ENOMEM;
485 		goto done;
486 	}
487 	bzero(ci, CHD_INFO_SIZE);
488 
489 	bzero(&init, sizeof(init));
490 	init.ci_version = CHANNEL_INIT_CURRENT_VERSION;
491 	if (cha != NULL) {
492 		if (cha->cha_exclusive != 0) {
493 			init.ci_ch_mode |= CHMODE_EXCLUSIVE;
494 		}
495 		if (cha->cha_user_packet_pool != 0) {
496 			init.ci_ch_mode |= CHMODE_USER_PACKET_POOL;
497 		}
498 		if (cha->cha_nexus_defunct_ok != 0) {
499 			init.ci_ch_mode |= CHMODE_DEFUNCT_OK;
500 		}
501 		if (cha->cha_enable_event_ring != 0) {
502 			/* User packet pool is required for event rings */
503 			if (cha->cha_user_packet_pool == 0) {
504 				err = EINVAL;
505 				goto done;
506 			}
507 			init.ci_ch_mode |= CHMODE_EVENT_RING;
508 		}
509 		if (cha->cha_monitor != 0) {
510 			if (dir == CHANNEL_DIR_TX_RX) {
511 				init.ci_ch_mode |= CHMODE_MONITOR;
512 			} else if (dir == CHANNEL_DIR_TX) {
513 				init.ci_ch_mode |= CHMODE_MONITOR_TX;
514 			} else if (dir == CHANNEL_DIR_RX) {
515 				init.ci_ch_mode |= CHMODE_MONITOR_RX;
516 			}
517 			if (cha->cha_monitor == CHANNEL_MONITOR_NO_COPY) {
518 				init.ci_ch_mode |= CHMODE_MONITOR_NO_COPY;
519 			}
520 		}
521 		if (cha->cha_filter != 0) {
522 			init.ci_ch_mode |= CHMODE_FILTER;
523 		}
524 		if (cha->cha_low_latency != 0) {
525 			init.ci_ch_mode |= CHMODE_LOW_LATENCY;
526 		}
527 		init.ci_key_len = cha->cha_key_len;
528 		init.ci_key = cha->cha_key;
529 		init.ci_tx_lowat = cha->cha_tx_lowat;
530 		init.ci_rx_lowat = cha->cha_rx_lowat;
531 	}
532 	init.ci_ch_ring_id = ring;
533 	init.ci_nx_port = port;
534 	bcopy(uuid, init.ci_nx_uuid, sizeof(uuid_t));
535 
536 	fd = __channel_open(&init, sizeof(init));
537 	if (fd == -1) {
538 		err = errno;
539 		goto done;
540 	}
541 
542 	err = __channel_get_info(fd, ci, CHD_INFO_SIZE);
543 	if (err != 0) {
544 		err = errno;
545 		goto done;
546 	}
547 
548 	ucs = _CHANNEL_SCHEMA(ci->cinfo_mem_base, ci->cinfo_schema_offset);
549 	num_tx_rings = _num_tx_rings(ci);       /* # of channel tx rings */
550 	num_rx_rings = _num_rx_rings(ci);       /* # of channel rx rings */
551 	num_allocator_rings = _num_allocator_rings(ucs);
552 	num_event_rings = ucs->csm_num_event_rings;
553 	num_large_buf_alloc_rings = ucs->csm_large_buf_alloc_rings;
554 
555 	/*
556 	 * if the user requested packet allocation mode for channel, then
557 	 * check that channel was opened in packet allocation mode and
558 	 * allocator rings were created.
559 	 */
560 	if ((init.ci_ch_mode & CHMODE_USER_PACKET_POOL) &&
561 	    ((num_allocator_rings < 2) ||
562 	    !(ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL))) {
563 		err = errno = ENXIO;
564 		goto done;
565 	}
566 
567 	if ((init.ci_ch_mode & CHMODE_EVENT_RING) && ((num_event_rings == 0) ||
568 	    !(ci->cinfo_ch_mode & CHMODE_EVENT_RING))) {
569 		err = errno = ENXIO;
570 		goto done;
571 	}
572 
573 	chd_sz = CHD_SIZE(num_tx_rings + num_rx_rings + num_allocator_rings +
574 	    num_event_rings + num_large_buf_alloc_rings);
575 	chd = malloc(chd_sz);
576 	if (chd == NULL) {
577 		err = errno = ENOMEM;
578 		goto done;
579 	}
580 
581 	bzero(chd, chd_sz);
582 	chd->chd_fd = fd;
583 	chd->chd_guard = init.ci_guard;
584 
585 	/* claim ch_info (will be freed along with the channel itself) */
586 	CHD_INFO(chd) = ci;
587 	ci = NULL;
588 
589 	/* const override */
590 	*(struct __user_channel_schema **)(uintptr_t)&chd->chd_schema = ucs;
591 
592 	/* make sure we're running on the right kernel */
593 	_SCHEMA_VER_VERIFY(chd);
594 
595 	*(nexus_meta_type_t *)&chd->chd_md_type = CHD_SCHEMA(chd)->csm_md_type;
596 	*(nexus_meta_subtype_t *)&chd->chd_md_subtype =
597 	    CHD_SCHEMA(chd)->csm_md_subtype;
598 
599 	if (CHD_SCHEMA(chd)->csm_stats_ofs != 0) {
600 		*(void **)(uintptr_t)&chd->chd_nx_stats =
601 		    _CHANNEL_OFFSET(void *, CHD_INFO(chd)->cinfo_mem_base,
602 		    CHD_SCHEMA(chd)->csm_stats_ofs);
603 	}
604 
605 	if (CHD_SCHEMA(chd)->csm_flowadv_ofs != 0) {
606 		*(struct __flowadv_entry **)(uintptr_t)&chd->chd_nx_flowadv =
607 		    _CHANNEL_OFFSET(struct __flowadv_entry *,
608 		    CHD_INFO(chd)->cinfo_mem_base,
609 		    CHD_SCHEMA(chd)->csm_flowadv_ofs);
610 	}
611 
612 	if (CHD_SCHEMA(chd)->csm_nexusadv_ofs != 0) {
613 		struct __kern_nexus_adv_metadata *adv_md;
614 
615 		*(struct __kern_nexus_adv_metadata **)
616 		(uintptr_t)&chd->chd_nx_adv =
617 		    _CHANNEL_OFFSET(struct __kern_nexus_adv_metadata *,
618 		    CHD_INFO(chd)->cinfo_mem_base,
619 		    CHD_SCHEMA(chd)->csm_nexusadv_ofs);
620 		adv_md = CHD_NX_ADV_MD(chd);
621 		if (adv_md->knam_version != NX_ADVISORY_MD_CURRENT_VERSION &&
622 		    !_CHANNEL_IS_DEFUNCT(chd)) {
623 			SK_ABORT_WITH_CAUSE("nexus advisory metadata version"
624 			    " mismatch", NX_ADVISORY_MD_CURRENT_VERSION);
625 			/* NOTREACHED */
626 			__builtin_unreachable();
627 		}
628 		if (chd->chd_nx_adv->knam_type == NEXUS_ADVISORY_TYPE_NETIF) {
629 			struct netif_nexus_advisory *netif_adv;
630 			netif_adv = CHD_NX_ADV_NETIF(adv_md);
631 			if (netif_adv->nna_version !=
632 			    NX_NETIF_ADVISORY_CURRENT_VERSION &&
633 			    !_CHANNEL_IS_DEFUNCT(chd)) {
634 				SK_ABORT_WITH_CAUSE("nexus advisory "
635 				    "version mismatch for netif",
636 				    NX_NETIF_ADVISORY_CURRENT_VERSION);
637 				/* NOTREACHED */
638 				__builtin_unreachable();
639 			}
640 		} else if (chd->chd_nx_adv->knam_type ==
641 		    NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
642 			struct sk_nexusadv *fsw_adv;
643 			fsw_adv = CHD_NX_ADV_FSW(adv_md);
644 			if (fsw_adv->nxadv_ver !=
645 			    NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION &&
646 			    !_CHANNEL_IS_DEFUNCT(chd)) {
647 				SK_ABORT_WITH_CAUSE("nexus advisory "
648 				    "version mismatch for flowswitch",
649 				    NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION);
650 				/* NOTREACHED */
651 				__builtin_unreachable();
652 			}
653 		} else if (!_CHANNEL_IS_DEFUNCT(chd)) {
654 			SK_ABORT_WITH_CAUSE("nexus advisory metadata type"
655 			    " unknown", NX_ADVISORY_MD_CURRENT_VERSION);
656 			/* NOTREACHED */
657 			__builtin_unreachable();
658 		}
659 	}
660 
661 	if (cha != NULL) {
662 		os_channel_info2attr(chd, cha);
663 	}
664 
665 	ring_offset = 0;
666 	for (i = 0; i < num_tx_rings; i++) {
667 		ring_index = ring_offset + i;
668 		os_channel_init_ring(&chd->chd_rings[ring_index], chd,
669 		    ring_index);
670 	}
671 
672 	ring_offset += num_tx_rings;
673 	for (i = 0; i < num_rx_rings; i++) {
674 		ring_index = ring_offset + i;
675 		os_channel_init_ring(&chd->chd_rings[ring_index], chd,
676 		    ring_index);
677 	}
678 
679 	ring_offset += num_rx_rings;
680 	for (i = 0; i < num_allocator_rings; i++) {
681 		ring_index = ring_offset + i;
682 		os_channel_init_ring(&chd->chd_rings[ring_index], chd,
683 		    ring_index);
684 	}
685 
686 	ring_offset += num_allocator_rings;
687 	for (i = 0; i < num_event_rings; i++) {
688 		ring_index = ring_offset + i;
689 		os_channel_init_ring(&chd->chd_rings[ring_index], chd,
690 		    ring_index);
691 	}
692 
693 	ring_offset += num_event_rings;
694 	for (i = 0; i < num_large_buf_alloc_rings; i++) {
695 		ring_index = ring_offset + i;
696 		os_channel_init_ring(&chd->chd_rings[ring_index], chd,
697 		    ring_index);
698 	}
699 
700 	if (init.ci_ch_mode & CHMODE_USER_PACKET_POOL) {
701 		chd->chd_sync_flags = CHANNEL_SYNCF_ALLOC | CHANNEL_SYNCF_FREE;
702 		*__DECONST(uint8_t *, &chd->chd_alloc_ring_idx) =
703 		    num_tx_rings + num_rx_rings;
704 		if (num_allocator_rings > 2) {
705 			chd->chd_sync_flags |= CHANNEL_SYNCF_ALLOC_BUF;
706 			*__DECONST(uint8_t *, &chd->chd_buf_alloc_ring_idx) =
707 			    chd->chd_alloc_ring_idx + 1;
708 			*__DECONST(uint8_t *, &chd->chd_free_ring_idx) =
709 			    chd->chd_buf_alloc_ring_idx + 1;
710 			*__DECONST(uint8_t *, &chd->chd_buf_free_ring_idx) =
711 			    chd->chd_free_ring_idx + 1;
712 		} else {
713 			*__DECONST(uint8_t *, &chd->chd_buf_alloc_ring_idx) =
714 			    CHD_RING_IDX_NONE;
715 			*__DECONST(uint8_t *, &chd->chd_buf_free_ring_idx) =
716 			    CHD_RING_IDX_NONE;
717 			*__DECONST(uint8_t *, &chd->chd_free_ring_idx) =
718 			    chd->chd_alloc_ring_idx + 1;
719 		}
720 		if (num_large_buf_alloc_rings > 0) {
721 			*__DECONST(uint8_t *, &chd->chd_large_buf_alloc_ring_idx) =
722 			    num_tx_rings + num_rx_rings + num_allocator_rings +
723 			    num_event_rings;
724 		} else {
725 			*__DECONST(uint8_t *, &chd->chd_large_buf_alloc_ring_idx) =
726 			    CHD_RING_IDX_NONE;
727 		}
728 	} else {
729 		*__DECONST(uint8_t *, &chd->chd_alloc_ring_idx) =
730 		    CHD_RING_IDX_NONE;
731 		*__DECONST(uint8_t *, &chd->chd_free_ring_idx) =
732 		    CHD_RING_IDX_NONE;
733 		*__DECONST(uint8_t *, &chd->chd_buf_alloc_ring_idx) =
734 		    CHD_RING_IDX_NONE;
735 		*__DECONST(uint8_t *, &chd->chd_buf_free_ring_idx) =
736 		    CHD_RING_IDX_NONE;
737 		*__DECONST(uint8_t *, &chd->chd_large_buf_alloc_ring_idx) =
738 		    CHD_RING_IDX_NONE;
739 	}
740 
741 	if (__os_ch_md_redzone_cookie == 0) {
742 		__os_ch_md_redzone_cookie =
743 		    CHD_SCHEMA(chd)->csm_md_redzone_cookie;
744 	}
745 
746 	/* ensure all stores are globally visible */
747 	os_atomic_thread_fence(seq_cst);
748 
749 done:
750 	if (err != 0) {
751 		if (fd != -1) {
752 			(void) guarded_close_np(fd, &init.ci_guard);
753 		}
754 		if (chd != NULL) {
755 			if (CHD_INFO(chd) != NULL) {
756 				free(CHD_INFO(chd));
757 				CHD_INFO(chd) = NULL;
758 			}
759 			free(chd);
760 			chd = NULL;
761 		}
762 		if (ci != NULL) {
763 			free(ci);
764 			ci = NULL;
765 		}
766 		errno = err;
767 	}
768 	return chd;
769 }
770 
771 channel_t
os_channel_create(const uuid_t uuid,const nexus_port_t port)772 os_channel_create(const uuid_t uuid, const nexus_port_t port)
773 {
774 	return os_channel_create_extended(uuid, port, CHANNEL_DIR_TX_RX,
775 	           CHANNEL_RING_ID_ANY, NULL);
776 }
777 
778 int
os_channel_get_fd(const channel_t chd)779 os_channel_get_fd(const channel_t chd)
780 {
781 	return chd->chd_fd;
782 }
783 
784 int
os_channel_read_attr(const channel_t chd,channel_attr_t cha)785 os_channel_read_attr(const channel_t chd, channel_attr_t cha)
786 {
787 	int err;
788 
789 	if ((err = __channel_get_info(chd->chd_fd, CHD_INFO(chd),
790 	    CHD_INFO_SIZE)) == 0) {
791 		os_channel_info2attr(chd, cha);
792 	}
793 
794 	return err;
795 }
796 
797 int
os_channel_write_attr(const channel_t chd,channel_attr_t cha)798 os_channel_write_attr(const channel_t chd, channel_attr_t cha)
799 {
800 	int err = 0;
801 
802 	if (CHD_INFO(chd)->cinfo_tx_lowat.cet_unit !=
803 	    cha->cha_tx_lowat.cet_unit ||
804 	    CHD_INFO(chd)->cinfo_tx_lowat.cet_value !=
805 	    cha->cha_tx_lowat.cet_value) {
806 		if ((err = __channel_set_opt(chd->chd_fd, CHOPT_TX_LOWAT_THRESH,
807 		    &cha->cha_tx_lowat, sizeof(cha->cha_tx_lowat))) != 0) {
808 			goto done;
809 		}
810 
811 		/* update local copy */
812 		CHD_INFO(chd)->cinfo_tx_lowat = cha->cha_tx_lowat;
813 	}
814 
815 	if (CHD_INFO(chd)->cinfo_rx_lowat.cet_unit !=
816 	    cha->cha_rx_lowat.cet_unit ||
817 	    CHD_INFO(chd)->cinfo_rx_lowat.cet_value !=
818 	    cha->cha_rx_lowat.cet_value) {
819 		if ((err = __channel_set_opt(chd->chd_fd, CHOPT_RX_LOWAT_THRESH,
820 		    &cha->cha_rx_lowat, sizeof(cha->cha_rx_lowat))) != 0) {
821 			goto done;
822 		}
823 
824 		/* update local copy */
825 		CHD_INFO(chd)->cinfo_rx_lowat = cha->cha_rx_lowat;
826 	}
827 done:
828 	return err;
829 }
830 
831 int
os_channel_read_nexus_extension_info(const channel_t chd,nexus_type_t * nt,uint64_t * ext)832 os_channel_read_nexus_extension_info(const channel_t chd, nexus_type_t *nt,
833     uint64_t *ext)
834 {
835 	struct nxprov_params *nxp;
836 
837 	nxp = &CHD_INFO(chd)->cinfo_nxprov_params;
838 	if (nt != NULL) {
839 		*nt = nxp->nxp_type;
840 	}
841 	if (ext != NULL) {
842 		*ext = (uint64_t)nxp->nxp_extensions;
843 	}
844 
845 	return 0;
846 }
847 
848 int
os_channel_sync(const channel_t chd,const sync_mode_t mode)849 os_channel_sync(const channel_t chd, const sync_mode_t mode)
850 {
851 	if (__improbable(mode != CHANNEL_SYNC_TX && mode != CHANNEL_SYNC_RX)) {
852 		return EINVAL;
853 	}
854 
855 	return __channel_sync(chd->chd_fd, mode,
856 	           (mode == CHANNEL_SYNC_TX) ? chd->chd_sync_flags :
857 	           (chd->chd_sync_flags &
858 	           ~(CHANNEL_SYNCF_ALLOC | CHANNEL_SYNCF_ALLOC_BUF)));
859 }
860 
861 void
os_channel_destroy(channel_t chd)862 os_channel_destroy(channel_t chd)
863 {
864 	if (chd->chd_fd != -1) {
865 		(void) guarded_close_np(chd->chd_fd, &chd->chd_guard);
866 	}
867 
868 	if (CHD_INFO(chd) != NULL) {
869 		free(CHD_INFO(chd));
870 		CHD_INFO(chd) = NULL;
871 	}
872 
873 	free(chd);
874 }
875 
876 int
os_channel_is_defunct(channel_t chd)877 os_channel_is_defunct(channel_t chd)
878 {
879 	return _CHANNEL_IS_DEFUNCT(chd);
880 }
881 
882 __attribute__((always_inline, visibility("hidden")))
883 static inline ring_id_t
_ring_id(struct ch_info * cinfo,const ring_id_type_t type)884 _ring_id(struct ch_info *cinfo, const ring_id_type_t type)
885 {
886 	ring_id_t rid = CHANNEL_RING_ID_ANY;    /* make it crash */
887 
888 	switch (type) {
889 	case CHANNEL_FIRST_TX_RING:
890 		rid = cinfo->cinfo_first_tx_ring;
891 		break;
892 
893 	case CHANNEL_LAST_TX_RING:
894 		rid = cinfo->cinfo_last_tx_ring;
895 		break;
896 
897 	case CHANNEL_FIRST_RX_RING:
898 		rid = cinfo->cinfo_first_rx_ring;
899 		break;
900 
901 	case CHANNEL_LAST_RX_RING:
902 		rid = cinfo->cinfo_last_rx_ring;
903 		break;
904 	}
905 
906 	return rid;
907 }
908 
909 ring_id_t
os_channel_ring_id(const channel_t chd,const ring_id_type_t type)910 os_channel_ring_id(const channel_t chd, const ring_id_type_t type)
911 {
912 	return _ring_id(CHD_INFO(chd), type);
913 }
914 
915 channel_ring_t
os_channel_tx_ring(const channel_t chd,const ring_id_t rid)916 os_channel_tx_ring(const channel_t chd, const ring_id_t rid)
917 {
918 	struct ch_info *ci = CHD_INFO(chd);
919 
920 	if (__improbable((ci->cinfo_ch_ring_id != CHANNEL_RING_ID_ANY &&
921 	    ci->cinfo_ch_ring_id != rid) ||
922 	    rid < _ring_id(ci, CHANNEL_FIRST_TX_RING) ||
923 	    rid > _ring_id(ci, CHANNEL_LAST_TX_RING))) {
924 		return NULL;
925 	}
926 
927 	return &chd->chd_rings[rid - _ring_id(ci, CHANNEL_FIRST_TX_RING)];
928 }
929 
930 channel_ring_t
os_channel_rx_ring(const channel_t chd,const ring_id_t rid)931 os_channel_rx_ring(const channel_t chd, const ring_id_t rid)
932 {
933 	struct ch_info *ci = CHD_INFO(chd);
934 
935 	if (__improbable((ci->cinfo_ch_ring_id != CHANNEL_RING_ID_ANY &&
936 	    ci->cinfo_ch_ring_id != rid) ||
937 	    rid < _ring_id(ci, CHANNEL_FIRST_RX_RING) ||
938 	    rid > _ring_id(ci, CHANNEL_LAST_RX_RING))) {
939 		return NULL;
940 	}
941 
942 	return &chd->chd_rings[_num_tx_rings(ci) +      /* add tx rings */
943 	       (rid - _ring_id(ci, CHANNEL_FIRST_RX_RING))];
944 }
945 
946 /*
947  * Return 1 if we have pending transmissions in the tx ring. When everything
948  * is complete ring->ring_head == ring->ring_khead.
949  */
950 int
os_channel_pending(const channel_ring_t chrd)951 os_channel_pending(const channel_ring_t chrd)
952 {
953 	struct __user_channel_ring *ring =
954 	    __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
955 	return ring->ring_head != ring->ring_khead;
956 }
957 
958 uint64_t
os_channel_ring_sync_time(const channel_ring_t chrd)959 os_channel_ring_sync_time(const channel_ring_t chrd)
960 {
961 	return chrd->chrd_ring->ring_sync_time;
962 }
963 
964 uint64_t
os_channel_ring_notify_time(const channel_ring_t chrd)965 os_channel_ring_notify_time(const channel_ring_t chrd)
966 {
967 	return chrd->chrd_ring->ring_notify_time;
968 }
969 
970 uint32_t
os_channel_available_slot_count(const channel_ring_t chrd)971 os_channel_available_slot_count(const channel_ring_t chrd)
972 {
973 	const struct __user_channel_ring *ring = chrd->chrd_ring;
974 	uint32_t count;
975 	int n;
976 
977 	if (ring->ring_kind == CR_KIND_TX) {
978 		n = ring->ring_head - ring->ring_khead;
979 		if (n < 0) {
980 			n += ring->ring_num_slots;
981 		}
982 		count = (ring->ring_num_slots - n - 1);
983 	} else {
984 		n = ring->ring_tail - ring->ring_head;
985 		if (n < 0) {
986 			n += ring->ring_num_slots;
987 		}
988 		count = n;
989 	}
990 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? 0 : count;
991 }
992 
993 int
os_channel_advance_slot(channel_ring_t chrd,const channel_slot_t slot)994 os_channel_advance_slot(channel_ring_t chrd, const channel_slot_t slot)
995 {
996 	struct __user_channel_ring *ring =
997 	    __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
998 	slot_idx_t idx;
999 	int err;
1000 
1001 	idx = _SLOT_INDEX(chrd, slot);
1002 	if (__probable(_slot_index_is_valid(ring, idx))) {
1003 		ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
1004 		err = 0;
1005 	} else {
1006 		err = (_CHANNEL_RING_IS_DEFUNCT(chrd) ? ENXIO : EINVAL);
1007 	}
1008 	return err;
1009 }
1010 
1011 channel_slot_t
os_channel_get_next_slot(const channel_ring_t chrd,const channel_slot_t slot0,slot_prop_t * prop)1012 os_channel_get_next_slot(const channel_ring_t chrd, const channel_slot_t slot0,
1013     slot_prop_t *prop)
1014 {
1015 	const struct __user_channel_ring *ring = chrd->chrd_ring;
1016 	const struct __slot_desc *slot;
1017 	slot_idx_t idx;
1018 
1019 	if (__probable(slot0 != NULL)) {
1020 		idx = _SLOT_INDEX(chrd, slot0);
1021 		if (__probable(_slot_index_is_valid(ring, idx))) {
1022 			idx = _CHANNEL_RING_NEXT(ring, idx);
1023 		} else if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1024 			/* slot is out of bounds */
1025 			SK_ABORT_WITH_CAUSE("Index out of bounds in gns", idx);
1026 			/* NOTREACHED */
1027 			__builtin_unreachable();
1028 		} else {
1029 			/*
1030 			 * In case of a defunct, pretend as if we've
1031 			 * advanced to the last slot; this will result
1032 			 * in a NULL slot below.
1033 			 */
1034 			idx = ring->ring_tail;
1035 		}
1036 	} else {
1037 		idx = ring->ring_head;
1038 	}
1039 
1040 	if (__probable(idx != ring->ring_tail)) {
1041 		slot = &chrd->chrd_slot_desc[idx];
1042 	} else {
1043 		/* we just advanced to the last slot */
1044 		slot = NULL;
1045 	}
1046 
1047 	if (__probable(slot != NULL)) {
1048 		uint16_t ring_kind = ring->ring_kind;
1049 		struct __user_quantum *q;
1050 		mach_vm_address_t baddr;
1051 		uint16_t bdoff;
1052 
1053 		if (__improbable((ring_kind == CR_KIND_TX) &&
1054 		    (CHD_INFO(chrd->chrd_channel)->cinfo_ch_mode &
1055 		    CHMODE_USER_PACKET_POOL))) {
1056 			if (SD_VALID_METADATA(SLOT_DESC_USD(slot))) {
1057 				SK_ABORT_WITH_CAUSE("Tx slot has attached "
1058 				    "metadata", idx);
1059 				/* NOTREACHED */
1060 				__builtin_unreachable();
1061 			}
1062 			if (prop != NULL) {
1063 				prop->sp_len = 0;
1064 				prop->sp_flags = 0;
1065 				prop->sp_buf_ptr = 0;
1066 				prop->sp_mdata_ptr = 0;
1067 			}
1068 			return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ?
1069 			       NULL : (channel_slot_t)slot;
1070 		}
1071 
1072 		_SLOT_DESC_VERIFY(chrd, SLOT_DESC_USD(slot));
1073 		q = _SLOT_METADATA(chrd, ring, idx);
1074 		_METADATA_VERIFY(chrd, q);
1075 
1076 		baddr = _initialize_metadata_address(chrd, q, &bdoff);
1077 		if (__improbable(baddr == 0)) {
1078 			return NULL;
1079 		}
1080 		/* No multi-buflet support for slot based interface */
1081 		if (__probable(prop != NULL)) {
1082 			/* immutable: slot index */
1083 			prop->sp_idx = idx;
1084 			prop->sp_flags = 0;
1085 			prop->sp_buf_ptr = baddr + bdoff;
1086 			prop->sp_mdata_ptr = q;
1087 			/* reset slot length if this is to be used for tx */
1088 			prop->sp_len = (ring_kind == CR_KIND_TX) ?
1089 			    ring->ring_def_buf_size : q->qum_len;
1090 		}
1091 	}
1092 
1093 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ?
1094 	       NULL : (channel_slot_t)slot;
1095 }
1096 
1097 void
os_channel_set_slot_properties(const channel_ring_t chrd,const channel_slot_t slot,const slot_prop_t * prop)1098 os_channel_set_slot_properties(const channel_ring_t chrd,
1099     const channel_slot_t slot, const slot_prop_t *prop)
1100 {
1101 	const struct __user_channel_ring *ring = chrd->chrd_ring;
1102 	slot_idx_t idx = _SLOT_INDEX(chrd, slot);
1103 
1104 	if (__probable(_slot_index_is_valid(ring, idx))) {
1105 		struct __user_quantum *q;
1106 
1107 		_METADATA_VERIFY(chrd, prop->sp_mdata_ptr);
1108 		_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
1109 
1110 		/*
1111 		 * In the event of a defunct, we'd be accessing zero-filled
1112 		 * memory; this is fine we ignore all changes made to the
1113 		 * region at that time.
1114 		 */
1115 		q = _SLOT_METADATA(chrd, ring, idx);
1116 		q->qum_len = prop->sp_len;
1117 		switch (chrd->chrd_md_type) {
1118 		case NEXUS_META_TYPE_PACKET: {
1119 			struct __user_packet *p = (struct __user_packet *)q;
1120 			/* No multi-buflet support for slot based interface */
1121 			p->pkt_qum_buf.buf_dlen = prop->sp_len;
1122 			p->pkt_qum_buf.buf_doff = 0;
1123 			break;
1124 		}
1125 		default:
1126 			q->qum_buf[0].buf_dlen = prop->sp_len;
1127 			q->qum_buf[0].buf_doff = 0;
1128 			break;
1129 		}
1130 	} else if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1131 		/* slot is out of bounds */
1132 		SK_ABORT_WITH_CAUSE("Index out of bounds in ssp", idx);
1133 		/* NOTREACHED */
1134 		__builtin_unreachable();
1135 	}
1136 }
1137 
1138 packet_t
os_channel_slot_get_packet(const channel_ring_t chrd,const channel_slot_t slot)1139 os_channel_slot_get_packet(const channel_ring_t chrd, const channel_slot_t slot)
1140 {
1141 	const struct __user_channel_ring *ring = chrd->chrd_ring;
1142 	struct __user_quantum *q = NULL;
1143 
1144 	if (__probable(slot != NULL)) {
1145 		slot_idx_t idx = _SLOT_INDEX(chrd, slot);
1146 		if (__improbable(!_slot_index_is_valid(ring, idx)) &&
1147 		    !_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1148 			/* slot is out of bounds */
1149 			SK_ABORT_WITH_CAUSE("Index out of bounds in sgp", idx);
1150 			/* NOTREACHED */
1151 			__builtin_unreachable();
1152 		}
1153 
1154 		if (__probable(SD_VALID_METADATA(_SLOT_DESC(chrd, idx)))) {
1155 			obj_idx_t midx;
1156 			q = _SLOT_METADATA(chrd, ring, idx);
1157 			_METADATA_VERIFY(chrd, q);
1158 			/*
1159 			 * In the event of a defunct, we'd be accessing
1160 			 * zero-filed memory and end up with 0 for midx;
1161 			 * this is fine since we ignore all changes made
1162 			 * to the region at that time.
1163 			 */
1164 			midx = METADATA_IDX(q);
1165 			_SLOT_METADATA_IDX_VERIFY(chrd, q, midx);
1166 		}
1167 	}
1168 
1169 	return (q == NULL) ? 0 :
1170 	       SK_PTR_ENCODE(q, chrd->chrd_md_type, chrd->chrd_md_subtype);
1171 }
1172 
1173 void *
os_channel_get_stats_region(const channel_t chd,const channel_stats_id_t id)1174 os_channel_get_stats_region(const channel_t chd, const channel_stats_id_t id)
1175 {
1176 	void *sp = CHD_NX_STATS(chd);
1177 	struct __nx_stats_fsw *nxs_fsw;
1178 	void *ptr = NULL;
1179 
1180 	/* we currently deal only with flowswitch */
1181 	if (sp == NULL ||
1182 	    CHD_SCHEMA(chd)->csm_stats_type != NEXUS_STATS_TYPE_FSW) {
1183 		return NULL;
1184 	}
1185 
1186 	nxs_fsw = sp;
1187 
1188 	switch (id) {
1189 	case CHANNEL_STATS_ID_IP:
1190 		ptr = &nxs_fsw->nxs_ipstat;
1191 		break;
1192 
1193 	case CHANNEL_STATS_ID_IP6:
1194 		ptr = &nxs_fsw->nxs_ip6stat;
1195 		break;
1196 
1197 	case CHANNEL_STATS_ID_TCP:
1198 		ptr = &nxs_fsw->nxs_tcpstat;
1199 		break;
1200 
1201 	case CHANNEL_STATS_ID_UDP:
1202 		ptr = &nxs_fsw->nxs_udpstat;
1203 		break;
1204 
1205 	case CHANNEL_STATS_ID_QUIC:
1206 		ptr = &nxs_fsw->nxs_quicstat;
1207 		break;
1208 
1209 	default:
1210 		ptr = NULL;
1211 		break;
1212 	}
1213 
1214 	return ptr;
1215 }
1216 
1217 void *
os_channel_get_advisory_region(const channel_t chd)1218 os_channel_get_advisory_region(const channel_t chd)
1219 {
1220 	struct __kern_nexus_adv_metadata *adv_md;
1221 	/*
1222 	 * To be backward compatible this API will only return
1223 	 * the advisory region for flowswitch.
1224 	 */
1225 	adv_md = CHD_NX_ADV_MD(chd);
1226 	if (adv_md == NULL ||
1227 	    adv_md->knam_type != NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
1228 		return NULL;
1229 	}
1230 	return CHD_NX_ADV_FSW(adv_md);
1231 }
1232 
1233 __attribute__((always_inline, visibility("hidden")))
1234 static inline int
_flowadv_id_equal(struct __flowadv_entry * fe,uuid_t id)1235 _flowadv_id_equal(struct __flowadv_entry *fe, uuid_t id)
1236 {
1237 	/*
1238 	 * Anticipate a nicely (8-bytes) aligned UUID from
1239 	 * caller; the one in fae_id is always 8-byte aligned.
1240 	 */
1241 	if (__probable(IS_P2ALIGNED(id, sizeof(uint64_t)))) {
1242 		uint64_t *id_64 = (uint64_t *)(uintptr_t)id;
1243 		return fe->fae_id_64[0] == id_64[0] &&
1244 		       fe->fae_id_64[1] == id_64[1];
1245 	} else if (__probable(IS_P2ALIGNED(id, sizeof(uint32_t)))) {
1246 		uint32_t *id_32 = (uint32_t *)(uintptr_t)id;
1247 		return fe->fae_id_32[0] == id_32[0] &&
1248 		       fe->fae_id_32[1] == id_32[1] &&
1249 		       fe->fae_id_32[2] == id_32[2] &&
1250 		       fe->fae_id_32[3] == id_32[3];
1251 	}
1252 
1253 	return UUID_COMPARE(fe->fae_id, id);
1254 }
1255 
1256 int
os_channel_flow_admissible(const channel_ring_t chrd,uuid_t flow_id,const flowadv_idx_t flow_index)1257 os_channel_flow_admissible(const channel_ring_t chrd, uuid_t flow_id,
1258     const flowadv_idx_t flow_index)
1259 {
1260 	const struct __user_channel_ring *ring = chrd->chrd_ring;
1261 	const struct channel *chd = chrd->chrd_channel;
1262 	struct __flowadv_entry *fe = CHD_NX_FLOWADV(chd);
1263 
1264 	/*
1265 	 * Currently, flow advisory is on a per-nexus port basis.
1266 	 * To anticipate for future requirements, we use the ring
1267 	 * as parameter instead, even though we use it only to
1268 	 * check if this is a TX ring for now.
1269 	 */
1270 	if (__improbable(CHD_NX_FLOWADV(chd) == NULL)) {
1271 		return ENXIO;
1272 	} else if (__improbable(ring->ring_kind != CR_KIND_TX ||
1273 	    flow_index >= CHD_PARAMS(chd)->nxp_flowadv_max)) {
1274 		return EINVAL;
1275 	}
1276 
1277 	/*
1278 	 * Rather than checking if the UUID is all zeroes, check
1279 	 * against fae_flags since the presence of FLOWADV_VALID
1280 	 * means fae_id is non-zero.  This avoids another round of
1281 	 * comparison against zeroes.
1282 	 */
1283 	fe = &CHD_NX_FLOWADV(chd)[flow_index];
1284 	if (__improbable(fe->fae_flags == 0 || !_flowadv_id_equal(fe, flow_id))) {
1285 		return ENOENT;
1286 	}
1287 
1288 	return __improbable((fe->fae_flags & FLOWADVF_SUSPENDED) != 0) ?
1289 	       ENOBUFS: 0;
1290 }
1291 
1292 int
os_channel_flow_adv_get_ce_count(const channel_ring_t chrd,uuid_t flow_id,const flowadv_idx_t flow_index,uint32_t * ce_cnt,uint32_t * pkt_cnt)1293 os_channel_flow_adv_get_ce_count(const channel_ring_t chrd, uuid_t flow_id,
1294     const flowadv_idx_t flow_index, uint32_t *ce_cnt, uint32_t *pkt_cnt)
1295 {
1296 	const struct __user_channel_ring *ring = chrd->chrd_ring;
1297 	const struct channel *chd = chrd->chrd_channel;
1298 	struct __flowadv_entry *fe = CHD_NX_FLOWADV(chd);
1299 
1300 	/*
1301 	 * Currently, flow advisory is on a per-nexus port basis.
1302 	 * To anticipate for future requirements, we use the ring
1303 	 * as parameter instead, even though we use it only to
1304 	 * check if this is a TX ring for now.
1305 	 */
1306 	if (__improbable(CHD_NX_FLOWADV(chd) == NULL)) {
1307 		return ENXIO;
1308 	} else if (__improbable(ring->ring_kind != CR_KIND_TX ||
1309 	    flow_index >= CHD_PARAMS(chd)->nxp_flowadv_max)) {
1310 		return EINVAL;
1311 	}
1312 
1313 	/*
1314 	 * Rather than checking if the UUID is all zeroes, check
1315 	 * against fae_flags since the presence of FLOWADV_VALID
1316 	 * means fae_id is non-zero.  This avoids another round of
1317 	 * comparison against zeroes.
1318 	 */
1319 	fe = &CHD_NX_FLOWADV(chd)[flow_index];
1320 	if (__improbable(fe->fae_flags == 0 || !_flowadv_id_equal(fe, flow_id))) {
1321 		return ENOENT;
1322 	}
1323 
1324 	*ce_cnt = fe->fae_ce_cnt;
1325 	*pkt_cnt = fe->fae_pkt_cnt;
1326 	return 0;
1327 }
1328 
1329 channel_attr_t
os_channel_attr_create(void)1330 os_channel_attr_create(void)
1331 {
1332 	struct channel_attr *cha;
1333 
1334 	cha = malloc(sizeof(*cha));
1335 	if (cha != NULL) {
1336 		bzero(cha, sizeof(*cha));
1337 	}
1338 	return cha;
1339 }
1340 
1341 channel_attr_t
os_channel_attr_clone(const channel_attr_t cha)1342 os_channel_attr_clone(const channel_attr_t cha)
1343 {
1344 	struct channel_attr *ncha;
1345 
1346 	ncha = os_channel_attr_create();
1347 	if (ncha != NULL && cha != NULL) {
1348 		bcopy(cha, ncha, sizeof(*ncha));
1349 		ncha->cha_key = NULL;
1350 		ncha->cha_key_len = 0;
1351 		if (cha->cha_key != NULL && cha->cha_key_len != 0 &&
1352 		    os_channel_attr_set_key(ncha, cha->cha_key,
1353 		    cha->cha_key_len) != 0) {
1354 			os_channel_attr_destroy(ncha);
1355 			ncha = NULL;
1356 		}
1357 	}
1358 
1359 	return ncha;
1360 }
1361 
1362 int
os_channel_attr_set(const channel_attr_t cha,const channel_attr_type_t type,const uint64_t value)1363 os_channel_attr_set(const channel_attr_t cha, const channel_attr_type_t type,
1364     const uint64_t value)
1365 {
1366 	int err = 0;
1367 
1368 	switch (type) {
1369 	case CHANNEL_ATTR_TX_RINGS:
1370 	case CHANNEL_ATTR_RX_RINGS:
1371 	case CHANNEL_ATTR_TX_SLOTS:
1372 	case CHANNEL_ATTR_RX_SLOTS:
1373 	case CHANNEL_ATTR_SLOT_BUF_SIZE:
1374 	case CHANNEL_ATTR_SLOT_META_SIZE:
1375 	case CHANNEL_ATTR_NEXUS_EXTENSIONS:
1376 	case CHANNEL_ATTR_NEXUS_MHINTS:
1377 	case CHANNEL_ATTR_NEXUS_IFINDEX:
1378 	case CHANNEL_ATTR_NEXUS_STATS_SIZE:
1379 	case CHANNEL_ATTR_NEXUS_FLOWADV_MAX:
1380 	case CHANNEL_ATTR_NEXUS_META_TYPE:
1381 	case CHANNEL_ATTR_NEXUS_META_SUBTYPE:
1382 	case CHANNEL_ATTR_NEXUS_CHECKSUM_OFFLOAD:
1383 	case CHANNEL_ATTR_NEXUS_ADV_SIZE:
1384 	case CHANNEL_ATTR_MAX_FRAGS:
1385 	case CHANNEL_ATTR_NUM_BUFFERS:
1386 	case CHANNEL_ATTR_LARGE_BUF_SIZE:
1387 		err = ENOTSUP;
1388 		break;
1389 
1390 	case CHANNEL_ATTR_EXCLUSIVE:
1391 		cha->cha_exclusive = (uint32_t)value;
1392 		break;
1393 
1394 	case CHANNEL_ATTR_NO_AUTO_SYNC:
1395 		if (value == 0) {
1396 			err = ENOTSUP;
1397 		}
1398 		break;
1399 
1400 	case CHANNEL_ATTR_MONITOR:
1401 		switch (value) {
1402 		case CHANNEL_MONITOR_OFF:
1403 		case CHANNEL_MONITOR_NO_COPY:
1404 		case CHANNEL_MONITOR_COPY:
1405 			cha->cha_monitor = (uint32_t)value;
1406 			goto done;
1407 		}
1408 		err = EINVAL;
1409 		break;
1410 
1411 	case CHANNEL_ATTR_TX_LOWAT_UNIT:
1412 	case CHANNEL_ATTR_RX_LOWAT_UNIT:
1413 		switch (value) {
1414 		case CHANNEL_THRESHOLD_UNIT_BYTES:
1415 		case CHANNEL_THRESHOLD_UNIT_SLOTS:
1416 			if (type == CHANNEL_ATTR_TX_LOWAT_UNIT) {
1417 				cha->cha_tx_lowat.cet_unit =
1418 				    (channel_threshold_unit_t)value;
1419 			} else {
1420 				cha->cha_rx_lowat.cet_unit =
1421 				    (channel_threshold_unit_t)value;
1422 			}
1423 			goto done;
1424 		}
1425 		err = EINVAL;
1426 		break;
1427 
1428 	case CHANNEL_ATTR_TX_LOWAT_VALUE:
1429 		cha->cha_tx_lowat.cet_value = (uint32_t)value;
1430 		break;
1431 
1432 	case CHANNEL_ATTR_RX_LOWAT_VALUE:
1433 		cha->cha_rx_lowat.cet_value = (uint32_t)value;
1434 		break;
1435 
1436 	case CHANNEL_ATTR_USER_PACKET_POOL:
1437 		cha->cha_user_packet_pool = (value != 0);
1438 		break;
1439 
1440 	case CHANNEL_ATTR_NEXUS_DEFUNCT_OK:
1441 		cha->cha_nexus_defunct_ok = (value != 0);
1442 		break;
1443 
1444 	case CHANNEL_ATTR_FILTER:
1445 		cha->cha_filter = (uint32_t)value;
1446 		break;
1447 
1448 	case CHANNEL_ATTR_EVENT_RING:
1449 		cha->cha_enable_event_ring = (value != 0);
1450 		break;
1451 
1452 	case CHANNEL_ATTR_LOW_LATENCY:
1453 		cha->cha_low_latency = (value != 0);
1454 		break;
1455 
1456 	default:
1457 		err = EINVAL;
1458 		break;
1459 	}
1460 done:
1461 	return err;
1462 }
1463 
1464 int
os_channel_attr_set_key(const channel_attr_t cha,const void * key,const uint32_t key_len)1465 os_channel_attr_set_key(const channel_attr_t cha, const void *key,
1466     const uint32_t key_len)
1467 {
1468 	int err = 0;
1469 
1470 	if ((key == NULL && key_len != 0) || (key != NULL && key_len == 0) ||
1471 	    (key_len != 0 && key_len > NEXUS_MAX_KEY_LEN)) {
1472 		err = EINVAL;
1473 		goto done;
1474 	}
1475 	cha->cha_key_len = 0;
1476 	if (key_len == 0 && cha->cha_key != NULL) {
1477 		free(cha->cha_key);
1478 		cha->cha_key = NULL;
1479 	} else if (key != NULL && key_len != 0) {
1480 		if (cha->cha_key != NULL) {
1481 			free(cha->cha_key);
1482 		}
1483 		if ((cha->cha_key = malloc(key_len)) == NULL) {
1484 			err = ENOMEM;
1485 			goto done;
1486 		}
1487 		cha->cha_key_len = key_len;
1488 		bcopy(key, cha->cha_key, key_len);
1489 	}
1490 done:
1491 	return err;
1492 }
1493 
1494 int
os_channel_attr_get(const channel_attr_t cha,const channel_attr_type_t type,uint64_t * value)1495 os_channel_attr_get(const channel_attr_t cha, const channel_attr_type_t type,
1496     uint64_t *value)
1497 {
1498 	int err = 0;
1499 
1500 	switch (type) {
1501 	case CHANNEL_ATTR_TX_RINGS:
1502 		*value = cha->cha_tx_rings;
1503 		break;
1504 
1505 	case CHANNEL_ATTR_RX_RINGS:
1506 		*value = cha->cha_rx_rings;
1507 		break;
1508 
1509 	case CHANNEL_ATTR_TX_SLOTS:
1510 		*value = cha->cha_tx_slots;
1511 		break;
1512 
1513 	case CHANNEL_ATTR_RX_SLOTS:
1514 		*value = cha->cha_rx_slots;
1515 		break;
1516 
1517 	case CHANNEL_ATTR_SLOT_BUF_SIZE:
1518 		*value = cha->cha_buf_size;
1519 		break;
1520 
1521 	case CHANNEL_ATTR_SLOT_META_SIZE:
1522 		*value = cha->cha_meta_size;
1523 		break;
1524 
1525 	case CHANNEL_ATTR_NEXUS_STATS_SIZE:
1526 		*value = cha->cha_stats_size;
1527 		break;
1528 
1529 	case CHANNEL_ATTR_NEXUS_FLOWADV_MAX:
1530 		*value = cha->cha_flowadv_max;
1531 		break;
1532 
1533 	case CHANNEL_ATTR_EXCLUSIVE:
1534 		*value = cha->cha_exclusive;
1535 		break;
1536 
1537 	case CHANNEL_ATTR_NO_AUTO_SYNC:
1538 		*value = 1;
1539 		break;
1540 
1541 	case CHANNEL_ATTR_MONITOR:
1542 		*value = cha->cha_monitor;
1543 		break;
1544 
1545 	case CHANNEL_ATTR_TX_LOWAT_UNIT:
1546 		*value = cha->cha_tx_lowat.cet_unit;
1547 		break;
1548 
1549 	case CHANNEL_ATTR_TX_LOWAT_VALUE:
1550 		*value = cha->cha_tx_lowat.cet_value;
1551 		break;
1552 
1553 	case CHANNEL_ATTR_RX_LOWAT_UNIT:
1554 		*value = cha->cha_rx_lowat.cet_unit;
1555 		break;
1556 
1557 	case CHANNEL_ATTR_RX_LOWAT_VALUE:
1558 		*value = cha->cha_rx_lowat.cet_value;
1559 		break;
1560 
1561 	case CHANNEL_ATTR_NEXUS_TYPE:
1562 		*value = cha->cha_nexus_type;
1563 		break;
1564 
1565 	case CHANNEL_ATTR_NEXUS_EXTENSIONS:
1566 		*value = cha->cha_nexus_extensions;
1567 		break;
1568 
1569 	case CHANNEL_ATTR_NEXUS_MHINTS:
1570 		*value = cha->cha_nexus_mhints;
1571 		break;
1572 
1573 	case CHANNEL_ATTR_NEXUS_IFINDEX:
1574 		*value = cha->cha_nexus_ifindex;
1575 		break;
1576 
1577 	case CHANNEL_ATTR_NEXUS_META_TYPE:
1578 		*value = cha->cha_nexus_meta_type;
1579 		break;
1580 
1581 	case CHANNEL_ATTR_NEXUS_META_SUBTYPE:
1582 		*value = cha->cha_nexus_meta_subtype;
1583 		break;
1584 
1585 	case CHANNEL_ATTR_NEXUS_CHECKSUM_OFFLOAD:
1586 		*value = cha->cha_nexus_checksum_offload;
1587 		break;
1588 
1589 	case CHANNEL_ATTR_USER_PACKET_POOL:
1590 		*value = (cha->cha_user_packet_pool != 0);
1591 		break;
1592 
1593 	case CHANNEL_ATTR_NEXUS_ADV_SIZE:
1594 		*value = cha->cha_nexusadv_size;
1595 		break;
1596 
1597 	case CHANNEL_ATTR_NEXUS_DEFUNCT_OK:
1598 		*value = cha->cha_nexus_defunct_ok;
1599 		break;
1600 
1601 	case CHANNEL_ATTR_EVENT_RING:
1602 		*value = (cha->cha_enable_event_ring != 0);
1603 		break;
1604 
1605 	case CHANNEL_ATTR_MAX_FRAGS:
1606 		*value = cha->cha_max_frags;
1607 		break;
1608 
1609 	case CHANNEL_ATTR_NUM_BUFFERS:
1610 		*value = cha->cha_num_buffers;
1611 		break;
1612 
1613 	case CHANNEL_ATTR_LOW_LATENCY:
1614 		*value = (cha->cha_low_latency != 0);
1615 		break;
1616 
1617 	case CHANNEL_ATTR_LARGE_BUF_SIZE:
1618 		*value = cha->cha_large_buf_size;
1619 		break;
1620 
1621 	default:
1622 		err = EINVAL;
1623 		break;
1624 	}
1625 
1626 	return err;
1627 }
1628 
1629 int
os_channel_attr_get_key(const channel_attr_t cha,void * key,uint32_t * key_len)1630 os_channel_attr_get_key(const channel_attr_t cha, void *key,
1631     uint32_t *key_len)
1632 {
1633 	int err = 0;
1634 
1635 	if (key_len == NULL) {
1636 		err = EINVAL;
1637 		goto done;
1638 	} else if (key == NULL || cha->cha_key == NULL) {
1639 		*key_len = (cha->cha_key != NULL) ? cha->cha_key_len : 0;
1640 		goto done;
1641 	}
1642 
1643 	if (*key_len >= cha->cha_key_len) {
1644 		bcopy(cha->cha_key, key, cha->cha_key_len);
1645 		*key_len = cha->cha_key_len;
1646 	} else {
1647 		err = ENOMEM;
1648 	}
1649 done:
1650 	return err;
1651 }
1652 
1653 __attribute__((visibility("hidden")))
1654 static void
os_channel_info2attr(struct channel * chd,channel_attr_t cha)1655 os_channel_info2attr(struct channel *chd, channel_attr_t cha)
1656 {
1657 	struct ch_info *cinfo = CHD_INFO(chd);
1658 	/* Save these first before we wipe out the attribute */
1659 	uint32_t cha_key_len = cha->cha_key_len;
1660 	void *cha_key = cha->cha_key;
1661 	uint32_t caps;
1662 
1663 	_CASSERT((uint32_t)NEXUS_META_TYPE_INVALID == (uint32_t)CHANNEL_NEXUS_META_TYPE_INVALID);
1664 	_CASSERT((uint32_t)NEXUS_META_TYPE_QUANTUM == (uint32_t)CHANNEL_NEXUS_META_TYPE_QUANTUM);
1665 	_CASSERT((uint32_t)NEXUS_META_TYPE_PACKET == (uint32_t)CHANNEL_NEXUS_META_TYPE_PACKET);
1666 	_CASSERT((uint32_t)NEXUS_META_SUBTYPE_INVALID ==
1667 	    (uint32_t)CHANNEL_NEXUS_META_SUBTYPE_INVALID);
1668 	_CASSERT((uint32_t)NEXUS_META_SUBTYPE_PAYLOAD ==
1669 	    (uint32_t)CHANNEL_NEXUS_META_SUBTYPE_PAYLOAD);
1670 	_CASSERT((uint32_t)NEXUS_META_SUBTYPE_RAW == (uint32_t)CHANNEL_NEXUS_META_SUBTYPE_RAW);
1671 
1672 	bzero(cha, sizeof(*cha));
1673 	cha->cha_tx_rings = CHD_PARAMS(chd)->nxp_tx_rings;
1674 	cha->cha_rx_rings = CHD_PARAMS(chd)->nxp_rx_rings;
1675 	cha->cha_tx_slots = CHD_PARAMS(chd)->nxp_tx_slots;
1676 	cha->cha_rx_slots = CHD_PARAMS(chd)->nxp_rx_slots;
1677 	cha->cha_buf_size = CHD_PARAMS(chd)->nxp_buf_size;
1678 	cha->cha_meta_size = CHD_PARAMS(chd)->nxp_meta_size;
1679 	cha->cha_stats_size = CHD_PARAMS(chd)->nxp_stats_size;
1680 	cha->cha_flowadv_max = CHD_PARAMS(chd)->nxp_flowadv_max;
1681 	cha->cha_exclusive = !!(cinfo->cinfo_ch_mode & CHMODE_EXCLUSIVE);
1682 	cha->cha_user_packet_pool = !!(cinfo->cinfo_ch_mode &
1683 	    CHMODE_USER_PACKET_POOL);
1684 	cha->cha_nexus_defunct_ok = !!(cinfo->cinfo_ch_mode &
1685 	    CHMODE_DEFUNCT_OK);
1686 	cha->cha_nexusadv_size = CHD_PARAMS(chd)->nxp_nexusadv_size;
1687 	if (cinfo->cinfo_ch_mode & CHMODE_MONITOR) {
1688 		cha->cha_monitor =
1689 		    (cinfo->cinfo_ch_mode & CHMODE_MONITOR_NO_COPY) ?
1690 		    CHANNEL_MONITOR_NO_COPY : CHANNEL_MONITOR_COPY;
1691 	} else {
1692 		cha->cha_monitor = CHANNEL_MONITOR_OFF;
1693 	}
1694 	cha->cha_key_len = cha_key_len;
1695 	cha->cha_key = cha_key;
1696 	cha->cha_tx_lowat = cinfo->cinfo_tx_lowat;
1697 	cha->cha_rx_lowat = cinfo->cinfo_rx_lowat;
1698 	cha->cha_nexus_type = CHD_PARAMS(chd)->nxp_type;
1699 	cha->cha_nexus_extensions = CHD_PARAMS(chd)->nxp_extensions;
1700 	cha->cha_nexus_mhints = CHD_PARAMS(chd)->nxp_mhints;
1701 	cha->cha_nexus_ifindex = CHD_PARAMS(chd)->nxp_ifindex;
1702 	cha->cha_nexus_meta_type = chd->chd_md_type;
1703 	cha->cha_nexus_meta_subtype = chd->chd_md_subtype;
1704 	cha->cha_enable_event_ring =
1705 	    (cinfo->cinfo_ch_mode & CHMODE_EVENT_RING) != 0;
1706 	cha->cha_low_latency =
1707 	    (cinfo->cinfo_ch_mode & CHMODE_LOW_LATENCY) != 0;
1708 
1709 	caps = CHD_PARAMS(chd)->nxp_capabilities;
1710 	if (caps & NXPCAP_CHECKSUM_PARTIAL) {
1711 		cha->cha_nexus_checksum_offload =
1712 		    CHANNEL_NEXUS_CHECKSUM_PARTIAL;
1713 	} else {
1714 		cha->cha_nexus_checksum_offload = 0;
1715 	}
1716 	cha->cha_max_frags = CHD_PARAMS(chd)->nxp_max_frags;
1717 	cha->cha_num_buffers = cinfo->cinfo_num_bufs;
1718 	cha->cha_large_buf_size = CHD_PARAMS(chd)->nxp_large_buf_size;
1719 }
1720 
1721 void
os_channel_attr_destroy(channel_attr_t cha)1722 os_channel_attr_destroy(channel_attr_t cha)
1723 {
1724 	if (cha->cha_key != NULL) {
1725 		free(cha->cha_key);
1726 		cha->cha_key = NULL;
1727 	}
1728 	free(cha);
1729 }
1730 
1731 static int
os_channel_packet_alloc_common(const channel_t chd,packet_t * ph,bool large)1732 os_channel_packet_alloc_common(const channel_t chd, packet_t *ph, bool large)
1733 {
1734 	struct __user_channel_ring *ring;
1735 	struct channel_ring_desc *chrd;
1736 	struct __user_quantum *q;
1737 	slot_idx_t idx;
1738 	mach_vm_address_t baddr;
1739 	uint16_t bdoff;
1740 	struct ch_info *ci = CHD_INFO(chd);
1741 
1742 	if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
1743 		return ENOTSUP;
1744 	}
1745 	if (__improbable(large &&
1746 	    chd->chd_large_buf_alloc_ring_idx == CHD_RING_IDX_NONE)) {
1747 		return ENOTSUP;
1748 	}
1749 	chrd = &chd->chd_rings[large ?
1750 	    chd->chd_large_buf_alloc_ring_idx : chd->chd_alloc_ring_idx];
1751 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
1752 	idx = ring->ring_head;
1753 
1754 	if (__improbable(idx == ring->ring_tail)) {
1755 		/*
1756 		 * do a sync to get more packets;
1757 		 * since we are paying the cost of a syscall do a sync for
1758 		 * free ring as well.
1759 		 */
1760 		int err;
1761 		sync_flags_t flags;
1762 
1763 		if (large) {
1764 			flags = (chd->chd_sync_flags &
1765 			    ~(CHANNEL_SYNCF_ALLOC_BUF | CHANNEL_SYNCF_ALLOC)) |
1766 			    CHANNEL_SYNCF_LARGE_ALLOC;
1767 		} else {
1768 			flags = chd->chd_sync_flags & ~CHANNEL_SYNCF_ALLOC_BUF;
1769 		}
1770 
1771 		err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP, flags);
1772 		if (__improbable(err != 0)) {
1773 			if (!_CHANNEL_IS_DEFUNCT(chd)) {
1774 				SK_ABORT_WITH_CAUSE("packet pool alloc "
1775 				    "sync failed", err);
1776 				/* NOTREACHED */
1777 				__builtin_unreachable();
1778 			}
1779 			return err;
1780 		}
1781 	}
1782 
1783 	if (__improbable(idx == ring->ring_tail)) {
1784 		return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ?
1785 		       ENXIO : ENOMEM;
1786 	}
1787 
1788 	_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
1789 	q = _SLOT_METADATA(chrd, ring, idx);
1790 	_METADATA_VERIFY(chrd, q);
1791 
1792 	*ph = SK_PTR_ENCODE(q, chrd->chrd_md_type, chrd->chrd_md_subtype);
1793 	_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
1794 
1795 	/*
1796 	 * Initialize the metadata buffer address. In the event of a
1797 	 * defunct, we'd be accessing zero-filled memory; this is fine
1798 	 * since we ignore all changes made to region at that time.
1799 	 */
1800 	baddr = _initialize_metadata_address(chrd, q, &bdoff);
1801 	if (__improbable(baddr == 0)) {
1802 		return ENXIO;
1803 	}
1804 	ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
1805 	return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
1806 }
1807 
1808 int
os_channel_packet_alloc(const channel_t chd,packet_t * ph)1809 os_channel_packet_alloc(const channel_t chd, packet_t *ph)
1810 {
1811 	return os_channel_packet_alloc_common(chd, ph, false);
1812 }
1813 
1814 int
os_channel_large_packet_alloc(const channel_t chd,packet_t * ph)1815 os_channel_large_packet_alloc(const channel_t chd, packet_t *ph)
1816 {
1817 	return os_channel_packet_alloc_common(chd, ph, true);
1818 }
1819 
1820 int
os_channel_packet_free(const channel_t chd,packet_t ph)1821 os_channel_packet_free(const channel_t chd, packet_t ph)
1822 {
1823 	struct __user_channel_ring *ring;
1824 	struct channel_ring_desc *chrd;
1825 	slot_idx_t idx;
1826 	obj_idx_t midx;
1827 	struct ch_info *ci = CHD_INFO(chd);
1828 
1829 	if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
1830 		return ENOTSUP;
1831 	}
1832 
1833 	chrd = &chd->chd_rings[chd->chd_free_ring_idx];
1834 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
1835 
1836 	idx = ring->ring_head;
1837 	if (__improbable(idx == ring->ring_tail)) {
1838 		/*
1839 		 * do a sync to reclaim space in free ring;
1840 		 */
1841 		int err;
1842 		err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
1843 		    CHANNEL_SYNCF_FREE);
1844 		if (__improbable(err != 0) && !_CHANNEL_IS_DEFUNCT(chd)) {
1845 			SK_ABORT_WITH_CAUSE("packet pool free "
1846 			    "sync failed", err);
1847 			/* NOTREACHED */
1848 			__builtin_unreachable();
1849 		}
1850 	}
1851 
1852 	if (__improbable(idx == ring->ring_tail) && !_CHANNEL_IS_DEFUNCT(chd)) {
1853 		SK_ABORT("no free ring space");
1854 		/* NOTREACHED */
1855 		__builtin_unreachable();
1856 	}
1857 
1858 	/*
1859 	 * In the event of a defunct, midx will be 0 and we'll end up
1860 	 * attaching it to the slot; this is fine since we ignore all
1861 	 * changes made to the slot descriptors at that time.
1862 	 */
1863 	midx = METADATA_IDX(QUM_ADDR(ph));
1864 	_SLOT_METADATA_IDX_VERIFY(chrd, QUM_ADDR(ph), midx);
1865 	_SLOT_ATTACH_METADATA(_SLOT_DESC(chrd, idx), midx);
1866 	ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
1867 
1868 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
1869 }
1870 
1871 int
os_channel_slot_attach_packet(const channel_ring_t chrd,const channel_slot_t slot,packet_t ph)1872 os_channel_slot_attach_packet(const channel_ring_t chrd,
1873     const channel_slot_t slot, packet_t ph)
1874 {
1875 	slot_idx_t idx;
1876 	obj_idx_t midx;
1877 
1878 	if (__improbable((chrd->chrd_channel->chd_info->cinfo_ch_mode &
1879 	    CHMODE_USER_PACKET_POOL) == 0)) {
1880 		return ENOTSUP;
1881 	}
1882 
1883 	if (__improbable(!__packet_is_finalized(ph))) {
1884 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1885 			SK_ABORT("packet not finalized");
1886 			/* NOTREACHED */
1887 			__builtin_unreachable();
1888 		}
1889 		goto done;
1890 	}
1891 
1892 	idx = _SLOT_INDEX(chrd, slot);
1893 	if (__improbable(!_slot_index_is_valid(chrd->chrd_ring, idx))) {
1894 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1895 			SK_ABORT_WITH_CAUSE("Invalid slot", slot);
1896 			/* NOTREACHED */
1897 			__builtin_unreachable();
1898 		}
1899 		goto done;
1900 	}
1901 
1902 	if (__improbable(SD_VALID_METADATA(SLOT_DESC_USD(slot)))) {
1903 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1904 			SK_ABORT_WITH_CAUSE("Slot has attached packet", slot);
1905 			/* NOTREACHED */
1906 			__builtin_unreachable();
1907 		}
1908 		goto done;
1909 	}
1910 
1911 	/*
1912 	 * In the event of a defunct, midx will be 0 and we'll end up
1913 	 * attaching it to the slot; this is fine since we ignore all
1914 	 * changes made to the slot descriptors at that time.
1915 	 */
1916 	midx = METADATA_IDX(QUM_ADDR(ph));
1917 	_SLOT_METADATA_IDX_VERIFY(chrd, QUM_ADDR(ph), midx);
1918 	_SLOT_ATTACH_METADATA(SLOT_DESC_USD(slot), midx);
1919 
1920 done:
1921 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
1922 }
1923 
1924 int
os_channel_slot_detach_packet(const channel_ring_t chrd,const channel_slot_t slot,packet_t ph)1925 os_channel_slot_detach_packet(const channel_ring_t chrd,
1926     const channel_slot_t slot, packet_t ph)
1927 {
1928 	slot_idx_t idx;
1929 
1930 	if (__improbable((chrd->chrd_channel->chd_info->cinfo_ch_mode &
1931 	    CHMODE_USER_PACKET_POOL) == 0)) {
1932 		return ENOTSUP;
1933 	}
1934 
1935 	idx = _SLOT_INDEX(chrd, slot);
1936 	if (__improbable(!_slot_index_is_valid(chrd->chrd_ring, idx))) {
1937 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1938 			SK_ABORT_WITH_CAUSE("Invalid slot", slot);
1939 			/* NOTREACHED */
1940 			__builtin_unreachable();
1941 		}
1942 		goto done;
1943 	}
1944 
1945 	if (__improbable(!SD_VALID_METADATA(SLOT_DESC_USD(slot)))) {
1946 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1947 			SK_ABORT_WITH_CAUSE("Slot has no attached packet",
1948 			    slot);
1949 			/* NOTREACHED */
1950 			__builtin_unreachable();
1951 		}
1952 		goto done;
1953 	}
1954 
1955 	if (__improbable(ph != SK_PTR_ENCODE(_SLOT_METADATA(chrd,
1956 	    chrd->chrd_ring, idx), chrd->chrd_md_type,
1957 	    chrd->chrd_md_subtype))) {
1958 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1959 			SK_ABORT("packet handle mismatch");
1960 			/* NOTREACHED */
1961 			__builtin_unreachable();
1962 		}
1963 		goto done;
1964 	}
1965 
1966 	if (__improbable(!__packet_is_finalized(ph))) {
1967 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1968 			SK_ABORT("packet not finalized");
1969 			/* NOTREACHED */
1970 			__builtin_unreachable();
1971 		}
1972 		goto done;
1973 	}
1974 
1975 	/*
1976 	 * In the event of a defunct, we ignore any changes made to
1977 	 * the slot descriptors, and so doing this is harmless.
1978 	 */
1979 	_SLOT_DETACH_METADATA(SLOT_DESC_USD(slot));
1980 
1981 done:
1982 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
1983 }
1984 
1985 __attribute__((visibility("hidden")))
1986 static inline int
os_channel_purge_packet_alloc_ring_common(const channel_t chd,bool large)1987 os_channel_purge_packet_alloc_ring_common(const channel_t chd, bool large)
1988 {
1989 	struct __user_channel_ring *ring;
1990 	struct channel_ring_desc *chrd;
1991 	uint32_t curr_ws;
1992 	slot_idx_t idx;
1993 	packet_t ph;
1994 	int npkts, err;
1995 
1996 	chrd = &chd->chd_rings[large ?
1997 	    chd->chd_large_buf_alloc_ring_idx : chd->chd_alloc_ring_idx];
1998 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
1999 	idx = ring->ring_head;
2000 
2001 	/* calculate the number of packets in alloc pool */
2002 	npkts = ring->ring_tail - idx;
2003 	if (npkts < 0) {
2004 		npkts += ring->ring_num_slots;
2005 	}
2006 
2007 	curr_ws = ring->ring_alloc_ws;
2008 	while ((uint32_t)npkts-- > curr_ws) {
2009 		struct __user_quantum *q;
2010 
2011 		_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
2012 		q = _SLOT_METADATA(chrd, ring, idx);
2013 		_METADATA_VERIFY(chrd, q);
2014 
2015 		ph = SK_PTR_ENCODE(q, chrd->chrd_md_type,
2016 		    chrd->chrd_md_subtype);
2017 		_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
2018 
2019 		/*
2020 		 * Initialize the metadata buffer address. In the event of a
2021 		 * defunct, we'd be accessing zero-filled memory; this is fine
2022 		 * since we ignore all changes made to region at that time.
2023 		 */
2024 		if (chrd->chrd_md_type == NEXUS_META_TYPE_PACKET) {
2025 			struct __user_packet *p = (struct __user_packet *)q;
2026 			uint16_t bcnt = p->pkt_bufs_cnt;
2027 			uint16_t bmax = p->pkt_bufs_max;
2028 
2029 			if (__improbable((bcnt == 0) || (bmax == 0))) {
2030 				if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
2031 					SK_ABORT("pkt pool purge, bad bufcnt");
2032 					/* NOTREACHED */
2033 					__builtin_unreachable();
2034 				} else {
2035 					return ENXIO;
2036 				}
2037 			}
2038 			/*
2039 			 * alloc ring will not have multi-buflet packets.
2040 			 */
2041 			_PKT_BUFCNT_VERIFY(chrd, bcnt, 1);
2042 		}
2043 		*(mach_vm_address_t *) (uintptr_t)&q->qum_buf[0].buf_addr =
2044 		    _CHANNEL_RING_BUF(chrd, ring, &q->qum_buf[0]);
2045 		idx = _CHANNEL_RING_NEXT(ring, idx);
2046 		ring->ring_head = idx;
2047 		err = os_channel_packet_free(chd, ph);
2048 		if (__improbable(err != 0)) {
2049 			if (!_CHANNEL_IS_DEFUNCT(chd)) {
2050 				SK_ABORT_WITH_CAUSE("packet pool purge "
2051 				    "free failed", err);
2052 				/* NOTREACHED */
2053 				__builtin_unreachable();
2054 			}
2055 			return err;
2056 		}
2057 	}
2058 
2059 	return 0;
2060 }
2061 
2062 __attribute__((visibility("hidden")))
2063 static inline int
os_channel_purge_packet_alloc_ring(const channel_t chd)2064 os_channel_purge_packet_alloc_ring(const channel_t chd)
2065 {
2066 	return os_channel_purge_packet_alloc_ring_common(chd, false);
2067 }
2068 
2069 __attribute__((visibility("hidden")))
2070 static inline int
os_channel_purge_large_packet_alloc_ring(const channel_t chd)2071 os_channel_purge_large_packet_alloc_ring(const channel_t chd)
2072 {
2073 	return os_channel_purge_packet_alloc_ring_common(chd, true);
2074 }
2075 
2076 __attribute__((visibility("hidden")))
2077 static inline int
os_channel_purge_buflet_alloc_ring(const channel_t chd)2078 os_channel_purge_buflet_alloc_ring(const channel_t chd)
2079 {
2080 	struct __user_channel_ring *ring;
2081 	struct channel_ring_desc *chrd;
2082 	uint32_t curr_ws;
2083 	slot_idx_t idx;
2084 	int nbfts, err;
2085 
2086 	chrd = &chd->chd_rings[chd->chd_buf_alloc_ring_idx];
2087 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
2088 	idx = ring->ring_head;
2089 
2090 	/* calculate the number of packets in alloc pool */
2091 	nbfts = ring->ring_tail - idx;
2092 	if (nbfts < 0) {
2093 		nbfts += ring->ring_num_slots;
2094 	}
2095 
2096 	curr_ws = ring->ring_alloc_ws;
2097 	while ((uint32_t)nbfts-- > curr_ws) {
2098 		struct __user_buflet *ubft;
2099 		obj_idx_t nbft_idx;
2100 
2101 		_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
2102 		ubft = _SLOT_BFT_METADATA(chrd, ring, idx);
2103 		_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
2104 
2105 		/*
2106 		 * Initialize the buflet metadata buffer address.
2107 		 */
2108 		*(mach_vm_address_t *)(uintptr_t)&(ubft->buf_addr) =
2109 		    _CHANNEL_RING_BUF(chrd, ring, ubft);
2110 		if (__improbable(ubft->buf_addr == 0)) {
2111 			SK_ABORT_WITH_CAUSE("buflet with NULL buffer",
2112 			    ubft->buf_idx);
2113 			/* NOTREACHED */
2114 			__builtin_unreachable();
2115 		}
2116 
2117 		nbft_idx = ubft->buf_nbft_idx;
2118 		if (__improbable(nbft_idx != OBJ_IDX_NONE)) {
2119 			if (_CHANNEL_IS_DEFUNCT(chd)) {
2120 				return ENXIO;
2121 			} else {
2122 				SK_ABORT_WITH_CAUSE("buflet with invalid nidx",
2123 				    nbft_idx);
2124 				/* NOTREACHED */
2125 				__builtin_unreachable();
2126 			}
2127 		}
2128 
2129 		idx = _CHANNEL_RING_NEXT(ring, idx);
2130 		ring->ring_head = idx;
2131 		err = os_channel_buflet_free(chd, ubft);
2132 		if (__improbable(err != 0)) {
2133 			if (!_CHANNEL_IS_DEFUNCT(chd)) {
2134 				SK_ABORT_WITH_CAUSE("buflet pool purge "
2135 				    "free failed", err);
2136 				/* NOTREACHED */
2137 				__builtin_unreachable();
2138 			}
2139 			return err;
2140 		}
2141 	}
2142 
2143 	return 0;
2144 }
2145 
2146 int
os_channel_packet_pool_purge(const channel_t chd)2147 os_channel_packet_pool_purge(const channel_t chd)
2148 {
2149 	struct ch_info *ci = CHD_INFO(chd);
2150 	int err;
2151 
2152 	if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
2153 		return ENOTSUP;
2154 	}
2155 
2156 	err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
2157 	    ((chd->chd_sync_flags & ~CHANNEL_SYNCF_FREE) | CHANNEL_SYNCF_PURGE));
2158 	if (__improbable(err != 0)) {
2159 		if (!_CHANNEL_IS_DEFUNCT(chd)) {
2160 			SK_ABORT_WITH_CAUSE("packet pool purge sync failed",
2161 			    err);
2162 			/* NOTREACHED */
2163 			__builtin_unreachable();
2164 		}
2165 		return err;
2166 	}
2167 
2168 	err = os_channel_purge_packet_alloc_ring(chd);
2169 	if (__improbable(err != 0)) {
2170 		return err;
2171 	}
2172 	if (chd->chd_large_buf_alloc_ring_idx != CHD_RING_IDX_NONE) {
2173 		err = os_channel_purge_large_packet_alloc_ring(chd);
2174 		if (__improbable(err != 0)) {
2175 			return err;
2176 		}
2177 	}
2178 	if (_num_allocator_rings(CHD_SCHEMA(chd)) > 2) {
2179 		err = os_channel_purge_buflet_alloc_ring(chd);
2180 		if (__improbable(err != 0)) {
2181 			return err;
2182 		}
2183 	}
2184 
2185 	err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP, CHANNEL_SYNCF_FREE);
2186 	if (__improbable(err != 0)) {
2187 		if (!_CHANNEL_IS_DEFUNCT(chd)) {
2188 			SK_ABORT_WITH_CAUSE("packet pool free sync failed",
2189 			    err);
2190 			/* NOTREACHED */
2191 			__builtin_unreachable();
2192 		}
2193 		return err;
2194 	}
2195 
2196 	return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
2197 }
2198 
2199 int
os_channel_get_next_event_handle(const channel_t chd,os_channel_event_handle_t * ehandle,os_channel_event_type_t * etype,uint32_t * nevents)2200 os_channel_get_next_event_handle(const channel_t chd,
2201     os_channel_event_handle_t *ehandle, os_channel_event_type_t *etype,
2202     uint32_t *nevents)
2203 {
2204 	struct __kern_channel_event_metadata *emd;
2205 	struct __user_channel_ring *ring;
2206 	struct channel_ring_desc *chrd;
2207 	struct __user_quantum *qum;
2208 	mach_vm_address_t baddr;
2209 	uint16_t bdoff;
2210 	slot_idx_t idx;
2211 	struct __user_channel_schema *csm = CHD_SCHEMA(chd);
2212 	struct ch_info *ci = CHD_INFO(chd);
2213 
2214 	if (__improbable((ehandle == NULL) || (etype == NULL) ||
2215 	    (nevents == NULL))) {
2216 		return EINVAL;
2217 	}
2218 	if (__improbable((ci->cinfo_ch_mode & CHMODE_EVENT_RING) == 0)) {
2219 		return ENOTSUP;
2220 	}
2221 	*ehandle = NULL;
2222 	chrd = &chd->chd_rings[_num_tx_rings(ci) + _num_rx_rings(ci) +
2223 	    _num_allocator_rings(csm)];
2224 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
2225 	idx = ring->ring_head;
2226 
2227 	if (__improbable(idx == ring->ring_tail)) {
2228 		return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ?
2229 		       ENXIO : ENODATA;
2230 	}
2231 	_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
2232 	qum = _SLOT_METADATA(chrd, ring, idx);
2233 	_METADATA_VERIFY(chrd, qum);
2234 	_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
2235 
2236 	baddr = _initialize_metadata_address(chrd, qum, &bdoff);
2237 	if (__improbable(baddr == 0)) {
2238 		return ENXIO;
2239 	}
2240 	*ehandle = SK_PTR_ENCODE(qum, chrd->chrd_md_type,
2241 	    chrd->chrd_md_subtype);
2242 	emd = (void *)(baddr + bdoff);
2243 	*etype = emd->emd_etype;
2244 	*nevents = emd->emd_nevents;
2245 	ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
2246 	return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
2247 }
2248 
2249 int
os_channel_event_free(const channel_t chd,os_channel_event_handle_t ehandle)2250 os_channel_event_free(const channel_t chd, os_channel_event_handle_t ehandle)
2251 {
2252 	return os_channel_packet_free(chd, (packet_t)ehandle);
2253 }
2254 
2255 int
os_channel_get_interface_advisory(const channel_t chd,struct ifnet_interface_advisory * advisory)2256 os_channel_get_interface_advisory(const channel_t chd,
2257     struct ifnet_interface_advisory *advisory)
2258 {
2259 	struct __kern_netif_intf_advisory *intf_adv;
2260 	struct __kern_nexus_adv_metadata *adv_md;
2261 	nexus_advisory_type_t adv_type;
2262 
2263 	/*
2264 	 * Interface advisory is only supported for netif and flowswitch.
2265 	 */
2266 	adv_md = CHD_NX_ADV_MD(chd);
2267 	if (adv_md == NULL) {
2268 		return ENOENT;
2269 	}
2270 	adv_type = adv_md->knam_type;
2271 	if (__improbable(adv_type != NEXUS_ADVISORY_TYPE_NETIF &&
2272 	    adv_type != NEXUS_ADVISORY_TYPE_FLOWSWITCH)) {
2273 		return _CHANNEL_IS_DEFUNCT(chd) ? ENXIO : ENOENT;
2274 	}
2275 	if (adv_type == NEXUS_ADVISORY_TYPE_NETIF) {
2276 		intf_adv = &(CHD_NX_ADV_NETIF(adv_md))->__kern_intf_adv;
2277 	} else {
2278 		intf_adv = &(CHD_NX_ADV_FSW(adv_md))->_nxadv_intf_adv;
2279 	}
2280 	if (intf_adv->cksum != os_cpu_copy_in_cksum(&intf_adv->adv, advisory,
2281 	    sizeof(*advisory), 0)) {
2282 		return _CHANNEL_IS_DEFUNCT(chd) ? ENXIO : EAGAIN;
2283 	}
2284 	return 0;
2285 }
2286 
2287 int
os_channel_configure_interface_advisory(const channel_t chd,boolean_t enable)2288 os_channel_configure_interface_advisory(const channel_t chd, boolean_t enable)
2289 {
2290 	uint32_t value = enable;
2291 
2292 	return __channel_set_opt(chd->chd_fd, CHOPT_IF_ADV_CONF,
2293 	           &value, sizeof(value));
2294 }
2295 
2296 int
os_channel_buflet_alloc(const channel_t chd,buflet_t * bft)2297 os_channel_buflet_alloc(const channel_t chd, buflet_t *bft)
2298 {
2299 	struct __user_channel_ring *ring;
2300 	struct channel_ring_desc *chrd;
2301 	struct __user_buflet *ubft;
2302 	obj_idx_t nbft_idx;
2303 	slot_idx_t idx;
2304 	struct ch_info *ci = CHD_INFO(chd);
2305 
2306 	if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
2307 		return ENOTSUP;
2308 	}
2309 
2310 	if (__improbable(_num_allocator_rings(CHD_SCHEMA(chd)) < 4)) {
2311 		return ENOTSUP;
2312 	}
2313 
2314 	chrd = &chd->chd_rings[chd->chd_buf_alloc_ring_idx];
2315 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
2316 	idx = ring->ring_head;
2317 
2318 	if (__improbable(idx == ring->ring_tail)) {
2319 		/*
2320 		 * do a sync to get more buflets;
2321 		 */
2322 		int err;
2323 		err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
2324 		    CHANNEL_SYNCF_ALLOC_BUF | CHANNEL_SYNCF_FREE);
2325 		if (__improbable(err != 0)) {
2326 			if (!_CHANNEL_IS_DEFUNCT(chd)) {
2327 				SK_ABORT_WITH_CAUSE("buflet pool alloc "
2328 				    "sync failed", err);
2329 				/* NOTREACHED */
2330 				__builtin_unreachable();
2331 			}
2332 			return err;
2333 		}
2334 	}
2335 
2336 	if (__improbable(idx == ring->ring_tail)) {
2337 		return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ?
2338 		       ENXIO : ENOMEM;
2339 	}
2340 
2341 	_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
2342 	ubft = _SLOT_BFT_METADATA(chrd, ring, idx);
2343 	_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
2344 
2345 	/*
2346 	 * Initialize the buflet metadata buffer address.
2347 	 */
2348 	*(mach_vm_address_t *)(uintptr_t)&(ubft->buf_addr) =
2349 	    _CHANNEL_RING_BUF(chrd, ring, ubft);
2350 	if (__improbable(ubft->buf_addr == 0)) {
2351 		SK_ABORT_WITH_CAUSE("buflet alloc with NULL buffer",
2352 		    ubft->buf_idx);
2353 		/* NOTREACHED */
2354 		__builtin_unreachable();
2355 	}
2356 	nbft_idx = ubft->buf_nbft_idx;
2357 	if (__improbable(nbft_idx != OBJ_IDX_NONE)) {
2358 		if (_CHANNEL_IS_DEFUNCT(chd)) {
2359 			return ENXIO;
2360 		} else {
2361 			SK_ABORT_WITH_CAUSE("buflet alloc with invalid nidx",
2362 			    nbft_idx);
2363 			/* NOTREACHED */
2364 			__builtin_unreachable();
2365 		}
2366 	}
2367 	ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
2368 	*bft = ubft;
2369 	return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
2370 }
2371 
2372 int
os_channel_buflet_free(const channel_t chd,buflet_t ubft)2373 os_channel_buflet_free(const channel_t chd, buflet_t ubft)
2374 {
2375 	struct __user_channel_ring *ring;
2376 	struct channel_ring_desc *chrd;
2377 	slot_idx_t idx;
2378 	obj_idx_t midx;
2379 	struct ch_info *ci = CHD_INFO(chd);
2380 
2381 	if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
2382 		return ENOTSUP;
2383 	}
2384 
2385 	if (__improbable(_num_allocator_rings(CHD_SCHEMA(chd)) < 4)) {
2386 		return ENOTSUP;
2387 	}
2388 
2389 	chrd = &chd->chd_rings[chd->chd_buf_free_ring_idx];
2390 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
2391 
2392 	idx = ring->ring_head;
2393 	if (__improbable(idx == ring->ring_tail)) {
2394 		/*
2395 		 * do a sync to reclaim space in free ring;
2396 		 */
2397 		int err;
2398 		err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
2399 		    CHANNEL_SYNCF_FREE);
2400 		if (__improbable(err != 0) && !_CHANNEL_IS_DEFUNCT(chd)) {
2401 			SK_ABORT_WITH_CAUSE("buflet pool free "
2402 			    "sync failed", err);
2403 			/* NOTREACHED */
2404 			__builtin_unreachable();
2405 		}
2406 	}
2407 
2408 	if (__improbable(idx == ring->ring_tail) && !_CHANNEL_IS_DEFUNCT(chd)) {
2409 		SK_ABORT("no ring space in buflet free ring");
2410 		/* NOTREACHED */
2411 		__builtin_unreachable();
2412 	}
2413 
2414 	midx = _BFT_INDEX(chrd, ubft);
2415 	_SLOT_BFT_METADATA_IDX_VERIFY(chrd, ubft, midx);
2416 	_SLOT_ATTACH_METADATA(_SLOT_DESC(chrd, idx), midx);
2417 	ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
2418 
2419 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
2420 }
2421