xref: /xnu-12377.81.4/libsyscall/wrappers/skywalk/os_channel.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 #include <stdlib.h>
31 #include <stddef.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <unistd.h>
35 #include <errno.h>
36 #include <os/atomic_private.h>
37 #include <skywalk/os_skywalk_private.h>
38 #include <skywalk/os_packet_private.h>
39 
40 #ifndef LIBSYSCALL_INTERFACE
41 #error "LIBSYSCALL_INTERFACE not defined"
42 #endif /* !LIBSYSCALL_INTERFACE */
43 
44 /*
45  * Defined here as we don't have Libc
46  */
47 extern int __getpid(void);
48 extern int __kill(int pid, int signum, int posix);
49 extern int __exit(int) __attribute__((noreturn));
50 
51 static ring_id_t _ring_id(struct ch_info *cinfo, const ring_id_type_t type);
52 static void os_channel_info2attr(struct channel *chd, channel_attr_t cha);
53 static int _flowadv_id_equal(struct __flowadv_entry *, uuid_t);
54 
55 /*
56  * This is pretty much what an inlined memcmp() would do for UUID
57  * comparison; since we don't have access to memcmp() here, we
58  * manually handle it ourselves.
59  */
60 #define UUID_COMPARE(a, b)                                                  \
61 	(a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3] &&    \
62 	a[4] == b[4] && a[5] == b[5] && a[6] == b[6] && a[7] == b[7] &&     \
63 	a[8] == b[8] && a[9] == b[9] && a[10] == b[10] && a[11] == b[11] && \
64 	a[12] == b[12] && a[13] == b[13] && a[14] == b[14] && a[15] == b[15])
65 
66 #define _SLOT_INDEX(_chrd, _slot)                                       \
67 	((slot_idx_t)((_slot - (_chrd)->chrd_slot_desc)))
68 
69 #define _SLOT_DESC(_chrd, _idx)                                         \
70 	(SLOT_DESC_USD(&(_chrd)->chrd_slot_desc[_idx]))
71 
72 #define _METADATA(_chrd, _ring, _midx)                                  \
73 	((void *)((_chrd)->chrd_md_base_addr +                          \
74 	((_midx) * (_ring)->ring_md_size) + METADATA_PREAMBLE_SZ))
75 
76 #define _SLOT_METADATA(_chrd, _ring, _idx)                              \
77 	_METADATA(_chrd, _ring, _SLOT_DESC(_chrd, _idx)->sd_md_idx)
78 
79 #define _SLOT_METADATA_IDX_VERIFY(_chrd, _md, _midx)    do {            \
80 	if (__improbable((_md) != _METADATA((_chrd), (_chrd)->chrd_ring, \
81 	    (_midx))) && !_CHANNEL_RING_IS_DEFUNCT(_chrd)) {            \
82 	        SK_ABORT_WITH_CAUSE("bad packet handle", (_midx));      \
83 	/* NOTREACHED */                                                \
84 	        __builtin_unreachable();                                \
85 	}                                                               \
86 } while (0)
87 
88 #define _BFT_INDEX(_chrd, _bft) (_bft)->buf_bft_idx_reg
89 
90 #define _SLOT_BFT_METADATA(_chrd, _ring, _idx)                          \
91 	_CHANNEL_RING_BFT(_chrd, _ring, _SLOT_DESC(_chrd, _idx)->sd_md_idx)
92 
93 #define _SLOT_BFT_METADATA_IDX_VERIFY(_chrd, _md, _midx)    do {        \
94 	if (__improbable((mach_vm_address_t)(_md) !=                    \
95 	    _CHANNEL_RING_BFT((_chrd), (_chrd)->chrd_ring, (_midx))) && \
96 	    !_CHANNEL_RING_IS_DEFUNCT(_chrd)) {                         \
97 	        SK_ABORT_WITH_CAUSE("bad buflet handle", (_midx));      \
98 	/* NOTREACHED */                                                \
99 	        __builtin_unreachable();                                \
100 	}                                                               \
101 } while (0)
102 
103 #define _SLOT_DESC_VERIFY(_chrd, _sdp) do {                             \
104 	if (__improbable(!SD_VALID_METADATA(_sdp)) &&                   \
105 	    !_CHANNEL_RING_IS_DEFUNCT(_chrd)) {                         \
106 	        SK_ABORT("Slot descriptor has no metadata");            \
107 	/* NOTREACHED */                                        \
108 	        __builtin_unreachable();                                \
109 	}                                                               \
110 } while (0)
111 
112 #define _METADATA_VERIFY(_chrd, _md) do {                               \
113 	if (__improbable(METADATA_PREAMBLE(_md)->mdp_redzone !=         \
114 	    (((mach_vm_address_t)(_md) - (_chrd)->chrd_md_base_addr) ^  \
115 	    __os_ch_md_redzone_cookie)) &&                              \
116 	    !_CHANNEL_RING_IS_DEFUNCT(_chrd)) {                         \
117 	        SK_ABORT_WITH_CAUSE("Metadata redzone corrupted",       \
118 	            METADATA_PREAMBLE(_md)->mdp_redzone);               \
119 	/* NOTREACHED */                                        \
120 	        __builtin_unreachable();                                \
121 	}                                                               \
122 } while (0)
123 
124 #define _PKT_BUFCNT_VERIFY(_chrd, _bcnt, _bmax) do {                    \
125 	if (__improbable((_chrd)->chrd_max_bufs < (_bmax))) {           \
126 	        SK_ABORT_WITH_CAUSE("Invalid max bufcnt", (_bmax));     \
127 	/* NOTREACHED */                                                \
128 	        __builtin_unreachable();                                \
129 	}                                                               \
130 	if (__improbable((_bcnt) > (_bmax))) {                          \
131 	        SK_ABORT_WITH_CAUSE("Invalid bufcnt", (_bcnt));         \
132 	/* NOTREACHED */                                                \
133 	        __builtin_unreachable();                                \
134 	}                                                               \
135 } while (0)
136 
137 #define _ABORT_MSGSZ    1024
138 
139 #define _SCHEMA_VER_VERIFY(_chd) do {                                   \
140 	/* ensure all stores are globally visible */                    \
141 	os_atomic_thread_fence(seq_cst);                                                  \
142 	if (CHD_SCHEMA(_chd)->csm_ver != CSM_CURRENT_VERSION)	{       \
143 	        char *_msg = malloc(_ABORT_MSGSZ);                      \
144 	        uint32_t _ver = (uint32_t)CHD_SCHEMA(_chd)->csm_ver;    \
145 	/* we're stuck with %x and %s formatters */             \
146 	        (void) _mach_snprintf(_msg, _ABORT_MSGSZ,               \
147 	            "Schema region version mismatch: 0x%x != 0x%x\n"    \
148 	            "Kernel version: %s - did you forget to install "   \
149 	            "a matching libsystem_kernel.dylib?\n"              \
150 	            "Kernel UUID: %x%x%x%x-%x%x-%x%x-%x%x-%x%x%x%x%x%x", \
151 	            _ver, (uint32_t)CSM_CURRENT_VERSION,                \
152 	            CHD_SCHEMA(_chd)->csm_kern_name,                    \
153 	            CHD_SCHEMA(_chd)->csm_kern_uuid[0],                 \
154 	            CHD_SCHEMA(_chd)->csm_kern_uuid[1],                 \
155 	            CHD_SCHEMA(_chd)->csm_kern_uuid[2],                 \
156 	            CHD_SCHEMA(_chd)->csm_kern_uuid[3],                 \
157 	            CHD_SCHEMA(_chd)->csm_kern_uuid[4],                 \
158 	            CHD_SCHEMA(_chd)->csm_kern_uuid[5],                 \
159 	            CHD_SCHEMA(_chd)->csm_kern_uuid[6],                 \
160 	            CHD_SCHEMA(_chd)->csm_kern_uuid[7],                 \
161 	            CHD_SCHEMA(_chd)->csm_kern_uuid[8],                 \
162 	            CHD_SCHEMA(_chd)->csm_kern_uuid[9],                 \
163 	            CHD_SCHEMA(_chd)->csm_kern_uuid[10],                \
164 	            CHD_SCHEMA(_chd)->csm_kern_uuid[11],                \
165 	            CHD_SCHEMA(_chd)->csm_kern_uuid[12],                \
166 	            CHD_SCHEMA(_chd)->csm_kern_uuid[13],                \
167 	            CHD_SCHEMA(_chd)->csm_kern_uuid[14],                \
168 	            CHD_SCHEMA(_chd)->csm_kern_uuid[15]);               \
169 	        SK_ABORT_DYNAMIC(_msg);                                 \
170 	/* NOTREACHED */                                        \
171 	        __builtin_unreachable();                                \
172 	}                                                               \
173 } while (0)
174 
175 #define _SLOT_ATTACH_METADATA(_usd, _md_idx) do {                       \
176 	(_usd)->sd_md_idx = (_md_idx);                                  \
177 	(_usd)->sd_flags |= SD_IDX_VALID;                               \
178 } while (0)
179 
180 #define _SLOT_DETACH_METADATA(_usd) do	{                               \
181 	(_usd)->sd_md_idx = OBJ_IDX_NONE;                               \
182 	(_usd)->sd_flags &= ~SD_IDX_VALID;                              \
183 } while (0)
184 
185 #define _CHANNEL_OFFSET(_type, _ptr, _offset)                           \
186 	((_type)(void *)((uintptr_t)(_ptr) + (_offset)))
187 
188 #define _CHANNEL_SCHEMA(_base, _off)                                    \
189 	_CHANNEL_OFFSET(struct __user_channel_schema *, _base, _off)
190 
191 #define _CHANNEL_RING_DEF_BUF(_chrd, _ring, _idx)                       \
192 	((_chrd)->chrd_def_buf_base_addr +                              \
193 	((_idx) * (_ring)->ring_def_buf_size))
194 
195 #define _CHANNEL_RING_LARGE_BUF(_chrd, _ring, _idx)                     \
196 	((_chrd)->chrd_large_buf_base_addr +                            \
197 	((_idx) * (_ring)->ring_large_buf_size))
198 
199 #define _CHANNEL_RING_BUF(_chrd, _ring, _bft)                           \
200 	BUFLET_HAS_LARGE_BUF(_bft) ?                                    \
201 	_CHANNEL_RING_LARGE_BUF(_chrd, _ring, (_bft)->buf_idx) :        \
202 	_CHANNEL_RING_DEF_BUF(_chrd, _ring, (_bft)->buf_idx)
203 
204 #define _CHANNEL_RING_BFT(_chrd, _ring, _idx)                           \
205 	((_chrd)->chrd_bft_base_addr + ((_idx) * (_ring)->ring_bft_size))
206 
207 #define _CHANNEL_RING_NEXT(_ring, _cur)                                 \
208 	(__improbable((_cur) + 1 == (_ring)->ring_num_slots) ? 0 : (_cur) + 1)
209 
210 #define _CHANNEL_RING_IS_DEFUNCT(_chrd)                                 \
211 	(!(*(_chrd)->chrd_csm_flags & CSM_ACTIVE))
212 
213 #define _CHANNEL_IS_DEFUNCT(_chd)                                       \
214 	(!(CHD_SCHEMA(_chd)->csm_flags & CSM_ACTIVE))
215 
216 #define _CH_PKT_GET_FIRST_BUFLET(_pkt, _bft, _chrd, _ring) do {         \
217 	if (__probable((_pkt)->pkt_qum_buf.buf_idx != OBJ_IDX_NONE)) {  \
218 	        (_bft) = &(_pkt)->pkt_qum_buf;                          \
219 	} else if ((_pkt)->pkt_qum_buf.buf_nbft_idx != OBJ_IDX_NONE) {  \
220 	        (_bft) = _CHANNEL_RING_BFT(_chrd, _ring,                \
221 	            (_pkt)->pkt_qum_buf.buf_nbft_idx);                  \
222 	} else {                                                        \
223 	        (_bft) = NULL;                                          \
224 	}                                                               \
225 } while (0)
226 
227 /*
228  * A per process copy of the channel metadata redzone cookie.
229  */
230 __attribute__((visibility("hidden")))
231 static uint64_t __os_ch_md_redzone_cookie = 0;
232 
233 __attribute__((always_inline, visibility("hidden")))
234 static inline uint32_t
_num_tx_rings(struct ch_info * ci)235 _num_tx_rings(struct ch_info *ci)
236 {
237 	ring_id_t first, last;
238 
239 	first = _ring_id(ci, CHANNEL_FIRST_TX_RING);
240 	last = _ring_id(ci, CHANNEL_LAST_TX_RING);
241 
242 	return (last - first) + 1;
243 }
244 
245 __attribute__((always_inline, visibility("hidden")))
246 static inline uint32_t
_num_rx_rings(struct ch_info * ci)247 _num_rx_rings(struct ch_info *ci)
248 {
249 	ring_id_t first, last;
250 
251 	first = _ring_id(ci, CHANNEL_FIRST_RX_RING);
252 	last = _ring_id(ci, CHANNEL_LAST_RX_RING);
253 
254 	return (last - first) + 1;
255 }
256 
257 __attribute__((always_inline, visibility("hidden")))
258 static inline uint32_t
_num_allocator_rings(const struct __user_channel_schema * csm)259 _num_allocator_rings(const struct __user_channel_schema *csm)
260 {
261 	return csm->csm_allocator_ring_pairs << 1;
262 }
263 
264 __attribute__((visibility("hidden")))
265 static void
os_channel_init_ring(struct channel_ring_desc * chrd,struct channel * chd,uint32_t ring_index)266 os_channel_init_ring(struct channel_ring_desc *chrd,
267     struct channel *chd, uint32_t ring_index)
268 {
269 	struct __user_channel_schema *csm = CHD_SCHEMA(chd);
270 	struct __user_channel_ring *ring = NULL;
271 	struct __slot_desc *sd = NULL;
272 	nexus_meta_type_t md_type;
273 	nexus_meta_subtype_t md_subtype;
274 
275 	ring = _CHANNEL_OFFSET(struct __user_channel_ring *, csm,
276 	    csm->csm_ring_ofs[ring_index].ring_off);
277 	sd = _CHANNEL_OFFSET(struct __slot_desc *, csm,
278 	    csm->csm_ring_ofs[ring_index].sd_off);
279 	md_type = csm->csm_md_type;
280 	md_subtype = csm->csm_md_subtype;
281 
282 	if (ring == NULL || sd == NULL) {
283 		SK_ABORT("Channel schema not valid");
284 		/* NOTREACHED */
285 		__builtin_unreachable();
286 	} else if (md_type != NEXUS_META_TYPE_PACKET) {
287 		SK_ABORT_WITH_CAUSE("Metadata type unknown", md_type);
288 		/* NOTREACHED */
289 		__builtin_unreachable();
290 	} else if (md_subtype != NEXUS_META_SUBTYPE_RAW) {
291 		SK_ABORT_WITH_CAUSE("Metadata subtype unknown", md_subtype);
292 		/* NOTREACHED */
293 		__builtin_unreachable();
294 	}
295 
296 	chrd->chrd_slot_desc = sd;
297 	chrd->chrd_csm_flags = &chd->chd_schema->csm_flags;
298 	/* const overrides */
299 	*(struct channel **)(uintptr_t)&chrd->chrd_channel = chd;
300 	*(struct __user_channel_ring **)(uintptr_t)&chrd->chrd_ring = ring;
301 	*(nexus_meta_type_t *)(uintptr_t)&chrd->chrd_md_type = md_type;
302 	*(nexus_meta_subtype_t *)(uintptr_t)&chrd->chrd_md_subtype = md_subtype;
303 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_shmem_base_addr =
304 	    CHD_INFO(chd)->cinfo_mem_base;
305 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_def_buf_base_addr =
306 	    (mach_vm_address_t)((uintptr_t)ring + ring->ring_def_buf_base);
307 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_md_base_addr =
308 	    (mach_vm_address_t)((uintptr_t)ring + ring->ring_md_base);
309 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_sd_base_addr =
310 	    (mach_vm_address_t)((uintptr_t)ring + ring->ring_sd_base);
311 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_bft_base_addr =
312 	    (mach_vm_address_t)((uintptr_t)ring + ring->ring_bft_base);
313 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_large_buf_base_addr =
314 	    (mach_vm_address_t)((uintptr_t)ring + ring->ring_large_buf_base);
315 	*(uint32_t *)(uintptr_t)&chrd->chrd_max_bufs =
316 	    CHD_PARAMS(chd)->nxp_max_frags;
317 }
318 
319 __attribute__((always_inline, visibility("hidden")))
320 static inline mach_vm_address_t
_initialize_metadata_address(const channel_ring_t chrd,struct __user_quantum * q,uint16_t * bdoff)321 _initialize_metadata_address(const channel_ring_t chrd,
322     struct __user_quantum *q, uint16_t *bdoff)
323 {
324 	int i;
325 	struct __user_buflet *ubft0;
326 	const struct __user_channel_ring *ring = chrd->chrd_ring;
327 	struct __user_buflet *ubft, *pbft;
328 	struct __user_packet *p = (struct __user_packet *)q;
329 	uint16_t bcnt = p->pkt_bufs_cnt;
330 	uint16_t bmax = p->pkt_bufs_max;
331 
332 	_Static_assert(sizeof(p->pkt_qum_buf.buf_addr) ==
333 	    sizeof(mach_vm_address_t), "invalid buffer size");
334 	/*
335 	 * In the event of a defunct, we'd be accessing zero-filled
336 	 * memory and end up with 0 for bcnt or bmax.
337 	 */
338 	if (__improbable((bcnt == 0) || (bmax == 0))) {
339 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
340 			SK_ABORT("bad bufcnt");
341 			/* NOTREACHED */
342 			__builtin_unreachable();
343 		}
344 		return 0;
345 	}
346 	_PKT_BUFCNT_VERIFY(chrd, bcnt, bmax);
347 	_CH_PKT_GET_FIRST_BUFLET(p, ubft, chrd, ring);
348 	if (__improbable(ubft == NULL)) {
349 		SK_ABORT("bad packet: no buflet");
350 		/* NOTREACHED */
351 		__builtin_unreachable();
352 	}
353 	/*
354 	 * special handling for empty packet buflet.
355 	 */
356 	if (__improbable(p->pkt_qum_buf.buf_idx == OBJ_IDX_NONE)) {
357 		*__DECONST(mach_vm_address_t *,
358 		    &p->pkt_qum_buf.buf_addr) = 0;
359 		*__DECONST(mach_vm_address_t *,
360 		    &p->pkt_qum_buf.buf_nbft_addr) =
361 		    (mach_vm_address_t)ubft;
362 	}
363 	ubft0 = ubft;
364 	for (i = 0; (i < bcnt) && (ubft != NULL); i++) {
365 		pbft = ubft;
366 		if (__probable(pbft->buf_idx != OBJ_IDX_NONE)) {
367 			*(mach_vm_address_t *)(uintptr_t)
368 			&(pbft->buf_addr) = _CHANNEL_RING_BUF(chrd,
369 			    ring, pbft);
370 		} else {
371 			*(mach_vm_address_t *)(uintptr_t)
372 			&(pbft->buf_addr) = NULL;
373 		}
374 		if (pbft->buf_nbft_idx != OBJ_IDX_NONE) {
375 			ubft = _CHANNEL_RING_BFT(chrd, ring,
376 			    pbft->buf_nbft_idx);
377 		} else {
378 			ubft = NULL;
379 		}
380 		*__DECONST(mach_vm_address_t *, &pbft->buf_nbft_addr) =
381 		    (mach_vm_address_t)ubft;
382 	}
383 	if (__improbable(pbft->buf_nbft_idx != OBJ_IDX_NONE)) {
384 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
385 			SK_ABORT("non terminating buflet chain");
386 			/* NOTREACHED */
387 			__builtin_unreachable();
388 		}
389 		return 0;
390 	}
391 	if (__improbable(i != bcnt)) {
392 		SK_ABORT_WITH_CAUSE("invalid buflet count", bcnt);
393 		/* NOTREACHED */
394 		__builtin_unreachable();
395 	}
396 
397 	/* return address and offset of the first buffer */
398 	*bdoff = ubft0->buf_doff;
399 	return ubft0->buf_addr;
400 }
401 
402 /*
403  * _slot_index_is_valid
404  * - verify that the slot index is within valid bounds
405  * - if the head is less than (or equal to) the tail (case A below)
406  *	head <= valid < tail
407  * - if the head is greater than the tail (case B below)
408  *      valid < tail
409  *    or
410  *	head <= valid < num_slots
411  *
412  * case A: x x x x x x x H o o o o o T x x x x x x
413  * case B: o o o o o T x x x x H o o o o o o o o o
414  *
415  * 'H' - head
416  * 'T' - tail
417  * 'x' - invalid
418  * 'o' - valid
419  */
420 __attribute__((always_inline, visibility("hidden")))
421 static inline int
_slot_index_is_valid(const struct __user_channel_ring * ring,slot_idx_t idx)422 _slot_index_is_valid(const struct __user_channel_ring *ring, slot_idx_t idx)
423 {
424 	int     is_valid = 0;
425 
426 	if (ring->ring_head <= ring->ring_tail) {
427 		if (__probable(idx >= ring->ring_head && idx < ring->ring_tail)) {
428 			is_valid = 1;
429 		}
430 	} else {
431 		if (__probable(idx < ring->ring_tail ||
432 		    (idx >= ring->ring_head && idx < ring->ring_num_slots))) {
433 			is_valid = 1;
434 		}
435 	}
436 
437 	return is_valid;
438 }
439 
440 channel_t
os_channel_create_extended(const uuid_t uuid,const nexus_port_t port,const ring_dir_t dir,const ring_id_t ring,const channel_attr_t cha)441 os_channel_create_extended(const uuid_t uuid, const nexus_port_t port,
442     const ring_dir_t dir, const ring_id_t ring, const channel_attr_t cha)
443 {
444 	uint32_t num_tx_rings, num_rx_rings, num_allocator_rings;
445 	uint32_t ring_offset, ring_index, num_event_rings, num_large_buf_alloc_rings;
446 	struct __user_channel_schema *ucs;
447 	struct channel *chd = NULL;
448 	struct ch_info *ci = NULL;
449 	struct ch_init init;
450 	int i, fd = -1;
451 	int err = 0;
452 	size_t chd_sz;
453 
454 	SK_ALIGN64_CASSERT(struct ch_info, cinfo_mem_map_size);
455 
456 	switch (dir) {
457 	case CHANNEL_DIR_TX_RX:
458 	case CHANNEL_DIR_TX:
459 	case CHANNEL_DIR_RX:
460 		break;
461 	default:
462 		err = EINVAL;
463 		goto done;
464 	}
465 
466 	ci = malloc(CHD_INFO_SIZE);
467 	if (ci == NULL) {
468 		err = errno = ENOMEM;
469 		goto done;
470 	}
471 	bzero(ci, CHD_INFO_SIZE);
472 
473 	bzero(&init, sizeof(init));
474 	init.ci_version = CHANNEL_INIT_CURRENT_VERSION;
475 	if (cha != NULL) {
476 		if (cha->cha_exclusive != 0) {
477 			init.ci_ch_mode |= CHMODE_EXCLUSIVE;
478 		}
479 		if (cha->cha_user_packet_pool != 0) {
480 			init.ci_ch_mode |= CHMODE_USER_PACKET_POOL;
481 		}
482 		if (cha->cha_nexus_defunct_ok != 0) {
483 			init.ci_ch_mode |= CHMODE_DEFUNCT_OK;
484 		}
485 		if (cha->cha_enable_event_ring != 0) {
486 			/* User packet pool is required for event rings */
487 			if (cha->cha_user_packet_pool == 0) {
488 				err = EINVAL;
489 				goto done;
490 			}
491 			init.ci_ch_mode |= CHMODE_EVENT_RING;
492 		}
493 		if (cha->cha_filter != 0) {
494 			init.ci_ch_mode |= CHMODE_FILTER;
495 		}
496 		if (cha->cha_low_latency != 0) {
497 			init.ci_ch_mode |= CHMODE_LOW_LATENCY;
498 		}
499 		init.ci_key_len = cha->cha_key_len;
500 		init.ci_key = cha->cha_key;
501 		init.ci_tx_lowat = cha->cha_tx_lowat;
502 		init.ci_rx_lowat = cha->cha_rx_lowat;
503 	}
504 	init.ci_ch_ring_id = ring;
505 	init.ci_nx_port = port;
506 	bcopy(uuid, init.ci_nx_uuid, sizeof(uuid_t));
507 
508 	fd = __channel_open(&init, sizeof(init));
509 	if (fd == -1) {
510 		err = errno;
511 		goto done;
512 	}
513 
514 	err = __channel_get_info(fd, ci, CHD_INFO_SIZE);
515 	if (err != 0) {
516 		err = errno;
517 		goto done;
518 	}
519 
520 	ucs = _CHANNEL_SCHEMA(ci->cinfo_mem_base, ci->cinfo_schema_offset);
521 	num_tx_rings = _num_tx_rings(ci);       /* # of channel tx rings */
522 	num_rx_rings = _num_rx_rings(ci);       /* # of channel rx rings */
523 	num_allocator_rings = _num_allocator_rings(ucs);
524 	num_event_rings = ucs->csm_num_event_rings;
525 	num_large_buf_alloc_rings = ucs->csm_large_buf_alloc_rings;
526 
527 	/*
528 	 * if the user requested packet allocation mode for channel, then
529 	 * check that channel was opened in packet allocation mode and
530 	 * allocator rings were created.
531 	 */
532 	if ((init.ci_ch_mode & CHMODE_USER_PACKET_POOL) &&
533 	    ((num_allocator_rings < 2) ||
534 	    !(ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL))) {
535 		err = errno = ENXIO;
536 		goto done;
537 	}
538 
539 	if ((init.ci_ch_mode & CHMODE_EVENT_RING) && ((num_event_rings == 0) ||
540 	    !(ci->cinfo_ch_mode & CHMODE_EVENT_RING))) {
541 		err = errno = ENXIO;
542 		goto done;
543 	}
544 
545 	chd_sz = CHD_SIZE(num_tx_rings + num_rx_rings + num_allocator_rings +
546 	    num_event_rings + num_large_buf_alloc_rings);
547 	chd = malloc(chd_sz);
548 	if (chd == NULL) {
549 		err = errno = ENOMEM;
550 		goto done;
551 	}
552 
553 	bzero(chd, chd_sz);
554 	chd->chd_fd = fd;
555 	chd->chd_guard = init.ci_guard;
556 
557 	/* claim ch_info (will be freed along with the channel itself) */
558 	CHD_INFO(chd) = ci;
559 	ci = NULL;
560 
561 	/* const override */
562 	*(struct __user_channel_schema **)(uintptr_t)&chd->chd_schema = ucs;
563 
564 	/* make sure we're running on the right kernel */
565 	_SCHEMA_VER_VERIFY(chd);
566 
567 	*(nexus_meta_type_t *)&chd->chd_md_type = CHD_SCHEMA(chd)->csm_md_type;
568 	*(nexus_meta_subtype_t *)&chd->chd_md_subtype =
569 	    CHD_SCHEMA(chd)->csm_md_subtype;
570 
571 	if (CHD_SCHEMA(chd)->csm_stats_ofs != 0) {
572 		*(void **)(uintptr_t)&chd->chd_nx_stats =
573 		    _CHANNEL_OFFSET(void *, CHD_INFO(chd)->cinfo_mem_base,
574 		    CHD_SCHEMA(chd)->csm_stats_ofs);
575 	}
576 
577 	if (CHD_SCHEMA(chd)->csm_flowadv_ofs != 0) {
578 		*(struct __flowadv_entry **)(uintptr_t)&chd->chd_nx_flowadv =
579 		    _CHANNEL_OFFSET(struct __flowadv_entry *,
580 		    CHD_INFO(chd)->cinfo_mem_base,
581 		    CHD_SCHEMA(chd)->csm_flowadv_ofs);
582 	}
583 
584 	if (CHD_SCHEMA(chd)->csm_nexusadv_ofs != 0) {
585 		struct __kern_nexus_adv_metadata *adv_md;
586 
587 		*(struct __kern_nexus_adv_metadata **)
588 		(uintptr_t)&chd->chd_nx_adv =
589 		    _CHANNEL_OFFSET(struct __kern_nexus_adv_metadata *,
590 		    CHD_INFO(chd)->cinfo_mem_base,
591 		    CHD_SCHEMA(chd)->csm_nexusadv_ofs);
592 		adv_md = CHD_NX_ADV_MD(chd);
593 		if (adv_md->knam_version != NX_ADVISORY_MD_CURRENT_VERSION &&
594 		    !_CHANNEL_IS_DEFUNCT(chd)) {
595 			SK_ABORT_WITH_CAUSE("nexus advisory metadata version"
596 			    " mismatch", NX_ADVISORY_MD_CURRENT_VERSION);
597 			/* NOTREACHED */
598 			__builtin_unreachable();
599 		}
600 		if (chd->chd_nx_adv->knam_type == NEXUS_ADVISORY_TYPE_NETIF) {
601 			struct netif_nexus_advisory *netif_adv;
602 			netif_adv = CHD_NX_ADV_NETIF(adv_md);
603 			if (netif_adv->nna_version !=
604 			    NX_NETIF_ADVISORY_CURRENT_VERSION &&
605 			    !_CHANNEL_IS_DEFUNCT(chd)) {
606 				SK_ABORT_WITH_CAUSE("nexus advisory "
607 				    "version mismatch for netif",
608 				    NX_NETIF_ADVISORY_CURRENT_VERSION);
609 				/* NOTREACHED */
610 				__builtin_unreachable();
611 			}
612 		} else if (chd->chd_nx_adv->knam_type ==
613 		    NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
614 			struct sk_nexusadv *fsw_adv;
615 			fsw_adv = CHD_NX_ADV_FSW(adv_md);
616 			if (fsw_adv->nxadv_ver !=
617 			    NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION &&
618 			    !_CHANNEL_IS_DEFUNCT(chd)) {
619 				SK_ABORT_WITH_CAUSE("nexus advisory "
620 				    "version mismatch for flowswitch",
621 				    NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION);
622 				/* NOTREACHED */
623 				__builtin_unreachable();
624 			}
625 		} else if (!_CHANNEL_IS_DEFUNCT(chd)) {
626 			SK_ABORT_WITH_CAUSE("nexus advisory metadata type"
627 			    " unknown", NX_ADVISORY_MD_CURRENT_VERSION);
628 			/* NOTREACHED */
629 			__builtin_unreachable();
630 		}
631 	}
632 
633 	if (cha != NULL) {
634 		os_channel_info2attr(chd, cha);
635 	}
636 
637 	ring_offset = 0;
638 	for (i = 0; i < num_tx_rings; i++) {
639 		ring_index = ring_offset + i;
640 		os_channel_init_ring(&chd->chd_rings[ring_index], chd,
641 		    ring_index);
642 	}
643 
644 	ring_offset += num_tx_rings;
645 	for (i = 0; i < num_rx_rings; i++) {
646 		ring_index = ring_offset + i;
647 		os_channel_init_ring(&chd->chd_rings[ring_index], chd,
648 		    ring_index);
649 	}
650 
651 	ring_offset += num_rx_rings;
652 	for (i = 0; i < num_allocator_rings; i++) {
653 		ring_index = ring_offset + i;
654 		os_channel_init_ring(&chd->chd_rings[ring_index], chd,
655 		    ring_index);
656 	}
657 
658 	ring_offset += num_allocator_rings;
659 	for (i = 0; i < num_event_rings; i++) {
660 		ring_index = ring_offset + i;
661 		os_channel_init_ring(&chd->chd_rings[ring_index], chd,
662 		    ring_index);
663 	}
664 
665 	ring_offset += num_event_rings;
666 	for (i = 0; i < num_large_buf_alloc_rings; i++) {
667 		ring_index = ring_offset + i;
668 		os_channel_init_ring(&chd->chd_rings[ring_index], chd,
669 		    ring_index);
670 	}
671 
672 	if (init.ci_ch_mode & CHMODE_USER_PACKET_POOL) {
673 		chd->chd_sync_flags = CHANNEL_SYNCF_ALLOC | CHANNEL_SYNCF_FREE;
674 		*__DECONST(uint8_t *, &chd->chd_alloc_ring_idx) =
675 		    num_tx_rings + num_rx_rings;
676 		if (num_allocator_rings > 2) {
677 			chd->chd_sync_flags |= CHANNEL_SYNCF_ALLOC_BUF;
678 			*__DECONST(uint8_t *, &chd->chd_buf_alloc_ring_idx) =
679 			    chd->chd_alloc_ring_idx + 1;
680 			*__DECONST(uint8_t *, &chd->chd_free_ring_idx) =
681 			    chd->chd_buf_alloc_ring_idx + 1;
682 			*__DECONST(uint8_t *, &chd->chd_buf_free_ring_idx) =
683 			    chd->chd_free_ring_idx + 1;
684 		} else {
685 			*__DECONST(uint8_t *, &chd->chd_buf_alloc_ring_idx) =
686 			    CHD_RING_IDX_NONE;
687 			*__DECONST(uint8_t *, &chd->chd_buf_free_ring_idx) =
688 			    CHD_RING_IDX_NONE;
689 			*__DECONST(uint8_t *, &chd->chd_free_ring_idx) =
690 			    chd->chd_alloc_ring_idx + 1;
691 		}
692 		if (num_large_buf_alloc_rings > 0) {
693 			*__DECONST(uint8_t *, &chd->chd_large_buf_alloc_ring_idx) =
694 			    num_tx_rings + num_rx_rings + num_allocator_rings +
695 			    num_event_rings;
696 		} else {
697 			*__DECONST(uint8_t *, &chd->chd_large_buf_alloc_ring_idx) =
698 			    CHD_RING_IDX_NONE;
699 		}
700 	} else {
701 		*__DECONST(uint8_t *, &chd->chd_alloc_ring_idx) =
702 		    CHD_RING_IDX_NONE;
703 		*__DECONST(uint8_t *, &chd->chd_free_ring_idx) =
704 		    CHD_RING_IDX_NONE;
705 		*__DECONST(uint8_t *, &chd->chd_buf_alloc_ring_idx) =
706 		    CHD_RING_IDX_NONE;
707 		*__DECONST(uint8_t *, &chd->chd_buf_free_ring_idx) =
708 		    CHD_RING_IDX_NONE;
709 		*__DECONST(uint8_t *, &chd->chd_large_buf_alloc_ring_idx) =
710 		    CHD_RING_IDX_NONE;
711 	}
712 
713 	if (__os_ch_md_redzone_cookie == 0) {
714 		__os_ch_md_redzone_cookie =
715 		    CHD_SCHEMA(chd)->csm_md_redzone_cookie;
716 	}
717 
718 	/* ensure all stores are globally visible */
719 	os_atomic_thread_fence(seq_cst);
720 
721 done:
722 	if (err != 0) {
723 		if (fd != -1) {
724 			(void) guarded_close_np(fd, &init.ci_guard);
725 		}
726 		if (chd != NULL) {
727 			if (CHD_INFO(chd) != NULL) {
728 				free(CHD_INFO(chd));
729 				CHD_INFO(chd) = NULL;
730 			}
731 			free(chd);
732 			chd = NULL;
733 		}
734 		if (ci != NULL) {
735 			free(ci);
736 			ci = NULL;
737 		}
738 		errno = err;
739 	}
740 	return chd;
741 }
742 
743 channel_t
os_channel_create(const uuid_t uuid,const nexus_port_t port)744 os_channel_create(const uuid_t uuid, const nexus_port_t port)
745 {
746 	return os_channel_create_extended(uuid, port, CHANNEL_DIR_TX_RX,
747 	           CHANNEL_RING_ID_ANY, NULL);
748 }
749 
750 int
os_channel_get_fd(const channel_t chd)751 os_channel_get_fd(const channel_t chd)
752 {
753 	return chd->chd_fd;
754 }
755 
756 int
os_channel_read_attr(const channel_t chd,channel_attr_t cha)757 os_channel_read_attr(const channel_t chd, channel_attr_t cha)
758 {
759 	int err;
760 
761 	if ((err = __channel_get_info(chd->chd_fd, CHD_INFO(chd),
762 	    CHD_INFO_SIZE)) == 0) {
763 		os_channel_info2attr(chd, cha);
764 	}
765 
766 	return err;
767 }
768 
769 int
os_channel_write_attr(const channel_t chd,channel_attr_t cha)770 os_channel_write_attr(const channel_t chd, channel_attr_t cha)
771 {
772 	int err = 0;
773 
774 	if (CHD_INFO(chd)->cinfo_tx_lowat.cet_unit !=
775 	    cha->cha_tx_lowat.cet_unit ||
776 	    CHD_INFO(chd)->cinfo_tx_lowat.cet_value !=
777 	    cha->cha_tx_lowat.cet_value) {
778 		if ((err = __channel_set_opt(chd->chd_fd, CHOPT_TX_LOWAT_THRESH,
779 		    &cha->cha_tx_lowat, sizeof(cha->cha_tx_lowat))) != 0) {
780 			goto done;
781 		}
782 
783 		/* update local copy */
784 		CHD_INFO(chd)->cinfo_tx_lowat = cha->cha_tx_lowat;
785 	}
786 
787 	if (CHD_INFO(chd)->cinfo_rx_lowat.cet_unit !=
788 	    cha->cha_rx_lowat.cet_unit ||
789 	    CHD_INFO(chd)->cinfo_rx_lowat.cet_value !=
790 	    cha->cha_rx_lowat.cet_value) {
791 		if ((err = __channel_set_opt(chd->chd_fd, CHOPT_RX_LOWAT_THRESH,
792 		    &cha->cha_rx_lowat, sizeof(cha->cha_rx_lowat))) != 0) {
793 			goto done;
794 		}
795 
796 		/* update local copy */
797 		CHD_INFO(chd)->cinfo_rx_lowat = cha->cha_rx_lowat;
798 	}
799 done:
800 	return err;
801 }
802 
803 int
os_channel_read_nexus_extension_info(const channel_t chd,nexus_type_t * nt,uint64_t * ext)804 os_channel_read_nexus_extension_info(const channel_t chd, nexus_type_t *nt,
805     uint64_t *ext)
806 {
807 	struct nxprov_params *nxp;
808 
809 	nxp = &CHD_INFO(chd)->cinfo_nxprov_params;
810 	if (nt != NULL) {
811 		*nt = nxp->nxp_type;
812 	}
813 	if (ext != NULL) {
814 		*ext = (uint64_t)nxp->nxp_extensions;
815 	}
816 
817 	return 0;
818 }
819 
820 int
os_channel_sync(const channel_t chd,const sync_mode_t mode)821 os_channel_sync(const channel_t chd, const sync_mode_t mode)
822 {
823 	if (__improbable(mode != CHANNEL_SYNC_TX && mode != CHANNEL_SYNC_RX)) {
824 		return EINVAL;
825 	}
826 
827 	return __channel_sync(chd->chd_fd, mode,
828 	           (mode == CHANNEL_SYNC_TX) ? chd->chd_sync_flags :
829 	           (chd->chd_sync_flags &
830 	           ~(CHANNEL_SYNCF_ALLOC | CHANNEL_SYNCF_ALLOC_BUF)));
831 }
832 
833 void
os_channel_destroy(channel_t chd)834 os_channel_destroy(channel_t chd)
835 {
836 	if (chd->chd_fd != -1) {
837 		(void) guarded_close_np(chd->chd_fd, &chd->chd_guard);
838 	}
839 
840 	if (CHD_INFO(chd) != NULL) {
841 		free(CHD_INFO(chd));
842 		CHD_INFO(chd) = NULL;
843 	}
844 
845 	free(chd);
846 }
847 
848 int
os_channel_is_defunct(channel_t chd)849 os_channel_is_defunct(channel_t chd)
850 {
851 	return _CHANNEL_IS_DEFUNCT(chd);
852 }
853 
854 __attribute__((always_inline, visibility("hidden")))
855 static inline ring_id_t
_ring_id(struct ch_info * cinfo,const ring_id_type_t type)856 _ring_id(struct ch_info *cinfo, const ring_id_type_t type)
857 {
858 	ring_id_t rid = CHANNEL_RING_ID_ANY;    /* make it crash */
859 
860 	switch (type) {
861 	case CHANNEL_FIRST_TX_RING:
862 		rid = cinfo->cinfo_first_tx_ring;
863 		break;
864 
865 	case CHANNEL_LAST_TX_RING:
866 		rid = cinfo->cinfo_last_tx_ring;
867 		break;
868 
869 	case CHANNEL_FIRST_RX_RING:
870 		rid = cinfo->cinfo_first_rx_ring;
871 		break;
872 
873 	case CHANNEL_LAST_RX_RING:
874 		rid = cinfo->cinfo_last_rx_ring;
875 		break;
876 	}
877 
878 	return rid;
879 }
880 
881 ring_id_t
os_channel_ring_id(const channel_t chd,const ring_id_type_t type)882 os_channel_ring_id(const channel_t chd, const ring_id_type_t type)
883 {
884 	return _ring_id(CHD_INFO(chd), type);
885 }
886 
887 channel_ring_t
os_channel_tx_ring(const channel_t chd,const ring_id_t rid)888 os_channel_tx_ring(const channel_t chd, const ring_id_t rid)
889 {
890 	struct ch_info *ci = CHD_INFO(chd);
891 
892 	if (__improbable((ci->cinfo_ch_ring_id != CHANNEL_RING_ID_ANY &&
893 	    ci->cinfo_ch_ring_id != rid) ||
894 	    rid < _ring_id(ci, CHANNEL_FIRST_TX_RING) ||
895 	    rid > _ring_id(ci, CHANNEL_LAST_TX_RING))) {
896 		return NULL;
897 	}
898 
899 	return &chd->chd_rings[rid - _ring_id(ci, CHANNEL_FIRST_TX_RING)];
900 }
901 
902 channel_ring_t
os_channel_rx_ring(const channel_t chd,const ring_id_t rid)903 os_channel_rx_ring(const channel_t chd, const ring_id_t rid)
904 {
905 	struct ch_info *ci = CHD_INFO(chd);
906 
907 	if (__improbable((ci->cinfo_ch_ring_id != CHANNEL_RING_ID_ANY &&
908 	    ci->cinfo_ch_ring_id != rid) ||
909 	    rid < _ring_id(ci, CHANNEL_FIRST_RX_RING) ||
910 	    rid > _ring_id(ci, CHANNEL_LAST_RX_RING))) {
911 		return NULL;
912 	}
913 
914 	return &chd->chd_rings[_num_tx_rings(ci) +      /* add tx rings */
915 	       (rid - _ring_id(ci, CHANNEL_FIRST_RX_RING))];
916 }
917 
918 /*
919  * Return 1 if we have pending transmissions in the tx ring. When everything
920  * is complete ring->ring_head == ring->ring_khead.
921  */
922 int
os_channel_pending(const channel_ring_t chrd)923 os_channel_pending(const channel_ring_t chrd)
924 {
925 	struct __user_channel_ring *ring =
926 	    __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
927 	return ring->ring_head != ring->ring_khead;
928 }
929 
930 uint64_t
os_channel_ring_sync_time(const channel_ring_t chrd)931 os_channel_ring_sync_time(const channel_ring_t chrd)
932 {
933 	return chrd->chrd_ring->ring_sync_time;
934 }
935 
936 uint64_t
os_channel_ring_notify_time(const channel_ring_t chrd)937 os_channel_ring_notify_time(const channel_ring_t chrd)
938 {
939 	return chrd->chrd_ring->ring_notify_time;
940 }
941 
942 uint32_t
os_channel_available_slot_count(const channel_ring_t chrd)943 os_channel_available_slot_count(const channel_ring_t chrd)
944 {
945 	const struct __user_channel_ring *ring = chrd->chrd_ring;
946 	uint32_t count;
947 	int n;
948 
949 	if (ring->ring_kind == CR_KIND_TX) {
950 		n = ring->ring_head - ring->ring_khead;
951 		if (n < 0) {
952 			n += ring->ring_num_slots;
953 		}
954 		count = (ring->ring_num_slots - n - 1);
955 	} else {
956 		n = ring->ring_tail - ring->ring_head;
957 		if (n < 0) {
958 			n += ring->ring_num_slots;
959 		}
960 		count = n;
961 	}
962 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? 0 : count;
963 }
964 
965 int
os_channel_advance_slot(channel_ring_t chrd,const channel_slot_t slot)966 os_channel_advance_slot(channel_ring_t chrd, const channel_slot_t slot)
967 {
968 	struct __user_channel_ring *ring =
969 	    __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
970 	slot_idx_t idx;
971 	int err;
972 
973 	idx = _SLOT_INDEX(chrd, slot);
974 	if (__probable(_slot_index_is_valid(ring, idx))) {
975 		ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
976 		err = 0;
977 	} else {
978 		err = (_CHANNEL_RING_IS_DEFUNCT(chrd) ? ENXIO : EINVAL);
979 	}
980 	return err;
981 }
982 
983 channel_slot_t
os_channel_get_next_slot(const channel_ring_t chrd,const channel_slot_t slot0,slot_prop_t * prop)984 os_channel_get_next_slot(const channel_ring_t chrd, const channel_slot_t slot0,
985     slot_prop_t *prop)
986 {
987 	const struct __user_channel_ring *ring = chrd->chrd_ring;
988 	const struct __slot_desc *slot;
989 	slot_idx_t idx;
990 
991 	if (__probable(slot0 != NULL)) {
992 		idx = _SLOT_INDEX(chrd, slot0);
993 		if (__probable(_slot_index_is_valid(ring, idx))) {
994 			idx = _CHANNEL_RING_NEXT(ring, idx);
995 		} else if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
996 			/* slot is out of bounds */
997 			SK_ABORT_WITH_CAUSE("Index out of bounds in gns", idx);
998 			/* NOTREACHED */
999 			__builtin_unreachable();
1000 		} else {
1001 			/*
1002 			 * In case of a defunct, pretend as if we've
1003 			 * advanced to the last slot; this will result
1004 			 * in a NULL slot below.
1005 			 */
1006 			idx = ring->ring_tail;
1007 		}
1008 	} else {
1009 		idx = ring->ring_head;
1010 	}
1011 
1012 	if (__probable(idx != ring->ring_tail)) {
1013 		slot = &chrd->chrd_slot_desc[idx];
1014 	} else {
1015 		/* we just advanced to the last slot */
1016 		slot = NULL;
1017 	}
1018 
1019 	if (__probable(slot != NULL)) {
1020 		uint16_t ring_kind = ring->ring_kind;
1021 		struct __user_quantum *q;
1022 		mach_vm_address_t baddr;
1023 		uint16_t bdoff;
1024 
1025 		if (__improbable((ring_kind == CR_KIND_TX) &&
1026 		    (CHD_INFO(chrd->chrd_channel)->cinfo_ch_mode &
1027 		    CHMODE_USER_PACKET_POOL))) {
1028 			if (SD_VALID_METADATA(SLOT_DESC_USD(slot))) {
1029 				SK_ABORT_WITH_CAUSE("Tx slot has attached "
1030 				    "metadata", idx);
1031 				/* NOTREACHED */
1032 				__builtin_unreachable();
1033 			}
1034 			if (prop != NULL) {
1035 				prop->sp_len = 0;
1036 				prop->sp_flags = 0;
1037 				prop->sp_buf_ptr = 0;
1038 				prop->sp_mdata_ptr = 0;
1039 			}
1040 			return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ?
1041 			       NULL : (channel_slot_t)slot;
1042 		}
1043 
1044 		_SLOT_DESC_VERIFY(chrd, SLOT_DESC_USD(slot));
1045 		q = _SLOT_METADATA(chrd, ring, idx);
1046 		_METADATA_VERIFY(chrd, q);
1047 
1048 		baddr = _initialize_metadata_address(chrd, q, &bdoff);
1049 		if (__improbable(baddr == 0)) {
1050 			return NULL;
1051 		}
1052 		/* No multi-buflet support for slot based interface */
1053 		if (__probable(prop != NULL)) {
1054 			/* immutable: slot index */
1055 			prop->sp_idx = idx;
1056 			prop->sp_flags = 0;
1057 			prop->sp_buf_ptr = baddr + bdoff;
1058 			prop->sp_mdata_ptr = q;
1059 			/* reset slot length if this is to be used for tx */
1060 			prop->sp_len = (ring_kind == CR_KIND_TX) ?
1061 			    ring->ring_def_buf_size : q->qum_len;
1062 		}
1063 	}
1064 
1065 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ?
1066 	       NULL : (channel_slot_t)slot;
1067 }
1068 
1069 void
os_channel_set_slot_properties(const channel_ring_t chrd,const channel_slot_t slot,const slot_prop_t * prop)1070 os_channel_set_slot_properties(const channel_ring_t chrd,
1071     const channel_slot_t slot, const slot_prop_t *prop)
1072 {
1073 	const struct __user_channel_ring *ring = chrd->chrd_ring;
1074 	slot_idx_t idx = _SLOT_INDEX(chrd, slot);
1075 
1076 	if (__probable(_slot_index_is_valid(ring, idx))) {
1077 		struct __user_quantum *q;
1078 
1079 		_METADATA_VERIFY(chrd, prop->sp_mdata_ptr);
1080 		_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
1081 
1082 		/*
1083 		 * In the event of a defunct, we'd be accessing zero-filled
1084 		 * memory; this is fine we ignore all changes made to the
1085 		 * region at that time.
1086 		 */
1087 		q = _SLOT_METADATA(chrd, ring, idx);
1088 		q->qum_len = prop->sp_len;
1089 		struct __user_packet *p = (struct __user_packet *)q;
1090 		/* No multi-buflet support for slot based interface */
1091 		p->pkt_qum_buf.buf_dlen = prop->sp_len;
1092 		p->pkt_qum_buf.buf_doff = 0;
1093 	} else if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1094 		/* slot is out of bounds */
1095 		SK_ABORT_WITH_CAUSE("Index out of bounds in ssp", idx);
1096 		/* NOTREACHED */
1097 		__builtin_unreachable();
1098 	}
1099 }
1100 
1101 packet_t
os_channel_slot_get_packet(const channel_ring_t chrd,const channel_slot_t slot)1102 os_channel_slot_get_packet(const channel_ring_t chrd, const channel_slot_t slot)
1103 {
1104 	const struct __user_channel_ring *ring = chrd->chrd_ring;
1105 	struct __user_quantum *q = NULL;
1106 
1107 	if (__probable(slot != NULL)) {
1108 		slot_idx_t idx = _SLOT_INDEX(chrd, slot);
1109 		if (__improbable(!_slot_index_is_valid(ring, idx)) &&
1110 		    !_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1111 			/* slot is out of bounds */
1112 			SK_ABORT_WITH_CAUSE("Index out of bounds in sgp", idx);
1113 			/* NOTREACHED */
1114 			__builtin_unreachable();
1115 		}
1116 
1117 		if (__probable(SD_VALID_METADATA(_SLOT_DESC(chrd, idx)))) {
1118 			obj_idx_t midx;
1119 			q = _SLOT_METADATA(chrd, ring, idx);
1120 			_METADATA_VERIFY(chrd, q);
1121 			/*
1122 			 * In the event of a defunct, we'd be accessing
1123 			 * zero-filed memory and end up with 0 for midx;
1124 			 * this is fine since we ignore all changes made
1125 			 * to the region at that time.
1126 			 */
1127 			midx = METADATA_IDX(q);
1128 			_SLOT_METADATA_IDX_VERIFY(chrd, q, midx);
1129 		}
1130 	}
1131 
1132 	return (q == NULL) ? 0 :
1133 	       SK_PTR_ENCODE(q, chrd->chrd_md_type, chrd->chrd_md_subtype);
1134 }
1135 
1136 void *
os_channel_get_stats_region(const channel_t chd,const channel_stats_id_t id)1137 os_channel_get_stats_region(const channel_t chd, const channel_stats_id_t id)
1138 {
1139 	void *sp = CHD_NX_STATS(chd);
1140 	struct __nx_stats_fsw *nxs_fsw;
1141 	void *ptr = NULL;
1142 
1143 	/* we currently deal only with flowswitch */
1144 	if (sp == NULL ||
1145 	    CHD_SCHEMA(chd)->csm_stats_type != NEXUS_STATS_TYPE_FSW) {
1146 		return NULL;
1147 	}
1148 
1149 	nxs_fsw = sp;
1150 
1151 	switch (id) {
1152 	case CHANNEL_STATS_ID_IP:
1153 		ptr = &nxs_fsw->nxs_ipstat;
1154 		break;
1155 
1156 	case CHANNEL_STATS_ID_IP6:
1157 		ptr = &nxs_fsw->nxs_ip6stat;
1158 		break;
1159 
1160 	case CHANNEL_STATS_ID_TCP:
1161 		ptr = &nxs_fsw->nxs_tcpstat;
1162 		break;
1163 
1164 	case CHANNEL_STATS_ID_UDP:
1165 		ptr = &nxs_fsw->nxs_udpstat;
1166 		break;
1167 
1168 	case CHANNEL_STATS_ID_QUIC:
1169 		ptr = &nxs_fsw->nxs_quicstat;
1170 		break;
1171 
1172 	default:
1173 		ptr = NULL;
1174 		break;
1175 	}
1176 
1177 	return ptr;
1178 }
1179 
1180 void *
os_channel_get_advisory_region(const channel_t chd)1181 os_channel_get_advisory_region(const channel_t chd)
1182 {
1183 	struct __kern_nexus_adv_metadata *adv_md;
1184 	/*
1185 	 * To be backward compatible this API will only return
1186 	 * the advisory region for flowswitch.
1187 	 */
1188 	adv_md = CHD_NX_ADV_MD(chd);
1189 	if (adv_md == NULL ||
1190 	    adv_md->knam_type != NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
1191 		return NULL;
1192 	}
1193 	return CHD_NX_ADV_FSW(adv_md);
1194 }
1195 
1196 __attribute__((always_inline, visibility("hidden")))
1197 static inline int
_flowadv_id_equal(struct __flowadv_entry * fe,uuid_t id)1198 _flowadv_id_equal(struct __flowadv_entry *fe, uuid_t id)
1199 {
1200 	/*
1201 	 * Anticipate a nicely (8-bytes) aligned UUID from
1202 	 * caller; the one in fae_id is always 8-byte aligned.
1203 	 */
1204 	if (__probable(IS_P2ALIGNED(id, sizeof(uint64_t)))) {
1205 		uint64_t *id_64 = (uint64_t *)(uintptr_t)id;
1206 		return fe->fae_id_64[0] == id_64[0] &&
1207 		       fe->fae_id_64[1] == id_64[1];
1208 	} else if (__probable(IS_P2ALIGNED(id, sizeof(uint32_t)))) {
1209 		uint32_t *id_32 = (uint32_t *)(uintptr_t)id;
1210 		return fe->fae_id_32[0] == id_32[0] &&
1211 		       fe->fae_id_32[1] == id_32[1] &&
1212 		       fe->fae_id_32[2] == id_32[2] &&
1213 		       fe->fae_id_32[3] == id_32[3];
1214 	}
1215 
1216 	return UUID_COMPARE(fe->fae_id, id);
1217 }
1218 
1219 int
os_channel_flow_admissible(const channel_ring_t chrd,uuid_t flow_id,const flowadv_idx_t flow_index)1220 os_channel_flow_admissible(const channel_ring_t chrd, uuid_t flow_id,
1221     const flowadv_idx_t flow_index)
1222 {
1223 	const struct __user_channel_ring *ring = chrd->chrd_ring;
1224 	const struct channel *chd = chrd->chrd_channel;
1225 	struct __flowadv_entry *fe = CHD_NX_FLOWADV(chd);
1226 
1227 	/*
1228 	 * Currently, flow advisory is on a per-nexus port basis.
1229 	 * To anticipate for future requirements, we use the ring
1230 	 * as parameter instead, even though we use it only to
1231 	 * check if this is a TX ring for now.
1232 	 */
1233 	if (__improbable(CHD_NX_FLOWADV(chd) == NULL)) {
1234 		return ENXIO;
1235 	} else if (__improbable(ring->ring_kind != CR_KIND_TX ||
1236 	    flow_index >= CHD_PARAMS(chd)->nxp_flowadv_max)) {
1237 		return EINVAL;
1238 	}
1239 
1240 	/*
1241 	 * Rather than checking if the UUID is all zeroes, check
1242 	 * against fae_flags since the presence of FLOWADV_VALID
1243 	 * means fae_id is non-zero.  This avoids another round of
1244 	 * comparison against zeroes.
1245 	 */
1246 	fe = &CHD_NX_FLOWADV(chd)[flow_index];
1247 	if (__improbable(fe->fae_flags == 0 || !_flowadv_id_equal(fe, flow_id))) {
1248 		return ENOENT;
1249 	}
1250 
1251 	return __improbable((fe->fae_flags & FLOWADVF_SUSPENDED) != 0) ?
1252 	       ENOBUFS: 0;
1253 }
1254 
1255 int
os_channel_flow_adv_get_ce_count(__unused const channel_ring_t chrd,__unused uuid_t flow_id,__unused const flowadv_idx_t flow_index,__unused uint32_t * ce_cnt,__unused uint32_t * pkt_cnt)1256 os_channel_flow_adv_get_ce_count(__unused const channel_ring_t chrd,
1257     __unused uuid_t flow_id, __unused const flowadv_idx_t flow_index,
1258     __unused uint32_t *ce_cnt, __unused uint32_t *pkt_cnt)
1259 {
1260 	return 0;
1261 }
1262 
1263 int
os_channel_flow_adv_get_feedback(const channel_ring_t chrd,uuid_t flow_id,const flowadv_idx_t flow_index,uint32_t * congestion_cnt,__unused uint32_t * ce_cnt,uint32_t * pkt_cnt)1264 os_channel_flow_adv_get_feedback(const channel_ring_t chrd, uuid_t flow_id,
1265     const flowadv_idx_t flow_index, uint32_t *congestion_cnt,
1266     __unused uint32_t *ce_cnt, uint32_t *pkt_cnt)
1267 {
1268 	const struct __user_channel_ring *ring = chrd->chrd_ring;
1269 	const struct channel *chd = chrd->chrd_channel;
1270 	struct __flowadv_entry *fe = CHD_NX_FLOWADV(chd);
1271 
1272 	/*
1273 	 * Currently, flow advisory is on a per-nexus port basis.
1274 	 * To anticipate for future requirements, we use the ring
1275 	 * as parameter instead, even though we use it only to
1276 	 * check if this is a TX ring for now.
1277 	 */
1278 	if (__improbable(CHD_NX_FLOWADV(chd) == NULL)) {
1279 		return ENXIO;
1280 	} else if (__improbable(ring->ring_kind != CR_KIND_TX ||
1281 	    flow_index >= CHD_PARAMS(chd)->nxp_flowadv_max)) {
1282 		return EINVAL;
1283 	}
1284 
1285 	/*
1286 	 * Rather than checking if the UUID is all zeroes, check
1287 	 * against fae_flags since the presence of FLOWADV_VALID
1288 	 * means fae_id is non-zero.  This avoids another round of
1289 	 * comparison against zeroes.
1290 	 */
1291 	fe = &CHD_NX_FLOWADV(chd)[flow_index];
1292 	if (__improbable(fe->fae_flags == 0 || !_flowadv_id_equal(fe, flow_id))) {
1293 		return ENOENT;
1294 	}
1295 
1296 	*congestion_cnt = fe->fae_congestion_cnt;
1297 	*pkt_cnt = fe->fae_pkt_cnt;
1298 	return 0;
1299 }
1300 
1301 channel_attr_t
os_channel_attr_create(void)1302 os_channel_attr_create(void)
1303 {
1304 	struct channel_attr *cha;
1305 
1306 	cha = malloc(sizeof(*cha));
1307 	if (cha != NULL) {
1308 		bzero(cha, sizeof(*cha));
1309 	}
1310 	return cha;
1311 }
1312 
1313 channel_attr_t
os_channel_attr_clone(const channel_attr_t cha)1314 os_channel_attr_clone(const channel_attr_t cha)
1315 {
1316 	struct channel_attr *ncha;
1317 
1318 	ncha = os_channel_attr_create();
1319 	if (ncha != NULL && cha != NULL) {
1320 		bcopy(cha, ncha, sizeof(*ncha));
1321 		ncha->cha_key = NULL;
1322 		ncha->cha_key_len = 0;
1323 		if (cha->cha_key != NULL && cha->cha_key_len != 0 &&
1324 		    os_channel_attr_set_key(ncha, cha->cha_key,
1325 		    cha->cha_key_len) != 0) {
1326 			os_channel_attr_destroy(ncha);
1327 			ncha = NULL;
1328 		}
1329 	}
1330 
1331 	return ncha;
1332 }
1333 
1334 int
os_channel_attr_set(const channel_attr_t cha,const channel_attr_type_t type,const uint64_t value)1335 os_channel_attr_set(const channel_attr_t cha, const channel_attr_type_t type,
1336     const uint64_t value)
1337 {
1338 	int err = 0;
1339 
1340 	switch (type) {
1341 	case CHANNEL_ATTR_TX_RINGS:
1342 	case CHANNEL_ATTR_RX_RINGS:
1343 	case CHANNEL_ATTR_TX_SLOTS:
1344 	case CHANNEL_ATTR_RX_SLOTS:
1345 	case CHANNEL_ATTR_SLOT_BUF_SIZE:
1346 	case CHANNEL_ATTR_SLOT_META_SIZE:
1347 	case CHANNEL_ATTR_NEXUS_EXTENSIONS:
1348 	case CHANNEL_ATTR_NEXUS_MHINTS:
1349 	case CHANNEL_ATTR_NEXUS_IFINDEX:
1350 	case CHANNEL_ATTR_NEXUS_STATS_SIZE:
1351 	case CHANNEL_ATTR_NEXUS_FLOWADV_MAX:
1352 	case CHANNEL_ATTR_NEXUS_META_TYPE:
1353 	case CHANNEL_ATTR_NEXUS_META_SUBTYPE:
1354 	case CHANNEL_ATTR_NEXUS_CHECKSUM_OFFLOAD:
1355 	case CHANNEL_ATTR_NEXUS_ADV_SIZE:
1356 	case CHANNEL_ATTR_MAX_FRAGS:
1357 	case CHANNEL_ATTR_NUM_BUFFERS:
1358 	case CHANNEL_ATTR_LARGE_BUF_SIZE:
1359 		err = ENOTSUP;
1360 		break;
1361 
1362 	case CHANNEL_ATTR_EXCLUSIVE:
1363 		cha->cha_exclusive = (uint32_t)value;
1364 		break;
1365 
1366 	case CHANNEL_ATTR_NO_AUTO_SYNC:
1367 		if (value == 0) {
1368 			err = ENOTSUP;
1369 		}
1370 		break;
1371 
1372 	case CHANNEL_ATTR_TX_LOWAT_UNIT:
1373 	case CHANNEL_ATTR_RX_LOWAT_UNIT:
1374 		switch (value) {
1375 		case CHANNEL_THRESHOLD_UNIT_BYTES:
1376 		case CHANNEL_THRESHOLD_UNIT_SLOTS:
1377 			if (type == CHANNEL_ATTR_TX_LOWAT_UNIT) {
1378 				cha->cha_tx_lowat.cet_unit =
1379 				    (channel_threshold_unit_t)value;
1380 			} else {
1381 				cha->cha_rx_lowat.cet_unit =
1382 				    (channel_threshold_unit_t)value;
1383 			}
1384 			goto done;
1385 		}
1386 		err = EINVAL;
1387 		break;
1388 
1389 	case CHANNEL_ATTR_TX_LOWAT_VALUE:
1390 		cha->cha_tx_lowat.cet_value = (uint32_t)value;
1391 		break;
1392 
1393 	case CHANNEL_ATTR_RX_LOWAT_VALUE:
1394 		cha->cha_rx_lowat.cet_value = (uint32_t)value;
1395 		break;
1396 
1397 	case CHANNEL_ATTR_USER_PACKET_POOL:
1398 		cha->cha_user_packet_pool = (value != 0);
1399 		break;
1400 
1401 	case CHANNEL_ATTR_NEXUS_DEFUNCT_OK:
1402 		cha->cha_nexus_defunct_ok = (value != 0);
1403 		break;
1404 
1405 	case CHANNEL_ATTR_FILTER:
1406 		cha->cha_filter = (uint32_t)value;
1407 		break;
1408 
1409 	case CHANNEL_ATTR_EVENT_RING:
1410 		cha->cha_enable_event_ring = (value != 0);
1411 		break;
1412 
1413 	case CHANNEL_ATTR_LOW_LATENCY:
1414 		cha->cha_low_latency = (value != 0);
1415 		break;
1416 
1417 	default:
1418 		err = EINVAL;
1419 		break;
1420 	}
1421 done:
1422 	return err;
1423 }
1424 
1425 int
os_channel_attr_set_key(const channel_attr_t cha,const void * key,const uint32_t key_len)1426 os_channel_attr_set_key(const channel_attr_t cha, const void *key,
1427     const uint32_t key_len)
1428 {
1429 	int err = 0;
1430 
1431 	if ((key == NULL && key_len != 0) || (key != NULL && key_len == 0) ||
1432 	    (key_len != 0 && key_len > NEXUS_MAX_KEY_LEN)) {
1433 		err = EINVAL;
1434 		goto done;
1435 	}
1436 	cha->cha_key_len = 0;
1437 	if (key_len == 0 && cha->cha_key != NULL) {
1438 		free(cha->cha_key);
1439 		cha->cha_key = NULL;
1440 	} else if (key != NULL && key_len != 0) {
1441 		if (cha->cha_key != NULL) {
1442 			free(cha->cha_key);
1443 		}
1444 		if ((cha->cha_key = malloc(key_len)) == NULL) {
1445 			err = ENOMEM;
1446 			goto done;
1447 		}
1448 		cha->cha_key_len = key_len;
1449 		bcopy(key, cha->cha_key, key_len);
1450 	}
1451 done:
1452 	return err;
1453 }
1454 
1455 int
os_channel_attr_get(const channel_attr_t cha,const channel_attr_type_t type,uint64_t * value)1456 os_channel_attr_get(const channel_attr_t cha, const channel_attr_type_t type,
1457     uint64_t *value)
1458 {
1459 	int err = 0;
1460 
1461 	switch (type) {
1462 	case CHANNEL_ATTR_TX_RINGS:
1463 		*value = cha->cha_tx_rings;
1464 		break;
1465 
1466 	case CHANNEL_ATTR_RX_RINGS:
1467 		*value = cha->cha_rx_rings;
1468 		break;
1469 
1470 	case CHANNEL_ATTR_TX_SLOTS:
1471 		*value = cha->cha_tx_slots;
1472 		break;
1473 
1474 	case CHANNEL_ATTR_RX_SLOTS:
1475 		*value = cha->cha_rx_slots;
1476 		break;
1477 
1478 	case CHANNEL_ATTR_SLOT_BUF_SIZE:
1479 		*value = cha->cha_buf_size;
1480 		break;
1481 
1482 	case CHANNEL_ATTR_SLOT_META_SIZE:
1483 		*value = cha->cha_meta_size;
1484 		break;
1485 
1486 	case CHANNEL_ATTR_NEXUS_STATS_SIZE:
1487 		*value = cha->cha_stats_size;
1488 		break;
1489 
1490 	case CHANNEL_ATTR_NEXUS_FLOWADV_MAX:
1491 		*value = cha->cha_flowadv_max;
1492 		break;
1493 
1494 	case CHANNEL_ATTR_EXCLUSIVE:
1495 		*value = cha->cha_exclusive;
1496 		break;
1497 
1498 	case CHANNEL_ATTR_NO_AUTO_SYNC:
1499 		*value = 1;
1500 		break;
1501 
1502 	case CHANNEL_ATTR_TX_LOWAT_UNIT:
1503 		*value = cha->cha_tx_lowat.cet_unit;
1504 		break;
1505 
1506 	case CHANNEL_ATTR_TX_LOWAT_VALUE:
1507 		*value = cha->cha_tx_lowat.cet_value;
1508 		break;
1509 
1510 	case CHANNEL_ATTR_RX_LOWAT_UNIT:
1511 		*value = cha->cha_rx_lowat.cet_unit;
1512 		break;
1513 
1514 	case CHANNEL_ATTR_RX_LOWAT_VALUE:
1515 		*value = cha->cha_rx_lowat.cet_value;
1516 		break;
1517 
1518 	case CHANNEL_ATTR_NEXUS_TYPE:
1519 		*value = cha->cha_nexus_type;
1520 		break;
1521 
1522 	case CHANNEL_ATTR_NEXUS_EXTENSIONS:
1523 		*value = cha->cha_nexus_extensions;
1524 		break;
1525 
1526 	case CHANNEL_ATTR_NEXUS_MHINTS:
1527 		*value = cha->cha_nexus_mhints;
1528 		break;
1529 
1530 	case CHANNEL_ATTR_NEXUS_IFINDEX:
1531 		*value = cha->cha_nexus_ifindex;
1532 		break;
1533 
1534 	case CHANNEL_ATTR_NEXUS_META_TYPE:
1535 		*value = cha->cha_nexus_meta_type;
1536 		break;
1537 
1538 	case CHANNEL_ATTR_NEXUS_META_SUBTYPE:
1539 		*value = cha->cha_nexus_meta_subtype;
1540 		break;
1541 
1542 	case CHANNEL_ATTR_NEXUS_CHECKSUM_OFFLOAD:
1543 		*value = cha->cha_nexus_checksum_offload;
1544 		break;
1545 
1546 	case CHANNEL_ATTR_USER_PACKET_POOL:
1547 		*value = (cha->cha_user_packet_pool != 0);
1548 		break;
1549 
1550 	case CHANNEL_ATTR_NEXUS_ADV_SIZE:
1551 		*value = cha->cha_nexusadv_size;
1552 		break;
1553 
1554 	case CHANNEL_ATTR_NEXUS_DEFUNCT_OK:
1555 		*value = cha->cha_nexus_defunct_ok;
1556 		break;
1557 
1558 	case CHANNEL_ATTR_EVENT_RING:
1559 		*value = (cha->cha_enable_event_ring != 0);
1560 		break;
1561 
1562 	case CHANNEL_ATTR_MAX_FRAGS:
1563 		*value = cha->cha_max_frags;
1564 		break;
1565 
1566 	case CHANNEL_ATTR_NUM_BUFFERS:
1567 		*value = cha->cha_num_buffers;
1568 		break;
1569 
1570 	case CHANNEL_ATTR_LOW_LATENCY:
1571 		*value = (cha->cha_low_latency != 0);
1572 		break;
1573 
1574 	case CHANNEL_ATTR_LARGE_BUF_SIZE:
1575 		*value = cha->cha_large_buf_size;
1576 		break;
1577 
1578 	default:
1579 		err = EINVAL;
1580 		break;
1581 	}
1582 
1583 	return err;
1584 }
1585 
1586 int
os_channel_attr_get_key(const channel_attr_t cha,void * key,uint32_t * key_len)1587 os_channel_attr_get_key(const channel_attr_t cha, void *key,
1588     uint32_t *key_len)
1589 {
1590 	int err = 0;
1591 
1592 	if (key_len == NULL) {
1593 		err = EINVAL;
1594 		goto done;
1595 	} else if (key == NULL || cha->cha_key == NULL) {
1596 		*key_len = (cha->cha_key != NULL) ? cha->cha_key_len : 0;
1597 		goto done;
1598 	}
1599 
1600 	if (*key_len >= cha->cha_key_len) {
1601 		bcopy(cha->cha_key, key, cha->cha_key_len);
1602 		*key_len = cha->cha_key_len;
1603 	} else {
1604 		err = ENOMEM;
1605 	}
1606 done:
1607 	return err;
1608 }
1609 
1610 __attribute__((visibility("hidden")))
1611 static void
os_channel_info2attr(struct channel * chd,channel_attr_t cha)1612 os_channel_info2attr(struct channel *chd, channel_attr_t cha)
1613 {
1614 	struct ch_info *cinfo = CHD_INFO(chd);
1615 	/* Save these first before we wipe out the attribute */
1616 	uint32_t cha_key_len = cha->cha_key_len;
1617 	void *cha_key = cha->cha_key;
1618 	uint32_t caps;
1619 
1620 	bzero(cha, sizeof(*cha));
1621 	cha->cha_tx_rings = CHD_PARAMS(chd)->nxp_tx_rings;
1622 	cha->cha_rx_rings = CHD_PARAMS(chd)->nxp_rx_rings;
1623 	cha->cha_tx_slots = CHD_PARAMS(chd)->nxp_tx_slots;
1624 	cha->cha_rx_slots = CHD_PARAMS(chd)->nxp_rx_slots;
1625 	cha->cha_buf_size = CHD_PARAMS(chd)->nxp_buf_size;
1626 	cha->cha_meta_size = CHD_PARAMS(chd)->nxp_meta_size;
1627 	cha->cha_stats_size = CHD_PARAMS(chd)->nxp_stats_size;
1628 	cha->cha_flowadv_max = CHD_PARAMS(chd)->nxp_flowadv_max;
1629 	cha->cha_exclusive = !!(cinfo->cinfo_ch_mode & CHMODE_EXCLUSIVE);
1630 	cha->cha_user_packet_pool = !!(cinfo->cinfo_ch_mode &
1631 	    CHMODE_USER_PACKET_POOL);
1632 	cha->cha_nexus_defunct_ok = !!(cinfo->cinfo_ch_mode &
1633 	    CHMODE_DEFUNCT_OK);
1634 	cha->cha_nexusadv_size = CHD_PARAMS(chd)->nxp_nexusadv_size;
1635 	cha->cha_key_len = cha_key_len;
1636 	cha->cha_key = cha_key;
1637 	cha->cha_tx_lowat = cinfo->cinfo_tx_lowat;
1638 	cha->cha_rx_lowat = cinfo->cinfo_rx_lowat;
1639 	cha->cha_nexus_type = CHD_PARAMS(chd)->nxp_type;
1640 	cha->cha_nexus_extensions = CHD_PARAMS(chd)->nxp_extensions;
1641 	cha->cha_nexus_mhints = CHD_PARAMS(chd)->nxp_mhints;
1642 	cha->cha_nexus_ifindex = CHD_PARAMS(chd)->nxp_ifindex;
1643 	cha->cha_nexus_meta_type = chd->chd_md_type;
1644 	cha->cha_nexus_meta_subtype = chd->chd_md_subtype;
1645 	cha->cha_enable_event_ring =
1646 	    (cinfo->cinfo_ch_mode & CHMODE_EVENT_RING) != 0;
1647 	cha->cha_low_latency =
1648 	    (cinfo->cinfo_ch_mode & CHMODE_LOW_LATENCY) != 0;
1649 
1650 	caps = CHD_PARAMS(chd)->nxp_capabilities;
1651 	if (caps & NXPCAP_CHECKSUM_PARTIAL) {
1652 		cha->cha_nexus_checksum_offload =
1653 		    CHANNEL_NEXUS_CHECKSUM_PARTIAL;
1654 	} else {
1655 		cha->cha_nexus_checksum_offload = 0;
1656 	}
1657 	cha->cha_max_frags = CHD_PARAMS(chd)->nxp_max_frags;
1658 	cha->cha_num_buffers = cinfo->cinfo_num_bufs;
1659 	cha->cha_large_buf_size = CHD_PARAMS(chd)->nxp_large_buf_size;
1660 }
1661 
1662 void
os_channel_attr_destroy(channel_attr_t cha)1663 os_channel_attr_destroy(channel_attr_t cha)
1664 {
1665 	if (cha->cha_key != NULL) {
1666 		free(cha->cha_key);
1667 		cha->cha_key = NULL;
1668 	}
1669 	free(cha);
1670 }
1671 
1672 static int
os_channel_packet_alloc_common(const channel_t chd,packet_t * ph,bool large)1673 os_channel_packet_alloc_common(const channel_t chd, packet_t *ph, bool large)
1674 {
1675 	struct __user_channel_ring *ring;
1676 	struct channel_ring_desc *chrd;
1677 	struct __user_quantum *q;
1678 	slot_idx_t idx;
1679 	mach_vm_address_t baddr;
1680 	uint16_t bdoff;
1681 	struct ch_info *ci = CHD_INFO(chd);
1682 
1683 	if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
1684 		return ENOTSUP;
1685 	}
1686 	if (__improbable(large &&
1687 	    chd->chd_large_buf_alloc_ring_idx == CHD_RING_IDX_NONE)) {
1688 		return ENOTSUP;
1689 	}
1690 	chrd = &chd->chd_rings[large ?
1691 	    chd->chd_large_buf_alloc_ring_idx : chd->chd_alloc_ring_idx];
1692 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
1693 	idx = ring->ring_head;
1694 
1695 	if (__improbable(idx == ring->ring_tail)) {
1696 		/*
1697 		 * do a sync to get more packets;
1698 		 * since we are paying the cost of a syscall do a sync for
1699 		 * free ring as well.
1700 		 */
1701 		int err;
1702 		sync_flags_t flags;
1703 
1704 		if (large) {
1705 			flags = (chd->chd_sync_flags &
1706 			    ~(CHANNEL_SYNCF_ALLOC_BUF | CHANNEL_SYNCF_ALLOC)) |
1707 			    CHANNEL_SYNCF_LARGE_ALLOC;
1708 		} else {
1709 			flags = chd->chd_sync_flags & ~CHANNEL_SYNCF_ALLOC_BUF;
1710 		}
1711 
1712 		err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP, flags);
1713 		if (__improbable(err != 0)) {
1714 			if (!_CHANNEL_IS_DEFUNCT(chd)) {
1715 				SK_ABORT_WITH_CAUSE("packet pool alloc "
1716 				    "sync failed", err);
1717 				/* NOTREACHED */
1718 				__builtin_unreachable();
1719 			}
1720 			return err;
1721 		}
1722 	}
1723 
1724 	if (__improbable(idx == ring->ring_tail)) {
1725 		return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ?
1726 		       ENXIO : ENOMEM;
1727 	}
1728 
1729 	_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
1730 	q = _SLOT_METADATA(chrd, ring, idx);
1731 	_METADATA_VERIFY(chrd, q);
1732 
1733 	*ph = SK_PTR_ENCODE(q, chrd->chrd_md_type, chrd->chrd_md_subtype);
1734 	_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
1735 
1736 	/*
1737 	 * Initialize the metadata buffer address. In the event of a
1738 	 * defunct, we'd be accessing zero-filled memory; this is fine
1739 	 * since we ignore all changes made to region at that time.
1740 	 */
1741 	baddr = _initialize_metadata_address(chrd, q, &bdoff);
1742 	if (__improbable(baddr == 0)) {
1743 		return ENXIO;
1744 	}
1745 	ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
1746 	return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
1747 }
1748 
1749 int
os_channel_packet_alloc(const channel_t chd,packet_t * ph)1750 os_channel_packet_alloc(const channel_t chd, packet_t *ph)
1751 {
1752 	return os_channel_packet_alloc_common(chd, ph, false);
1753 }
1754 
1755 int
os_channel_large_packet_alloc(const channel_t chd,packet_t * ph)1756 os_channel_large_packet_alloc(const channel_t chd, packet_t *ph)
1757 {
1758 	return os_channel_packet_alloc_common(chd, ph, true);
1759 }
1760 
1761 int
os_channel_packet_free(const channel_t chd,packet_t ph)1762 os_channel_packet_free(const channel_t chd, packet_t ph)
1763 {
1764 	struct __user_channel_ring *ring;
1765 	struct channel_ring_desc *chrd;
1766 	slot_idx_t idx;
1767 	obj_idx_t midx;
1768 	struct ch_info *ci = CHD_INFO(chd);
1769 
1770 	if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
1771 		return ENOTSUP;
1772 	}
1773 
1774 	chrd = &chd->chd_rings[chd->chd_free_ring_idx];
1775 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
1776 
1777 	idx = ring->ring_head;
1778 	if (__improbable(idx == ring->ring_tail)) {
1779 		/*
1780 		 * do a sync to reclaim space in free ring;
1781 		 */
1782 		int err;
1783 		err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
1784 		    CHANNEL_SYNCF_FREE);
1785 		if (__improbable(err != 0) && !_CHANNEL_IS_DEFUNCT(chd)) {
1786 			SK_ABORT_WITH_CAUSE("packet pool free "
1787 			    "sync failed", err);
1788 			/* NOTREACHED */
1789 			__builtin_unreachable();
1790 		}
1791 	}
1792 
1793 	if (__improbable(idx == ring->ring_tail) && !_CHANNEL_IS_DEFUNCT(chd)) {
1794 		SK_ABORT("no free ring space");
1795 		/* NOTREACHED */
1796 		__builtin_unreachable();
1797 	}
1798 
1799 	/*
1800 	 * In the event of a defunct, midx will be 0 and we'll end up
1801 	 * attaching it to the slot; this is fine since we ignore all
1802 	 * changes made to the slot descriptors at that time.
1803 	 */
1804 	midx = METADATA_IDX(QUM_ADDR(ph));
1805 	_SLOT_METADATA_IDX_VERIFY(chrd, QUM_ADDR(ph), midx);
1806 	_SLOT_ATTACH_METADATA(_SLOT_DESC(chrd, idx), midx);
1807 	ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
1808 
1809 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
1810 }
1811 
1812 int
os_channel_slot_attach_packet(const channel_ring_t chrd,const channel_slot_t slot,packet_t ph)1813 os_channel_slot_attach_packet(const channel_ring_t chrd,
1814     const channel_slot_t slot, packet_t ph)
1815 {
1816 	slot_idx_t idx;
1817 	obj_idx_t midx;
1818 
1819 	if (__improbable((chrd->chrd_channel->chd_info->cinfo_ch_mode &
1820 	    CHMODE_USER_PACKET_POOL) == 0)) {
1821 		return ENOTSUP;
1822 	}
1823 
1824 	if (__improbable(!__packet_is_finalized(ph))) {
1825 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1826 			SK_ABORT("packet not finalized");
1827 			/* NOTREACHED */
1828 			__builtin_unreachable();
1829 		}
1830 		goto done;
1831 	}
1832 
1833 	idx = _SLOT_INDEX(chrd, slot);
1834 	if (__improbable(!_slot_index_is_valid(chrd->chrd_ring, idx))) {
1835 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1836 			SK_ABORT_WITH_CAUSE("Invalid slot", slot);
1837 			/* NOTREACHED */
1838 			__builtin_unreachable();
1839 		}
1840 		goto done;
1841 	}
1842 
1843 	if (__improbable(SD_VALID_METADATA(SLOT_DESC_USD(slot)))) {
1844 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1845 			SK_ABORT_WITH_CAUSE("Slot has attached packet", slot);
1846 			/* NOTREACHED */
1847 			__builtin_unreachable();
1848 		}
1849 		goto done;
1850 	}
1851 
1852 	/*
1853 	 * In the event of a defunct, midx will be 0 and we'll end up
1854 	 * attaching it to the slot; this is fine since we ignore all
1855 	 * changes made to the slot descriptors at that time.
1856 	 */
1857 	midx = METADATA_IDX(QUM_ADDR(ph));
1858 	_SLOT_METADATA_IDX_VERIFY(chrd, QUM_ADDR(ph), midx);
1859 	_SLOT_ATTACH_METADATA(SLOT_DESC_USD(slot), midx);
1860 
1861 done:
1862 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
1863 }
1864 
1865 int
os_channel_slot_detach_packet(const channel_ring_t chrd,const channel_slot_t slot,packet_t ph)1866 os_channel_slot_detach_packet(const channel_ring_t chrd,
1867     const channel_slot_t slot, packet_t ph)
1868 {
1869 	slot_idx_t idx;
1870 
1871 	if (__improbable((chrd->chrd_channel->chd_info->cinfo_ch_mode &
1872 	    CHMODE_USER_PACKET_POOL) == 0)) {
1873 		return ENOTSUP;
1874 	}
1875 
1876 	idx = _SLOT_INDEX(chrd, slot);
1877 	if (__improbable(!_slot_index_is_valid(chrd->chrd_ring, idx))) {
1878 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1879 			SK_ABORT_WITH_CAUSE("Invalid slot", slot);
1880 			/* NOTREACHED */
1881 			__builtin_unreachable();
1882 		}
1883 		goto done;
1884 	}
1885 
1886 	if (__improbable(!SD_VALID_METADATA(SLOT_DESC_USD(slot)))) {
1887 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1888 			SK_ABORT_WITH_CAUSE("Slot has no attached packet",
1889 			    slot);
1890 			/* NOTREACHED */
1891 			__builtin_unreachable();
1892 		}
1893 		goto done;
1894 	}
1895 
1896 	if (__improbable(ph != SK_PTR_ENCODE(_SLOT_METADATA(chrd,
1897 	    chrd->chrd_ring, idx), chrd->chrd_md_type,
1898 	    chrd->chrd_md_subtype))) {
1899 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1900 			SK_ABORT("packet handle mismatch");
1901 			/* NOTREACHED */
1902 			__builtin_unreachable();
1903 		}
1904 		goto done;
1905 	}
1906 
1907 	if (__improbable(!__packet_is_finalized(ph))) {
1908 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1909 			SK_ABORT("packet not finalized");
1910 			/* NOTREACHED */
1911 			__builtin_unreachable();
1912 		}
1913 		goto done;
1914 	}
1915 
1916 	/*
1917 	 * In the event of a defunct, we ignore any changes made to
1918 	 * the slot descriptors, and so doing this is harmless.
1919 	 */
1920 	_SLOT_DETACH_METADATA(SLOT_DESC_USD(slot));
1921 
1922 done:
1923 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
1924 }
1925 
1926 __attribute__((visibility("hidden")))
1927 static inline int
os_channel_purge_packet_alloc_ring_common(const channel_t chd,bool large)1928 os_channel_purge_packet_alloc_ring_common(const channel_t chd, bool large)
1929 {
1930 	struct __user_channel_ring *ring;
1931 	struct channel_ring_desc *chrd;
1932 	uint32_t curr_ws;
1933 	slot_idx_t idx;
1934 	packet_t ph;
1935 	int npkts, err;
1936 
1937 	chrd = &chd->chd_rings[large ?
1938 	    chd->chd_large_buf_alloc_ring_idx : chd->chd_alloc_ring_idx];
1939 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
1940 	idx = ring->ring_head;
1941 
1942 	/* calculate the number of packets in alloc pool */
1943 	npkts = ring->ring_tail - idx;
1944 	if (npkts < 0) {
1945 		npkts += ring->ring_num_slots;
1946 	}
1947 
1948 	curr_ws = ring->ring_alloc_ws;
1949 	while ((uint32_t)npkts-- > curr_ws) {
1950 		struct __user_quantum *q;
1951 
1952 		_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
1953 		q = _SLOT_METADATA(chrd, ring, idx);
1954 		_METADATA_VERIFY(chrd, q);
1955 
1956 		ph = SK_PTR_ENCODE(q, chrd->chrd_md_type,
1957 		    chrd->chrd_md_subtype);
1958 		_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
1959 
1960 		/*
1961 		 * Initialize the metadata buffer address. In the event of a
1962 		 * defunct, we'd be accessing zero-filled memory; this is fine
1963 		 * since we ignore all changes made to region at that time.
1964 		 */
1965 		struct __user_packet *p = (struct __user_packet *)q;
1966 		uint16_t bcnt = p->pkt_bufs_cnt;
1967 		uint16_t bmax = p->pkt_bufs_max;
1968 
1969 		if (__improbable((bcnt == 0) || (bmax == 0))) {
1970 			if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1971 				SK_ABORT("pkt pool purge, bad bufcnt");
1972 				/* NOTREACHED */
1973 				__builtin_unreachable();
1974 			} else {
1975 				return ENXIO;
1976 			}
1977 		}
1978 		/*
1979 		 * alloc ring will not have multi-buflet packets.
1980 		 */
1981 		_PKT_BUFCNT_VERIFY(chrd, bcnt, 1);
1982 		*(mach_vm_address_t *) (uintptr_t)&q->qum_buf[0].buf_addr =
1983 		    _CHANNEL_RING_BUF(chrd, ring, &q->qum_buf[0]);
1984 		idx = _CHANNEL_RING_NEXT(ring, idx);
1985 		ring->ring_head = idx;
1986 		err = os_channel_packet_free(chd, ph);
1987 		if (__improbable(err != 0)) {
1988 			if (!_CHANNEL_IS_DEFUNCT(chd)) {
1989 				SK_ABORT_WITH_CAUSE("packet pool purge "
1990 				    "free failed", err);
1991 				/* NOTREACHED */
1992 				__builtin_unreachable();
1993 			}
1994 			return err;
1995 		}
1996 	}
1997 
1998 	return 0;
1999 }
2000 
2001 __attribute__((visibility("hidden")))
2002 static inline int
os_channel_purge_packet_alloc_ring(const channel_t chd)2003 os_channel_purge_packet_alloc_ring(const channel_t chd)
2004 {
2005 	return os_channel_purge_packet_alloc_ring_common(chd, false);
2006 }
2007 
2008 __attribute__((visibility("hidden")))
2009 static inline int
os_channel_purge_large_packet_alloc_ring(const channel_t chd)2010 os_channel_purge_large_packet_alloc_ring(const channel_t chd)
2011 {
2012 	return os_channel_purge_packet_alloc_ring_common(chd, true);
2013 }
2014 
2015 __attribute__((visibility("hidden")))
2016 static inline int
os_channel_purge_buflet_alloc_ring(const channel_t chd)2017 os_channel_purge_buflet_alloc_ring(const channel_t chd)
2018 {
2019 	struct __user_channel_ring *ring;
2020 	struct channel_ring_desc *chrd;
2021 	uint32_t curr_ws;
2022 	slot_idx_t idx;
2023 	int nbfts, err;
2024 
2025 	chrd = &chd->chd_rings[chd->chd_buf_alloc_ring_idx];
2026 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
2027 	idx = ring->ring_head;
2028 
2029 	/* calculate the number of packets in alloc pool */
2030 	nbfts = ring->ring_tail - idx;
2031 	if (nbfts < 0) {
2032 		nbfts += ring->ring_num_slots;
2033 	}
2034 
2035 	curr_ws = ring->ring_alloc_ws;
2036 	while ((uint32_t)nbfts-- > curr_ws) {
2037 		struct __user_buflet *ubft;
2038 		obj_idx_t nbft_idx;
2039 
2040 		_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
2041 		ubft = _SLOT_BFT_METADATA(chrd, ring, idx);
2042 		_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
2043 
2044 		/*
2045 		 * Initialize the buflet metadata buffer address.
2046 		 */
2047 		*(mach_vm_address_t *)(uintptr_t)&(ubft->buf_addr) =
2048 		    _CHANNEL_RING_BUF(chrd, ring, ubft);
2049 		if (__improbable(ubft->buf_addr == 0)) {
2050 			SK_ABORT_WITH_CAUSE("buflet with NULL buffer",
2051 			    ubft->buf_idx);
2052 			/* NOTREACHED */
2053 			__builtin_unreachable();
2054 		}
2055 
2056 		nbft_idx = ubft->buf_nbft_idx;
2057 		if (__improbable(nbft_idx != OBJ_IDX_NONE)) {
2058 			if (_CHANNEL_IS_DEFUNCT(chd)) {
2059 				return ENXIO;
2060 			} else {
2061 				SK_ABORT_WITH_CAUSE("buflet with invalid nidx",
2062 				    nbft_idx);
2063 				/* NOTREACHED */
2064 				__builtin_unreachable();
2065 			}
2066 		}
2067 
2068 		idx = _CHANNEL_RING_NEXT(ring, idx);
2069 		ring->ring_head = idx;
2070 		err = os_channel_buflet_free(chd, ubft);
2071 		if (__improbable(err != 0)) {
2072 			if (!_CHANNEL_IS_DEFUNCT(chd)) {
2073 				SK_ABORT_WITH_CAUSE("buflet pool purge "
2074 				    "free failed", err);
2075 				/* NOTREACHED */
2076 				__builtin_unreachable();
2077 			}
2078 			return err;
2079 		}
2080 	}
2081 
2082 	return 0;
2083 }
2084 
2085 int
os_channel_packet_pool_purge(const channel_t chd)2086 os_channel_packet_pool_purge(const channel_t chd)
2087 {
2088 	struct ch_info *ci = CHD_INFO(chd);
2089 	int err;
2090 
2091 	if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
2092 		return ENOTSUP;
2093 	}
2094 
2095 	err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
2096 	    ((chd->chd_sync_flags & ~CHANNEL_SYNCF_FREE) | CHANNEL_SYNCF_PURGE));
2097 	if (__improbable(err != 0)) {
2098 		if (!_CHANNEL_IS_DEFUNCT(chd)) {
2099 			SK_ABORT_WITH_CAUSE("packet pool purge sync failed",
2100 			    err);
2101 			/* NOTREACHED */
2102 			__builtin_unreachable();
2103 		}
2104 		return err;
2105 	}
2106 
2107 	err = os_channel_purge_packet_alloc_ring(chd);
2108 	if (__improbable(err != 0)) {
2109 		return err;
2110 	}
2111 	if (chd->chd_large_buf_alloc_ring_idx != CHD_RING_IDX_NONE) {
2112 		err = os_channel_purge_large_packet_alloc_ring(chd);
2113 		if (__improbable(err != 0)) {
2114 			return err;
2115 		}
2116 	}
2117 	if (_num_allocator_rings(CHD_SCHEMA(chd)) > 2) {
2118 		err = os_channel_purge_buflet_alloc_ring(chd);
2119 		if (__improbable(err != 0)) {
2120 			return err;
2121 		}
2122 	}
2123 
2124 	err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP, CHANNEL_SYNCF_FREE);
2125 	if (__improbable(err != 0)) {
2126 		if (!_CHANNEL_IS_DEFUNCT(chd)) {
2127 			SK_ABORT_WITH_CAUSE("packet pool free sync failed",
2128 			    err);
2129 			/* NOTREACHED */
2130 			__builtin_unreachable();
2131 		}
2132 		return err;
2133 	}
2134 
2135 	return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
2136 }
2137 
2138 int
os_channel_get_next_event_handle(const channel_t chd,os_channel_event_handle_t * ehandle,os_channel_event_type_t * etype,uint32_t * nevents)2139 os_channel_get_next_event_handle(const channel_t chd,
2140     os_channel_event_handle_t *ehandle, os_channel_event_type_t *etype,
2141     uint32_t *nevents)
2142 {
2143 	struct __kern_channel_event_metadata *emd;
2144 	struct __user_channel_ring *ring;
2145 	struct channel_ring_desc *chrd;
2146 	struct __user_quantum *qum;
2147 	mach_vm_address_t baddr;
2148 	uint16_t bdoff;
2149 	slot_idx_t idx;
2150 	struct __user_channel_schema *csm = CHD_SCHEMA(chd);
2151 	struct ch_info *ci = CHD_INFO(chd);
2152 
2153 	if (__improbable((ehandle == NULL) || (etype == NULL) ||
2154 	    (nevents == NULL))) {
2155 		return EINVAL;
2156 	}
2157 	if (__improbable((ci->cinfo_ch_mode & CHMODE_EVENT_RING) == 0)) {
2158 		return ENOTSUP;
2159 	}
2160 	*ehandle = NULL;
2161 	chrd = &chd->chd_rings[_num_tx_rings(ci) + _num_rx_rings(ci) +
2162 	    _num_allocator_rings(csm)];
2163 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
2164 	idx = ring->ring_head;
2165 
2166 	if (__improbable(idx == ring->ring_tail)) {
2167 		return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ?
2168 		       ENXIO : ENODATA;
2169 	}
2170 	_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
2171 	qum = _SLOT_METADATA(chrd, ring, idx);
2172 	_METADATA_VERIFY(chrd, qum);
2173 	_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
2174 
2175 	baddr = _initialize_metadata_address(chrd, qum, &bdoff);
2176 	if (__improbable(baddr == 0)) {
2177 		return ENXIO;
2178 	}
2179 	*ehandle = SK_PTR_ENCODE(qum, chrd->chrd_md_type,
2180 	    chrd->chrd_md_subtype);
2181 	emd = (void *)(baddr + bdoff);
2182 	*etype = emd->emd_etype;
2183 	*nevents = emd->emd_nevents;
2184 	ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
2185 	return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
2186 }
2187 
2188 int
os_channel_event_free(const channel_t chd,os_channel_event_handle_t ehandle)2189 os_channel_event_free(const channel_t chd, os_channel_event_handle_t ehandle)
2190 {
2191 	return os_channel_packet_free(chd, (packet_t)ehandle);
2192 }
2193 
2194 int
os_channel_get_interface_advisory(const channel_t chd,struct ifnet_interface_advisory * advisory)2195 os_channel_get_interface_advisory(const channel_t chd,
2196     struct ifnet_interface_advisory *advisory)
2197 {
2198 	struct __kern_netif_intf_advisory *intf_adv;
2199 	struct __kern_nexus_adv_metadata *adv_md;
2200 	nexus_advisory_type_t adv_type;
2201 
2202 	/*
2203 	 * Interface advisory is only supported for netif and flowswitch.
2204 	 */
2205 	adv_md = CHD_NX_ADV_MD(chd);
2206 	if (adv_md == NULL) {
2207 		return ENOENT;
2208 	}
2209 	adv_type = adv_md->knam_type;
2210 	if (__improbable(adv_type != NEXUS_ADVISORY_TYPE_NETIF &&
2211 	    adv_type != NEXUS_ADVISORY_TYPE_FLOWSWITCH)) {
2212 		return _CHANNEL_IS_DEFUNCT(chd) ? ENXIO : ENOENT;
2213 	}
2214 	if (adv_type == NEXUS_ADVISORY_TYPE_NETIF) {
2215 		intf_adv = &(CHD_NX_ADV_NETIF(adv_md))->__kern_intf_adv;
2216 	} else {
2217 		intf_adv = &(CHD_NX_ADV_FSW(adv_md))->_nxadv_intf_adv;
2218 	}
2219 	if (intf_adv->cksum != os_cpu_copy_in_cksum(&intf_adv->adv, advisory,
2220 	    sizeof(*advisory), 0)) {
2221 		return _CHANNEL_IS_DEFUNCT(chd) ? ENXIO : EAGAIN;
2222 	}
2223 	return 0;
2224 }
2225 
2226 int
os_channel_configure_interface_advisory(const channel_t chd,boolean_t enable)2227 os_channel_configure_interface_advisory(const channel_t chd, boolean_t enable)
2228 {
2229 	uint32_t value = enable;
2230 
2231 	return __channel_set_opt(chd->chd_fd, CHOPT_IF_ADV_CONF,
2232 	           &value, sizeof(value));
2233 }
2234 
2235 int
os_channel_buflet_alloc(const channel_t chd,buflet_t * bft)2236 os_channel_buflet_alloc(const channel_t chd, buflet_t *bft)
2237 {
2238 	struct __user_channel_ring *ring;
2239 	struct channel_ring_desc *chrd;
2240 	struct __user_buflet *ubft;
2241 	obj_idx_t nbft_idx;
2242 	slot_idx_t idx;
2243 	struct ch_info *ci = CHD_INFO(chd);
2244 
2245 	if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
2246 		return ENOTSUP;
2247 	}
2248 
2249 	if (__improbable(_num_allocator_rings(CHD_SCHEMA(chd)) < 4)) {
2250 		return ENOTSUP;
2251 	}
2252 
2253 	chrd = &chd->chd_rings[chd->chd_buf_alloc_ring_idx];
2254 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
2255 	idx = ring->ring_head;
2256 
2257 	if (__improbable(idx == ring->ring_tail)) {
2258 		/*
2259 		 * do a sync to get more buflets;
2260 		 */
2261 		int err;
2262 		err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
2263 		    CHANNEL_SYNCF_ALLOC_BUF | CHANNEL_SYNCF_FREE);
2264 		if (__improbable(err != 0)) {
2265 			if (!_CHANNEL_IS_DEFUNCT(chd)) {
2266 				SK_ABORT_WITH_CAUSE("buflet pool alloc "
2267 				    "sync failed", err);
2268 				/* NOTREACHED */
2269 				__builtin_unreachable();
2270 			}
2271 			return err;
2272 		}
2273 	}
2274 
2275 	if (__improbable(idx == ring->ring_tail)) {
2276 		return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ?
2277 		       ENXIO : ENOMEM;
2278 	}
2279 
2280 	_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
2281 	ubft = _SLOT_BFT_METADATA(chrd, ring, idx);
2282 	_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
2283 
2284 	/*
2285 	 * Initialize the buflet metadata buffer address.
2286 	 */
2287 	*(mach_vm_address_t *)(uintptr_t)&(ubft->buf_addr) =
2288 	    _CHANNEL_RING_BUF(chrd, ring, ubft);
2289 	if (__improbable(ubft->buf_addr == 0)) {
2290 		SK_ABORT_WITH_CAUSE("buflet alloc with NULL buffer",
2291 		    ubft->buf_idx);
2292 		/* NOTREACHED */
2293 		__builtin_unreachable();
2294 	}
2295 	nbft_idx = ubft->buf_nbft_idx;
2296 	if (__improbable(nbft_idx != OBJ_IDX_NONE)) {
2297 		if (_CHANNEL_IS_DEFUNCT(chd)) {
2298 			return ENXIO;
2299 		} else {
2300 			SK_ABORT_WITH_CAUSE("buflet alloc with invalid nidx",
2301 			    nbft_idx);
2302 			/* NOTREACHED */
2303 			__builtin_unreachable();
2304 		}
2305 	}
2306 	ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
2307 	*bft = ubft;
2308 	return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
2309 }
2310 
2311 int
os_channel_buflet_free(const channel_t chd,buflet_t ubft)2312 os_channel_buflet_free(const channel_t chd, buflet_t ubft)
2313 {
2314 	struct __user_channel_ring *ring;
2315 	struct channel_ring_desc *chrd;
2316 	slot_idx_t idx;
2317 	obj_idx_t midx;
2318 	struct ch_info *ci = CHD_INFO(chd);
2319 
2320 	if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
2321 		return ENOTSUP;
2322 	}
2323 
2324 	if (__improbable(_num_allocator_rings(CHD_SCHEMA(chd)) < 4)) {
2325 		return ENOTSUP;
2326 	}
2327 
2328 	chrd = &chd->chd_rings[chd->chd_buf_free_ring_idx];
2329 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
2330 
2331 	idx = ring->ring_head;
2332 	if (__improbable(idx == ring->ring_tail)) {
2333 		/*
2334 		 * do a sync to reclaim space in free ring;
2335 		 */
2336 		int err;
2337 		err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
2338 		    CHANNEL_SYNCF_FREE);
2339 		if (__improbable(err != 0) && !_CHANNEL_IS_DEFUNCT(chd)) {
2340 			SK_ABORT_WITH_CAUSE("buflet pool free "
2341 			    "sync failed", err);
2342 			/* NOTREACHED */
2343 			__builtin_unreachable();
2344 		}
2345 	}
2346 
2347 	if (__improbable(idx == ring->ring_tail) && !_CHANNEL_IS_DEFUNCT(chd)) {
2348 		SK_ABORT("no ring space in buflet free ring");
2349 		/* NOTREACHED */
2350 		__builtin_unreachable();
2351 	}
2352 
2353 	midx = _BFT_INDEX(chrd, ubft);
2354 	_SLOT_BFT_METADATA_IDX_VERIFY(chrd, ubft, midx);
2355 	_SLOT_ATTACH_METADATA(_SLOT_DESC(chrd, idx), midx);
2356 	ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
2357 
2358 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
2359 }
2360 
2361 int
os_channel_get_upp_buffer_stats(const channel_t chd,uint64_t * buffer_total,uint64_t * buffer_inuse)2362 os_channel_get_upp_buffer_stats(const channel_t chd, uint64_t *buffer_total,
2363     uint64_t *buffer_inuse)
2364 {
2365 	struct __user_channel_schema *csm = CHD_SCHEMA(chd);
2366 	*buffer_total = csm->csm_upp_buf_total;
2367 	*buffer_inuse = csm->csm_upp_buf_inuse;
2368 	return 0;
2369 }
2370