xref: /xnu-8020.121.3/libsyscall/wrappers/skywalk/os_channel.c (revision fdd8201d7b966f0c3ea610489d29bd841d358941)
1 /*
2  * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 #include <stdlib.h>
31 #include <stddef.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <unistd.h>
35 #include <errno.h>
36 #include <skywalk/os_skywalk_private.h>
37 #include <skywalk/os_packet_private.h>
38 
39 #ifndef LIBSYSCALL_INTERFACE
40 #error "LIBSYSCALL_INTERFACE not defined"
41 #endif /* !LIBSYSCALL_INTERFACE */
42 
43 /*
44  * Defined here as we don't have Libc
45  */
46 extern int __getpid(void);
47 extern int __kill(int pid, int signum, int posix);
48 extern int __exit(int) __attribute__((noreturn));
49 
50 static ring_id_t _ring_id(struct ch_info *cinfo, const ring_id_type_t type);
51 static void os_channel_info2attr(struct channel *chd, channel_attr_t cha);
52 static int _flowadv_id_equal(struct __flowadv_entry *, uuid_t);
53 
54 #if defined(__arm__) || defined(__arm64__)
55 __attribute__((always_inline, visibility("hidden")))
56 static inline void
membar_sync(void)57 membar_sync(void)
58 {
59 	__asm__ volatile ("dmb ish" ::: "memory");
60 }
61 #elif defined(__i386__) || defined(__x86_64__)
62 __attribute__((always_inline, visibility("hidden")))
63 static inline void
membar_sync(void)64 membar_sync(void)
65 {
66 	__asm__ volatile ("mfence" ::: "memory");
67 }
68 #else /* !__arm__ && !__arm64__ && !__i386__ && !__x86_64__ */
69 #error "Unknown platform; membar_sync() not available"
70 #endif /* !__arm__ && !__arm64__ && !__i386__ && !__x86_64__ */
71 
72 /*
73  * This is pretty much what an inlined memcmp() would do for UUID
74  * comparison; since we don't have access to memcmp() here, we
75  * manually handle it ourselves.
76  */
77 #define UUID_COMPARE(a, b)                                                  \
78 	(a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3] &&    \
79 	a[4] == b[4] && a[5] == b[5] && a[6] == b[6] && a[7] == b[7] &&     \
80 	a[8] == b[8] && a[9] == b[9] && a[10] == b[10] && a[11] == b[11] && \
81 	a[12] == b[12] && a[13] == b[13] && a[14] == b[14] && a[15] == b[15])
82 
83 #define _SLOT_INDEX(_chrd, _slot)                                       \
84 	((slot_idx_t)((_slot - (_chrd)->chrd_slot_desc)))
85 
86 #define _SLOT_DESC(_chrd, _idx)                                         \
87 	(SLOT_DESC_USD(&(_chrd)->chrd_slot_desc[_idx]))
88 
89 #define _METADATA(_chrd, _ring, _midx)                                  \
90 	((void *)((_chrd)->chrd_md_base_addr +                          \
91 	((_midx) * (_ring)->ring_md_size) + METADATA_PREAMBLE_SZ))
92 
93 #define _SLOT_METADATA(_chrd, _ring, _idx)                              \
94 	_METADATA(_chrd, _ring, _SLOT_DESC(_chrd, _idx)->sd_md_idx)
95 
96 #define _SLOT_METADATA_IDX_VERIFY(_chrd, _md, _midx)    do {            \
97 	if (__improbable((_md) != _METADATA((_chrd), (_chrd)->chrd_ring, \
98 	    (_midx))) && !_CHANNEL_RING_IS_DEFUNCT(_chrd)) {            \
99 	        SK_ABORT_WITH_CAUSE("bad packet handle", (_midx));      \
100 	/* NOTREACHED */                                                \
101 	        __builtin_unreachable();                                \
102 	}                                                               \
103 } while (0)
104 
105 #define _BFT_INDEX(_chrd, _bft) (_bft)->buf_bft_idx_reg
106 
107 #define _SLOT_BFT_METADATA(_chrd, _ring, _idx)                          \
108 	_CHANNEL_RING_BFT(_chrd, _ring, _SLOT_DESC(_chrd, _idx)->sd_md_idx)
109 
110 #define _SLOT_BFT_METADATA_IDX_VERIFY(_chrd, _md, _midx)    do {        \
111 	if (__improbable((mach_vm_address_t)(_md) !=                    \
112 	    _CHANNEL_RING_BFT((_chrd), (_chrd)->chrd_ring, (_midx))) && \
113 	    !_CHANNEL_RING_IS_DEFUNCT(_chrd)) {                         \
114 	        SK_ABORT_WITH_CAUSE("bad buflet handle", (_midx));      \
115 	/* NOTREACHED */                                                \
116 	        __builtin_unreachable();                                \
117 	}                                                               \
118 } while (0)
119 
120 #define _SLOT_DESC_VERIFY(_chrd, _sdp) do {                             \
121 	if (__improbable(!SD_VALID_METADATA(_sdp)) &&                   \
122 	    !_CHANNEL_RING_IS_DEFUNCT(_chrd)) {                         \
123 	        SK_ABORT("Slot descriptor has no metadata");            \
124 	/* NOTREACHED */                                        \
125 	        __builtin_unreachable();                                \
126 	}                                                               \
127 } while (0)
128 
129 #define _METADATA_VERIFY(_chrd, _md) do {                               \
130 	if (__improbable(METADATA_PREAMBLE(_md)->mdp_redzone !=         \
131 	    (((mach_vm_address_t)(_md) - (_chrd)->chrd_md_base_addr) ^  \
132 	    __os_ch_md_redzone_cookie)) &&                              \
133 	    !_CHANNEL_RING_IS_DEFUNCT(_chrd)) {                         \
134 	        SK_ABORT_WITH_CAUSE("Metadata redzone corrupted",       \
135 	            METADATA_PREAMBLE(_md)->mdp_redzone);               \
136 	/* NOTREACHED */                                        \
137 	        __builtin_unreachable();                                \
138 	}                                                               \
139 } while (0)
140 
141 #define _PKT_BUFCNT_VERIFY(_chrd, _bcnt, _bmax) do {                    \
142 	if (__improbable((_chrd)->chrd_max_bufs < (_bmax))) {           \
143 	        SK_ABORT_WITH_CAUSE("Invalid max bufcnt", (_bmax));     \
144 	/* NOTREACHED */                                                \
145 	        __builtin_unreachable();                                \
146 	}                                                               \
147 	if (__improbable((_bcnt) > (_bmax))) {                          \
148 	        SK_ABORT_WITH_CAUSE("Invalid bufcnt", (_bcnt));         \
149 	/* NOTREACHED */                                                \
150 	        __builtin_unreachable();                                \
151 	}                                                               \
152 } while (0)
153 
154 #define _ABORT_MSGSZ    1024
155 
156 #define _SCHEMA_VER_VERIFY(_chd) do {                                   \
157 	/* ensure all stores are globally visible */                    \
158 	membar_sync();                                                  \
159 	if (CHD_SCHEMA(_chd)->csm_ver != CSM_CURRENT_VERSION)	{       \
160 	        char *_msg = malloc(_ABORT_MSGSZ);                      \
161 	        uint32_t _ver = (uint32_t)CHD_SCHEMA(_chd)->csm_ver;    \
162 	/* we're stuck with %x and %s formatters */             \
163 	        (void) _mach_snprintf(_msg, _ABORT_MSGSZ,               \
164 	            "Schema region version mismatch: 0x%x != 0x%x\n"    \
165 	            "Kernel version: %s - did you forget to install "   \
166 	            "a matching libsystem_kernel.dylib?\n"              \
167 	            "Kernel UUID: %x%x%x%x-%x%x-%x%x-%x%x-%x%x%x%x%x%x", \
168 	            _ver, (uint32_t)CSM_CURRENT_VERSION,                \
169 	            CHD_SCHEMA(_chd)->csm_kern_name,                    \
170 	            CHD_SCHEMA(_chd)->csm_kern_uuid[0],                 \
171 	            CHD_SCHEMA(_chd)->csm_kern_uuid[1],                 \
172 	            CHD_SCHEMA(_chd)->csm_kern_uuid[2],                 \
173 	            CHD_SCHEMA(_chd)->csm_kern_uuid[3],                 \
174 	            CHD_SCHEMA(_chd)->csm_kern_uuid[4],                 \
175 	            CHD_SCHEMA(_chd)->csm_kern_uuid[5],                 \
176 	            CHD_SCHEMA(_chd)->csm_kern_uuid[6],                 \
177 	            CHD_SCHEMA(_chd)->csm_kern_uuid[7],                 \
178 	            CHD_SCHEMA(_chd)->csm_kern_uuid[8],                 \
179 	            CHD_SCHEMA(_chd)->csm_kern_uuid[9],                 \
180 	            CHD_SCHEMA(_chd)->csm_kern_uuid[10],                \
181 	            CHD_SCHEMA(_chd)->csm_kern_uuid[11],                \
182 	            CHD_SCHEMA(_chd)->csm_kern_uuid[12],                \
183 	            CHD_SCHEMA(_chd)->csm_kern_uuid[13],                \
184 	            CHD_SCHEMA(_chd)->csm_kern_uuid[14],                \
185 	            CHD_SCHEMA(_chd)->csm_kern_uuid[15]);               \
186 	        SK_ABORT_DYNAMIC(_msg);                                 \
187 	/* NOTREACHED */                                        \
188 	        __builtin_unreachable();                                \
189 	}                                                               \
190 } while (0)
191 
192 #define _SLOT_ATTACH_METADATA(_usd, _md_idx) do {                       \
193 	(_usd)->sd_md_idx = (_md_idx);                                  \
194 	(_usd)->sd_flags |= SD_IDX_VALID;                               \
195 } while (0)
196 
197 #define _SLOT_DETACH_METADATA(_usd) do	{                               \
198 	(_usd)->sd_md_idx = OBJ_IDX_NONE;                               \
199 	(_usd)->sd_flags &= ~SD_IDX_VALID;                              \
200 } while (0)
201 
202 #define _CHANNEL_OFFSET(_type, _ptr, _offset)                           \
203 	((_type)(void *)((uintptr_t)(_ptr) + (_offset)))
204 
205 #define _CHANNEL_SCHEMA(_base, _off)                                    \
206 	_CHANNEL_OFFSET(struct __user_channel_schema *, _base, _off)
207 
208 #define _CHANNEL_RING_BUF(_chrd, _ring, _idx)                           \
209 	((_chrd)->chrd_buf_base_addr + ((_idx) * (_ring)->ring_buf_size))
210 
211 #define _CHANNEL_RING_BFT(_chrd, _ring, _idx)                           \
212 	((_chrd)->chrd_bft_base_addr + ((_idx) * (_ring)->ring_bft_size))
213 
214 #define _CHANNEL_RING_NEXT(_ring, _cur)                                 \
215 	(__improbable((_cur) + 1 == (_ring)->ring_num_slots) ? 0 : (_cur) + 1)
216 
217 #define _CHANNEL_RING_IS_DEFUNCT(_chrd)                                 \
218 	(!(*(_chrd)->chrd_csm_flags & CSM_ACTIVE))
219 
220 #define _CHANNEL_IS_DEFUNCT(_chd)                                       \
221 	(!(CHD_SCHEMA(_chd)->csm_flags & CSM_ACTIVE))
222 
223 #define _CH_PKT_GET_FIRST_BUFLET(_pkt, _bft, _chrd, _ring) do {         \
224 	if (__probable((_pkt)->pkt_qum_buf.buf_idx != OBJ_IDX_NONE)) {  \
225 	        (_bft) = &(_pkt)->pkt_qum_buf;                          \
226 	} else if ((_pkt)->pkt_qum_buf.buf_nbft_idx != OBJ_IDX_NONE) {  \
227 	        (_bft) = _CHANNEL_RING_BFT(_chrd, _ring,                \
228 	            (_pkt)->pkt_qum_buf.buf_nbft_idx);                  \
229 	} else {                                                        \
230 	        (_bft) = NULL;                                          \
231 	}                                                               \
232 } while (0)
233 
234 /*
235  * A per process copy of the channel metadata redzone cookie.
236  */
237 __attribute__((visibility("hidden")))
238 static uint64_t __os_ch_md_redzone_cookie = 0;
239 
240 __attribute__((always_inline, visibility("hidden")))
241 static inline uint32_t
_num_tx_rings(struct ch_info * ci)242 _num_tx_rings(struct ch_info *ci)
243 {
244 	ring_id_t first, last;
245 
246 	first = _ring_id(ci, CHANNEL_FIRST_TX_RING);
247 	last = _ring_id(ci, CHANNEL_LAST_TX_RING);
248 
249 	return (last - first) + 1;
250 }
251 
252 __attribute__((always_inline, visibility("hidden")))
253 static inline uint32_t
_num_rx_rings(struct ch_info * ci)254 _num_rx_rings(struct ch_info *ci)
255 {
256 	ring_id_t first, last;
257 
258 	first = _ring_id(ci, CHANNEL_FIRST_RX_RING);
259 	last = _ring_id(ci, CHANNEL_LAST_RX_RING);
260 
261 	return (last - first) + 1;
262 }
263 
264 __attribute__((always_inline, visibility("hidden")))
265 static inline uint32_t
_num_allocator_rings(const struct __user_channel_schema * csm)266 _num_allocator_rings(const struct __user_channel_schema *csm)
267 {
268 	return csm->csm_allocator_ring_pairs << 1;
269 }
270 
271 __attribute__((visibility("hidden")))
272 static void
os_channel_init_ring(struct channel_ring_desc * chrd,struct channel * chd,uint32_t ring_index)273 os_channel_init_ring(struct channel_ring_desc *chrd,
274     struct channel *chd, uint32_t ring_index)
275 {
276 	struct __user_channel_schema *csm = CHD_SCHEMA(chd);
277 	struct __user_channel_ring *ring = NULL;
278 	struct __slot_desc *sd = NULL;
279 	nexus_meta_type_t md_type;
280 	nexus_meta_subtype_t md_subtype;
281 
282 	ring = _CHANNEL_OFFSET(struct __user_channel_ring *, csm,
283 	    csm->csm_ring_ofs[ring_index].ring_off);
284 	sd = _CHANNEL_OFFSET(struct __slot_desc *, csm,
285 	    csm->csm_ring_ofs[ring_index].sd_off);
286 	md_type = csm->csm_md_type;
287 	md_subtype = csm->csm_md_subtype;
288 
289 	if (ring == NULL || sd == NULL) {
290 		SK_ABORT("Channel schema not valid");
291 		/* NOTREACHED */
292 		__builtin_unreachable();
293 	} else if (!(md_type == NEXUS_META_TYPE_QUANTUM ||
294 	    md_type == NEXUS_META_TYPE_PACKET)) {
295 		SK_ABORT_WITH_CAUSE("Metadata type unknown", md_type);
296 		/* NOTREACHED */
297 		__builtin_unreachable();
298 	} else if (!(md_subtype == NEXUS_META_SUBTYPE_PAYLOAD ||
299 	    md_subtype == NEXUS_META_SUBTYPE_RAW)) {
300 		SK_ABORT_WITH_CAUSE("Metadata subtype unknown", md_subtype);
301 		/* NOTREACHED */
302 		__builtin_unreachable();
303 	}
304 
305 	chrd->chrd_slot_desc = sd;
306 	chrd->chrd_csm_flags = &chd->chd_schema->csm_flags;
307 	/* const overrides */
308 	*(struct channel **)(uintptr_t)&chrd->chrd_channel = chd;
309 	*(struct __user_channel_ring **)(uintptr_t)&chrd->chrd_ring = ring;
310 	*(nexus_meta_type_t *)(uintptr_t)&chrd->chrd_md_type = md_type;
311 	*(nexus_meta_subtype_t *)(uintptr_t)&chrd->chrd_md_subtype = md_subtype;
312 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_shmem_base_addr =
313 	    CHD_INFO(chd)->cinfo_mem_base;
314 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_buf_base_addr =
315 	    (mach_vm_address_t)((uintptr_t)ring + ring->ring_buf_base);
316 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_md_base_addr =
317 	    (mach_vm_address_t)((uintptr_t)ring + ring->ring_md_base);
318 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_sd_base_addr =
319 	    (mach_vm_address_t)((uintptr_t)ring + ring->ring_sd_base);
320 	*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_bft_base_addr =
321 	    (mach_vm_address_t)((uintptr_t)ring + ring->ring_bft_base);
322 	*(uint32_t *)(uintptr_t)&chrd->chrd_max_bufs =
323 	    CHD_PARAMS(chd)->nxp_max_frags;
324 }
325 
326 __attribute__((always_inline, visibility("hidden")))
327 static inline mach_vm_address_t
_initialize_metadata_address(const channel_ring_t chrd,struct __user_quantum * q,uint16_t * bdoff)328 _initialize_metadata_address(const channel_ring_t chrd,
329     struct __user_quantum *q, uint16_t *bdoff)
330 {
331 	int i;
332 	struct __user_buflet *ubft0;
333 	const struct __user_channel_ring *ring = chrd->chrd_ring;
334 
335 	switch (chrd->chrd_md_type) {
336 	case NEXUS_META_TYPE_PACKET: {
337 		struct __user_buflet *ubft, *pbft;
338 		struct __user_packet *p = (struct __user_packet *)q;
339 		uint16_t bcnt = p->pkt_bufs_cnt;
340 		uint16_t bmax = p->pkt_bufs_max;
341 
342 		_CASSERT(sizeof(p->pkt_qum_buf.buf_addr) ==
343 		    sizeof(mach_vm_address_t));
344 		/*
345 		 * In the event of a defunct, we'd be accessing zero-filled
346 		 * memory and end up with 0 for bcnt or bmax.
347 		 */
348 		if (__improbable((bcnt == 0) || (bmax == 0))) {
349 			if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
350 				SK_ABORT("bad bufcnt");
351 				/* NOTREACHED */
352 				__builtin_unreachable();
353 			}
354 			return 0;
355 		}
356 		_PKT_BUFCNT_VERIFY(chrd, bcnt, bmax);
357 		_CH_PKT_GET_FIRST_BUFLET(p, ubft, chrd, ring);
358 		if (__improbable(ubft == NULL)) {
359 			SK_ABORT("bad packet: no buflet");
360 			/* NOTREACHED */
361 			__builtin_unreachable();
362 		}
363 		/*
364 		 * special handling for empty packet buflet.
365 		 */
366 		if (__improbable(p->pkt_qum_buf.buf_idx == OBJ_IDX_NONE)) {
367 			*__DECONST(mach_vm_address_t *,
368 			    &p->pkt_qum_buf.buf_addr) = 0;
369 			*__DECONST(mach_vm_address_t *,
370 			    &p->pkt_qum_buf.buf_nbft_addr) =
371 			    (mach_vm_address_t)ubft;
372 		}
373 		ubft0 = ubft;
374 		for (i = 0; (i < bcnt) && (ubft != NULL); i++) {
375 			pbft = ubft;
376 			if (__probable(pbft->buf_idx != OBJ_IDX_NONE)) {
377 				*(mach_vm_address_t *)(uintptr_t)
378 				&(pbft->buf_addr) = _CHANNEL_RING_BUF(chrd,
379 				    ring, pbft->buf_idx);
380 			} else {
381 				*(mach_vm_address_t *)(uintptr_t)
382 				&(pbft->buf_addr) = NULL;
383 			}
384 			if (pbft->buf_nbft_idx != OBJ_IDX_NONE) {
385 				ubft = _CHANNEL_RING_BFT(chrd, ring,
386 				    pbft->buf_nbft_idx);
387 			} else {
388 				ubft = NULL;
389 			}
390 			*__DECONST(mach_vm_address_t *, &pbft->buf_nbft_addr) =
391 			    (mach_vm_address_t)ubft;
392 		}
393 		if (__improbable(pbft->buf_nbft_idx != OBJ_IDX_NONE)) {
394 			if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
395 				SK_ABORT("non terminating buflet chain");
396 				/* NOTREACHED */
397 				__builtin_unreachable();
398 			}
399 			return 0;
400 		}
401 		if (__improbable(i != bcnt)) {
402 			SK_ABORT_WITH_CAUSE("invalid buflet count", bcnt);
403 			/* NOTREACHED */
404 			__builtin_unreachable();
405 		}
406 		break;
407 	}
408 	default:
409 		ubft0 = &q->qum_buf[0];
410 		_CASSERT(sizeof(q->qum_buf[0].buf_addr) ==
411 		    sizeof(mach_vm_address_t));
412 		/* immutable: compute pointers from the index */
413 		*(mach_vm_address_t *)(uintptr_t)&ubft0->buf_addr =
414 		    _CHANNEL_RING_BUF(chrd, ring, ubft0->buf_idx);
415 		break;
416 	}
417 
418 	/* return address and offset of the first buffer */
419 	*bdoff = ubft0->buf_doff;
420 	return ubft0->buf_addr;
421 }
422 
423 /*
424  * _slot_index_is_valid
425  * - verify that the slot index is within valid bounds
426  * - if the head is less than (or equal to) the tail (case A below)
427  *	head <= valid < tail
428  * - if the head is greater than the tail (case B below)
429  *      valid < tail
430  *    or
431  *	head <= valid < num_slots
432  *
433  * case A: x x x x x x x H o o o o o T x x x x x x
434  * case B: o o o o o T x x x x H o o o o o o o o o
435  *
436  * 'H' - head
437  * 'T' - tail
438  * 'x' - invalid
439  * 'o' - valid
440  */
441 __attribute__((always_inline, visibility("hidden")))
442 static inline int
_slot_index_is_valid(const struct __user_channel_ring * ring,slot_idx_t idx)443 _slot_index_is_valid(const struct __user_channel_ring *ring, slot_idx_t idx)
444 {
445 	int     is_valid = 0;
446 
447 	if (ring->ring_head <= ring->ring_tail) {
448 		if (__probable(idx >= ring->ring_head && idx < ring->ring_tail)) {
449 			is_valid = 1;
450 		}
451 	} else {
452 		if (__probable(idx < ring->ring_tail ||
453 		    (idx >= ring->ring_head && idx < ring->ring_num_slots))) {
454 			is_valid = 1;
455 		}
456 	}
457 
458 	return is_valid;
459 }
460 
461 channel_t
os_channel_create_extended(const uuid_t uuid,const nexus_port_t port,const ring_dir_t dir,const ring_id_t ring,const channel_attr_t cha)462 os_channel_create_extended(const uuid_t uuid, const nexus_port_t port,
463     const ring_dir_t dir, const ring_id_t ring, const channel_attr_t cha)
464 {
465 	uint32_t num_tx_rings, num_rx_rings, num_allocator_rings;
466 	uint32_t ring_offset, ring_index, num_event_rings;
467 	struct __user_channel_schema *ucs;
468 	struct channel *chd = NULL;
469 	struct ch_info *ci = NULL;
470 	struct ch_init init;
471 	int i, fd = -1;
472 	int err = 0;
473 	size_t chd_sz;
474 
475 	SK_ALIGN64_CASSERT(struct ch_info, cinfo_mem_map_size);
476 
477 	switch (dir) {
478 	case CHANNEL_DIR_TX_RX:
479 	case CHANNEL_DIR_TX:
480 	case CHANNEL_DIR_RX:
481 		break;
482 	default:
483 		err = EINVAL;
484 		goto done;
485 	}
486 
487 	ci = malloc(CHD_INFO_SIZE);
488 	if (ci == NULL) {
489 		err = errno = ENOMEM;
490 		goto done;
491 	}
492 	bzero(ci, CHD_INFO_SIZE);
493 
494 	bzero(&init, sizeof(init));
495 	init.ci_version = CHANNEL_INIT_CURRENT_VERSION;
496 	if (cha != NULL) {
497 		if (cha->cha_exclusive != 0) {
498 			init.ci_ch_mode |= CHMODE_EXCLUSIVE;
499 		}
500 		if (cha->cha_user_packet_pool != 0) {
501 			init.ci_ch_mode |= CHMODE_USER_PACKET_POOL;
502 		}
503 		if (cha->cha_nexus_defunct_ok != 0) {
504 			init.ci_ch_mode |= CHMODE_DEFUNCT_OK;
505 		}
506 		if (cha->cha_enable_event_ring != 0) {
507 			/* User packet pool is required for event rings */
508 			if (cha->cha_user_packet_pool == 0) {
509 				err = EINVAL;
510 				goto done;
511 			}
512 			init.ci_ch_mode |= CHMODE_EVENT_RING;
513 		}
514 		if (cha->cha_monitor != 0) {
515 			if (dir == CHANNEL_DIR_TX_RX) {
516 				init.ci_ch_mode |= CHMODE_MONITOR;
517 			} else if (dir == CHANNEL_DIR_TX) {
518 				init.ci_ch_mode |= CHMODE_MONITOR_TX;
519 			} else if (dir == CHANNEL_DIR_RX) {
520 				init.ci_ch_mode |= CHMODE_MONITOR_RX;
521 			}
522 			if (cha->cha_monitor == CHANNEL_MONITOR_NO_COPY) {
523 				init.ci_ch_mode |= CHMODE_MONITOR_NO_COPY;
524 			}
525 		}
526 		if (cha->cha_filter != 0) {
527 			init.ci_ch_mode |= CHMODE_FILTER;
528 		}
529 		if (cha->cha_low_latency != 0) {
530 			init.ci_ch_mode |= CHMODE_LOW_LATENCY;
531 		}
532 		init.ci_key_len = cha->cha_key_len;
533 		init.ci_key = cha->cha_key;
534 		init.ci_tx_lowat = cha->cha_tx_lowat;
535 		init.ci_rx_lowat = cha->cha_rx_lowat;
536 	}
537 	init.ci_ch_ring_id = ring;
538 	init.ci_nx_port = port;
539 	bcopy(uuid, init.ci_nx_uuid, sizeof(uuid_t));
540 
541 	fd = __channel_open(&init, sizeof(init));
542 	if (fd == -1) {
543 		err = errno;
544 		goto done;
545 	}
546 
547 	err = __channel_get_info(fd, ci, CHD_INFO_SIZE);
548 	if (err != 0) {
549 		err = errno;
550 		goto done;
551 	}
552 
553 	ucs = _CHANNEL_SCHEMA(ci->cinfo_mem_base, ci->cinfo_schema_offset);
554 	num_tx_rings = _num_tx_rings(ci);       /* # of channel tx rings */
555 	num_rx_rings = _num_rx_rings(ci);       /* # of channel rx rings */
556 	num_allocator_rings = _num_allocator_rings(ucs);
557 	num_event_rings = ucs->csm_num_event_rings;
558 
559 	/*
560 	 * if the user requested packet allocation mode for channel, then
561 	 * check that channel was opened in packet allocation mode and
562 	 * allocator rings were created.
563 	 */
564 	if ((init.ci_ch_mode & CHMODE_USER_PACKET_POOL) &&
565 	    ((num_allocator_rings < 2) ||
566 	    !(ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL))) {
567 		err = errno = ENXIO;
568 		goto done;
569 	}
570 
571 	if ((init.ci_ch_mode & CHMODE_EVENT_RING) && ((num_event_rings == 0) ||
572 	    !(ci->cinfo_ch_mode & CHMODE_EVENT_RING))) {
573 		err = errno = ENXIO;
574 		goto done;
575 	}
576 
577 	chd_sz = CHD_SIZE(num_tx_rings + num_rx_rings + num_allocator_rings +
578 	    num_event_rings);
579 	chd = malloc(chd_sz);
580 	if (chd == NULL) {
581 		err = errno = ENOMEM;
582 		goto done;
583 	}
584 
585 	bzero(chd, chd_sz);
586 	chd->chd_fd = fd;
587 	chd->chd_guard = init.ci_guard;
588 
589 	/* claim ch_info (will be freed along with the channel itself) */
590 	CHD_INFO(chd) = ci;
591 	ci = NULL;
592 
593 	/* const override */
594 	*(struct __user_channel_schema **)(uintptr_t)&chd->chd_schema = ucs;
595 
596 	/* make sure we're running on the right kernel */
597 	_SCHEMA_VER_VERIFY(chd);
598 
599 	*(nexus_meta_type_t *)&chd->chd_md_type = CHD_SCHEMA(chd)->csm_md_type;
600 	*(nexus_meta_subtype_t *)&chd->chd_md_subtype =
601 	    CHD_SCHEMA(chd)->csm_md_subtype;
602 
603 	if (CHD_SCHEMA(chd)->csm_stats_ofs != 0) {
604 		*(void **)(uintptr_t)&chd->chd_nx_stats =
605 		    _CHANNEL_OFFSET(void *, CHD_INFO(chd)->cinfo_mem_base,
606 		    CHD_SCHEMA(chd)->csm_stats_ofs);
607 	}
608 
609 	if (CHD_SCHEMA(chd)->csm_flowadv_ofs != 0) {
610 		*(struct __flowadv_entry **)(uintptr_t)&chd->chd_nx_flowadv =
611 		    _CHANNEL_OFFSET(struct __flowadv_entry *,
612 		    CHD_INFO(chd)->cinfo_mem_base,
613 		    CHD_SCHEMA(chd)->csm_flowadv_ofs);
614 	}
615 
616 	if (CHD_SCHEMA(chd)->csm_nexusadv_ofs != 0) {
617 		struct __kern_nexus_adv_metadata *adv_md;
618 
619 		*(struct __kern_nexus_adv_metadata **)
620 		(uintptr_t)&chd->chd_nx_adv =
621 		    _CHANNEL_OFFSET(struct __kern_nexus_adv_metadata *,
622 		    CHD_INFO(chd)->cinfo_mem_base,
623 		    CHD_SCHEMA(chd)->csm_nexusadv_ofs);
624 		adv_md = CHD_NX_ADV_MD(chd);
625 		if (adv_md->knam_version != NX_ADVISORY_MD_CURRENT_VERSION &&
626 		    !_CHANNEL_IS_DEFUNCT(chd)) {
627 			SK_ABORT_WITH_CAUSE("nexus advisory metadata version"
628 			    " mismatch", NX_ADVISORY_MD_CURRENT_VERSION);
629 			/* NOTREACHED */
630 			__builtin_unreachable();
631 		}
632 		if (chd->chd_nx_adv->knam_type == NEXUS_ADVISORY_TYPE_NETIF) {
633 			struct netif_nexus_advisory *netif_adv;
634 			netif_adv = CHD_NX_ADV_NETIF(adv_md);
635 			if (netif_adv->nna_version !=
636 			    NX_NETIF_ADVISORY_CURRENT_VERSION &&
637 			    !_CHANNEL_IS_DEFUNCT(chd)) {
638 				SK_ABORT_WITH_CAUSE("nexus advisory "
639 				    "version mismatch for netif",
640 				    NX_NETIF_ADVISORY_CURRENT_VERSION);
641 				/* NOTREACHED */
642 				__builtin_unreachable();
643 			}
644 		} else if (chd->chd_nx_adv->knam_type ==
645 		    NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
646 			struct sk_nexusadv *fsw_adv;
647 			fsw_adv = CHD_NX_ADV_FSW(adv_md);
648 			if (fsw_adv->nxadv_ver !=
649 			    NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION &&
650 			    !_CHANNEL_IS_DEFUNCT(chd)) {
651 				SK_ABORT_WITH_CAUSE("nexus advisory "
652 				    "version mismatch for flowswitch",
653 				    NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION);
654 				/* NOTREACHED */
655 				__builtin_unreachable();
656 			}
657 		} else if (!_CHANNEL_IS_DEFUNCT(chd)) {
658 			SK_ABORT_WITH_CAUSE("nexus advisory metadata type"
659 			    " unknown", NX_ADVISORY_MD_CURRENT_VERSION);
660 			/* NOTREACHED */
661 			__builtin_unreachable();
662 		}
663 	}
664 
665 	if (cha != NULL) {
666 		os_channel_info2attr(chd, cha);
667 	}
668 
669 	ring_offset = 0;
670 	for (i = 0; i < num_tx_rings; i++) {
671 		ring_index = ring_offset + i;
672 		os_channel_init_ring(&chd->chd_rings[ring_index], chd,
673 		    ring_index);
674 	}
675 
676 	ring_offset += num_tx_rings;
677 	for (i = 0; i < num_rx_rings; i++) {
678 		ring_index = ring_offset + i;
679 		os_channel_init_ring(&chd->chd_rings[ring_index], chd,
680 		    ring_index);
681 	}
682 
683 	ring_offset += num_rx_rings;
684 	for (i = 0; i < num_allocator_rings; i++) {
685 		ring_index = ring_offset + i;
686 		os_channel_init_ring(&chd->chd_rings[ring_index], chd,
687 		    ring_index);
688 	}
689 
690 	ring_offset += num_allocator_rings;
691 	for (i = 0; i < num_event_rings; i++) {
692 		ring_index = ring_offset + i;
693 		os_channel_init_ring(&chd->chd_rings[ring_index], chd,
694 		    ring_index);
695 	}
696 
697 	if (init.ci_ch_mode & CHMODE_USER_PACKET_POOL) {
698 		chd->chd_sync_flags = CHANNEL_SYNCF_ALLOC | CHANNEL_SYNCF_FREE;
699 		*__DECONST(uint8_t *, &chd->chd_alloc_ring_idx) =
700 		    num_tx_rings + num_rx_rings;
701 		if (num_allocator_rings > 2) {
702 			chd->chd_sync_flags |= CHANNEL_SYNCF_ALLOC_BUF;
703 			*__DECONST(uint8_t *, &chd->chd_buf_alloc_ring_idx) =
704 			    chd->chd_alloc_ring_idx + 1;
705 			*__DECONST(uint8_t *, &chd->chd_free_ring_idx) =
706 			    chd->chd_buf_alloc_ring_idx + 1;
707 			*__DECONST(uint8_t *, &chd->chd_buf_free_ring_idx) =
708 			    chd->chd_free_ring_idx + 1;
709 		} else {
710 			*__DECONST(uint8_t *, &chd->chd_buf_alloc_ring_idx) =
711 			    CHD_RING_IDX_NONE;
712 			*__DECONST(uint8_t *, &chd->chd_buf_free_ring_idx) =
713 			    CHD_RING_IDX_NONE;
714 			*__DECONST(uint8_t *, &chd->chd_free_ring_idx) =
715 			    chd->chd_alloc_ring_idx + 1;
716 		}
717 	} else {
718 		*__DECONST(uint8_t *, &chd->chd_alloc_ring_idx) =
719 		    CHD_RING_IDX_NONE;
720 		*__DECONST(uint8_t *, &chd->chd_free_ring_idx) =
721 		    CHD_RING_IDX_NONE;
722 		*__DECONST(uint8_t *, &chd->chd_buf_alloc_ring_idx) =
723 		    CHD_RING_IDX_NONE;
724 		*__DECONST(uint8_t *, &chd->chd_buf_free_ring_idx) =
725 		    CHD_RING_IDX_NONE;
726 	}
727 
728 	if (__os_ch_md_redzone_cookie == 0) {
729 		__os_ch_md_redzone_cookie =
730 		    CHD_SCHEMA(chd)->csm_md_redzone_cookie;
731 	}
732 
733 	/* ensure all stores are globally visible */
734 	membar_sync();
735 
736 done:
737 	if (err != 0) {
738 		if (fd != -1) {
739 			(void) guarded_close_np(fd, &init.ci_guard);
740 		}
741 		if (chd != NULL) {
742 			if (CHD_INFO(chd) != NULL) {
743 				free(CHD_INFO(chd));
744 				CHD_INFO(chd) = NULL;
745 			}
746 			free(chd);
747 			chd = NULL;
748 		}
749 		if (ci != NULL) {
750 			free(ci);
751 			ci = NULL;
752 		}
753 		errno = err;
754 	}
755 	return chd;
756 }
757 
758 channel_t
os_channel_create(const uuid_t uuid,const nexus_port_t port)759 os_channel_create(const uuid_t uuid, const nexus_port_t port)
760 {
761 	return os_channel_create_extended(uuid, port, CHANNEL_DIR_TX_RX,
762 	           CHANNEL_RING_ID_ANY, NULL);
763 }
764 
765 int
os_channel_get_fd(const channel_t chd)766 os_channel_get_fd(const channel_t chd)
767 {
768 	return chd->chd_fd;
769 }
770 
771 int
os_channel_read_attr(const channel_t chd,channel_attr_t cha)772 os_channel_read_attr(const channel_t chd, channel_attr_t cha)
773 {
774 	int err;
775 
776 	if ((err = __channel_get_info(chd->chd_fd, CHD_INFO(chd),
777 	    CHD_INFO_SIZE)) == 0) {
778 		os_channel_info2attr(chd, cha);
779 	}
780 
781 	return err;
782 }
783 
784 int
os_channel_write_attr(const channel_t chd,channel_attr_t cha)785 os_channel_write_attr(const channel_t chd, channel_attr_t cha)
786 {
787 	int err = 0;
788 
789 	if (CHD_INFO(chd)->cinfo_tx_lowat.cet_unit !=
790 	    cha->cha_tx_lowat.cet_unit ||
791 	    CHD_INFO(chd)->cinfo_tx_lowat.cet_value !=
792 	    cha->cha_tx_lowat.cet_value) {
793 		if ((err = __channel_set_opt(chd->chd_fd, CHOPT_TX_LOWAT_THRESH,
794 		    &cha->cha_tx_lowat, sizeof(cha->cha_tx_lowat))) != 0) {
795 			goto done;
796 		}
797 
798 		/* update local copy */
799 		CHD_INFO(chd)->cinfo_tx_lowat = cha->cha_tx_lowat;
800 	}
801 
802 	if (CHD_INFO(chd)->cinfo_rx_lowat.cet_unit !=
803 	    cha->cha_rx_lowat.cet_unit ||
804 	    CHD_INFO(chd)->cinfo_rx_lowat.cet_value !=
805 	    cha->cha_rx_lowat.cet_value) {
806 		if ((err = __channel_set_opt(chd->chd_fd, CHOPT_RX_LOWAT_THRESH,
807 		    &cha->cha_rx_lowat, sizeof(cha->cha_rx_lowat))) != 0) {
808 			goto done;
809 		}
810 
811 		/* update local copy */
812 		CHD_INFO(chd)->cinfo_rx_lowat = cha->cha_rx_lowat;
813 	}
814 done:
815 	return err;
816 }
817 
818 int
os_channel_read_nexus_extension_info(const channel_t chd,nexus_type_t * nt,uint64_t * ext)819 os_channel_read_nexus_extension_info(const channel_t chd, nexus_type_t *nt,
820     uint64_t *ext)
821 {
822 	struct nxprov_params *nxp;
823 
824 	nxp = &CHD_INFO(chd)->cinfo_nxprov_params;
825 	if (nt != NULL) {
826 		*nt = nxp->nxp_type;
827 	}
828 	if (ext != NULL) {
829 		*ext = (uint64_t)nxp->nxp_extensions;
830 	}
831 
832 	return 0;
833 }
834 
835 int
os_channel_sync(const channel_t chd,const sync_mode_t mode)836 os_channel_sync(const channel_t chd, const sync_mode_t mode)
837 {
838 	if (__improbable(mode != CHANNEL_SYNC_TX && mode != CHANNEL_SYNC_RX)) {
839 		return EINVAL;
840 	}
841 
842 	return __channel_sync(chd->chd_fd, mode,
843 	           (mode == CHANNEL_SYNC_TX) ? chd->chd_sync_flags :
844 	           (chd->chd_sync_flags &
845 	           ~(CHANNEL_SYNCF_ALLOC | CHANNEL_SYNCF_ALLOC_BUF)));
846 }
847 
848 void
os_channel_destroy(channel_t chd)849 os_channel_destroy(channel_t chd)
850 {
851 	if (chd->chd_fd != -1) {
852 		(void) guarded_close_np(chd->chd_fd, &chd->chd_guard);
853 	}
854 
855 	if (CHD_INFO(chd) != NULL) {
856 		free(CHD_INFO(chd));
857 		CHD_INFO(chd) = NULL;
858 	}
859 
860 	free(chd);
861 }
862 
863 int
os_channel_is_defunct(channel_t chd)864 os_channel_is_defunct(channel_t chd)
865 {
866 	return _CHANNEL_IS_DEFUNCT(chd);
867 }
868 
869 __attribute__((always_inline, visibility("hidden")))
870 static inline ring_id_t
_ring_id(struct ch_info * cinfo,const ring_id_type_t type)871 _ring_id(struct ch_info *cinfo, const ring_id_type_t type)
872 {
873 	ring_id_t rid = CHANNEL_RING_ID_ANY;    /* make it crash */
874 
875 	switch (type) {
876 	case CHANNEL_FIRST_TX_RING:
877 		rid = cinfo->cinfo_first_tx_ring;
878 		break;
879 
880 	case CHANNEL_LAST_TX_RING:
881 		rid = cinfo->cinfo_last_tx_ring;
882 		break;
883 
884 	case CHANNEL_FIRST_RX_RING:
885 		rid = cinfo->cinfo_first_rx_ring;
886 		break;
887 
888 	case CHANNEL_LAST_RX_RING:
889 		rid = cinfo->cinfo_last_rx_ring;
890 		break;
891 	}
892 
893 	return rid;
894 }
895 
896 ring_id_t
os_channel_ring_id(const channel_t chd,const ring_id_type_t type)897 os_channel_ring_id(const channel_t chd, const ring_id_type_t type)
898 {
899 	return _ring_id(CHD_INFO(chd), type);
900 }
901 
902 channel_ring_t
os_channel_tx_ring(const channel_t chd,const ring_id_t rid)903 os_channel_tx_ring(const channel_t chd, const ring_id_t rid)
904 {
905 	struct ch_info *ci = CHD_INFO(chd);
906 
907 	if (__improbable((ci->cinfo_ch_ring_id != CHANNEL_RING_ID_ANY &&
908 	    ci->cinfo_ch_ring_id != rid) ||
909 	    rid < _ring_id(ci, CHANNEL_FIRST_TX_RING) ||
910 	    rid > _ring_id(ci, CHANNEL_LAST_TX_RING))) {
911 		return NULL;
912 	}
913 
914 	return &chd->chd_rings[rid - _ring_id(ci, CHANNEL_FIRST_TX_RING)];
915 }
916 
917 channel_ring_t
os_channel_rx_ring(const channel_t chd,const ring_id_t rid)918 os_channel_rx_ring(const channel_t chd, const ring_id_t rid)
919 {
920 	struct ch_info *ci = CHD_INFO(chd);
921 
922 	if (__improbable((ci->cinfo_ch_ring_id != CHANNEL_RING_ID_ANY &&
923 	    ci->cinfo_ch_ring_id != rid) ||
924 	    rid < _ring_id(ci, CHANNEL_FIRST_RX_RING) ||
925 	    rid > _ring_id(ci, CHANNEL_LAST_RX_RING))) {
926 		return NULL;
927 	}
928 
929 	return &chd->chd_rings[_num_tx_rings(ci) +      /* add tx rings */
930 	       (rid - _ring_id(ci, CHANNEL_FIRST_RX_RING))];
931 }
932 
933 /*
934  * Return 1 if we have pending transmissions in the tx ring. When everything
935  * is complete ring->ring_head == ring->ring_khead.
936  */
937 int
os_channel_pending(const channel_ring_t chrd)938 os_channel_pending(const channel_ring_t chrd)
939 {
940 	struct __user_channel_ring *ring =
941 	    __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
942 	return ring->ring_head != ring->ring_khead;
943 }
944 
945 uint64_t
os_channel_ring_sync_time(const channel_ring_t chrd)946 os_channel_ring_sync_time(const channel_ring_t chrd)
947 {
948 	return chrd->chrd_ring->ring_sync_time;
949 }
950 
951 uint64_t
os_channel_ring_notify_time(const channel_ring_t chrd)952 os_channel_ring_notify_time(const channel_ring_t chrd)
953 {
954 	return chrd->chrd_ring->ring_notify_time;
955 }
956 
957 uint32_t
os_channel_available_slot_count(const channel_ring_t chrd)958 os_channel_available_slot_count(const channel_ring_t chrd)
959 {
960 	const struct __user_channel_ring *ring = chrd->chrd_ring;
961 	uint32_t count;
962 	int n;
963 
964 	if (ring->ring_kind == CR_KIND_TX) {
965 		n = ring->ring_head - ring->ring_khead;
966 		if (n < 0) {
967 			n += ring->ring_num_slots;
968 		}
969 		count = (ring->ring_num_slots - n - 1);
970 	} else {
971 		n = ring->ring_tail - ring->ring_head;
972 		if (n < 0) {
973 			n += ring->ring_num_slots;
974 		}
975 		count = n;
976 	}
977 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? 0 : count;
978 }
979 
980 int
os_channel_advance_slot(channel_ring_t chrd,const channel_slot_t slot)981 os_channel_advance_slot(channel_ring_t chrd, const channel_slot_t slot)
982 {
983 	struct __user_channel_ring *ring =
984 	    __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
985 	slot_idx_t idx;
986 	int err;
987 
988 	idx = _SLOT_INDEX(chrd, slot);
989 	if (__probable(_slot_index_is_valid(ring, idx))) {
990 		ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
991 		err = 0;
992 	} else {
993 		err = (_CHANNEL_RING_IS_DEFUNCT(chrd) ? ENXIO : EINVAL);
994 	}
995 	return err;
996 }
997 
998 channel_slot_t
os_channel_get_next_slot(const channel_ring_t chrd,const channel_slot_t slot0,slot_prop_t * prop)999 os_channel_get_next_slot(const channel_ring_t chrd, const channel_slot_t slot0,
1000     slot_prop_t *prop)
1001 {
1002 	const struct __user_channel_ring *ring = chrd->chrd_ring;
1003 	const struct __slot_desc *slot;
1004 	slot_idx_t idx;
1005 
1006 	if (__probable(slot0 != NULL)) {
1007 		idx = _SLOT_INDEX(chrd, slot0);
1008 		if (__probable(_slot_index_is_valid(ring, idx))) {
1009 			idx = _CHANNEL_RING_NEXT(ring, idx);
1010 		} else if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1011 			/* slot is out of bounds */
1012 			SK_ABORT_WITH_CAUSE("Index out of bounds in gns", idx);
1013 			/* NOTREACHED */
1014 			__builtin_unreachable();
1015 		} else {
1016 			/*
1017 			 * In case of a defunct, pretend as if we've
1018 			 * advanced to the last slot; this will result
1019 			 * in a NULL slot below.
1020 			 */
1021 			idx = ring->ring_tail;
1022 		}
1023 	} else {
1024 		idx = ring->ring_head;
1025 	}
1026 
1027 	if (__probable(idx != ring->ring_tail)) {
1028 		slot = &chrd->chrd_slot_desc[idx];
1029 	} else {
1030 		/* we just advanced to the last slot */
1031 		slot = NULL;
1032 	}
1033 
1034 	if (__probable(slot != NULL)) {
1035 		uint16_t ring_kind = ring->ring_kind;
1036 		struct __user_quantum *q;
1037 		mach_vm_address_t baddr;
1038 		uint16_t bdoff;
1039 
1040 		if (__improbable((ring_kind == CR_KIND_TX) &&
1041 		    (CHD_INFO(chrd->chrd_channel)->cinfo_ch_mode &
1042 		    CHMODE_USER_PACKET_POOL))) {
1043 			if (SD_VALID_METADATA(SLOT_DESC_USD(slot))) {
1044 				SK_ABORT_WITH_CAUSE("Tx slot has attached "
1045 				    "metadata", idx);
1046 				/* NOTREACHED */
1047 				__builtin_unreachable();
1048 			}
1049 			if (prop != NULL) {
1050 				prop->sp_len = 0;
1051 				prop->sp_flags = 0;
1052 				prop->sp_buf_ptr = 0;
1053 				prop->sp_mdata_ptr = 0;
1054 			}
1055 			return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ?
1056 			       NULL : (channel_slot_t)slot;
1057 		}
1058 
1059 		_SLOT_DESC_VERIFY(chrd, SLOT_DESC_USD(slot));
1060 		q = _SLOT_METADATA(chrd, ring, idx);
1061 		_METADATA_VERIFY(chrd, q);
1062 
1063 		baddr = _initialize_metadata_address(chrd, q, &bdoff);
1064 		if (__improbable(baddr == 0)) {
1065 			return NULL;
1066 		}
1067 		/* No multi-buflet support for slot based interface */
1068 		if (__probable(prop != NULL)) {
1069 			/* immutable: slot index */
1070 			prop->sp_idx = idx;
1071 			prop->sp_flags = 0;
1072 			prop->sp_buf_ptr = baddr + bdoff;
1073 			prop->sp_mdata_ptr = q;
1074 			/* reset slot length if this is to be used for tx */
1075 			prop->sp_len = (ring_kind == CR_KIND_TX) ?
1076 			    ring->ring_buf_size : q->qum_len;
1077 		}
1078 	}
1079 
1080 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ?
1081 	       NULL : (channel_slot_t)slot;
1082 }
1083 
1084 void
os_channel_set_slot_properties(const channel_ring_t chrd,const channel_slot_t slot,const slot_prop_t * prop)1085 os_channel_set_slot_properties(const channel_ring_t chrd,
1086     const channel_slot_t slot, const slot_prop_t *prop)
1087 {
1088 	const struct __user_channel_ring *ring = chrd->chrd_ring;
1089 	slot_idx_t idx = _SLOT_INDEX(chrd, slot);
1090 
1091 	if (__probable(_slot_index_is_valid(ring, idx))) {
1092 		struct __user_quantum *q;
1093 
1094 		_METADATA_VERIFY(chrd, prop->sp_mdata_ptr);
1095 		_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
1096 
1097 		/*
1098 		 * In the event of a defunct, we'd be accessing zero-filled
1099 		 * memory; this is fine we ignore all changes made to the
1100 		 * region at that time.
1101 		 */
1102 		q = _SLOT_METADATA(chrd, ring, idx);
1103 		q->qum_len = prop->sp_len;
1104 		switch (chrd->chrd_md_type) {
1105 		case NEXUS_META_TYPE_PACKET: {
1106 			struct __user_packet *p = (struct __user_packet *)q;
1107 			/* No multi-buflet support for slot based interface */
1108 			p->pkt_qum_buf.buf_dlen = prop->sp_len;
1109 			p->pkt_qum_buf.buf_doff = 0;
1110 			break;
1111 		}
1112 		default:
1113 			q->qum_buf[0].buf_dlen = prop->sp_len;
1114 			q->qum_buf[0].buf_doff = 0;
1115 			break;
1116 		}
1117 	} else if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1118 		/* slot is out of bounds */
1119 		SK_ABORT_WITH_CAUSE("Index out of bounds in ssp", idx);
1120 		/* NOTREACHED */
1121 		__builtin_unreachable();
1122 	}
1123 }
1124 
1125 packet_t
os_channel_slot_get_packet(const channel_ring_t chrd,const channel_slot_t slot)1126 os_channel_slot_get_packet(const channel_ring_t chrd, const channel_slot_t slot)
1127 {
1128 	const struct __user_channel_ring *ring = chrd->chrd_ring;
1129 	struct __user_quantum *q = NULL;
1130 
1131 	if (__probable(slot != NULL)) {
1132 		slot_idx_t idx = _SLOT_INDEX(chrd, slot);
1133 		if (__improbable(!_slot_index_is_valid(ring, idx)) &&
1134 		    !_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1135 			/* slot is out of bounds */
1136 			SK_ABORT_WITH_CAUSE("Index out of bounds in sgp", idx);
1137 			/* NOTREACHED */
1138 			__builtin_unreachable();
1139 		}
1140 
1141 		if (__probable(SD_VALID_METADATA(_SLOT_DESC(chrd, idx)))) {
1142 			obj_idx_t midx;
1143 			q = _SLOT_METADATA(chrd, ring, idx);
1144 			_METADATA_VERIFY(chrd, q);
1145 			/*
1146 			 * In the event of a defunct, we'd be accessing
1147 			 * zero-filed memory and end up with 0 for midx;
1148 			 * this is fine since we ignore all changes made
1149 			 * to the region at that time.
1150 			 */
1151 			midx = METADATA_IDX(q);
1152 			_SLOT_METADATA_IDX_VERIFY(chrd, q, midx);
1153 		}
1154 	}
1155 
1156 	return (q == NULL) ? 0 :
1157 	       SK_PTR_ENCODE(q, chrd->chrd_md_type, chrd->chrd_md_subtype);
1158 }
1159 
1160 void *
os_channel_get_stats_region(const channel_t chd,const channel_stats_id_t id)1161 os_channel_get_stats_region(const channel_t chd, const channel_stats_id_t id)
1162 {
1163 	void *sp = CHD_NX_STATS(chd);
1164 	struct __nx_stats_fsw *nxs_fsw;
1165 	void *ptr = NULL;
1166 
1167 	/* we currently deal only with flowswitch */
1168 	if (sp == NULL ||
1169 	    CHD_SCHEMA(chd)->csm_stats_type != NEXUS_STATS_TYPE_FSW) {
1170 		return NULL;
1171 	}
1172 
1173 	nxs_fsw = sp;
1174 
1175 	switch (id) {
1176 	case CHANNEL_STATS_ID_IP:
1177 		ptr = &nxs_fsw->nxs_ipstat;
1178 		break;
1179 
1180 	case CHANNEL_STATS_ID_IP6:
1181 		ptr = &nxs_fsw->nxs_ip6stat;
1182 		break;
1183 
1184 	case CHANNEL_STATS_ID_TCP:
1185 		ptr = &nxs_fsw->nxs_tcpstat;
1186 		break;
1187 
1188 	case CHANNEL_STATS_ID_UDP:
1189 		ptr = &nxs_fsw->nxs_udpstat;
1190 		break;
1191 
1192 	case CHANNEL_STATS_ID_QUIC:
1193 		ptr = &nxs_fsw->nxs_quicstat;
1194 		break;
1195 
1196 	default:
1197 		ptr = NULL;
1198 		break;
1199 	}
1200 
1201 	return ptr;
1202 }
1203 
1204 void *
os_channel_get_advisory_region(const channel_t chd)1205 os_channel_get_advisory_region(const channel_t chd)
1206 {
1207 	struct __kern_nexus_adv_metadata *adv_md;
1208 	/*
1209 	 * To be backward compatible this API will only return
1210 	 * the advisory region for flowswitch.
1211 	 */
1212 	adv_md = CHD_NX_ADV_MD(chd);
1213 	if (adv_md == NULL ||
1214 	    adv_md->knam_type != NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
1215 		return NULL;
1216 	}
1217 	return CHD_NX_ADV_FSW(adv_md);
1218 }
1219 
1220 __attribute__((always_inline, visibility("hidden")))
1221 static inline int
_flowadv_id_equal(struct __flowadv_entry * fe,uuid_t id)1222 _flowadv_id_equal(struct __flowadv_entry *fe, uuid_t id)
1223 {
1224 	/*
1225 	 * Anticipate a nicely (8-bytes) aligned UUID from
1226 	 * caller; the one in fae_id is always 8-byte aligned.
1227 	 */
1228 	if (__probable(IS_P2ALIGNED(id, sizeof(uint64_t)))) {
1229 		uint64_t *id_64 = (uint64_t *)(uintptr_t)id;
1230 		return fe->fae_id_64[0] == id_64[0] &&
1231 		       fe->fae_id_64[1] == id_64[1];
1232 	} else if (__probable(IS_P2ALIGNED(id, sizeof(uint32_t)))) {
1233 		uint32_t *id_32 = (uint32_t *)(uintptr_t)id;
1234 		return fe->fae_id_32[0] == id_32[0] &&
1235 		       fe->fae_id_32[1] == id_32[1] &&
1236 		       fe->fae_id_32[2] == id_32[2] &&
1237 		       fe->fae_id_32[3] == id_32[3];
1238 	}
1239 
1240 	return UUID_COMPARE(fe->fae_id, id);
1241 }
1242 
1243 int
os_channel_flow_admissible(const channel_ring_t chrd,uuid_t flow_id,const flowadv_idx_t flow_index)1244 os_channel_flow_admissible(const channel_ring_t chrd, uuid_t flow_id,
1245     const flowadv_idx_t flow_index)
1246 {
1247 	const struct __user_channel_ring *ring = chrd->chrd_ring;
1248 	const struct channel *chd = chrd->chrd_channel;
1249 	struct __flowadv_entry *fe = CHD_NX_FLOWADV(chd);
1250 
1251 	/*
1252 	 * Currently, flow advisory is on a per-nexus port basis.
1253 	 * To anticipate for future requirements, we use the ring
1254 	 * as parameter instead, even though we use it only to
1255 	 * check if this is a TX ring for now.
1256 	 */
1257 	if (__improbable(CHD_NX_FLOWADV(chd) == NULL)) {
1258 		return ENXIO;
1259 	} else if (__improbable(ring->ring_kind != CR_KIND_TX ||
1260 	    flow_index >= CHD_PARAMS(chd)->nxp_flowadv_max)) {
1261 		return EINVAL;
1262 	}
1263 
1264 	/*
1265 	 * Rather than checking if the UUID is all zeroes, check
1266 	 * against fae_flags since the presence of FLOWADV_VALID
1267 	 * means fae_id is non-zero.  This avoids another round of
1268 	 * comparison against zeroes.
1269 	 */
1270 	fe = &CHD_NX_FLOWADV(chd)[flow_index];
1271 	if (__improbable(fe->fae_flags == 0 || !_flowadv_id_equal(fe, flow_id))) {
1272 		return ENOENT;
1273 	}
1274 
1275 	return __improbable((fe->fae_flags & FLOWADVF_SUSPENDED) != 0) ?
1276 	       ENOBUFS: 0;
1277 }
1278 
1279 channel_attr_t
os_channel_attr_create(void)1280 os_channel_attr_create(void)
1281 {
1282 	struct channel_attr *cha;
1283 
1284 	cha = malloc(sizeof(*cha));
1285 	if (cha != NULL) {
1286 		bzero(cha, sizeof(*cha));
1287 	}
1288 	return cha;
1289 }
1290 
1291 channel_attr_t
os_channel_attr_clone(const channel_attr_t cha)1292 os_channel_attr_clone(const channel_attr_t cha)
1293 {
1294 	struct channel_attr *ncha;
1295 
1296 	ncha = os_channel_attr_create();
1297 	if (ncha != NULL && cha != NULL) {
1298 		bcopy(cha, ncha, sizeof(*ncha));
1299 		ncha->cha_key = NULL;
1300 		ncha->cha_key_len = 0;
1301 		if (cha->cha_key != NULL && cha->cha_key_len != 0 &&
1302 		    os_channel_attr_set_key(ncha, cha->cha_key,
1303 		    cha->cha_key_len) != 0) {
1304 			os_channel_attr_destroy(ncha);
1305 			ncha = NULL;
1306 		}
1307 	}
1308 
1309 	return ncha;
1310 }
1311 
1312 int
os_channel_attr_set(const channel_attr_t cha,const channel_attr_type_t type,const uint64_t value)1313 os_channel_attr_set(const channel_attr_t cha, const channel_attr_type_t type,
1314     const uint64_t value)
1315 {
1316 	int err = 0;
1317 
1318 	switch (type) {
1319 	case CHANNEL_ATTR_TX_RINGS:
1320 	case CHANNEL_ATTR_RX_RINGS:
1321 	case CHANNEL_ATTR_TX_SLOTS:
1322 	case CHANNEL_ATTR_RX_SLOTS:
1323 	case CHANNEL_ATTR_SLOT_BUF_SIZE:
1324 	case CHANNEL_ATTR_SLOT_META_SIZE:
1325 	case CHANNEL_ATTR_NEXUS_EXTENSIONS:
1326 	case CHANNEL_ATTR_NEXUS_MHINTS:
1327 	case CHANNEL_ATTR_NEXUS_IFINDEX:
1328 	case CHANNEL_ATTR_NEXUS_STATS_SIZE:
1329 	case CHANNEL_ATTR_NEXUS_FLOWADV_MAX:
1330 	case CHANNEL_ATTR_NEXUS_META_TYPE:
1331 	case CHANNEL_ATTR_NEXUS_META_SUBTYPE:
1332 	case CHANNEL_ATTR_NEXUS_CHECKSUM_OFFLOAD:
1333 	case CHANNEL_ATTR_NEXUS_ADV_SIZE:
1334 	case CHANNEL_ATTR_MAX_FRAGS:
1335 	case CHANNEL_ATTR_NUM_BUFFERS:
1336 		err = ENOTSUP;
1337 		break;
1338 
1339 	case CHANNEL_ATTR_EXCLUSIVE:
1340 		cha->cha_exclusive = (uint32_t)value;
1341 		break;
1342 
1343 	case CHANNEL_ATTR_NO_AUTO_SYNC:
1344 		if (value == 0) {
1345 			err = ENOTSUP;
1346 		}
1347 		break;
1348 
1349 	case CHANNEL_ATTR_MONITOR:
1350 		switch (value) {
1351 		case CHANNEL_MONITOR_OFF:
1352 		case CHANNEL_MONITOR_NO_COPY:
1353 		case CHANNEL_MONITOR_COPY:
1354 			cha->cha_monitor = (uint32_t)value;
1355 			goto done;
1356 		}
1357 		err = EINVAL;
1358 		break;
1359 
1360 	case CHANNEL_ATTR_TX_LOWAT_UNIT:
1361 	case CHANNEL_ATTR_RX_LOWAT_UNIT:
1362 		switch (value) {
1363 		case CHANNEL_THRESHOLD_UNIT_BYTES:
1364 		case CHANNEL_THRESHOLD_UNIT_SLOTS:
1365 			if (type == CHANNEL_ATTR_TX_LOWAT_UNIT) {
1366 				cha->cha_tx_lowat.cet_unit =
1367 				    (channel_threshold_unit_t)value;
1368 			} else {
1369 				cha->cha_rx_lowat.cet_unit =
1370 				    (channel_threshold_unit_t)value;
1371 			}
1372 			goto done;
1373 		}
1374 		err = EINVAL;
1375 		break;
1376 
1377 	case CHANNEL_ATTR_TX_LOWAT_VALUE:
1378 		cha->cha_tx_lowat.cet_value = (uint32_t)value;
1379 		break;
1380 
1381 	case CHANNEL_ATTR_RX_LOWAT_VALUE:
1382 		cha->cha_rx_lowat.cet_value = (uint32_t)value;
1383 		break;
1384 
1385 	case CHANNEL_ATTR_USER_PACKET_POOL:
1386 		cha->cha_user_packet_pool = (value != 0);
1387 		break;
1388 
1389 	case CHANNEL_ATTR_NEXUS_DEFUNCT_OK:
1390 		cha->cha_nexus_defunct_ok = (value != 0);
1391 		break;
1392 
1393 	case CHANNEL_ATTR_FILTER:
1394 		cha->cha_filter = (uint32_t)value;
1395 		break;
1396 
1397 	case CHANNEL_ATTR_EVENT_RING:
1398 		cha->cha_enable_event_ring = (value != 0);
1399 		break;
1400 
1401 	case CHANNEL_ATTR_LOW_LATENCY:
1402 		cha->cha_low_latency = (value != 0);
1403 		break;
1404 
1405 	default:
1406 		err = EINVAL;
1407 		break;
1408 	}
1409 done:
1410 	return err;
1411 }
1412 
1413 int
os_channel_attr_set_key(const channel_attr_t cha,const void * key,const uint32_t key_len)1414 os_channel_attr_set_key(const channel_attr_t cha, const void *key,
1415     const uint32_t key_len)
1416 {
1417 	int err = 0;
1418 
1419 	if ((key == NULL && key_len != 0) || (key != NULL && key_len == 0) ||
1420 	    (key_len != 0 && key_len > NEXUS_MAX_KEY_LEN)) {
1421 		err = EINVAL;
1422 		goto done;
1423 	}
1424 	cha->cha_key_len = 0;
1425 	if (key_len == 0 && cha->cha_key != NULL) {
1426 		free(cha->cha_key);
1427 		cha->cha_key = NULL;
1428 	} else if (key != NULL && key_len != 0) {
1429 		if (cha->cha_key != NULL) {
1430 			free(cha->cha_key);
1431 		}
1432 		if ((cha->cha_key = malloc(key_len)) == NULL) {
1433 			err = ENOMEM;
1434 			goto done;
1435 		}
1436 		cha->cha_key_len = key_len;
1437 		bcopy(key, cha->cha_key, key_len);
1438 	}
1439 done:
1440 	return err;
1441 }
1442 
1443 int
os_channel_attr_get(const channel_attr_t cha,const channel_attr_type_t type,uint64_t * value)1444 os_channel_attr_get(const channel_attr_t cha, const channel_attr_type_t type,
1445     uint64_t *value)
1446 {
1447 	int err = 0;
1448 
1449 	switch (type) {
1450 	case CHANNEL_ATTR_TX_RINGS:
1451 		*value = cha->cha_tx_rings;
1452 		break;
1453 
1454 	case CHANNEL_ATTR_RX_RINGS:
1455 		*value = cha->cha_rx_rings;
1456 		break;
1457 
1458 	case CHANNEL_ATTR_TX_SLOTS:
1459 		*value = cha->cha_tx_slots;
1460 		break;
1461 
1462 	case CHANNEL_ATTR_RX_SLOTS:
1463 		*value = cha->cha_rx_slots;
1464 		break;
1465 
1466 	case CHANNEL_ATTR_SLOT_BUF_SIZE:
1467 		*value = cha->cha_buf_size;
1468 		break;
1469 
1470 	case CHANNEL_ATTR_SLOT_META_SIZE:
1471 		*value = cha->cha_meta_size;
1472 		break;
1473 
1474 	case CHANNEL_ATTR_NEXUS_STATS_SIZE:
1475 		*value = cha->cha_stats_size;
1476 		break;
1477 
1478 	case CHANNEL_ATTR_NEXUS_FLOWADV_MAX:
1479 		*value = cha->cha_flowadv_max;
1480 		break;
1481 
1482 	case CHANNEL_ATTR_EXCLUSIVE:
1483 		*value = cha->cha_exclusive;
1484 		break;
1485 
1486 	case CHANNEL_ATTR_NO_AUTO_SYNC:
1487 		*value = 1;
1488 		break;
1489 
1490 	case CHANNEL_ATTR_MONITOR:
1491 		*value = cha->cha_monitor;
1492 		break;
1493 
1494 	case CHANNEL_ATTR_TX_LOWAT_UNIT:
1495 		*value = cha->cha_tx_lowat.cet_unit;
1496 		break;
1497 
1498 	case CHANNEL_ATTR_TX_LOWAT_VALUE:
1499 		*value = cha->cha_tx_lowat.cet_value;
1500 		break;
1501 
1502 	case CHANNEL_ATTR_RX_LOWAT_UNIT:
1503 		*value = cha->cha_rx_lowat.cet_unit;
1504 		break;
1505 
1506 	case CHANNEL_ATTR_RX_LOWAT_VALUE:
1507 		*value = cha->cha_rx_lowat.cet_value;
1508 		break;
1509 
1510 	case CHANNEL_ATTR_NEXUS_TYPE:
1511 		*value = cha->cha_nexus_type;
1512 		break;
1513 
1514 	case CHANNEL_ATTR_NEXUS_EXTENSIONS:
1515 		*value = cha->cha_nexus_extensions;
1516 		break;
1517 
1518 	case CHANNEL_ATTR_NEXUS_MHINTS:
1519 		*value = cha->cha_nexus_mhints;
1520 		break;
1521 
1522 	case CHANNEL_ATTR_NEXUS_IFINDEX:
1523 		*value = cha->cha_nexus_ifindex;
1524 		break;
1525 
1526 	case CHANNEL_ATTR_NEXUS_META_TYPE:
1527 		*value = cha->cha_nexus_meta_type;
1528 		break;
1529 
1530 	case CHANNEL_ATTR_NEXUS_META_SUBTYPE:
1531 		*value = cha->cha_nexus_meta_subtype;
1532 		break;
1533 
1534 	case CHANNEL_ATTR_NEXUS_CHECKSUM_OFFLOAD:
1535 		*value = cha->cha_nexus_checksum_offload;
1536 		break;
1537 
1538 	case CHANNEL_ATTR_USER_PACKET_POOL:
1539 		*value = (cha->cha_user_packet_pool != 0);
1540 		break;
1541 
1542 	case CHANNEL_ATTR_NEXUS_ADV_SIZE:
1543 		*value = cha->cha_nexusadv_size;
1544 		break;
1545 
1546 	case CHANNEL_ATTR_NEXUS_DEFUNCT_OK:
1547 		*value = cha->cha_nexus_defunct_ok;
1548 		break;
1549 
1550 	case CHANNEL_ATTR_EVENT_RING:
1551 		*value = (cha->cha_enable_event_ring != 0);
1552 		break;
1553 
1554 	case CHANNEL_ATTR_MAX_FRAGS:
1555 		*value = cha->cha_max_frags;
1556 		break;
1557 
1558 	case CHANNEL_ATTR_NUM_BUFFERS:
1559 		*value = cha->cha_num_buffers;
1560 		break;
1561 
1562 	case CHANNEL_ATTR_LOW_LATENCY:
1563 		*value = (cha->cha_low_latency != 0);
1564 		break;
1565 
1566 	default:
1567 		err = EINVAL;
1568 		break;
1569 	}
1570 
1571 	return err;
1572 }
1573 
1574 int
os_channel_attr_get_key(const channel_attr_t cha,void * key,uint32_t * key_len)1575 os_channel_attr_get_key(const channel_attr_t cha, void *key,
1576     uint32_t *key_len)
1577 {
1578 	int err = 0;
1579 
1580 	if (key_len == NULL) {
1581 		err = EINVAL;
1582 		goto done;
1583 	} else if (key == NULL || cha->cha_key == NULL) {
1584 		*key_len = (cha->cha_key != NULL) ? cha->cha_key_len : 0;
1585 		goto done;
1586 	}
1587 
1588 	if (*key_len >= cha->cha_key_len) {
1589 		bcopy(cha->cha_key, key, cha->cha_key_len);
1590 		*key_len = cha->cha_key_len;
1591 	} else {
1592 		err = ENOMEM;
1593 	}
1594 done:
1595 	return err;
1596 }
1597 
1598 __attribute__((visibility("hidden")))
1599 static void
os_channel_info2attr(struct channel * chd,channel_attr_t cha)1600 os_channel_info2attr(struct channel *chd, channel_attr_t cha)
1601 {
1602 	struct ch_info *cinfo = CHD_INFO(chd);
1603 	/* Save these first before we wipe out the attribute */
1604 	uint32_t cha_key_len = cha->cha_key_len;
1605 	void *cha_key = cha->cha_key;
1606 	uint32_t caps;
1607 
1608 	_CASSERT(NEXUS_META_TYPE_INVALID == CHANNEL_NEXUS_META_TYPE_INVALID);
1609 	_CASSERT(NEXUS_META_TYPE_QUANTUM == CHANNEL_NEXUS_META_TYPE_QUANTUM);
1610 	_CASSERT(NEXUS_META_TYPE_PACKET == CHANNEL_NEXUS_META_TYPE_PACKET);
1611 	_CASSERT(NEXUS_META_SUBTYPE_INVALID ==
1612 	    CHANNEL_NEXUS_META_SUBTYPE_INVALID);
1613 	_CASSERT(NEXUS_META_SUBTYPE_PAYLOAD ==
1614 	    CHANNEL_NEXUS_META_SUBTYPE_PAYLOAD);
1615 	_CASSERT(NEXUS_META_SUBTYPE_RAW == CHANNEL_NEXUS_META_SUBTYPE_RAW);
1616 
1617 	bzero(cha, sizeof(*cha));
1618 	cha->cha_tx_rings = CHD_PARAMS(chd)->nxp_tx_rings;
1619 	cha->cha_rx_rings = CHD_PARAMS(chd)->nxp_rx_rings;
1620 	cha->cha_tx_slots = CHD_PARAMS(chd)->nxp_tx_slots;
1621 	cha->cha_rx_slots = CHD_PARAMS(chd)->nxp_rx_slots;
1622 	cha->cha_buf_size = CHD_PARAMS(chd)->nxp_buf_size;
1623 	cha->cha_meta_size = CHD_PARAMS(chd)->nxp_meta_size;
1624 	cha->cha_stats_size = CHD_PARAMS(chd)->nxp_stats_size;
1625 	cha->cha_flowadv_max = CHD_PARAMS(chd)->nxp_flowadv_max;
1626 	cha->cha_exclusive = !!(cinfo->cinfo_ch_mode & CHMODE_EXCLUSIVE);
1627 	cha->cha_user_packet_pool = !!(cinfo->cinfo_ch_mode &
1628 	    CHMODE_USER_PACKET_POOL);
1629 	cha->cha_nexus_defunct_ok = !!(cinfo->cinfo_ch_mode &
1630 	    CHMODE_DEFUNCT_OK);
1631 	cha->cha_nexusadv_size = CHD_PARAMS(chd)->nxp_nexusadv_size;
1632 	if (cinfo->cinfo_ch_mode & CHMODE_MONITOR) {
1633 		cha->cha_monitor =
1634 		    (cinfo->cinfo_ch_mode & CHMODE_MONITOR_NO_COPY) ?
1635 		    CHANNEL_MONITOR_NO_COPY : CHANNEL_MONITOR_COPY;
1636 	} else {
1637 		cha->cha_monitor = CHANNEL_MONITOR_OFF;
1638 	}
1639 	cha->cha_key_len = cha_key_len;
1640 	cha->cha_key = cha_key;
1641 	cha->cha_tx_lowat = cinfo->cinfo_tx_lowat;
1642 	cha->cha_rx_lowat = cinfo->cinfo_rx_lowat;
1643 	cha->cha_nexus_type = CHD_PARAMS(chd)->nxp_type;
1644 	cha->cha_nexus_extensions = CHD_PARAMS(chd)->nxp_extensions;
1645 	cha->cha_nexus_mhints = CHD_PARAMS(chd)->nxp_mhints;
1646 	cha->cha_nexus_ifindex = CHD_PARAMS(chd)->nxp_ifindex;
1647 	cha->cha_nexus_meta_type = chd->chd_md_type;
1648 	cha->cha_nexus_meta_subtype = chd->chd_md_subtype;
1649 	cha->cha_enable_event_ring =
1650 	    (cinfo->cinfo_ch_mode & CHMODE_EVENT_RING) != 0;
1651 	cha->cha_low_latency =
1652 	    (cinfo->cinfo_ch_mode & CHMODE_LOW_LATENCY) != 0;
1653 
1654 	caps = CHD_PARAMS(chd)->nxp_capabilities;
1655 	if (caps & NXPCAP_CHECKSUM_PARTIAL) {
1656 		cha->cha_nexus_checksum_offload =
1657 		    CHANNEL_NEXUS_CHECKSUM_PARTIAL;
1658 	} else {
1659 		cha->cha_nexus_checksum_offload = 0;
1660 	}
1661 	cha->cha_max_frags = CHD_PARAMS(chd)->nxp_max_frags;
1662 	cha->cha_num_buffers = cinfo->cinfo_num_bufs;
1663 }
1664 
1665 void
os_channel_attr_destroy(channel_attr_t cha)1666 os_channel_attr_destroy(channel_attr_t cha)
1667 {
1668 	if (cha->cha_key != NULL) {
1669 		free(cha->cha_key);
1670 		cha->cha_key = NULL;
1671 	}
1672 	free(cha);
1673 }
1674 
1675 int
os_channel_packet_alloc(const channel_t chd,packet_t * ph)1676 os_channel_packet_alloc(const channel_t chd, packet_t *ph)
1677 {
1678 	struct __user_channel_ring *ring;
1679 	struct channel_ring_desc *chrd;
1680 	struct __user_quantum *q;
1681 	slot_idx_t idx;
1682 	mach_vm_address_t baddr;
1683 	uint16_t bdoff;
1684 	struct ch_info *ci = CHD_INFO(chd);
1685 
1686 	if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
1687 		return ENOTSUP;
1688 	}
1689 
1690 	chrd = &chd->chd_rings[chd->chd_alloc_ring_idx];
1691 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
1692 	idx = ring->ring_head;
1693 
1694 	if (__improbable(idx == ring->ring_tail)) {
1695 		/*
1696 		 * do a sync to get more packets;
1697 		 * since we are paying the cost of a syscall do a sync for
1698 		 * free ring as well.
1699 		 */
1700 		int err;
1701 		err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
1702 		    (chd->chd_sync_flags & ~CHANNEL_SYNCF_ALLOC_BUF));
1703 		if (__improbable(err != 0)) {
1704 			if (!_CHANNEL_IS_DEFUNCT(chd)) {
1705 				SK_ABORT_WITH_CAUSE("packet pool alloc "
1706 				    "sync failed", err);
1707 				/* NOTREACHED */
1708 				__builtin_unreachable();
1709 			}
1710 			return err;
1711 		}
1712 	}
1713 
1714 	if (__improbable(idx == ring->ring_tail)) {
1715 		return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ?
1716 		       ENXIO : ENOMEM;
1717 	}
1718 
1719 	_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
1720 	q = _SLOT_METADATA(chrd, ring, idx);
1721 	_METADATA_VERIFY(chrd, q);
1722 
1723 	*ph = SK_PTR_ENCODE(q, chrd->chrd_md_type, chrd->chrd_md_subtype);
1724 	_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
1725 
1726 	/*
1727 	 * Initialize the metadata buffer address. In the event of a
1728 	 * defunct, we'd be accessing zero-filled memory; this is fine
1729 	 * since we ignore all changes made to region at that time.
1730 	 */
1731 	baddr = _initialize_metadata_address(chrd, q, &bdoff);
1732 	if (__improbable(baddr == 0)) {
1733 		return ENXIO;
1734 	}
1735 	ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
1736 	return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
1737 }
1738 
1739 int
os_channel_packet_free(const channel_t chd,packet_t ph)1740 os_channel_packet_free(const channel_t chd, packet_t ph)
1741 {
1742 	struct __user_channel_ring *ring;
1743 	struct channel_ring_desc *chrd;
1744 	slot_idx_t idx;
1745 	obj_idx_t midx;
1746 	struct ch_info *ci = CHD_INFO(chd);
1747 
1748 	if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
1749 		return ENOTSUP;
1750 	}
1751 
1752 	chrd = &chd->chd_rings[chd->chd_free_ring_idx];
1753 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
1754 
1755 	idx = ring->ring_head;
1756 	if (__improbable(idx == ring->ring_tail)) {
1757 		/*
1758 		 * do a sync to reclaim space in free ring;
1759 		 */
1760 		int err;
1761 		err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
1762 		    CHANNEL_SYNCF_FREE);
1763 		if (__improbable(err != 0) && !_CHANNEL_IS_DEFUNCT(chd)) {
1764 			SK_ABORT_WITH_CAUSE("packet pool free "
1765 			    "sync failed", err);
1766 			/* NOTREACHED */
1767 			__builtin_unreachable();
1768 		}
1769 	}
1770 
1771 	if (__improbable(idx == ring->ring_tail) && !_CHANNEL_IS_DEFUNCT(chd)) {
1772 		SK_ABORT("no free ring space");
1773 		/* NOTREACHED */
1774 		__builtin_unreachable();
1775 	}
1776 
1777 	/*
1778 	 * In the event of a defunct, midx will be 0 and we'll end up
1779 	 * attaching it to the slot; this is fine since we ignore all
1780 	 * changes made to the slot descriptors at that time.
1781 	 */
1782 	midx = METADATA_IDX(QUM_ADDR(ph));
1783 	_SLOT_METADATA_IDX_VERIFY(chrd, QUM_ADDR(ph), midx);
1784 	_SLOT_ATTACH_METADATA(_SLOT_DESC(chrd, idx), midx);
1785 	ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
1786 
1787 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
1788 }
1789 
1790 int
os_channel_slot_attach_packet(const channel_ring_t chrd,const channel_slot_t slot,packet_t ph)1791 os_channel_slot_attach_packet(const channel_ring_t chrd,
1792     const channel_slot_t slot, packet_t ph)
1793 {
1794 	slot_idx_t idx;
1795 	obj_idx_t midx;
1796 
1797 	if (__improbable((chrd->chrd_channel->chd_info->cinfo_ch_mode &
1798 	    CHMODE_USER_PACKET_POOL) == 0)) {
1799 		return ENOTSUP;
1800 	}
1801 
1802 	if (__improbable(!__packet_is_finalized(ph))) {
1803 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1804 			SK_ABORT("packet not finalized");
1805 			/* NOTREACHED */
1806 			__builtin_unreachable();
1807 		}
1808 		goto done;
1809 	}
1810 
1811 	idx = _SLOT_INDEX(chrd, slot);
1812 	if (__improbable(!_slot_index_is_valid(chrd->chrd_ring, idx))) {
1813 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1814 			SK_ABORT_WITH_CAUSE("Invalid slot", slot);
1815 			/* NOTREACHED */
1816 			__builtin_unreachable();
1817 		}
1818 		goto done;
1819 	}
1820 
1821 	if (__improbable(SD_VALID_METADATA(SLOT_DESC_USD(slot)))) {
1822 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1823 			SK_ABORT_WITH_CAUSE("Slot has attached packet", slot);
1824 			/* NOTREACHED */
1825 			__builtin_unreachable();
1826 		}
1827 		goto done;
1828 	}
1829 
1830 	/*
1831 	 * In the event of a defunct, midx will be 0 and we'll end up
1832 	 * attaching it to the slot; this is fine since we ignore all
1833 	 * changes made to the slot descriptors at that time.
1834 	 */
1835 	midx = METADATA_IDX(QUM_ADDR(ph));
1836 	_SLOT_METADATA_IDX_VERIFY(chrd, QUM_ADDR(ph), midx);
1837 	_SLOT_ATTACH_METADATA(SLOT_DESC_USD(slot), midx);
1838 
1839 done:
1840 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
1841 }
1842 
1843 int
os_channel_slot_detach_packet(const channel_ring_t chrd,const channel_slot_t slot,packet_t ph)1844 os_channel_slot_detach_packet(const channel_ring_t chrd,
1845     const channel_slot_t slot, packet_t ph)
1846 {
1847 	slot_idx_t idx;
1848 
1849 	if (__improbable((chrd->chrd_channel->chd_info->cinfo_ch_mode &
1850 	    CHMODE_USER_PACKET_POOL) == 0)) {
1851 		return ENOTSUP;
1852 	}
1853 
1854 	idx = _SLOT_INDEX(chrd, slot);
1855 	if (__improbable(!_slot_index_is_valid(chrd->chrd_ring, idx))) {
1856 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1857 			SK_ABORT_WITH_CAUSE("Invalid slot", slot);
1858 			/* NOTREACHED */
1859 			__builtin_unreachable();
1860 		}
1861 		goto done;
1862 	}
1863 
1864 	if (__improbable(!SD_VALID_METADATA(SLOT_DESC_USD(slot)))) {
1865 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1866 			SK_ABORT_WITH_CAUSE("Slot has no attached packet",
1867 			    slot);
1868 			/* NOTREACHED */
1869 			__builtin_unreachable();
1870 		}
1871 		goto done;
1872 	}
1873 
1874 	if (__improbable(ph != SK_PTR_ENCODE(_SLOT_METADATA(chrd,
1875 	    chrd->chrd_ring, idx), chrd->chrd_md_type,
1876 	    chrd->chrd_md_subtype))) {
1877 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1878 			SK_ABORT("packet handle mismatch");
1879 			/* NOTREACHED */
1880 			__builtin_unreachable();
1881 		}
1882 		goto done;
1883 	}
1884 
1885 	if (__improbable(!__packet_is_finalized(ph))) {
1886 		if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1887 			SK_ABORT("packet not finalized");
1888 			/* NOTREACHED */
1889 			__builtin_unreachable();
1890 		}
1891 		goto done;
1892 	}
1893 
1894 	/*
1895 	 * In the event of a defunct, we ignore any changes made to
1896 	 * the slot descriptors, and so doing this is harmless.
1897 	 */
1898 	_SLOT_DETACH_METADATA(SLOT_DESC_USD(slot));
1899 
1900 done:
1901 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
1902 }
1903 
1904 __attribute__((visibility("hidden")))
1905 static inline int
os_channel_purge_packet_alloc_ring(const channel_t chd)1906 os_channel_purge_packet_alloc_ring(const channel_t chd)
1907 {
1908 	struct __user_channel_ring *ring;
1909 	struct channel_ring_desc *chrd;
1910 	uint32_t curr_ws;
1911 	slot_idx_t idx;
1912 	packet_t ph;
1913 	int npkts, err;
1914 
1915 	chrd = &chd->chd_rings[chd->chd_alloc_ring_idx];
1916 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
1917 	idx = ring->ring_head;
1918 
1919 	/* calculate the number of packets in alloc pool */
1920 	npkts = ring->ring_tail - idx;
1921 	if (npkts < 0) {
1922 		npkts += ring->ring_num_slots;
1923 	}
1924 
1925 	curr_ws = ring->ring_alloc_ws;
1926 	while ((uint32_t)npkts-- > curr_ws) {
1927 		struct __user_quantum *q;
1928 
1929 		_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
1930 		q = _SLOT_METADATA(chrd, ring, idx);
1931 		_METADATA_VERIFY(chrd, q);
1932 
1933 		ph = SK_PTR_ENCODE(q, chrd->chrd_md_type,
1934 		    chrd->chrd_md_subtype);
1935 		_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
1936 
1937 		/*
1938 		 * Initialize the metadata buffer address. In the event of a
1939 		 * defunct, we'd be accessing zero-filled memory; this is fine
1940 		 * since we ignore all changes made to region at that time.
1941 		 */
1942 		if (chrd->chrd_md_type == NEXUS_META_TYPE_PACKET) {
1943 			struct __user_packet *p = (struct __user_packet *)q;
1944 			uint16_t bcnt = p->pkt_bufs_cnt;
1945 			uint16_t bmax = p->pkt_bufs_max;
1946 
1947 			if (__improbable((bcnt == 0) || (bmax == 0))) {
1948 				if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
1949 					SK_ABORT("pkt pool purge, bad bufcnt");
1950 					/* NOTREACHED */
1951 					__builtin_unreachable();
1952 				} else {
1953 					return ENXIO;
1954 				}
1955 			}
1956 			/*
1957 			 * alloc ring will not have multi-buflet packets.
1958 			 */
1959 			_PKT_BUFCNT_VERIFY(chrd, bcnt, 1);
1960 		}
1961 		*(mach_vm_address_t *) (uintptr_t)&q->qum_buf[0].buf_addr =
1962 		    _CHANNEL_RING_BUF(chrd, ring, q->qum_buf[0].buf_idx);
1963 		idx = _CHANNEL_RING_NEXT(ring, idx);
1964 		ring->ring_head = idx;
1965 		err = os_channel_packet_free(chd, ph);
1966 		if (__improbable(err != 0)) {
1967 			if (!_CHANNEL_IS_DEFUNCT(chd)) {
1968 				SK_ABORT_WITH_CAUSE("packet pool purge "
1969 				    "free failed", err);
1970 				/* NOTREACHED */
1971 				__builtin_unreachable();
1972 			}
1973 			return err;
1974 		}
1975 	}
1976 
1977 	return 0;
1978 }
1979 
1980 __attribute__((visibility("hidden")))
1981 static inline int
os_channel_purge_buflet_alloc_ring(const channel_t chd)1982 os_channel_purge_buflet_alloc_ring(const channel_t chd)
1983 {
1984 	struct __user_channel_ring *ring;
1985 	struct channel_ring_desc *chrd;
1986 	uint32_t curr_ws;
1987 	slot_idx_t idx;
1988 	int nbfts, err;
1989 
1990 	chrd = &chd->chd_rings[chd->chd_buf_alloc_ring_idx];
1991 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
1992 	idx = ring->ring_head;
1993 
1994 	/* calculate the number of packets in alloc pool */
1995 	nbfts = ring->ring_tail - idx;
1996 	if (nbfts < 0) {
1997 		nbfts += ring->ring_num_slots;
1998 	}
1999 
2000 	curr_ws = ring->ring_alloc_ws;
2001 	while ((uint32_t)nbfts-- > curr_ws) {
2002 		struct __user_buflet *ubft;
2003 		obj_idx_t nbft_idx;
2004 
2005 		_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
2006 		ubft = _SLOT_BFT_METADATA(chrd, ring, idx);
2007 		_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
2008 
2009 		/*
2010 		 * Initialize the buflet metadata buffer address.
2011 		 */
2012 		*(mach_vm_address_t *)(uintptr_t)&(ubft->buf_addr) =
2013 		    _CHANNEL_RING_BUF(chrd, ring, ubft->buf_idx);
2014 		if (__improbable(ubft->buf_addr == 0)) {
2015 			SK_ABORT_WITH_CAUSE("buflet with NULL buffer",
2016 			    ubft->buf_idx);
2017 			/* NOTREACHED */
2018 			__builtin_unreachable();
2019 		}
2020 
2021 		nbft_idx = ubft->buf_nbft_idx;
2022 		if (__improbable(nbft_idx != OBJ_IDX_NONE)) {
2023 			if (_CHANNEL_IS_DEFUNCT(chd)) {
2024 				return ENXIO;
2025 			} else {
2026 				SK_ABORT_WITH_CAUSE("buflet with invalid nidx",
2027 				    nbft_idx);
2028 				/* NOTREACHED */
2029 				__builtin_unreachable();
2030 			}
2031 		}
2032 
2033 		idx = _CHANNEL_RING_NEXT(ring, idx);
2034 		ring->ring_head = idx;
2035 		err = os_channel_buflet_free(chd, ubft);
2036 		if (__improbable(err != 0)) {
2037 			if (!_CHANNEL_IS_DEFUNCT(chd)) {
2038 				SK_ABORT_WITH_CAUSE("buflet pool purge "
2039 				    "free failed", err);
2040 				/* NOTREACHED */
2041 				__builtin_unreachable();
2042 			}
2043 			return err;
2044 		}
2045 	}
2046 
2047 	return 0;
2048 }
2049 
2050 int
os_channel_packet_pool_purge(const channel_t chd)2051 os_channel_packet_pool_purge(const channel_t chd)
2052 {
2053 	struct ch_info *ci = CHD_INFO(chd);
2054 	int err;
2055 
2056 	if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
2057 		return ENOTSUP;
2058 	}
2059 
2060 	err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
2061 	    (chd->chd_sync_flags | ~CHANNEL_SYNCF_FREE | CHANNEL_SYNCF_PURGE));
2062 	if (__improbable(err != 0)) {
2063 		if (!_CHANNEL_IS_DEFUNCT(chd)) {
2064 			SK_ABORT_WITH_CAUSE("packet pool purge sync failed",
2065 			    err);
2066 			/* NOTREACHED */
2067 			__builtin_unreachable();
2068 		}
2069 		return err;
2070 	}
2071 
2072 	err = os_channel_purge_packet_alloc_ring(chd);
2073 	if (__improbable(err != 0)) {
2074 		return err;
2075 	}
2076 
2077 	if (_num_allocator_rings(CHD_SCHEMA(chd)) > 2) {
2078 		err = os_channel_purge_buflet_alloc_ring(chd);
2079 		if (__improbable(err != 0)) {
2080 			return err;
2081 		}
2082 	}
2083 
2084 	err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP, CHANNEL_SYNCF_FREE);
2085 	if (__improbable(err != 0)) {
2086 		if (!_CHANNEL_IS_DEFUNCT(chd)) {
2087 			SK_ABORT_WITH_CAUSE("packet pool free sync failed",
2088 			    err);
2089 			/* NOTREACHED */
2090 			__builtin_unreachable();
2091 		}
2092 		return err;
2093 	}
2094 
2095 	return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
2096 }
2097 
2098 int
os_channel_get_next_event_handle(const channel_t chd,os_channel_event_handle_t * ehandle,os_channel_event_type_t * etype,uint32_t * nevents)2099 os_channel_get_next_event_handle(const channel_t chd,
2100     os_channel_event_handle_t *ehandle, os_channel_event_type_t *etype,
2101     uint32_t *nevents)
2102 {
2103 	struct __kern_channel_event_metadata *emd;
2104 	struct __user_channel_ring *ring;
2105 	struct channel_ring_desc *chrd;
2106 	struct __user_quantum *qum;
2107 	mach_vm_address_t baddr;
2108 	uint16_t bdoff;
2109 	slot_idx_t idx;
2110 	struct __user_channel_schema *csm = CHD_SCHEMA(chd);
2111 	struct ch_info *ci = CHD_INFO(chd);
2112 
2113 	if (__improbable((ehandle == NULL) || (etype == NULL) ||
2114 	    (nevents == NULL))) {
2115 		return EINVAL;
2116 	}
2117 	if (__improbable((ci->cinfo_ch_mode & CHMODE_EVENT_RING) == 0)) {
2118 		return ENOTSUP;
2119 	}
2120 	*ehandle = NULL;
2121 	chrd = &chd->chd_rings[_num_tx_rings(ci) + _num_rx_rings(ci) +
2122 	    _num_allocator_rings(csm)];
2123 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
2124 	idx = ring->ring_head;
2125 
2126 	if (__improbable(idx == ring->ring_tail)) {
2127 		return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ?
2128 		       ENXIO : ENODATA;
2129 	}
2130 	_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
2131 	qum = _SLOT_METADATA(chrd, ring, idx);
2132 	_METADATA_VERIFY(chrd, qum);
2133 	_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
2134 
2135 	baddr = _initialize_metadata_address(chrd, qum, &bdoff);
2136 	if (__improbable(baddr == 0)) {
2137 		return ENXIO;
2138 	}
2139 	*ehandle = SK_PTR_ENCODE(qum, chrd->chrd_md_type,
2140 	    chrd->chrd_md_subtype);
2141 	emd = (void *)(baddr + bdoff);
2142 	*etype = emd->emd_etype;
2143 	*nevents = emd->emd_nevents;
2144 	ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
2145 	return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
2146 }
2147 
2148 int
os_channel_event_free(const channel_t chd,os_channel_event_handle_t ehandle)2149 os_channel_event_free(const channel_t chd, os_channel_event_handle_t ehandle)
2150 {
2151 	return os_channel_packet_free(chd, (packet_t)ehandle);
2152 }
2153 
2154 int
os_channel_get_interface_advisory(const channel_t chd,struct ifnet_interface_advisory * advisory)2155 os_channel_get_interface_advisory(const channel_t chd,
2156     struct ifnet_interface_advisory *advisory)
2157 {
2158 	struct __kern_netif_intf_advisory *intf_adv;
2159 	struct __kern_nexus_adv_metadata *adv_md;
2160 	nexus_advisory_type_t adv_type;
2161 
2162 	/*
2163 	 * Interface advisory is only supported for netif and flowswitch.
2164 	 */
2165 	adv_md = CHD_NX_ADV_MD(chd);
2166 	if (adv_md == NULL) {
2167 		return ENOENT;
2168 	}
2169 	adv_type = adv_md->knam_type;
2170 	if (__improbable(adv_type != NEXUS_ADVISORY_TYPE_NETIF &&
2171 	    adv_type != NEXUS_ADVISORY_TYPE_FLOWSWITCH)) {
2172 		return _CHANNEL_IS_DEFUNCT(chd) ? ENXIO : ENOENT;
2173 	}
2174 	if (adv_type == NEXUS_ADVISORY_TYPE_NETIF) {
2175 		intf_adv = &(CHD_NX_ADV_NETIF(adv_md))->__kern_intf_adv;
2176 	} else {
2177 		intf_adv = &(CHD_NX_ADV_FSW(adv_md))->_nxadv_intf_adv;
2178 	}
2179 	if (intf_adv->cksum != os_cpu_copy_in_cksum(&intf_adv->adv, advisory,
2180 	    sizeof(*advisory), 0)) {
2181 		return _CHANNEL_IS_DEFUNCT(chd) ? ENXIO : EAGAIN;
2182 	}
2183 	return 0;
2184 }
2185 
2186 int
os_channel_configure_interface_advisory(const channel_t chd,boolean_t enable)2187 os_channel_configure_interface_advisory(const channel_t chd, boolean_t enable)
2188 {
2189 	uint32_t value = enable;
2190 
2191 	return __channel_set_opt(chd->chd_fd, CHOPT_IF_ADV_CONF,
2192 	           &value, sizeof(value));
2193 }
2194 
2195 int
os_channel_buflet_alloc(const channel_t chd,buflet_t * bft)2196 os_channel_buflet_alloc(const channel_t chd, buflet_t *bft)
2197 {
2198 	struct __user_channel_ring *ring;
2199 	struct channel_ring_desc *chrd;
2200 	struct __user_buflet *ubft;
2201 	obj_idx_t nbft_idx;
2202 	slot_idx_t idx;
2203 	struct ch_info *ci = CHD_INFO(chd);
2204 
2205 	if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
2206 		return ENOTSUP;
2207 	}
2208 
2209 	if (__improbable(_num_allocator_rings(CHD_SCHEMA(chd)) < 4)) {
2210 		return ENOTSUP;
2211 	}
2212 
2213 	chrd = &chd->chd_rings[chd->chd_buf_alloc_ring_idx];
2214 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
2215 	idx = ring->ring_head;
2216 
2217 	if (__improbable(idx == ring->ring_tail)) {
2218 		/*
2219 		 * do a sync to get more buflets;
2220 		 */
2221 		int err;
2222 		err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
2223 		    CHANNEL_SYNCF_ALLOC_BUF | CHANNEL_SYNCF_FREE);
2224 		if (__improbable(err != 0)) {
2225 			if (!_CHANNEL_IS_DEFUNCT(chd)) {
2226 				SK_ABORT_WITH_CAUSE("buflet pool alloc "
2227 				    "sync failed", err);
2228 				/* NOTREACHED */
2229 				__builtin_unreachable();
2230 			}
2231 			return err;
2232 		}
2233 	}
2234 
2235 	if (__improbable(idx == ring->ring_tail)) {
2236 		return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ?
2237 		       ENXIO : ENOMEM;
2238 	}
2239 
2240 	_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
2241 	ubft = _SLOT_BFT_METADATA(chrd, ring, idx);
2242 	_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
2243 
2244 	/*
2245 	 * Initialize the buflet metadata buffer address.
2246 	 */
2247 	*(mach_vm_address_t *)(uintptr_t)&(ubft->buf_addr) =
2248 	    _CHANNEL_RING_BUF(chrd, ring, ubft->buf_idx);
2249 	if (__improbable(ubft->buf_addr == 0)) {
2250 		SK_ABORT_WITH_CAUSE("buflet alloc with NULL buffer",
2251 		    ubft->buf_idx);
2252 		/* NOTREACHED */
2253 		__builtin_unreachable();
2254 	}
2255 	nbft_idx = ubft->buf_nbft_idx;
2256 	if (__improbable(nbft_idx != OBJ_IDX_NONE)) {
2257 		if (_CHANNEL_IS_DEFUNCT(chd)) {
2258 			return ENXIO;
2259 		} else {
2260 			SK_ABORT_WITH_CAUSE("buflet alloc with invalid nidx",
2261 			    nbft_idx);
2262 			/* NOTREACHED */
2263 			__builtin_unreachable();
2264 		}
2265 	}
2266 	ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
2267 	*bft = ubft;
2268 	return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
2269 }
2270 
2271 int
os_channel_buflet_free(const channel_t chd,buflet_t ubft)2272 os_channel_buflet_free(const channel_t chd, buflet_t ubft)
2273 {
2274 	struct __user_channel_ring *ring;
2275 	struct channel_ring_desc *chrd;
2276 	slot_idx_t idx;
2277 	obj_idx_t midx;
2278 	struct ch_info *ci = CHD_INFO(chd);
2279 
2280 	if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
2281 		return ENOTSUP;
2282 	}
2283 
2284 	if (__improbable(_num_allocator_rings(CHD_SCHEMA(chd)) < 4)) {
2285 		return ENOTSUP;
2286 	}
2287 
2288 	chrd = &chd->chd_rings[chd->chd_buf_free_ring_idx];
2289 	ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
2290 
2291 	idx = ring->ring_head;
2292 	if (__improbable(idx == ring->ring_tail)) {
2293 		/*
2294 		 * do a sync to reclaim space in free ring;
2295 		 */
2296 		int err;
2297 		err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
2298 		    CHANNEL_SYNCF_FREE);
2299 		if (__improbable(err != 0) && !_CHANNEL_IS_DEFUNCT(chd)) {
2300 			SK_ABORT_WITH_CAUSE("buflet pool free "
2301 			    "sync failed", err);
2302 			/* NOTREACHED */
2303 			__builtin_unreachable();
2304 		}
2305 	}
2306 
2307 	if (__improbable(idx == ring->ring_tail) && !_CHANNEL_IS_DEFUNCT(chd)) {
2308 		SK_ABORT("no ring space in buflet free ring");
2309 		/* NOTREACHED */
2310 		__builtin_unreachable();
2311 	}
2312 
2313 	midx = _BFT_INDEX(chrd, ubft);
2314 	_SLOT_BFT_METADATA_IDX_VERIFY(chrd, ubft, midx);
2315 	_SLOT_ATTACH_METADATA(_SLOT_DESC(chrd, idx), midx);
2316 	ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
2317 
2318 	return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
2319 }
2320